From a1f19b61f84f709ba20c350da34956c5455a6956 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Fri, 23 Jul 2021 12:41:07 -0400 Subject: [PATCH 01/35] settings: Implement setting ranges Clamps the setting's values against the specified minimum and maximum values. --- src/common/settings.h | 170 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 152 insertions(+), 18 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index cfc1ab46f7..51f9a179b3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -141,6 +141,67 @@ protected: const std::string label{}; ///< The setting's label }; +/** + * BasicRangedSetting class is intended for use with quantifiable settings that need a more + * restrictive range than implicitly defined by its type. Implements a minimum and maximum that is + * simply used to sanitize SetValue and the assignment overload. + */ +template +class BasicRangedSetting : virtual public BasicSetting { +public: + /** + * Sets a default value, minimum value, maximum value, and label. + * + * @param default_val Intial value of the setting, and default value of the setting + * @param min_val Sets the minimum allowed value of the setting + * @param max_val Sets the maximum allowed value of the setting + * @param name Label for the setting + */ + explicit BasicRangedSetting(const Type& default_val, const Type& min_val, const Type& max_val, + const std::string& name) + : BasicSetting{default_val, name}, minimum{min_val}, maximum{max_val} {} + ~BasicRangedSetting() = default; + + /** + * Like BasicSetting's SetValue, except value is clamped to the range of the setting. + * + * @param value The desired value + */ + void SetValue(const Type& value) { + Type temp; + if (value < minimum) { + temp = std::move(minimum); + } else if (value > maximum) { + temp = std::move(maximum); + } else { + temp = std::move(value); + } + std::swap(this->global, temp); + } + + /** + * Like BasicSetting's assignment overload, except value is clamped to the range of the setting. + * + * @param value The desired value + * @returns A reference to the setting's value + */ + const Type& operator=(const Type& value) { + Type temp; + if (value < minimum) { + temp = std::move(minimum); + } else if (value > maximum) { + temp = std::move(maximum); + } else { + temp = std::move(value); + } + std::swap(this->global, temp); + return this->global; + } + + const Type minimum; ///< Minimum allowed value of the setting + const Type maximum; ///< Maximum allowed value of the setting +}; + /** * The Setting class is a slightly more complex version of the BasicSetting class. This adds a * custom setting to switch to when a guest application specifically requires it. The effect is that @@ -152,7 +213,7 @@ protected: * Like the BasicSetting, this requires setting a default value and label to use. */ template -class Setting final : public BasicSetting { +class Setting : virtual public BasicSetting { public: /** * Sets a default value, label, and setting value. @@ -241,11 +302,80 @@ public: return custom; } -private: +protected: bool use_global{true}; ///< The setting's global state Type custom{}; ///< The custom value of the setting }; +/** + * RangedSetting is a Setting that implements a maximum and minimum value for its setting. Intended + * for use with quantifiable settings. + */ +template +class RangedSetting final : public BasicRangedSetting, public Setting { +public: + /** + * Sets a default value, minimum value, maximum value, and label. + * + * @param default_val Intial value of the setting, and default value of the setting + * @param min_val Sets the minimum allowed value of the setting + * @param max_val Sets the maximum allowed value of the setting + * @param name Label for the setting + */ + explicit RangedSetting(const Type& default_val, const Type& min_val, const Type& max_val, + const std::string& name) + : BasicSetting{default_val, name}, + BasicRangedSetting{default_val, min_val, max_val, name}, Setting{default_val, + name} {} + ~RangedSetting() = default; + + /** + * Like BasicSetting's SetValue, except value is clamped to the range of the setting. Sets the + * appropriate value depending on the global state. + * + * @param value The desired value + */ + void SetValue(const Type& value) { + Type temp; + if (value < this->minimum) { + temp = std::move(this->minimum); + } else if (value > this->maximum) { + temp = std::move(this->maximum); + } else { + temp = std::move(value); + } + if (this->use_global) { + std::swap(this->global, temp); + } else { + std::swap(this->custom, temp); + } + } + + /** + * Like BasicSetting's assignment overload, except value is clamped to the range of the setting. + * Uses the appropriate value depending on the global state. + * + * @param value The desired value + * @returns A reference to the setting's value + */ + const Type& operator=(const Type& value) { + Type temp; + if (value < this->minimum) { + temp = std::move(this->minimum); + } else if (value > this->maximum) { + temp = std::move(this->maximum); + } else { + temp = std::move(value); + } + if (this->use_global) { + std::swap(this->global, temp); + return this->global; + } + std::swap(this->custom, temp); + return this->custom; + } +}; + /** * The InputSetting class allows for getting a reference to either the global or custom members. * This is required as we cannot easily modify the values of user-defined types within containers @@ -289,13 +419,14 @@ struct Values { BasicSetting sink_id{"auto", "output_engine"}; BasicSetting audio_muted{false, "audio_muted"}; Setting enable_audio_stretching{true, "enable_audio_stretching"}; - Setting volume{100, "volume"}; + RangedSetting volume{100, 0, 100, "volume"}; // Core Setting use_multi_core{true, "use_multi_core"}; // Cpu - Setting cpu_accuracy{CPUAccuracy::Auto, "cpu_accuracy"}; + RangedSetting cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, + CPUAccuracy::Unsafe, "cpu_accuracy"}; // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021 BasicSetting cpu_accuracy_first_time{true, "cpu_accuracy_first_time"}; BasicSetting cpu_debug_mode{false, "cpu_debug_mode"}; @@ -317,7 +448,8 @@ struct Values { Setting cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"}; // Renderer - Setting renderer_backend{RendererBackend::OpenGL, "backend"}; + RangedSetting renderer_backend{ + RendererBackend::OpenGL, RendererBackend::OpenGL, RendererBackend::Vulkan, "backend"}; BasicSetting renderer_debug{false, "debug"}; BasicSetting enable_nsight_aftermath{false, "nsight_aftermath"}; BasicSetting disable_shader_loop_safety_checks{false, @@ -327,26 +459,28 @@ struct Values { Setting resolution_factor{1, "resolution_factor"}; // *nix platforms may have issues with the borderless windowed fullscreen mode. // Default to exclusive fullscreen on these platforms for now. - Setting fullscreen_mode{ + RangedSetting fullscreen_mode{ #ifdef _WIN32 FullscreenMode::Borderless, #else FullscreenMode::Exclusive, #endif - "fullscreen_mode"}; - Setting aspect_ratio{0, "aspect_ratio"}; - Setting max_anisotropy{0, "max_anisotropy"}; + FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"}; + RangedSetting aspect_ratio{0, 0, 3, "aspect_ratio"}; + RangedSetting max_anisotropy{0, 0, 4, "max_anisotropy"}; Setting use_speed_limit{true, "use_speed_limit"}; - Setting speed_limit{100, "speed_limit"}; + RangedSetting speed_limit{100, 0, 9999, "speed_limit"}; Setting use_disk_shader_cache{true, "use_disk_shader_cache"}; - Setting gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"}; + RangedSetting gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal, + GPUAccuracy::Extreme, "gpu_accuracy"}; Setting use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; Setting use_nvdec_emulation{true, "use_nvdec_emulation"}; Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; - BasicSetting fps_cap{1000, "fps_cap"}; + BasicRangedSetting fps_cap{1000, 1, 1000, "fps_cap"}; BasicSetting disable_fps_limit{false, "disable_fps_limit"}; - Setting shader_backend{ShaderBackend::GLASM, "shader_backend"}; + RangedSetting shader_backend{ShaderBackend::GLASM, ShaderBackend::GLSL, + ShaderBackend::SPIRV, "shader_backend"}; Setting use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting use_fast_gpu_time{true, "use_fast_gpu_time"}; Setting use_caches_gc{false, "use_caches_gc"}; @@ -363,10 +497,10 @@ struct Values { std::chrono::seconds custom_rtc_differential; BasicSetting current_user{0, "current_user"}; - Setting language_index{1, "language_index"}; - Setting region_index{1, "region_index"}; - Setting time_zone_index{0, "time_zone_index"}; - Setting sound_index{1, "sound_index"}; + RangedSetting language_index{1, 0, 16, "language_index"}; + RangedSetting region_index{1, 0, 6, "region_index"}; + RangedSetting time_zone_index{0, 0, 45, "time_zone_index"}; + RangedSetting sound_index{1, 0, 2, "sound_index"}; // Controls InputSetting> players; @@ -383,7 +517,7 @@ struct Values { "udp_input_servers"}; BasicSetting mouse_panning{false, "mouse_panning"}; - BasicSetting mouse_panning_sensitivity{10, "mouse_panning_sensitivity"}; + BasicRangedSetting mouse_panning_sensitivity{10, 1, 100, "mouse_panning_sensitivity"}; BasicSetting mouse_enabled{false, "mouse_enabled"}; std::string mouse_device; MouseButtonsRaw mouse_buttons; From 7737bdfd1ac2868397b94ba26a9ccf06ea1dfcba Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Fri, 30 Jul 2021 13:33:35 -0400 Subject: [PATCH 02/35] settings: Fix function virtualization Fixes a theoretical scenario where a Setting is using the BasicSetting's GetValue function. In practice this probably only happens on yuzu-cmd, where there is no need for a Setting's additional features. Need to fix regardless. --- src/common/settings.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 51f9a179b3..f54705a96e 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -81,7 +81,7 @@ public: * * @returns A reference to the setting */ - [[nodiscard]] const Type& GetValue() const { + [[nodiscard]] virtual const Type& GetValue() const { return global; } @@ -90,7 +90,7 @@ public: * * @param value The desired value */ - void SetValue(const Type& value) { + virtual void SetValue(const Type& value) { Type temp{value}; std::swap(global, temp); } @@ -120,7 +120,7 @@ public: * * @returns A reference to the setting */ - const Type& operator=(const Type& value) { + virtual const Type& operator=(const Type& value) { Type temp{value}; std::swap(global, temp); return global; @@ -131,7 +131,7 @@ public: * * @returns A reference to the setting */ - explicit operator const Type&() const { + explicit virtual operator const Type&() const { return global; } @@ -167,7 +167,7 @@ public: * * @param value The desired value */ - void SetValue(const Type& value) { + void SetValue(const Type& value) override { Type temp; if (value < minimum) { temp = std::move(minimum); @@ -185,7 +185,7 @@ public: * @param value The desired value * @returns A reference to the setting's value */ - const Type& operator=(const Type& value) { + const Type& operator=(const Type& value) override { Type temp; if (value < minimum) { temp = std::move(minimum); @@ -252,7 +252,13 @@ public: * * @returns The required value of the setting */ - [[nodiscard]] const Type& GetValue(bool need_global = false) const { + [[nodiscard]] const Type& GetValue() const override { + if (use_global) { + return this->global; + } + return custom; + } + [[nodiscard]] const Type& GetValue(bool need_global) const { if (use_global || need_global) { return this->global; } @@ -264,7 +270,7 @@ public: * * @param value The new value */ - void SetValue(const Type& value) { + void SetValue(const Type& value) override { Type temp{value}; if (use_global) { std::swap(this->global, temp); @@ -280,7 +286,7 @@ public: * * @returns A reference to the current setting value */ - const Type& operator=(const Type& value) { + const Type& operator=(const Type& value) override { Type temp{value}; if (use_global) { std::swap(this->global, temp); @@ -295,7 +301,7 @@ public: * * @returns A reference to the current setting value */ - explicit operator const Type&() const { + explicit operator const Type&() const override { if (use_global) { return this->global; } @@ -335,7 +341,7 @@ public: * * @param value The desired value */ - void SetValue(const Type& value) { + void SetValue(const Type& value) override { Type temp; if (value < this->minimum) { temp = std::move(this->minimum); @@ -358,7 +364,7 @@ public: * @param value The desired value * @returns A reference to the setting's value */ - const Type& operator=(const Type& value) { + const Type& operator=(const Type& value) override { Type temp; if (value < this->minimum) { temp = std::move(this->minimum); From e9cf08c2411197860330f77650a9aaac586b9725 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Fri, 30 Jul 2021 16:12:15 -0400 Subject: [PATCH 03/35] settings: Remove unnecessary std::move usages Addresses review feedback. Co-authored-by: Mai M. --- src/common/settings.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index f54705a96e..4432b5ddde 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -170,11 +170,11 @@ public: void SetValue(const Type& value) override { Type temp; if (value < minimum) { - temp = std::move(minimum); + temp = minimum; } else if (value > maximum) { - temp = std::move(maximum); + temp = maximum; } else { - temp = std::move(value); + temp = value; } std::swap(this->global, temp); } @@ -188,11 +188,11 @@ public: const Type& operator=(const Type& value) override { Type temp; if (value < minimum) { - temp = std::move(minimum); + temp = minimum; } else if (value > maximum) { - temp = std::move(maximum); + temp = maximum; } else { - temp = std::move(value); + temp = value; } std::swap(this->global, temp); return this->global; @@ -344,11 +344,11 @@ public: void SetValue(const Type& value) override { Type temp; if (value < this->minimum) { - temp = std::move(this->minimum); + temp = this->minimum; } else if (value > this->maximum) { - temp = std::move(this->maximum); + temp = this->maximum; } else { - temp = std::move(value); + temp = value; } if (this->use_global) { std::swap(this->global, temp); @@ -367,11 +367,11 @@ public: const Type& operator=(const Type& value) override { Type temp; if (value < this->minimum) { - temp = std::move(this->minimum); + temp = this->minimum; } else if (value > this->maximum) { - temp = std::move(this->maximum); + temp = this->maximum; } else { - temp = std::move(value); + temp = value; } if (this->use_global) { std::swap(this->global, temp); From 3862511a9ad294918682dccf7765f43df166e0d7 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 31 Jul 2021 17:20:12 -0400 Subject: [PATCH 04/35] settings: Use std::clamp where possible Addresses PR review Co-authored-by: PixelyIon --- src/common/settings.h | 48 ++++++++----------------------------------- 1 file changed, 9 insertions(+), 39 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 4432b5ddde..69f4adaebc 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -168,15 +169,7 @@ public: * @param value The desired value */ void SetValue(const Type& value) override { - Type temp; - if (value < minimum) { - temp = minimum; - } else if (value > maximum) { - temp = maximum; - } else { - temp = value; - } - std::swap(this->global, temp); + this->global = std::clamp(value, minimum, maximum); } /** @@ -186,15 +179,7 @@ public: * @returns A reference to the setting's value */ const Type& operator=(const Type& value) override { - Type temp; - if (value < minimum) { - temp = minimum; - } else if (value > maximum) { - temp = maximum; - } else { - temp = value; - } - std::swap(this->global, temp); + this->global = std::clamp(value, minimum, maximum); return this->global; } @@ -342,19 +327,11 @@ public: * @param value The desired value */ void SetValue(const Type& value) override { - Type temp; - if (value < this->minimum) { - temp = this->minimum; - } else if (value > this->maximum) { - temp = this->maximum; - } else { - temp = value; - } + const Type temp = std::clamp(value, this->minimum, this->maximum); if (this->use_global) { - std::swap(this->global, temp); - } else { - std::swap(this->custom, temp); + this->global = temp; } + this->custom = temp; } /** @@ -365,19 +342,12 @@ public: * @returns A reference to the setting's value */ const Type& operator=(const Type& value) override { - Type temp; - if (value < this->minimum) { - temp = this->minimum; - } else if (value > this->maximum) { - temp = this->maximum; - } else { - temp = value; - } + const Type temp = std::clamp(value, this->minimum, this->maximum); if (this->use_global) { - std::swap(this->global, temp); + this->global = temp; return this->global; } - std::swap(this->custom, temp); + this->custom = temp; return this->custom; } }; From 15c0c213b1efb63f1d6f4900409fca8c8984e973 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:07:53 -0400 Subject: [PATCH 05/35] astc.h: Move data to cpp implementation Moves leftover values that are no longer used by the gpu decoder back to the cpp implementation. --- src/video_core/textures/astc.cpp | 86 +++++++++++++++++++++++--------- src/video_core/textures/astc.h | 41 --------------- 2 files changed, 63 insertions(+), 64 deletions(-) diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 3ab500760f..26c19d75ba 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -521,35 +521,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, - u32 blockHeight) { - // Don't actually care about the void extent, just read the bits... - for (s32 i = 0; i < 4; ++i) { - strm.ReadBits<13>(); +// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] +// is the same as [(num_bits - 1):0] and repeats all the way down. +template +static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { + if (num_bits == 0 || to_bit == 0) { + return 0; } - - // Decode the RGBA components and renormalize them to the range [0, 255] - u16 r = static_cast(strm.ReadBits<16>()); - u16 g = static_cast(strm.ReadBits<16>()); - u16 b = static_cast(strm.ReadBits<16>()); - u16 a = static_cast(strm.ReadBits<16>()); - - u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast(b) & 0xFF00) << 8 | - (static_cast(a) & 0xFF00) << 16; - - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = rgba; + const IntType v = val & static_cast((1 << num_bits) - 1); + IntType res = v; + u32 reslen = num_bits; + while (reslen < to_bit) { + u32 comp = 0; + if (num_bits > to_bit - reslen) { + u32 newshift = to_bit - reslen; + comp = num_bits - newshift; + num_bits = newshift; } + res = static_cast(res << num_bits); + res = static_cast(res | (v >> comp)); + reslen += num_bits; } + return res; } -static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = 0xFFFF00FF; - } +static constexpr std::size_t NumReplicateEntries(u32 num_bits) { + return std::size_t(1) << num_bits; +} + +template +static constexpr auto MakeReplicateTable() { + std::array table{}; + for (IntType value = 0; value < static_cast(std::size(table)); ++value) { + table[value] = Replicate(value, num_bits, to_bit); } + return table; } static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); @@ -572,6 +578,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable(); static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable(); static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); +static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback /// to the runtime implementation static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { @@ -1316,6 +1325,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } +static void FillVoidExtentLDR(InputBitStream& strm, std::span outBuf, u32 blockWidth, + u32 blockHeight) { + // Don't actually care about the void extent, just read the bits... + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + u16 r = static_cast(strm.ReadBits<16>()); + u16 g = static_cast(strm.ReadBits<16>()); + u16 b = static_cast(strm.ReadBits<16>()); + u16 a = static_cast(strm.ReadBits<16>()); + + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast(b) & 0xFF00) << 8 | + (static_cast(a) & 0xFF00) << 16; + + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = rgba; + } + } +} + +static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = 0xFFFF00FF; + } + } +} + static void DecompressBlock(std::span inBuf, const u32 blockWidth, const u32 blockHeight, std::span outBuf) { InputBitStream strm(inBuf); diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 0229ae1220..9e148afc4a 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -79,47 +79,6 @@ constexpr std::array MakeEncodedValues() { constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); -// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] -// is the same as [(num_bits - 1):0] and repeats all the way down. -template -constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { - if (num_bits == 0 || to_bit == 0) { - return 0; - } - const IntType v = val & static_cast((1 << num_bits) - 1); - IntType res = v; - u32 reslen = num_bits; - while (reslen < to_bit) { - u32 comp = 0; - if (num_bits > to_bit - reslen) { - u32 newshift = to_bit - reslen; - comp = num_bits - newshift; - num_bits = newshift; - } - res = static_cast(res << num_bits); - res = static_cast(res | (v >> comp)); - reslen += num_bits; - } - return res; -} - -constexpr std::size_t NumReplicateEntries(u32 num_bits) { - return std::size_t(1) << num_bits; -} - -template -constexpr auto MakeReplicateTable() { - std::array table{}; - for (IntType value = 0; value < static_cast(std::size(table)); ++value) { - table[value] = Replicate(value, num_bits, to_bit); - } - return table; -} - -constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); -constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); -constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); From 5665d055476fa793192523c3cb6fe06369d58674 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:48:41 -0400 Subject: [PATCH 06/35] astc_decoder: Optimize the use EncodingData This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation. We can take advantage of its sorted property to optimize its usage in the shader. Thanks to wwylele for the optimization idea. --- src/video_core/host_shaders/astc_decoder.comp | 50 ++++++------- .../renderer_opengl/util_shaders.cpp | 13 ++-- src/video_core/renderer_opengl/util_shaders.h | 1 - .../renderer_vulkan/vk_compute_pass.cpp | 42 +++-------- src/video_core/textures/astc.cpp | 70 +++++++++++++++++++ src/video_core/textures/astc.h | 70 ------------------- 6 files changed, 108 insertions(+), 138 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index c37f15bfd8..7f4efa31af 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -10,18 +10,16 @@ #define END_PUSH_CONSTANTS }; #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 -#define BINDING_ENC_BUFFER 1 -#define BINDING_SWIZZLE_BUFFER 2 -#define BINDING_OUTPUT_IMAGE 3 +#define BINDING_SWIZZLE_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv #define BEGIN_PUSH_CONSTANTS #define END_PUSH_CONSTANTS #define UNIFORM(n) layout(location = n) uniform -#define BINDING_SWIZZLE_BUFFER 0 -#define BINDING_INPUT_BUFFER 1 -#define BINDING_ENC_BUFFER 2 +#define BINDING_INPUT_BUFFER 0 +#define BINDING_SWIZZLE_BUFFER 1 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -64,11 +62,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint astc_data[]; }; -// ASTC Encodings data -layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { - EncodingData encoding_values[]; -}; - layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; const uint GOB_SIZE_X = 64; @@ -94,6 +87,19 @@ const int JUST_BITS = 0; const int QUINT = 1; const int TRIT = 2; +// ASTC Encodings data, sorted in ascending order based on their BitLength value +// (see GetBitLength() function) +EncodingData encoding_values[22] = EncodingData[]( + EncodingData(JUST_BITS, 0, 0, 0), EncodingData(JUST_BITS, 1, 0, 0), EncodingData(TRIT, 0, 0, 0), + EncodingData(JUST_BITS, 2, 0, 0), EncodingData(QUINT, 0, 0, 0), EncodingData(TRIT, 1, 0, 0), + EncodingData(JUST_BITS, 3, 0, 0), EncodingData(QUINT, 1, 0, 0), EncodingData(TRIT, 2, 0, 0), + EncodingData(JUST_BITS, 4, 0, 0), EncodingData(QUINT, 2, 0, 0), EncodingData(TRIT, 3, 0, 0), + EncodingData(JUST_BITS, 5, 0, 0), EncodingData(QUINT, 3, 0, 0), EncodingData(TRIT, 4, 0, 0), + EncodingData(JUST_BITS, 6, 0, 0), EncodingData(QUINT, 4, 0, 0), EncodingData(TRIT, 5, 0, 0), + EncodingData(JUST_BITS, 7, 0, 0), EncodingData(QUINT, 5, 0, 0), EncodingData(TRIT, 6, 0, 0), + EncodingData(JUST_BITS, 8, 0, 0) +); + // The following constants are expanded variants of the Replicate() // function calls corresponding to the following arguments: // value: index into the generated table @@ -596,22 +602,16 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) { for (uint i = 0; i < num_partitions; i++) { num_values += ((modes[i] >> 2) + 1) << 1; } - int range = 256; - while (--range > 0) { - EncodingData val = encoding_values[range]; + // Find the largest encoding that's within color_data_bits + // TODO(ameerj): profile with binary search + int range = 0; + while (++range < encoding_values.length()) { uint bit_length = GetBitLength(num_values, range); - if (bit_length <= color_data_bits) { - while (--range > 0) { - EncodingData newval = encoding_values[range]; - if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) { - break; - } - } - ++range; + if (bit_length > color_data_bits) { break; } } - DecodeIntegerSequence(range, num_values); + DecodeIntegerSequence(range - 1, num_values); uint out_index = 0; for (int itr = 0; itr < result_index; ++itr) { if (out_index >= num_values) { @@ -1110,10 +1110,10 @@ TexelWeightParams DecodeBlockInfo(uint block_index) { } weight_index -= 2; if ((mode_layout != 9) && ((mode & 0x200) != 0)) { - const int max_weights[6] = int[6](9, 11, 15, 19, 23, 31); + const int max_weights[6] = int[6](7, 8, 9, 10, 11, 12); params.max_weight = max_weights[weight_index]; } else { - const int max_weights[6] = int[6](1, 2, 3, 4, 5, 7); + const int max_weights[6] = int[6](1, 2, 3, 4, 5, 6); params.max_weight = max_weights[weight_index]; } return params; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 37a4d1d9db..a2b2647005 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -60,19 +60,15 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); - astc_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); - glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES, - 0); } UtilShaders::~UtilShaders() = default; void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { - static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; - static constexpr GLuint BINDING_INPUT_BUFFER = 1; - static constexpr GLuint BINDING_ENC_BUFFER = 2; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -81,7 +77,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, }; program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); @@ -103,11 +98,11 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glUniform1ui(6, params.block_height); glUniform1ui(7, params.block_height_mask); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, GL_RGBA8); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, GL_RGBA8); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 53d65f368e..ef881e35f6 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -62,7 +62,6 @@ private: ProgramManager& program_manager; OGLBuffer swizzle_table_buffer; - OGLBuffer astc_buffer; OGLProgram astc_decoder_program; OGLProgram block_linear_unswizzle_2d_program; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 561cf5e11f..328813a572 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -30,16 +30,13 @@ namespace Vulkan { using Tegra::Texture::SWIZZLE_TABLE; -using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES; -using namespace Tegra::Texture::ASTC; namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; -constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; -constexpr size_t ASTC_NUM_BINDINGS = 4; +constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; +constexpr size_t ASTC_NUM_BINDINGS = 3; template inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ @@ -75,7 +72,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ .score = 2, }; -constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ { .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -83,13 +80,6 @@ constexpr std::array ASTC_DESCRIPTOR_SET_BINDIN .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_SWIZZLE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -108,12 +98,12 @@ constexpr std::array ASTC_DESCRIPTOR_SET_BINDIN constexpr DescriptorBankInfo ASTC_BANK_INFO{ .uniform_buffers = 0, - .storage_buffers = 3, + .storage_buffers = 2, .texture_buffers = 0, .image_buffers = 0, .textures = 0, .images = 1, - .score = 4, + .score = 3, }; constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ @@ -135,14 +125,6 @@ constexpr std::array .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_ENC_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstArrayElement = 0, @@ -355,7 +337,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, ASTCDecoderPass::~ASTCDecoderPass() = default; void ASTCDecoderPass::MakeDataBuffer() { - constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE); + constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE); data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -369,11 +351,7 @@ void ASTCDecoderPass::MakeDataBuffer() { data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); - std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES, - sizeof(ASTC_ENCODINGS_VALUES)); - // Tack on the swizzle table at the end of the buffer - std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE, - sizeof(SWIZZLE_TABLE)); + std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { @@ -443,9 +421,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES)); - update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), - sizeof(SWIZZLE_TABLE)); + update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); const void* const descriptor_data{update_descriptor_queue.UpdateData()}; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 26c19d75ba..25161df1f6 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -151,6 +151,76 @@ private: const IntType& m_Bits; }; +enum class IntegerEncoding { JustBits, Quint, Trit }; + +struct IntegerEncodedValue { + constexpr IntegerEncodedValue() = default; + + constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) + : encoding{encoding_}, num_bits{num_bits_} {} + + constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { + return encoding == other.encoding && num_bits == other.num_bits; + } + + // Returns the number of bits required to encode num_vals values. + u32 GetBitLength(u32 num_vals) const { + u32 total_bits = num_bits * num_vals; + if (encoding == IntegerEncoding::Trit) { + total_bits += (num_vals * 8 + 4) / 5; + } else if (encoding == IntegerEncoding::Quint) { + total_bits += (num_vals * 7 + 2) / 3; + } + return total_bits; + } + + IntegerEncoding encoding{}; + u32 num_bits = 0; + u32 bit_value = 0; + union { + u32 quint_value = 0; + u32 trit_value; + }; +}; + +// Returns a new instance of this struct that corresponds to the +// can take no more than mav_value values +static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { + while (mav_value > 0) { + u32 check = mav_value + 1; + + // Is mav_value a power of two? + if (!(check & (check - 1))) { + return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); + } + + // Is mav_value of the type 3*2^n - 1? + if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); + } + + // Is mav_value of the type 5*2^n - 1? + if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); + } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + mav_value--; + } + return IntegerEncodedValue(IntegerEncoding::JustBits, 0); +} + +static constexpr std::array MakeEncodedValues() { + std::array encodings{}; + for (std::size_t i = 0; i < encodings.size(); ++i) { + encodings[i] = CreateEncoding(static_cast(i)); + } + return encodings; +} + +static constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); + namespace Tegra::Texture::ASTC { using IntegerEncodedVector = boost::container::static_vector< IntegerEncodedValue, 256, diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 9e148afc4a..14d2beec0c 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -9,76 +9,6 @@ namespace Tegra::Texture::ASTC { -enum class IntegerEncoding { JustBits, Quint, Trit }; - -struct IntegerEncodedValue { - constexpr IntegerEncodedValue() = default; - - constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) - : encoding{encoding_}, num_bits{num_bits_} {} - - constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { - return encoding == other.encoding && num_bits == other.num_bits; - } - - // Returns the number of bits required to encode num_vals values. - u32 GetBitLength(u32 num_vals) const { - u32 total_bits = num_bits * num_vals; - if (encoding == IntegerEncoding::Trit) { - total_bits += (num_vals * 8 + 4) / 5; - } else if (encoding == IntegerEncoding::Quint) { - total_bits += (num_vals * 7 + 2) / 3; - } - return total_bits; - } - - IntegerEncoding encoding{}; - u32 num_bits = 0; - u32 bit_value = 0; - union { - u32 quint_value = 0; - u32 trit_value; - }; -}; - -// Returns a new instance of this struct that corresponds to the -// can take no more than mav_value values -constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { - while (mav_value > 0) { - u32 check = mav_value + 1; - - // Is mav_value a power of two? - if (!(check & (check - 1))) { - return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); - } - - // Is mav_value of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); - } - - // Is mav_value of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); - } - - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - mav_value--; - } - return IntegerEncodedValue(IntegerEncoding::JustBits, 0); -} - -constexpr std::array MakeEncodedValues() { - std::array encodings{}; - for (std::size_t i = 0; i < encodings.size(); ++i) { - encodings[i] = CreateEncoding(static_cast(i)); - } - return encodings; -} - -constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); - void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output); From a75d70fa9025baad7a80b700903148d1152b1b84 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 5 Jul 2021 18:51:51 -0400 Subject: [PATCH 07/35] astc_decoder: Simplify Select2DPartition --- src/video_core/host_shaders/astc_decoder.comp | 55 ++++++------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 7f4efa31af..8d8b64fbd5 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -284,14 +284,10 @@ uint Hash52(uint p) { return p; } -uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bool small_block) { - if (partition_count == 1) { - return 0; - } +uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) { if (small_block) { x <<= 1; y <<= 1; - z <<= 1; } seed += (partition_count - 1) * 1024; @@ -305,10 +301,6 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo uint seed6 = uint((rnum >> 20) & 0xF); uint seed7 = uint((rnum >> 24) & 0xF); uint seed8 = uint((rnum >> 28) & 0xF); - uint seed9 = uint((rnum >> 18) & 0xF); - uint seed10 = uint((rnum >> 22) & 0xF); - uint seed11 = uint((rnum >> 26) & 0xF); - uint seed12 = uint(((rnum >> 30) | (rnum << 2)) & 0xF); seed1 = (seed1 * seed1); seed2 = (seed2 * seed2); @@ -318,12 +310,8 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo seed6 = (seed6 * seed6); seed7 = (seed7 * seed7); seed8 = (seed8 * seed8); - seed9 = (seed9 * seed9); - seed10 = (seed10 * seed10); - seed11 = (seed11 * seed11); - seed12 = (seed12 * seed12); - int sh1, sh2, sh3; + uint sh1, sh2; if ((seed & 1) > 0) { sh1 = (seed & 2) > 0 ? 4 : 5; sh2 = (partition_count == 3) ? 6 : 5; @@ -331,25 +319,19 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo sh1 = (partition_count == 3) ? 6 : 5; sh2 = (seed & 2) > 0 ? 4 : 5; } - sh3 = (seed & 0x10) > 0 ? sh1 : sh2; + seed1 >>= sh1; + seed2 >>= sh2; + seed3 >>= sh1; + seed4 >>= sh2; + seed5 >>= sh1; + seed6 >>= sh2; + seed7 >>= sh1; + seed8 >>= sh2; - seed1 = (seed1 >> sh1); - seed2 = (seed2 >> sh2); - seed3 = (seed3 >> sh1); - seed4 = (seed4 >> sh2); - seed5 = (seed5 >> sh1); - seed6 = (seed6 >> sh2); - seed7 = (seed7 >> sh1); - seed8 = (seed8 >> sh2); - seed9 = (seed9 >> sh3); - seed10 = (seed10 >> sh3); - seed11 = (seed11 >> sh3); - seed12 = (seed12 >> sh3); - - uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); - uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); - uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); - uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + uint a = seed1 * x + seed2 * y + (rnum >> 14); + uint b = seed3 * x + seed4 * y + (rnum >> 10); + uint c = seed5 * x + seed6 * y + (rnum >> 6); + uint d = seed7 * x + seed8 * y + (rnum >> 2); a &= 0x3F; b &= 0x3F; @@ -374,10 +356,6 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo } } -uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) { - return SelectPartition(seed, x, y, 0, partition_count, small_block); -} - uint ReadBit() { if (current_index >= local_buff.length()) { return 0; @@ -1281,8 +1259,11 @@ void DecompressBlock(ivec3 coord, uint block_index) { for (uint j = 0; j < block_dims.y; j++) { for (uint i = 0; i < block_dims.x; i++) { - uint local_partition = Select2DPartition(partition_index, i, j, num_partitions, + uint local_partition = 0; + if (num_partitions > 1) { + local_partition = Select2DPartition(partition_index, i, j, num_partitions, (block_dims.y * block_dims.x) < 32); + } vec4 p; uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); From b2862e4772489f0ade41f79765d33ec4fc33712c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 8 Jul 2021 00:31:35 -0400 Subject: [PATCH 08/35] astc_decoder: Make use of uvec4 for payload data --- src/video_core/host_shaders/astc_decoder.comp | 122 ++++++------------ 1 file changed, 43 insertions(+), 79 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 8d8b64fbd5..392f09c68d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -59,7 +59,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { }; layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { - uint astc_data[]; + uvec4 astc_data[]; }; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; @@ -141,32 +141,28 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] = // Input ASTC texture globals uint current_index = 0; int bitsread = 0; -uint total_bitsread = 0; -uint local_buff[16]; +int total_bitsread = 0; +uvec4 local_buff; // Color data globals -uint color_endpoint_data[16]; +uvec4 color_endpoint_data; int color_bitsread = 0; -uint total_color_bitsread = 0; -int color_index = 0; // Four values, two endpoints, four maximum paritions uint color_values[32]; int colvals_index = 0; // Weight data globals -uint texel_weight_data[16]; +uvec4 texel_weight_data; int texel_bitsread = 0; -uint total_texel_bitsread = 0; -int texel_index = 0; bool texel_flag = false; // Global "vectors" to be pushed into when decoding -EncodingData result_vector[100]; +EncodingData result_vector[144]; int result_index = 0; -EncodingData texel_vector[100]; +EncodingData texel_vector[144]; int texel_vector_index = 0; uint unquantized_texel_weights[2][144]; @@ -176,11 +172,6 @@ uint SwizzleOffset(uvec2 pos) { return swizzle_table[pos.y * 64 + pos.x]; } -uint ReadTexel(uint offset) { - // extract the 8-bit value from the 32-bit packed data. - return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); -} - // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] // is the same as [(num_bits - 1):0] and repeats all the way down. uint Replicate(uint val, uint num_bits, uint to_bit) { @@ -356,54 +347,37 @@ uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool sma } } -uint ReadBit() { - if (current_index >= local_buff.length()) { +uint ExtractBits(uvec4 payload, int offset, int bits) { + if (bits <= 0) { return 0; } - uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1); - ++bitsread; - ++total_bitsread; - if (bitsread == 8) { - ++current_index; - bitsread = 0; + int last_offset = offset + bits - 1; + int shifted_offset = offset >> 5; + if ((last_offset >> 5) == shifted_offset) { + return bitfieldExtract(payload[shifted_offset], offset & 31, bits); } - return bit; + int first_bits = 32 - (offset & 31); + int result_first = int(bitfieldExtract(payload[shifted_offset], offset & 31, first_bits)); + int result_second = int(bitfieldExtract(payload[shifted_offset + 1], 0, bits - first_bits)); + return result_first | (result_second << first_bits); } uint StreamBits(uint num_bits) { - uint ret = 0; - for (uint i = 0; i < num_bits; i++) { - ret |= ((ReadBit() & 1) << i); - } + int int_bits = int(num_bits); + uint ret = ExtractBits(local_buff, total_bitsread, int_bits); + total_bitsread += int_bits; return ret; } -uint ReadColorBit() { - uint bit = 0; - if (texel_flag) { - bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1); - ++texel_bitsread; - ++total_texel_bitsread; - if (texel_bitsread == 8) { - ++texel_index; - texel_bitsread = 0; - } - } else { - bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1); - ++color_bitsread; - ++total_color_bitsread; - if (color_bitsread == 8) { - ++color_index; - color_bitsread = 0; - } - } - return bit; -} - uint StreamColorBits(uint num_bits) { uint ret = 0; - for (uint i = 0; i < num_bits; i++) { - ret |= ((ReadColorBit() & 1) << i); + int int_bits = int(num_bits); + if (texel_flag) { + ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits); + texel_bitsread += int_bits; + } else { + ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits); + color_bitsread += int_bits; } return ret; } @@ -1006,7 +980,7 @@ int FindLayout(uint mode) { return 5; } -TexelWeightParams DecodeBlockInfo(uint block_index) { +TexelWeightParams DecodeBlockInfo() { TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false); uint mode = StreamBits(11); if ((mode & 0x1ff) == 0x1fc) { @@ -1122,8 +1096,8 @@ void FillVoidExtentLDR(ivec3 coord) { } } -void DecompressBlock(ivec3 coord, uint block_index) { - TexelWeightParams params = DecodeBlockInfo(block_index); +void DecompressBlock(ivec3 coord) { + TexelWeightParams params = DecodeBlockInfo(); if (params.error_state) { FillError(coord); return; @@ -1190,7 +1164,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { // Read color data... uint color_data_bits = remaining_bits; while (remaining_bits > 0) { - int nb = int(min(remaining_bits, 8U)); + int nb = int(min(remaining_bits, 32U)); uint b = StreamBits(nb); color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb)); ++ced_pointer; @@ -1232,25 +1206,20 @@ void DecompressBlock(ivec3 coord, uint block_index) { ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); } - for (uint i = 0; i < 16; i++) { - texel_weight_data[i] = local_buff[i]; - } - for (uint i = 0; i < 8; i++) { -#define REVERSE_BYTE(b) ((b * 0x0802U & 0x22110U) | (b * 0x8020U & 0x88440U)) * 0x10101U >> 16 - uint a = REVERSE_BYTE(texel_weight_data[i]); - uint b = REVERSE_BYTE(texel_weight_data[15 - i]); -#undef REVERSE_BYTE - texel_weight_data[i] = uint(bitfieldExtract(b, 0, 8)); - texel_weight_data[15 - i] = uint(bitfieldExtract(a, 0, 8)); - } + texel_weight_data = local_buff; + texel_weight_data = bitfieldReverse(texel_weight_data).wzyx; uint clear_byte_start = (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; - texel_weight_data[clear_byte_start - 1] = - texel_weight_data[clear_byte_start - 1] & + + uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) & uint( ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); - for (uint i = 0; i < 16 - clear_byte_start; i++) { - texel_weight_data[clear_byte_start + i] = 0U; + uint vec_index = (clear_byte_start - 1) >> 2; + texel_weight_data[vec_index] = + bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); + for (uint i = clear_byte_start; i < 16; ++i) { + uint idx = i >> 2; + texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8); } texel_flag = true; // use texel "vector" and bit stream in integer decoding DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); @@ -1302,13 +1271,8 @@ void main() { if (any(greaterThanEqual(coord, imageSize(dest_image)))) { return; } - uint block_index = - pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; - current_index = 0; bitsread = 0; - for (int i = 0; i < 16; i++) { - local_buff[i] = ReadTexel(offset + i); - } - DecompressBlock(coord, block_index); + local_buff = astc_data[offset / 16]; + DecompressBlock(coord); } From 5ab80535118e593ef3add3ce2b5935437e1dc1d3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 22:24:15 -0400 Subject: [PATCH 09/35] astc_decoder: Compute offset swizzles in-shader Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes. --- src/video_core/host_shaders/astc_decoder.comp | 46 ++++--------- .../renderer_opengl/util_shaders.cpp | 16 ++--- .../renderer_vulkan/vk_compute_pass.cpp | 67 ++----------------- .../renderer_vulkan/vk_compute_pass.h | 5 -- 4 files changed, 25 insertions(+), 109 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 392f09c68d..74ce058a93 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -10,8 +10,7 @@ #define END_PUSH_CONSTANTS }; #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 -#define BINDING_SWIZZLE_BUFFER 1 -#define BINDING_OUTPUT_IMAGE 2 +#define BINDING_OUTPUT_IMAGE 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv @@ -19,7 +18,6 @@ #define END_PUSH_CONSTANTS #define UNIFORM(n) layout(location = n) uniform #define BINDING_INPUT_BUFFER 0 -#define BINDING_SWIZZLE_BUFFER 1 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -28,13 +26,11 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS UNIFORM(1) uvec2 block_dims; - -UNIFORM(2) uint bytes_per_block_log2; -UNIFORM(3) uint layer_stride; -UNIFORM(4) uint block_size; -UNIFORM(5) uint x_shift; -UNIFORM(6) uint block_height; -UNIFORM(7) uint block_height_mask; +UNIFORM(2) uint layer_stride; +UNIFORM(3) uint block_size; +UNIFORM(4) uint x_shift; +UNIFORM(5) uint block_height; +UNIFORM(6) uint block_height_mask; END_PUSH_CONSTANTS struct EncodingData { @@ -53,35 +49,17 @@ struct TexelWeightParams { bool void_extent_hdr; }; -// Swizzle data -layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { - uint swizzle_table[]; -}; - layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uvec4 astc_data[]; }; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; -const uint GOB_SIZE_X = 64; -const uint GOB_SIZE_Y = 8; -const uint GOB_SIZE_Z = 1; -const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; - const uint GOB_SIZE_X_SHIFT = 6; const uint GOB_SIZE_Y_SHIFT = 3; -const uint GOB_SIZE_Z_SHIFT = 0; -const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT; -const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); - -const int BLOCK_SIZE_IN_BYTES = 16; - -const int BLOCK_INFO_ERROR = 0; -const int BLOCK_INFO_VOID_EXTENT_HDR = 1; -const int BLOCK_INFO_VOID_EXTENT_LDR = 2; -const int BLOCK_INFO_NORMAL = 3; +const uint BYTES_PER_BLOCK_LOG2 = 4; const int JUST_BITS = 0; const int QUINT = 1; @@ -168,8 +146,10 @@ int texel_vector_index = 0; uint unquantized_texel_weights[2][144]; uint SwizzleOffset(uvec2 pos) { - pos = pos & SWIZZLE_MASK; - return swizzle_table[pos.y * 64 + pos.x]; + uint x = pos.x; + uint y = pos.y; + return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + + (y % 2) * 16 + (x % 16); } // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] @@ -1253,7 +1233,7 @@ void DecompressBlock(ivec3 coord) { void main() { uvec3 pos = gl_GlobalInvocationID; - pos.x <<= bytes_per_block_log2; + pos.x <<= BYTES_PER_BLOCK_LOG2; // Read as soon as possible due to its latency const uint swizzle = SwizzleOffset(pos.xy); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index a2b2647005..4e6f7cb003 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -68,7 +68,6 @@ UtilShaders::~UtilShaders() = default; void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; - static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -76,10 +75,9 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; program_manager.BindComputeProgram(astc_decoder_program.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); + // Ensure buffer data is valid before dispatching glFlush(); for (const SwizzleParameters& swizzle : swizzles) { @@ -90,13 +88,13 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); + ASSERT(params.bytes_per_block_log2 == 4); - glUniform1ui(2, params.bytes_per_block_log2); - glUniform1ui(3, params.layer_stride); - glUniform1ui(4, params.block_size); - glUniform1ui(5, params.x_shift); - glUniform1ui(6, params.block_height); - glUniform1ui(7, params.block_height_mask); + glUniform1ui(2, params.layer_stride); + glUniform1ui(3, params.block_size); + glUniform1ui(4, params.x_shift); + glUniform1ui(5, params.block_height); + glUniform1ui(6, params.block_height_mask); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 328813a572..d13d58e8cc 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -34,9 +34,8 @@ using Tegra::Texture::SWIZZLE_TABLE; namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; -constexpr size_t ASTC_NUM_BINDINGS = 3; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1; +constexpr size_t ASTC_NUM_BINDINGS = 2; template inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ @@ -80,13 +79,6 @@ constexpr std::array ASTC_DESCR .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_OUTPUT_IMAGE, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, @@ -98,12 +90,12 @@ constexpr std::array ASTC_DESCR constexpr DescriptorBankInfo ASTC_BANK_INFO{ .uniform_buffers = 0, - .storage_buffers = 2, + .storage_buffers = 1, .texture_buffers = 0, .image_buffers = 0, .textures = 0, .images = 1, - .score = 3, + .score = 2, }; constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ @@ -125,14 +117,6 @@ constexpr std::array .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, .dstArrayElement = 0, @@ -145,7 +129,6 @@ constexpr std::array struct AstcPushConstants { std::array blocks_dims; - u32 bytes_per_block_log2; u32 layer_stride; u32 block_size; u32 x_shift; @@ -336,42 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, ASTCDecoderPass::~ASTCDecoderPass() = default; -void ASTCDecoderPass::MakeDataBuffer() { - constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE); - data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = TOTAL_BUFFER_SIZE, - .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); - data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); - - const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); - std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); - - scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, - TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { - static constexpr VkMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - }; - const VkBufferCopy copy{ - .srcOffset = offset, - .dstOffset = 0, - .size = TOTAL_BUFFER_SIZE, - }; - cmdbuf.CopyBuffer(src, dst, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, write_barrier); - }); -} - void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, std::span swizzles) { using namespace VideoCommon::Accelerated; @@ -380,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, VideoCore::Surface::DefaultBlockHeight(image.info.format), }; scheduler.RequestOutsideRenderPassOperationContext(); - if (!data_buffer) { - MakeDataBuffer(); - } const VkPipeline vk_pipeline = *pipeline; const VkImageAspectFlags aspect_mask = image.AspectMask(); const VkImage vk_image = image.Handle(); @@ -421,7 +365,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); const void* const descriptor_data{update_descriptor_queue.UpdateData()}; @@ -429,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); + ASSERT(params.bytes_per_block_log2 == 4); scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, - .bytes_per_block_log2 = params.bytes_per_block_log2, .layer_stride = params.layer_stride, .block_size = params.block_size, .x_shift = params.x_shift, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 114aef2bd7..c7b92cce0d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -96,15 +96,10 @@ public: std::span swizzles); private: - void MakeDataBuffer(); - VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; MemoryAllocator& memory_allocator; - - vk::Buffer data_buffer; - MemoryCommit data_buffer_commit; }; } // namespace Vulkan From c439fc9be994583801418743ab202fb63d1c83a0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 23:55:20 -0400 Subject: [PATCH 10/35] astc_decoder: Reduce workgroup size This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32. --- src/video_core/host_shaders/astc_decoder.comp | 2 +- src/video_core/renderer_opengl/util_shaders.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 74ce058a93..f34c5f5d98 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -22,7 +22,7 @@ #endif -layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; BEGIN_PUSH_CONSTANTS UNIFORM(1) uvec2 block_dims; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 4e6f7cb003..333f35a1c6 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -82,8 +82,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glFlush(); for (const SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d13d58e8cc..3e96c0f603 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -358,8 +358,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, }); for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const u32 num_dispatches_z = image.info.resources.layers; update_descriptor_queue.Acquire(); From 3e2614148353342284b06899080e4707804eecf5 Mon Sep 17 00:00:00 2001 From: san Date: Sun, 1 Aug 2021 21:46:13 +0200 Subject: [PATCH 11/35] yuzu-cmd: hide cursor when in fullscreen Exposed the SDL_ShowCursor function to EmuWindow baseclass. When creating the window (GL or VK) in fullscreen it now automatically hides the cursor. --- src/yuzu_cmd/emu_window/emu_window_sdl2.cpp | 4 ++++ src/yuzu_cmd/emu_window/emu_window_sdl2.h | 3 +++ src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | 1 + src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp | 1 + 4 files changed, 9 insertions(+) diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index f643a4b0b0..c80f7791c5 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -122,6 +122,10 @@ void EmuWindow_SDL2::OnResize() { UpdateCurrentFramebufferLayout(width, height); } +void EmuWindow_SDL2::ShowCursor(bool show_cursor) { + SDL_ShowCursor(show_cursor ? SDL_ENABLE : SDL_DISABLE); +} + void EmuWindow_SDL2::Fullscreen() { switch (Settings::values.fullscreen_mode.GetValue()) { case Settings::FullscreenMode::Exclusive: diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h index aa0d52ae42..4810f87755 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h @@ -67,6 +67,9 @@ protected: /// Called by WaitEvent when any event that may cause the window to be resized occurs void OnResize(); + /// Called when users want to hide the mouse cursor + void ShowCursor(bool show_cursor); + /// Called when user passes the fullscreen parameter flag void Fullscreen(); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 5b98c255ba..a075ad08ae 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -111,6 +111,7 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste if (fullscreen) { Fullscreen(); + ShowCursor(false); } window_context = SDL_GL_CreateContext(render_window); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index cdda375d8d..de40b76bf0 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -45,6 +45,7 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste if (fullscreen) { Fullscreen(); + ShowCursor(false); } switch (wm.subsystem) { From 1fb158ce9043e36e438e681123e64ee250a32150 Mon Sep 17 00:00:00 2001 From: german77 Date: Wed, 4 Aug 2021 11:40:49 -0500 Subject: [PATCH 12/35] settings_ui: Add emulated joystick position dot to controller preview --- .../configure_input_player_widget.cpp | 61 +++++++++++++------ .../configure_input_player_widget.h | 6 +- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp index f50cda2f3c..cd633e45fb 100644 --- a/src/yuzu/configuration/configure_input_player_widget.cpp +++ b/src/yuzu/configuration/configure_input_player_widget.cpp @@ -122,6 +122,7 @@ void PlayerControlPreview::UpdateColors() { colors.slider_arrow = QColor(14, 15, 18); colors.font2 = QColor(255, 255, 255); colors.indicator = QColor(170, 238, 255); + colors.indicator2 = QColor(100, 255, 100); colors.deadzone = QColor(204, 136, 136); colors.slider_button = colors.button; } @@ -139,6 +140,7 @@ void PlayerControlPreview::UpdateColors() { colors.slider_arrow = QColor(65, 68, 73); colors.font2 = QColor(0, 0, 0); colors.indicator = QColor(0, 0, 200); + colors.indicator2 = QColor(0, 150, 0); colors.deadzone = QColor(170, 0, 0); colors.slider_button = QColor(153, 149, 149); } @@ -317,8 +319,7 @@ void PlayerControlPreview::DrawLeftController(QPainter& p, const QPointF center) using namespace Settings::NativeAnalog; DrawJoystick(p, center + QPointF(9, -69) + (axis_values[LStick].value * 8), 1.8f, button_values[Settings::NativeButton::LStick]); - DrawRawJoystick(p, center + QPointF(-140, 90), axis_values[LStick].raw_value, - axis_values[LStick].properties); + DrawRawJoystick(p, center + QPointF(-140, 90), QPointF(0, 0)); } using namespace Settings::NativeButton; @@ -432,8 +433,7 @@ void PlayerControlPreview::DrawRightController(QPainter& p, const QPointF center using namespace Settings::NativeAnalog; DrawJoystick(p, center + QPointF(-9, 11) + (axis_values[RStick].value * 8), 1.8f, button_values[Settings::NativeButton::RStick]); - DrawRawJoystick(p, center + QPointF(140, 90), axis_values[RStick].raw_value, - axis_values[RStick].properties); + DrawRawJoystick(p, QPointF(0, 0), center + QPointF(140, 90)); } using namespace Settings::NativeButton; @@ -547,8 +547,7 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center) DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button); DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button); - DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties); - DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties); + DrawRawJoystick(p, center + QPointF(-180, 90), center + QPointF(180, 90)); } using namespace Settings::NativeButton; @@ -634,8 +633,7 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button); DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button); - DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties); - DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties); + DrawRawJoystick(p, center + QPointF(-50, 0), center + QPointF(50, 0)); } using namespace Settings::NativeButton; @@ -728,10 +726,7 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center) button_values[Settings::NativeButton::LStick]); DrawProJoystick(p, center + QPointF(51, 0), axis_values[RStick].value, 11, button_values[Settings::NativeButton::RStick]); - DrawRawJoystick(p, center + QPointF(-50, 105), axis_values[LStick].raw_value, - axis_values[LStick].properties); - DrawRawJoystick(p, center + QPointF(50, 105), axis_values[RStick].raw_value, - axis_values[RStick].properties); + DrawRawJoystick(p, center + QPointF(-50, 105), center + QPointF(50, 105)); } using namespace Settings::NativeButton; @@ -821,10 +816,7 @@ void PlayerControlPreview::DrawGCController(QPainter& p, const QPointF center) { p.setBrush(colors.font); DrawSymbol(p, center + QPointF(61, 37) + (axis_values[RStick].value * 9.5f), Symbol::C, 1.0f); - DrawRawJoystick(p, center + QPointF(-198, -125), axis_values[LStick].raw_value, - axis_values[LStick].properties); - DrawRawJoystick(p, center + QPointF(198, -125), axis_values[RStick].raw_value, - axis_values[RStick].properties); + DrawRawJoystick(p, center + QPointF(-198, -125), center + QPointF(198, -125)); } using namespace Settings::NativeButton; @@ -2358,8 +2350,33 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo DrawCircle(p, center, 7.5f); } -void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value, - const Input::AnalogProperties& properties) { +void PlayerControlPreview::DrawRawJoystick(QPainter& p, QPointF center_left, QPointF center_right) { + using namespace Settings::NativeAnalog; + if (controller_type != Settings::ControllerType::LeftJoycon) { + DrawJoystickProperties(p, center_right, axis_values[RStick].properties); + p.setPen(colors.indicator); + p.setBrush(colors.indicator); + DrawJoystickDot(p, center_right, axis_values[RStick].raw_value, + axis_values[RStick].properties); + p.setPen(colors.indicator2); + p.setBrush(colors.indicator2); + DrawJoystickDot(p, center_right, axis_values[RStick].value, axis_values[RStick].properties); + } + + if (controller_type != Settings::ControllerType::RightJoycon) { + DrawJoystickProperties(p, center_left, axis_values[LStick].properties); + p.setPen(colors.indicator); + p.setBrush(colors.indicator); + DrawJoystickDot(p, center_left, axis_values[LStick].raw_value, + axis_values[LStick].properties); + p.setPen(colors.indicator2); + p.setBrush(colors.indicator2); + DrawJoystickDot(p, center_left, axis_values[LStick].value, axis_values[LStick].properties); + } +} + +void PlayerControlPreview::DrawJoystickProperties(QPainter& p, const QPointF center, + const Input::AnalogProperties& properties) { constexpr float size = 45.0f; const float range = size * properties.range; const float deadzone = size * properties.deadzone; @@ -2376,10 +2393,14 @@ void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, co pen.setColor(colors.deadzone); p.setPen(pen); DrawCircle(p, center, deadzone); +} + +void PlayerControlPreview::DrawJoystickDot(QPainter& p, const QPointF center, const QPointF value, + const Input::AnalogProperties& properties) { + constexpr float size = 45.0f; + const float range = size * properties.range; // Dot pointer - p.setPen(colors.indicator); - p.setBrush(colors.indicator); DrawCircle(p, center + (value * range), 2); } diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h index 5fc16d8af0..f4a6a5e1b2 100644 --- a/src/yuzu/configuration/configure_input_player_widget.h +++ b/src/yuzu/configuration/configure_input_player_widget.h @@ -90,6 +90,7 @@ private: QColor highlight2{}; QColor transparent{}; QColor indicator{}; + QColor indicator2{}; QColor led_on{}; QColor led_off{}; QColor slider{}; @@ -139,7 +140,10 @@ private: // Draw joystick functions void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed); void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed); - void DrawRawJoystick(QPainter& p, QPointF center, QPointF value, + void DrawRawJoystick(QPainter& p, QPointF center_left, QPointF center_right); + void DrawJoystickProperties(QPainter& p, QPointF center, + const Input::AnalogProperties& properties); + void DrawJoystickDot(QPainter& p, QPointF center, QPointF value, const Input::AnalogProperties& properties); void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed); void DrawGCJoystick(QPainter& p, QPointF center, bool pressed); From d5bf597436ffa95564975ed21c3cc4cebbaf869f Mon Sep 17 00:00:00 2001 From: german77 Date: Wed, 4 Aug 2021 11:41:58 -0500 Subject: [PATCH 13/35] settings_ui: Use better colors for the light theme --- dist/qt_themes/default/style.qss | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/qt_themes/default/style.qss b/dist/qt_themes/default/style.qss index 9915a40bab..f0908a7f1a 100644 --- a/dist/qt_themes/default/style.qss +++ b/dist/qt_themes/default/style.qss @@ -51,11 +51,11 @@ QPushButton#GPUStatusBarButton:hover { } QPushButton#GPUStatusBarButton:checked { - color: #ff8040; + color: #b06020; } QPushButton#GPUStatusBarButton:!checked { - color: #40dd40; + color: #109010; } QPushButton#buttonRefreshDevices { From f9563c8f248677894b886373f18c016fb189e416 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 13:52:30 +0000 Subject: [PATCH 14/35] texture_cache: Split templates out --- src/video_core/CMakeLists.txt | 3 + .../renderer_opengl/gl_texture_cache.cpp | 5 +- .../gl_texture_cache_templates.cpp | 10 + .../renderer_vulkan/vk_texture_cache.cpp | 2 +- .../vk_texture_cache_templates.cpp | 10 + src/video_core/texture_cache/texture_cache.h | 1528 +---------------- .../texture_cache/texture_cache_templates.h | 1507 ++++++++++++++++ 7 files changed, 1533 insertions(+), 1532 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_texture_cache_templates.cpp create mode 100644 src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp create mode 100644 src/video_core/texture_cache/texture_cache_templates.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1eb67c051b..1250cca6f9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -97,6 +97,7 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h + renderer_opengl/gl_texture_cache_templates.cpp renderer_opengl/gl_query_cache.cpp renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h @@ -155,6 +156,7 @@ add_library(video_core STATIC renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h + renderer_vulkan/vk_texture_cache_templates.cpp renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp @@ -186,6 +188,7 @@ add_library(video_core STATIC texture_cache/samples_helper.h texture_cache/slot_vector.h texture_cache/texture_cache.h + texture_cache/texture_cache_templates.h texture_cache/types.h texture_cache/util.cpp texture_cache/util.h diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c373c9cb43..26b423f5ea 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -18,10 +18,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/surface.h" -#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/samples_helper.h" -#include "video_core/texture_cache/texture_cache.h" -#include "video_core/textures/decoders.h" namespace OpenGL { namespace { diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp new file mode 100644 index 0000000000..00ed064478 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/texture_cache/texture_cache_templates.h" + +namespace VideoCommon { +template class VideoCommon::TextureCache; +} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8e029bcb34..b0496556d0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp new file mode 100644 index 0000000000..fd89789547 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/texture_cache/texture_cache_templates.h" + +namespace VideoCommon { +template class VideoCommon::TextureCache; +} diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f34c9d9ca9..a4f6e94224 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -164,14 +164,6 @@ public: const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); - /// Invalidate the contents of the color buffer index - /// These contents become unspecified, the cache can assume aggressive optimizations. - void InvalidateColorBuffer(size_t index); - - /// Invalidate the contents of the depth buffer - /// These contents become unspecified, the cache can assume aggressive optimizations. - void InvalidateDepthBuffer(); - /// Try to find a cached image view in the given CPU address [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); @@ -407,1522 +399,4 @@ private: typename SlotVector::Iterator deletion_iterator; }; -template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { - // Configure null sampler - TSCEntry sampler_descriptor{}; - sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); - sampler_descriptor.cubemap_anisotropy.Assign(1); - - // Make sure the first index is reserved for the null resources - // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); - void(slot_samplers.insert(runtime, sampler_descriptor)); - - deletion_iterator = slot_images.begin(); - - if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); - minimum_memory = 0; - } else { - // on OGL we can be more conservatives as the driver takes care. - expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; - critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; - } -} - -template -void TextureCache

::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } - } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } - } - ++deletion_iterator; - } -} - -template -void TextureCache

::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { - RunGarbageCollector(); - } - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; -} - -template -const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { - return slot_image_views[id]; -} - -template -typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { - return slot_image_views[id]; -} - -template -void TextureCache

::MarkModification(ImageId id) noexcept { - MarkModification(slot_images[id]); -} - -template -void TextureCache

::FillGraphicsImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); -} - -template -void TextureCache

::FillComputeImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); -} - -template -typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -void TextureCache

::SynchronizeGraphicsDescriptors() { - using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::UpdateRenderTargets(bool is_clear) { - using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::RenderTargets]) { - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - return; - } - flags[Dirty::RenderTargets] = false; - - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; - - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - - for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); - } - render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, - }; -} - -template -typename P::Framebuffer* TextureCache

::GetFramebuffer() { - return &slot_framebuffers[GetFramebufferId(render_targets)]; -} - -template -void TextureCache

::FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span indices, - std::span image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); - do { - has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); -} - -template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); - return NULL_IMAGE_VIEW_ID; - } - const auto [descriptor, is_new] = table.Read(index); - ImageViewId& image_view_id = cached_image_view_ids[index]; - if (is_new) { - image_view_id = FindImageView(descriptor); - } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; -} - -template -FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { - const auto [pair, is_new] = framebuffers.try_emplace(key); - FramebufferId& framebuffer_id = pair->second; - if (!is_new) { - return framebuffer_id; - } - std::array color_buffers; - std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), - [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); - ImageView* const depth_buffer = - key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; - framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); - return framebuffer_id; -} - -template -void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { - ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - }); -} - -template -void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector images; - ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - if (!image.IsSafeDownload()) { - return; - } - image.flags &= ~ImageFlagBits::GpuModified; - images.push_back(image_id); - }); - if (images.empty()) { - return; - } - std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { - return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; - }); - for (const ImageId image_id : images) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); - } -} - -template -void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - UnregisterImage(id); - DeleteImage(id); - } -} - -template -void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, - [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Remapped)) { - continue; - } - image.flags |= ImageFlagBits::Remapped; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - } -} - -template -void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); - const ImageId dst_id = images.dst_id; - const ImageId src_id = images.src_id; - PrepareImage(src_id, false, false); - PrepareImage(dst_id, true, false); - - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; - - // TODO: Deduplicate - const std::optional src_base = src_image.TryFindBase(src.Address()); - const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; - const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); - const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ - Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, - Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, - }; - - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ - Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, - Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, - }; - - // Always call this after src_framebuffer_id was queried, as the address might be invalidated. - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - if constexpr (FRAMEBUFFER_BLITS) { - // OpenGL blits from framebuffers, not images - Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; - runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, - copy.filter, copy.operation); - } else { - // Vulkan can blit images, but it lacks format reinterpretations - // Provide a framebuffer in case it's necessary - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, - copy.operation); - } -} - -template -void TextureCache

::InvalidateColorBuffer(size_t index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - color_buffer_id = FindColorBuffer(index, false); - if (!color_buffer_id) { - LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); - return; - } - // When invalidating a color buffer, the old contents are no longer relevant - ImageView& color_buffer = slot_image_views[color_buffer_id]; - Image& image = slot_images[color_buffer.image_id]; - image.flags &= ~ImageFlagBits::CpuModified; - image.flags &= ~ImageFlagBits::GpuModified; - - runtime.InvalidateColorBuffer(color_buffer, index); -} - -template -void TextureCache

::InvalidateDepthBuffer() { - ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; - depth_buffer_id = FindDepthBuffer(false); - if (!depth_buffer_id) { - LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); - return; - } - // When invalidating the depth buffer, the old contents are no longer relevant - ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; - image.flags &= ~ImageFlagBits::CpuModified; - image.flags &= ~ImageFlagBits::GpuModified; - - ImageView& depth_buffer = slot_image_views[depth_buffer_id]; - runtime.InvalidateDepthBuffer(depth_buffer); -} - -template -typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { - // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_BITS); - if (it == page_table.end()) { - return nullptr; - } - const auto& image_map_ids = it->second; - for (const ImageMapId map_id : image_map_ids) { - const ImageMapView& map = slot_map_views[map_id]; - const ImageBase& image = slot_images[map.image_id]; - if (image.cpu_addr != cpu_addr) { - continue; - } - if (image.image_view_ids.empty()) { - continue; - } - return &slot_image_views[image.image_view_ids.at(0)]; - } - return nullptr; -} - -template -bool TextureCache

::HasUncommittedFlushes() const noexcept { - return !uncommitted_downloads.empty(); -} - -template -bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { - return !committed_downloads.empty() && !committed_downloads.front().empty(); -} - -template -void TextureCache

::CommitAsyncFlushes() { - // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); - uncommitted_downloads.clear(); -} - -template -void TextureCache

::PopAsyncFlushes() { - if (committed_downloads.empty()) { - return; - } - const std::span download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; - } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); - } - committed_downloads.pop(); -} - -template -bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { - bool is_modified = false; - ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { - if (False(image.flags & ImageFlagBits::GpuModified)) { - return false; - } - is_modified = true; - return true; - }); - return is_modified; -} - -template -void TextureCache

::RefreshContents(Image& image, ImageId image_id) { - if (False(image.flags & ImageFlagBits::CpuModified)) { - // Only upload modified images - return; - } - image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image, image_id); - - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); - return; - } - auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, staging); - runtime.InsertUploadMemoryBarrier(); -} - -template -template -void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { - const std::span mapped_span = staging.mapped_span; - const GPUVAddr gpu_addr = image.gpu_addr; - - if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); - const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, staging, uploads); - } else if (True(image.flags & ImageFlagBits::Converted)) { - std::vector unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); - ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(staging, copies); - } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(staging, copies); - } -} - -template -ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - -template -ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { - const ImageInfo info(config); - if (info.type == ImageType::Buffer) { - const ImageViewInfo view_info(config, 0); - return slot_image_views.insert(runtime, info, view_info, config.Address()); - } - const u32 layer_offset = config.BaseLayer() * info.layer_stride; - const GPUVAddr image_gpu_addr = config.Address() - layer_offset; - const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - ImageBase& image = slot_images[image_id]; - const SubresourceBase base = image.TryFindBase(config.Address()).value(); - ASSERT(base.level == 0); - const ImageViewInfo view_info(config, base.layer); - const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); - ImageViewBase& image_view = slot_image_views[image_view_id]; - image_view.flags |= ImageViewFlagBits::Strong; - image.flags |= ImageFlagBits::Strong; - return image_view_id; -} - -template -ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { - return image_id; - } - return InsertImage(info, gpu_addr, options); -} - -template -ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); - if (!cpu_addr) { - return ImageId{}; - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; - const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { - if (True(existing_image.flags & ImageFlagBits::Remapped)) { - return false; - } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { - const bool strict_size = False(options & RelaxedOptions::Size) && - True(existing_image.flags & ImageFlagBits::Strong); - const ImageInfo& existing = existing_image.info; - if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && - existing.pitch == info.pitch && - IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { - image_id = existing_image_id; - return true; - } - } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, - native_bgr)) { - image_id = existing_image_id; - return true; - } - return false; - }; - ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; -} - -template -ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); - if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional(fake_addr); - } - } - ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); - const Image& image = slot_images[image_id]; - // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different - const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); - if (is_new) { - it->second = slot_image_allocs.insert(); - } - slot_image_allocs[it->second].images.push_back(image_id); - return image_id; -} - -template -ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { - ImageInfo new_info = info; - const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - std::vector overlap_ids; - std::unordered_set overlaps_found; - std::vector left_aliased_ids; - std::vector right_aliased_ids; - std::unordered_set ignore_textures; - std::vector bad_overlap_ids; - const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - return; - } - if (info.type == ImageType::Linear) { - if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { - // Alias linear images with the same pitch - left_aliased_ids.push_back(overlap_id); - } - return; - } - overlaps_found.insert(overlap_id); - static constexpr bool strict_size = true; - const std::optional solution = ResolveOverlap( - new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); - if (solution) { - gpu_addr = solution->gpu_addr; - cpu_addr = solution->cpu_addr; - new_info.resources = solution->resources; - overlap_ids.push_back(overlap_id); - return; - } - static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; - const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { - left_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, - broken_views, native_bgr)) { - right_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else { - bad_overlap_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::BadOverlap; - } - }; - ForEachImageInRegion(cpu_addr, size_bytes, region_check); - const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - } - if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { - ignore_textures.insert(overlap_id); - } - } - }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); - const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); - Image& new_image = slot_images[new_image_id]; - - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { - new_image.flags |= ImageFlagBits::Sparse; - } - - for (const ImageId overlap_id : ignore_textures) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - - // TODO: Only upload what we need - RefreshContents(new_image, new_image_id); - - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; - if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); - } else { - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - ImageBase& new_image_base = new_image; - for (const ImageId aliased_id : right_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : left_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : bad_overlap_ids) { - ImageBase& aliased = slot_images[aliased_id]; - aliased.overlapping_images.push_back(new_image_id); - new_image.overlapping_images.push_back(aliased_id); - new_image.flags |= ImageFlagBits::BadOverlap; - } - RegisterImage(new_image_id); - return new_image_id; -} - -template -typename TextureCache

::BlitImages TextureCache

::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; - const GPUVAddr dst_addr = dst.Address(); - const GPUVAddr src_addr = src.Address(); - ImageInfo dst_info(dst); - ImageInfo src_info(src); - ImageId dst_id; - ImageId src_id; - do { - has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; - const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; - } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } - } while (has_deleted_images); - return BlitImages{ - .dst_id = dst_id, - .src_id = src_id, - .dst_format = dst_info.format, - .src_format = src_info.format, - }; -} - -template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { - if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { - return NULL_SAMPLER_ID; - } - const auto [pair, is_new] = samplers.try_emplace(config); - if (is_new) { - pair->second = slot_samplers.insert(runtime, config); - } - return pair->second; -} - -template -ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; - if (index >= regs.rt_control.count) { - return ImageViewId{}; - } - const auto& rt = regs.rt[index]; - const GPUVAddr gpu_addr = rt.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - if (rt.format == Tegra::RenderTargetFormat::NONE) { - return ImageViewId{}; - } - const ImageInfo info(regs, index); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; - if (!regs.zeta_enable) { - return ImageViewId{}; - } - const GPUVAddr gpu_addr = regs.zeta.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - const ImageInfo info(regs); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - Image& image = slot_images[image_id]; - const ImageViewType view_type = RenderTargetImageViewType(info); - SubresourceBase base; - if (image.info.type == ImageType::Linear) { - base = SubresourceBase{.level = 0, .layer = 0}; - } else { - base = image.TryFindBase(gpu_addr).value(); - } - const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; - const SubresourceRange range{ - .base = base, - .extent = {.levels = 1, .layers = layers}, - }; - return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); -} - -template -template -void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - boost::container::small_vector maps; - ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { - const auto it = page_table.find(page); - if (it == page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageMapId map_id : it->second) { - ImageMapView& map = slot_map_views[map_id]; - if (map.picked) { - continue; - } - if (!map.Overlaps(cpu_addr, size)) { - continue; - } - map.picked = true; - maps.push_back(map_id); - Image& image = slot_images[map.image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(map.image_id); - if constexpr (BOOL_BREAK) { - if (func(map.image_id, image)) { - return true; - } - } else { - func(map.image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } - for (const ImageMapId map_id : maps) { - slot_map_views[map_id].picked = false; - } -} - -template -template -void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); - for (auto& segment : segments) { - const auto gpu_addr = segment.first; - const auto size = segment.second; - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - if constexpr (RETURNS_BOOL) { - if (func(gpu_addr, *cpu_addr, size)) { - break; - } - } else { - func(gpu_addr, *cpu_addr, size); - } - } -} - -template -ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { - Image& image = slot_images[image_id]; - if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { - return image_view_id; - } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); - image.InsertView(info, image_view_id); - return image_view_id; -} - -template -void TextureCache

::RegisterImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), - "Trying to register an already registered image"); - image.flags |= ImageFlagBits::Registered; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory += Common::AlignUp(tentative_size, 1024); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - auto map_id = - slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - image.map_view_id = map_id; - return; - } - std::vector sparse_maps{}; - ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); - ForEachCPUPage(cpu_addr, size, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - sparse_maps.push_back(map_id); - }); - sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); -} - -template -void TextureCache

::UnregisterImage(ImageId image_id) { - Image& image = slot_images[image_id]; - ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), - "Trying to unregister an already registered image"); - image.flags &= ~ImageFlagBits::Registered; - image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); - const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { - const auto page_it = selected_page_table.find(page); - if (page_it == selected_page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); - }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - const auto map_id = image.map_view_id; - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - const auto vector_it = std::ranges::find(image_map_ids, map_id); - if (vector_it == image_map_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_map_ids.erase(vector_it); - }); - slot_map_views.erase(map_id); - return; - } - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); - }); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; - const std::size_t size = map_range.size; - ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - auto vector_it = image_map_ids.begin(); - while (vector_it != image_map_ids.end()) { - ImageMapView& map = slot_map_views[*vector_it]; - if (map.image_id != image_id) { - vector_it++; - continue; - } - if (!map.picked) { - map.picked = true; - } - vector_it = image_map_ids.erase(vector_it); - } - }); - slot_map_views.erase(map_view_id); - } - sparse_views.erase(it); -} - -template -void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { - ASSERT(False(image.flags & ImageFlagBits::Tracked)); - image.flags |= ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); - return; - } - if (True(image.flags & ImageFlagBits::Registered)) { - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - } - return; - } - ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - }); -} - -template -void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { - ASSERT(True(image.flags & ImageFlagBits::Tracked)); - image.flags &= ~ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); - return; - } - ASSERT(True(image.flags & ImageFlagBits::Registered)); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - } -} - -template -void TextureCache

::DeleteImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - const GPUVAddr gpu_addr = image.gpu_addr; - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it == image_allocs_table.end()) { - UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", - gpu_addr); - return; - } - const ImageAllocId alloc_id = alloc_it->second; - std::vector& alloc_images = slot_image_allocs[alloc_id].images; - const auto alloc_image_it = std::ranges::find(alloc_images, image_id); - if (alloc_image_it == alloc_images.end()) { - UNREACHABLE_MSG("Trying to delete an image that does not exist"); - return; - } - ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - - // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::RenderTargets] = true; - dirty[Dirty::ZetaBuffer] = true; - for (size_t rt = 0; rt < NUM_RT; ++rt) { - dirty[Dirty::ColorBuffer0 + rt] = true; - } - const std::span image_view_ids = image.image_view_ids; - for (const ImageViewId image_view_id : image_view_ids) { - std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); - if (render_targets.depth_buffer_id == image_view_id) { - render_targets.depth_buffer_id = ImageViewId{}; - } - } - RemoveImageViewReferences(image_view_ids); - RemoveFramebuffers(image_view_ids); - - for (const AliasedImage& alias : image.aliased_images) { - ImageBase& other_image = slot_images[alias.id]; - [[maybe_unused]] const size_t num_removed_aliases = - std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { - return other_alias.id == image_id; - }); - other_image.CheckAliasState(); - ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", - num_removed_aliases); - } - for (const ImageId overlap_id : image.overlapping_images) { - ImageBase& other_image = slot_images[overlap_id]; - [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( - other_image.overlapping_images, - [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); - other_image.CheckBadOverlapState(); - ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", - num_removed_overlaps); - } - for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); - slot_image_views.erase(image_view_id); - } - sentenced_images.Push(std::move(slot_images[image_id])); - slot_images.erase(image_id); - - alloc_images.erase(alloc_image_it); - if (alloc_images.empty()) { - image_allocs_table.erase(alloc_it); - } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); - } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); - has_deleted_images = true; -} - -template -void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { - const auto found = std::ranges::find(removed_views, it->second); - if (found != removed_views.end()) { - it = image_views.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::RemoveFramebuffers(std::span removed_views) { - auto it = framebuffers.begin(); - while (it != framebuffers.end()) { - if (it->first.Contains(removed_views)) { - it = framebuffers.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::MarkModification(ImageBase& image) noexcept { - image.flags |= ImageFlagBits::GpuModified; - image.modification_tick = ++modification_tick; -} - -template -void TextureCache

::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector aliased_images; - ImageBase& image = slot_images[image_id]; - u64 most_recent_tick = image.modification_tick; - for (const AliasedImage& aliased : image.aliased_images) { - ImageBase& aliased_image = slot_images[aliased.id]; - if (image.modification_tick < aliased_image.modification_tick) { - most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); - aliased_images.push_back(&aliased); - } - } - if (aliased_images.empty()) { - return; - } - image.modification_tick = most_recent_tick; - std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { - const ImageBase& lhs_image = slot_images[lhs->id]; - const ImageBase& rhs_image = slot_images[rhs->id]; - return lhs_image.modification_tick < rhs_image.modification_tick; - }); - for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); - } -} - -template -void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { - Image& image = slot_images[image_id]; - if (invalidate) { - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); - if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image, image_id); - } - } else { - RefreshContents(image, image_id); - SynchronizeAliases(image_id); - } - if (is_modification) { - MarkModification(image); - } - image.frame_tick = frame_tick; -} - -template -void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, - bool invalidate) { - if (!image_view_id) { - return; - } - const ImageViewBase& image_view = slot_image_views[image_view_id]; - if (image_view.IsBuffer()) { - return; - } - PrepareImage(image_view.image_id, is_modification, invalidate); -} - -template -void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { - Image& dst = slot_images[dst_id]; - Image& src = slot_images[src_id]; - const auto dst_format_type = GetFormatType(dst.info.format); - const auto src_format_type = GetFormatType(src.info.format); - if (src_format_type == dst_format_type) { - if constexpr (HAS_EMULATED_COPIES) { - if (!runtime.CanImageBeCopied(dst, src)) { - return runtime.EmulateCopyImage(dst, src, copies); - } - } - return runtime.CopyImage(dst, src, copies); - } - UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); - UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); - for (const ImageCopy& copy : copies) { - UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); - UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); - - const SubresourceBase dst_base{ - .level = copy.dst_subresource.base_level, - .layer = copy.dst_subresource.base_layer, - }; - const SubresourceBase src_base{ - .level = copy.src_subresource.base_level, - .layer = copy.src_subresource.base_layer, - }; - const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; - const SubresourceExtent src_extent{.levels = 1, .layers = 1}; - const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; - const SubresourceRange src_range{.base = src_base, .extent = src_extent}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); - const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - [[maybe_unused]] const Extent3D expected_size{ - .width = std::min(dst_view.size.width, src_view.size.width), - .height = std::min(dst_view.size.height, src_view.size.height), - .depth = std::min(dst_view.size.depth, src_view.size.depth), - }; - UNIMPLEMENTED_IF(copy.extent != expected_size); - - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); - } -} - -template -void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { - if (*old_id == new_id) { - return; - } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; - if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); - } - } - *old_id = new_id; -} - -template -std::pair TextureCache

::RenderTargetFromImage( - ImageId image_id, const ImageViewInfo& view_info) { - const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); - const ImageBase& image = slot_images[image_id]; - const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; - const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; - const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); - const u32 num_samples = image.info.num_samples; - const auto [samples_x, samples_y] = SamplesLog2(num_samples); - const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ - .color_buffer_ids = {color_view_id}, - .depth_buffer_id = depth_view_id, - .size = {extent.width >> samples_x, extent.height >> samples_y}, - }); - return {framebuffer_id, view_id}; -} - -template -bool TextureCache

::IsFullClear(ImageViewId id) { - if (!id) { - return true; - } - const ImageViewBase& image_view = slot_image_views[id]; - const ImageBase& image = slot_images[image_view.image_id]; - const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; - const auto& scissor = regs.scissor_test[0]; - if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { - // Images with multiple resources can't be cleared in a single call - return false; - } - if (regs.clear_flags.scissor == 0) { - // If scissor testing is disabled, the clear is always full - return true; - } - // Make sure the clear covers all texels in the subresource - return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && - scissor.max_y >= size.height; -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h new file mode 100644 index 0000000000..8440d23d1d --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_templates.h @@ -0,0 +1,1507 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; + +template +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); + + deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; + } else { + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; + critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; + minimum_memory = expected_memory; + } +} + +template +void TextureCache

::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; + } + } + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return overlap.frame_tick >= image->frame_tick; + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + if (is_bad_overlap) { + ++num_iterations; + } + } + ++deletion_iterator; + } +} + +template +void TextureCache

::TickFrame() { + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + RunGarbageCollector(); + } + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} + +template +const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} + +template +typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} + +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + +template +void TextureCache

::FillGraphicsImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template +void TextureCache

::FillComputeImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template +typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +void TextureCache

::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + return; + } + flags[Dirty::RenderTargets] = false; + + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, + }; +} + +template +typename P::Framebuffer* TextureCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} + +template +void TextureCache

::FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, + std::span indices, + std::span image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} + +template +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} + +template +FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} + +template +void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + }); +} + +template +void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + if (!image.IsSafeDownload()) { + return; + } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; + } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } +} + +template +void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + UnregisterImage(id); + DeleteImage(id); + } +} + +template +void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} + +template +void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; + + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; + + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} + +template +typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_BITS); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; + if (image.cpu_addr != cpu_addr) { + continue; + } + if (image.image_view_ids.empty()) { + continue; + } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} + +template +bool TextureCache

::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} + +template +bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} + +template +void TextureCache

::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} + +template +void TextureCache

::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + download_map.offset = original_offset; + std::span download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop(); +} + +template +bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + is_modified = true; + return true; + }); + return is_modified; +} + +template +void TextureCache

::RefreshContents(Image& image, ImageId image_id) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image, image_id); + + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); + runtime.InsertUploadMemoryBarrier(); +} + +template +template +void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span mapped_span = staging.mapped_span; + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, staging, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(staging, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(staging, copies); + } +} + +template +ImageViewId TextureCache

::FindImageView(const TICEntry& config) { + if (!IsValidEntry(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} + +template +ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} + +template +ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} + +template +ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, + native_bgr)) { + image_id = existing_image_id; + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} + +template +ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional(fake_addr); + } + } + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} + +template +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + std::vector overlap_ids; + std::unordered_set overlaps_found; + std::vector left_aliased_ids; + std::vector right_aliased_ids; + std::unordered_set ignore_textures; + std::vector bad_overlap_ids; + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + return; + } + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); + } + return; + } + overlaps_found.insert(overlap_id); + static constexpr bool strict_size = true; + const std::optional solution = ResolveOverlap( + new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { + left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views, native_bgr)) { + right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; + } + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; + + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } + + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + + // TODO: Only upload what we need + RefreshContents(new_image, new_image_id); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + } else { + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; + } + RegisterImage(new_image_id); + return new_image_id; +} + +template +typename TextureCache

::BlitImages TextureCache

::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} + +template +SamplerId TextureCache

::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); + } + return pair->second; +} + +template +ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; + } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} + +template +ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} + +template +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); + } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} + +template +template +void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + boost::container::small_vector maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} + +template +template +void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template +template +void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template +template +void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool RETURNS_BOOL = std::is_same_v; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} + +template +ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} + +template +void TextureCache

::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); +} + +template +void TextureCache

::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map, IdentityHash>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); + }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); +} + +template +void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); +} + +template +void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } +} + +template +void TextureCache

::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + other_image.CheckAliasState(); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); + + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} + +template +void TextureCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} + +template +void TextureCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} + +template +void TextureCache

::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} + +template +void TextureCache

::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} + +template +void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image, image_id); + } + } else { + RefreshContents(image, image_id); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} + +template +void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } + PrepareImage(image_view.image_id, is_modification, invalidate); +} + +template +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); + + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} + +template +void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} + +template +std::pair TextureCache

::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} + +template +bool TextureCache

::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} + +} // namespace VideoCommon From 6df9611059e21d003fda4a0f8bd6773736438643 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 20:11:14 +0000 Subject: [PATCH 15/35] memory: Clean up code --- src/core/memory.cpp | 306 +++++++++++--------------------------------- src/core/memory.h | 104 +-------------- 2 files changed, 81 insertions(+), 329 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f285c6f639..7b23c189cb 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1,11 +1,9 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2021 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include -#include -#include #include "common/assert.h" #include "common/atomic_ops.h" @@ -14,12 +12,10 @@ #include "common/page_table.h" #include "common/settings.h" #include "common/swap.h" -#include "core/arm/arm_interface.h" #include "core/core.h" #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" -#include "core/hle/kernel/physical_memory.h" #include "core/memory.h" #include "video_core/gpu.h" @@ -62,17 +58,7 @@ struct Memory::Impl { } } - bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { - const auto& page_table = process.PageTable().PageTableImpl(); - const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType(); - return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory; - } - - bool IsValidVirtualAddress(VAddr vaddr) const { - return IsValidVirtualAddress(*system.CurrentProcess(), vaddr); - } - - u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const { + [[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const { const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]}; if (!paddr) { @@ -82,7 +68,7 @@ struct Memory::Impl { return system.DeviceMemory().GetPointer(paddr) + vaddr; } - u8* GetPointer(const VAddr vaddr) const { + [[nodiscard]] u8* GetPointer(const VAddr vaddr) const { const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { return pointer + vaddr; @@ -179,7 +165,7 @@ struct Memory::Impl { std::string string; string.reserve(max_length); for (std::size_t i = 0; i < max_length; ++i) { - const char c = Read8(vaddr); + const char c = Read(vaddr); if (c == '\0') { break; } @@ -190,15 +176,14 @@ struct Memory::Impl { return string; } - void ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer, - const std::size_t size) { + void WalkBlock(const Kernel::KProcess& process, VAddr addr, const std::size_t size, + auto on_unmapped, auto on_memory, auto on_rasterizer, auto increment) { const auto& page_table = process.PageTable().PageTableImpl(); - std::size_t remaining_size = size; - std::size_t page_index = src_addr >> PAGE_BITS; - std::size_t page_offset = src_addr & PAGE_MASK; + std::size_t page_index = addr >> PAGE_BITS; + std::size_t page_offset = addr & PAGE_MASK; - while (remaining_size > 0) { + while (remaining_size) { const std::size_t copy_amount = std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); @@ -206,22 +191,18 @@ struct Memory::Impl { const auto [pointer, type] = page_table.pointers[page_index].PointerType(); switch (type) { case Common::PageType::Unmapped: { - LOG_ERROR(HW_Memory, - "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", - current_vaddr, src_addr, size); - std::memset(dest_buffer, 0, copy_amount); + on_unmapped(copy_amount, current_vaddr); break; } case Common::PageType::Memory: { DEBUG_ASSERT(pointer); - const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS); - std::memcpy(dest_buffer, src_ptr, copy_amount); + u8* mem_ptr = pointer + page_offset + (page_index << PAGE_BITS); + on_memory(copy_amount, mem_ptr); break; } case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; - system.GPU().FlushRegion(current_vaddr, copy_amount); - std::memcpy(dest_buffer, host_ptr, copy_amount); + u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + on_rasterizer(current_vaddr, copy_amount, host_ptr); break; } default: @@ -230,199 +211,98 @@ struct Memory::Impl { page_index++; page_offset = 0; - dest_buffer = static_cast(dest_buffer) + copy_amount; + addr += static_cast(copy_amount); + increment(copy_amount); remaining_size -= copy_amount; } } - void ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer, - const std::size_t size) { - const auto& page_table = process.PageTable().PageTableImpl(); - - std::size_t remaining_size = size; - std::size_t page_index = src_addr >> PAGE_BITS; - std::size_t page_offset = src_addr & PAGE_MASK; - - while (remaining_size > 0) { - const std::size_t copy_amount = - std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); - const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - - const auto [pointer, type] = page_table.pointers[page_index].PointerType(); - switch (type) { - case Common::PageType::Unmapped: { + template + void ReadBlockImpl(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer, + const std::size_t size) { + WalkBlock( + process, src_addr, size, + [src_addr, size, &dest_buffer](const std::size_t copy_amount, + const VAddr current_vaddr) { LOG_ERROR(HW_Memory, "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", current_vaddr, src_addr, size); std::memset(dest_buffer, 0, copy_amount); - break; - } - case Common::PageType::Memory: { - DEBUG_ASSERT(pointer); - const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS); + }, + [&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) { std::memcpy(dest_buffer, src_ptr, copy_amount); - break; - } - case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + }, + [&system = system, &dest_buffer](const VAddr current_vaddr, + const std::size_t copy_amount, + const u8* const host_ptr) { + if (!UNSAFE) { + system.GPU().FlushRegion(current_vaddr, copy_amount); + } std::memcpy(dest_buffer, host_ptr, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - dest_buffer = static_cast(dest_buffer) + copy_amount; - remaining_size -= copy_amount; - } + }, + [&dest_buffer](const std::size_t copy_amount) { + dest_buffer = static_cast(dest_buffer) + copy_amount; + }); } void ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) { - ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); + ReadBlockImpl(*system.CurrentProcess(), src_addr, dest_buffer, size); } void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { - ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size); + ReadBlockImpl(*system.CurrentProcess(), src_addr, dest_buffer, size); } - void WriteBlock(const Kernel::KProcess& process, const VAddr dest_addr, const void* src_buffer, - const std::size_t size) { - const auto& page_table = process.PageTable().PageTableImpl(); - std::size_t remaining_size = size; - std::size_t page_index = dest_addr >> PAGE_BITS; - std::size_t page_offset = dest_addr & PAGE_MASK; - - while (remaining_size > 0) { - const std::size_t copy_amount = - std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); - const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - - const auto [pointer, type] = page_table.pointers[page_index].PointerType(); - switch (type) { - case Common::PageType::Unmapped: { + template + void WriteBlockImpl(const Kernel::KProcess& process, const VAddr dest_addr, + const void* src_buffer, const std::size_t size) { + WalkBlock( + process, dest_addr, size, + [dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) { LOG_ERROR(HW_Memory, "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", current_vaddr, dest_addr, size); - break; - } - case Common::PageType::Memory: { - DEBUG_ASSERT(pointer); - u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS); + }, + [&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) { std::memcpy(dest_ptr, src_buffer, copy_amount); - break; - } - case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; - system.GPU().InvalidateRegion(current_vaddr, copy_amount); + }, + [&system = system, &src_buffer](const VAddr current_vaddr, + const std::size_t copy_amount, u8* const host_ptr) { + if (!UNSAFE) { + system.GPU().InvalidateRegion(current_vaddr, copy_amount); + } std::memcpy(host_ptr, src_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - src_buffer = static_cast(src_buffer) + copy_amount; - remaining_size -= copy_amount; - } - } - - void WriteBlockUnsafe(const Kernel::KProcess& process, const VAddr dest_addr, - const void* src_buffer, const std::size_t size) { - const auto& page_table = process.PageTable().PageTableImpl(); - std::size_t remaining_size = size; - std::size_t page_index = dest_addr >> PAGE_BITS; - std::size_t page_offset = dest_addr & PAGE_MASK; - - while (remaining_size > 0) { - const std::size_t copy_amount = - std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); - const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - - const auto [pointer, type] = page_table.pointers[page_index].PointerType(); - switch (type) { - case Common::PageType::Unmapped: { - LOG_ERROR(HW_Memory, - "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", - current_vaddr, dest_addr, size); - break; - } - case Common::PageType::Memory: { - DEBUG_ASSERT(pointer); - u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS); - std::memcpy(dest_ptr, src_buffer, copy_amount); - break; - } - case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; - std::memcpy(host_ptr, src_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - src_buffer = static_cast(src_buffer) + copy_amount; - remaining_size -= copy_amount; - } + }, + [&src_buffer](const std::size_t copy_amount) { + src_buffer = static_cast(src_buffer) + copy_amount; + }); } void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { - WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); + WriteBlockImpl(*system.CurrentProcess(), dest_addr, src_buffer, size); } void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { - WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size); + WriteBlockImpl(*system.CurrentProcess(), dest_addr, src_buffer, size); } void ZeroBlock(const Kernel::KProcess& process, const VAddr dest_addr, const std::size_t size) { - const auto& page_table = process.PageTable().PageTableImpl(); - std::size_t remaining_size = size; - std::size_t page_index = dest_addr >> PAGE_BITS; - std::size_t page_offset = dest_addr & PAGE_MASK; - - while (remaining_size > 0) { - const std::size_t copy_amount = - std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); - const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - - const auto [pointer, type] = page_table.pointers[page_index].PointerType(); - switch (type) { - case Common::PageType::Unmapped: { + WalkBlock( + process, dest_addr, size, + [dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) { LOG_ERROR(HW_Memory, "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", current_vaddr, dest_addr, size); - break; - } - case Common::PageType::Memory: { - DEBUG_ASSERT(pointer); - u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS); + }, + [](const std::size_t copy_amount, u8* const dest_ptr) { std::memset(dest_ptr, 0, copy_amount); - break; - } - case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + }, + [&system = system](const VAddr current_vaddr, const std::size_t copy_amount, + u8* const host_ptr) { system.GPU().InvalidateRegion(current_vaddr, copy_amount); std::memset(host_ptr, 0, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - remaining_size -= copy_amount; - } - } - - void ZeroBlock(const VAddr dest_addr, const std::size_t size) { - ZeroBlock(*system.CurrentProcess(), dest_addr, size); + }, + [](const std::size_t copy_amount) {}); } void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr, @@ -432,7 +312,7 @@ struct Memory::Impl { std::size_t page_index = src_addr >> PAGE_BITS; std::size_t page_offset = src_addr & PAGE_MASK; - while (remaining_size > 0) { + while (remaining_size) { const std::size_t copy_amount = std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); @@ -449,13 +329,13 @@ struct Memory::Impl { case Common::PageType::Memory: { DEBUG_ASSERT(pointer); const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS); - WriteBlock(process, dest_addr, src_ptr, copy_amount); + WriteBlockImpl(process, dest_addr, src_ptr, copy_amount); break; } case Common::PageType::RasterizerCachedMemory: { const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; system.GPU().FlushRegion(current_vaddr, copy_amount); - WriteBlock(process, dest_addr, host_ptr, copy_amount); + WriteBlockImpl(process, dest_addr, host_ptr, copy_amount); break; } default: @@ -470,10 +350,6 @@ struct Memory::Impl { } } - void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) { - return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size); - } - void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { if (vaddr == 0) { return; @@ -517,7 +393,6 @@ struct Memory::Impl { case Common::PageType::Unmapped: // It is not necessary for a process to have this region mapped into its address // space, for example, a system module need not have a VRAM mapping. - break; case Common::PageType::Memory: // There can be more than one GPU region mapped per CPU region, so it's common // that this area is already unmarked as cached. @@ -789,12 +664,11 @@ void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { impl->UnmapRegion(page_table, base, size); } -bool Memory::IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { - return impl->IsValidVirtualAddress(process, vaddr); -} - bool Memory::IsValidVirtualAddress(const VAddr vaddr) const { - return impl->IsValidVirtualAddress(vaddr); + const Kernel::KProcess& process = *system.CurrentProcess(); + const auto& pageTable = process.PageTable().PageTableImpl(); + const auto [pointer, type] = pageTable.pointers[vaddr >> PAGE_BITS].PointerType(); + return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory; } u8* Memory::GetPointer(VAddr vaddr) { @@ -863,64 +737,38 @@ std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) { void Memory::ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer, const std::size_t size) { - impl->ReadBlock(process, src_addr, dest_buffer, size); + impl->ReadBlockImpl(process, src_addr, dest_buffer, size); } void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) { impl->ReadBlock(src_addr, dest_buffer, size); } -void Memory::ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr, - void* dest_buffer, const std::size_t size) { - impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size); -} - void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { impl->ReadBlockUnsafe(src_addr, dest_buffer, size); } void Memory::WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer, std::size_t size) { - impl->WriteBlock(process, dest_addr, src_buffer, size); + impl->WriteBlockImpl(process, dest_addr, src_buffer, size); } void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { impl->WriteBlock(dest_addr, src_buffer, size); } -void Memory::WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr, - const void* src_buffer, std::size_t size) { - impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size); -} - void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { impl->WriteBlockUnsafe(dest_addr, src_buffer, size); } -void Memory::ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) { - impl->ZeroBlock(process, dest_addr, size); -} - -void Memory::ZeroBlock(VAddr dest_addr, std::size_t size) { - impl->ZeroBlock(dest_addr, size); -} - void Memory::CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr, const std::size_t size) { impl->CopyBlock(process, dest_addr, src_addr, size); } -void Memory::CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) { - impl->CopyBlock(dest_addr, src_addr, size); -} - void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { impl->RasterizerMarkRegionCached(vaddr, size, cached); } -bool IsKernelVirtualAddress(const VAddr vaddr) { - return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END; -} - } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index c91eeced96..9122cbab9c 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -1,4 +1,4 @@ -// Copyright 2014 Citra Emulator Project +// Copyright 2021 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -39,11 +39,6 @@ enum : VAddr { /// Application stack DEFAULT_STACK_SIZE = 0x100000, - - /// Kernel Virtual Address Range - KERNEL_REGION_VADDR = 0xFFFFFF8000000000, - KERNEL_REGION_SIZE = 0x7FFFE00000, - KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, }; /// Central class that handles all memory operations and state. @@ -56,7 +51,7 @@ public: Memory& operator=(const Memory&) = delete; Memory(Memory&&) = default; - Memory& operator=(Memory&&) = default; + Memory& operator=(Memory&&) = delete; /** * Resets the state of the Memory system. @@ -90,17 +85,6 @@ public: */ void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size); - /** - * Checks whether or not the supplied address is a valid virtual - * address for the given process. - * - * @param process The emulated process to check the address against. - * @param vaddr The virtual address to check the validity of. - * - * @returns True if the given virtual address is valid, false otherwise. - */ - bool IsValidVirtualAddress(const Kernel::KProcess& process, VAddr vaddr) const; - /** * Checks whether or not the supplied address is a valid virtual * address for the current process. @@ -109,7 +93,7 @@ public: * * @returns True if the given virtual address is valid, false otherwise. */ - bool IsValidVirtualAddress(VAddr vaddr) const; + [[nodiscard]] bool IsValidVirtualAddress(VAddr vaddr) const; /** * Gets a pointer to the given address. @@ -134,7 +118,7 @@ public: * @returns The pointer to the given address, if the address is valid. * If the address is not valid, nullptr will be returned. */ - const u8* GetPointer(VAddr vaddr) const; + [[nodiscard]] const u8* GetPointer(VAddr vaddr) const; template const T* GetPointer(VAddr vaddr) const { @@ -327,27 +311,6 @@ public: void ReadBlock(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer, std::size_t size); - /** - * Reads a contiguous block of bytes from a specified process' address space. - * This unsafe version does not trigger GPU flushing. - * - * @param process The process to read the data from. - * @param src_addr The virtual address to begin reading from. - * @param dest_buffer The buffer to place the read bytes into. - * @param size The amount of data to read, in bytes. - * - * @note If a size of 0 is specified, then this function reads nothing and - * no attempts to access memory are made at all. - * - * @pre dest_buffer must be at least size bytes in length, otherwise a - * buffer overrun will occur. - * - * @post The range [dest_buffer, size) contains the read bytes from the - * process' address space. - */ - void ReadBlockUnsafe(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer, - std::size_t size); - /** * Reads a contiguous block of bytes from the current process' address space. * @@ -408,26 +371,6 @@ public: void WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer, std::size_t size); - /** - * Writes a range of bytes into a given process' address space at the specified - * virtual address. - * This unsafe version does not invalidate GPU Memory. - * - * @param process The process to write data into the address space of. - * @param dest_addr The destination virtual address to begin writing the data at. - * @param src_buffer The data to write into the process' address space. - * @param size The size of the data to write, in bytes. - * - * @post The address range [dest_addr, size) in the process' address space - * contains the data that was within src_buffer. - * - * @post If an attempt is made to write into an unmapped region of memory, the writes - * will be ignored and an error will be logged. - * - */ - void WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer, - std::size_t size); - /** * Writes a range of bytes into the current process' address space at the specified * virtual address. @@ -467,29 +410,6 @@ public: */ void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size); - /** - * Fills the specified address range within a process' address space with zeroes. - * - * @param process The process that will have a portion of its memory zeroed out. - * @param dest_addr The starting virtual address of the range to zero out. - * @param size The size of the address range to zero out, in bytes. - * - * @post The range [dest_addr, size) within the process' address space is - * filled with zeroes. - */ - void ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size); - - /** - * Fills the specified address range within the current process' address space with zeroes. - * - * @param dest_addr The starting virtual address of the range to zero out. - * @param size The size of the address range to zero out, in bytes. - * - * @post The range [dest_addr, size) within the current process' address space is - * filled with zeroes. - */ - void ZeroBlock(VAddr dest_addr, std::size_t size); - /** * Copies data within a process' address space to another location within the * same address space. @@ -505,19 +425,6 @@ public: void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr, std::size_t size); - /** - * Copies data within the current process' address space to another location within the - * same address space. - * - * @param dest_addr The destination virtual address to begin copying the data into. - * @param src_addr The source virtual address to begin copying the data from. - * @param size The size of the data to copy, in bytes. - * - * @post The range [dest_addr, size) within the current process' address space - * contains the same data within the range [src_addr, size). - */ - void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size); - /** * Marks each page within the specified address range as cached or uncached. * @@ -535,7 +442,4 @@ private: std::unique_ptr impl; }; -/// Determines if the given VAddr is a kernel address -bool IsKernelVirtualAddress(VAddr vaddr); - } // namespace Core::Memory From 4edfa6ad8ff65e91dd6363e10546e1b894d3460b Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 20:29:43 +0000 Subject: [PATCH 16/35] memory: Address lioncash's review --- src/core/memory.cpp | 13 +++++++------ src/core/memory.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 7b23c189cb..2e578f1891 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 Citra Emulator Project +// Copyright 2015 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -235,7 +235,7 @@ struct Memory::Impl { [&system = system, &dest_buffer](const VAddr current_vaddr, const std::size_t copy_amount, const u8* const host_ptr) { - if (!UNSAFE) { + if constexpr (!UNSAFE) { system.GPU().FlushRegion(current_vaddr, copy_amount); } std::memcpy(dest_buffer, host_ptr, copy_amount); @@ -268,7 +268,7 @@ struct Memory::Impl { }, [&system = system, &src_buffer](const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) { - if (!UNSAFE) { + if constexpr (!UNSAFE) { system.GPU().InvalidateRegion(current_vaddr, copy_amount); } std::memcpy(host_ptr, src_buffer, copy_amount); @@ -390,9 +390,10 @@ struct Memory::Impl { } else { // Switch page type to uncached if now uncached switch (page_type) { - case Common::PageType::Unmapped: + case Common::PageType::Unmapped: // NOLINT(bugprone-branch-clone) // It is not necessary for a process to have this region mapped into its address // space, for example, a system module need not have a VRAM mapping. + break; case Common::PageType::Memory: // There can be more than one GPU region mapped per CPU region, so it's common // that this area is already unmarked as cached. @@ -666,8 +667,8 @@ void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { bool Memory::IsValidVirtualAddress(const VAddr vaddr) const { const Kernel::KProcess& process = *system.CurrentProcess(); - const auto& pageTable = process.PageTable().PageTableImpl(); - const auto [pointer, type] = pageTable.pointers[vaddr >> PAGE_BITS].PointerType(); + const auto& page_table = process.PageTable().PageTableImpl(); + const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType(); return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory; } diff --git a/src/core/memory.h b/src/core/memory.h index 9122cbab9c..b5721b7405 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -1,4 +1,4 @@ -// Copyright 2021 Citra Emulator Project +// Copyright 2014 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. From 5566f3dbc0db1de41fcd291f5b7588d9e055ba85 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 20:46:24 +0000 Subject: [PATCH 17/35] texture_cache: Address ameerj's review --- src/video_core/CMakeLists.txt | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_texture_cache.h | 2 +- ...emplates.cpp => gl_texture_cache_base.cpp} | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.h | 2 +- ...emplates.cpp => vk_texture_cache_base.cpp} | 2 +- .../texture_cache/image_view_info.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 1775 +++++++++++++---- .../texture_cache/texture_cache_base.h | 402 ++++ .../texture_cache/texture_cache_templates.h | 1507 -------------- 12 files changed, 1853 insertions(+), 1853 deletions(-) rename src/video_core/renderer_opengl/{gl_texture_cache_templates.cpp => gl_texture_cache_base.cpp} (82%) rename src/video_core/renderer_vulkan/{vk_texture_cache_templates.cpp => vk_texture_cache_base.cpp} (82%) create mode 100644 src/video_core/texture_cache/texture_cache_base.h delete mode 100644 src/video_core/texture_cache/texture_cache_templates.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1250cca6f9..2f6cdd216d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -97,7 +97,7 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h - renderer_opengl/gl_texture_cache_templates.cpp + renderer_opengl/gl_texture_cache_base.cpp renderer_opengl/gl_query_cache.cpp renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h @@ -156,7 +156,7 @@ add_library(video_core STATIC renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h - renderer_vulkan/vk_texture_cache_templates.cpp + renderer_vulkan/vk_texture_cache_base.cpp renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp @@ -188,7 +188,7 @@ add_library(video_core STATIC texture_cache/samples_helper.h texture_cache/slot_vector.h texture_cache/texture_cache.h - texture_cache/texture_cache_templates.h + texture_cache/texture_cache_base.h texture_cache/types.h texture_cache/util.cpp texture_cache/util.h diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index fac0034fb8..bccb37a586 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_notify.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 41d2b73f4e..b909c387e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -32,7 +32,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 921072ebee..4a4f6301c3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -12,7 +12,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp similarity index 82% rename from src/video_core/renderer_opengl/gl_texture_cache_templates.cpp rename to src/video_core/renderer_opengl/gl_texture_cache_base.cpp index 00ed064478..385358fea3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/texture_cache/texture_cache_templates.h" +#include "video_core/texture_cache/texture_cache.h" namespace VideoCommon { template class VideoCommon::TextureCache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23cef2996e..3ac18ea541 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -32,7 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0b73d55f8b..5fe6b7ba3e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -9,7 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp similarity index 82% rename from src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp rename to src/video_core/renderer_vulkan/vk_texture_cache_base.cpp index fd89789547..44e6883422 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/texture_cache/texture_cache_templates.h" +#include "video_core/texture_cache/texture_cache.h" namespace VideoCommon { template class VideoCommon::TextureCache; diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index faf5b151f3..f14a925655 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -6,7 +6,7 @@ #include "common/assert.h" #include "video_core/texture_cache/image_view_info.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a4f6e94224..5884fa16e3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,48 +4,7 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/common_types.h" -#include "common/literals.h" -#include "common/logging/log.h" -#include "common/settings.h" -#include "video_core/compatible_formats.h" -#include "video_core/delayed_destruction_ring.h" -#include "video_core/dirty_flags.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/descriptor_table.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/formatter.h" -#include "video_core/texture_cache/image_base.h" -#include "video_core/texture_cache/image_info.h" -#include "video_core/texture_cache/image_view_base.h" -#include "video_core/texture_cache/image_view_info.h" -#include "video_core/texture_cache/render_targets.h" -#include "video_core/texture_cache/samples_helper.h" -#include "video_core/texture_cache/slot_vector.h" -#include "video_core/texture_cache/types.h" -#include "video_core/texture_cache/util.h" -#include "video_core/textures/texture.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace VideoCommon { @@ -62,341 +21,1487 @@ using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template -class TextureCache { - /// Address shift for caching images into a hash table - static constexpr u64 PAGE_BITS = 20; +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); - /// Enables debugging features to the texture cache - static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; - /// Implement blits as copies between framebuffers - static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; - /// True when some copies have to be emulated - static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; - /// True when the API can provide info about the memory of the device. - static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); - /// Image view ID for null descriptors - static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; - /// Sampler ID for bugged sampler ids - static constexpr SamplerId NULL_SAMPLER_ID{0}; + deletion_iterator = slot_images.begin(); - static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; - static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; + } else { + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; + critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; + minimum_memory = expected_memory; + } +} - using Runtime = typename P::Runtime; - using Image = typename P::Image; - using ImageAlloc = typename P::ImageAlloc; - using ImageView = typename P::ImageView; - using Sampler = typename P::Sampler; - using Framebuffer = typename P::Framebuffer; - - struct BlitImages { - ImageId dst_id; - ImageId src_id; - PixelFormat dst_format; - PixelFormat src_format; - }; - - template - struct IdentityHash { - [[nodiscard]] size_t operator()(T value) const noexcept { - return static_cast(value); - } - }; - -public: - explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, - Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); - - /// Notify the cache that a new frame has been queued - void TickFrame(); - - /// Return a constant reference to the given image view id - [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; - - /// Return a reference to the given image view id - [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - - /// Mark an image as modified from the GPU - void MarkModification(ImageId id) noexcept; - - /// Fill image_view_ids with the graphics images in indices - void FillGraphicsImageViews(std::span indices, - std::span image_view_ids); - - /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span indices, std::span image_view_ids); - - /// Get the sampler from the graphics descriptor table in the specified index - Sampler* GetGraphicsSampler(u32 index); - - /// Get the sampler from the compute descriptor table in the specified index - Sampler* GetComputeSampler(u32 index); - - /// Refresh the state for graphics image view and sampler descriptors - void SynchronizeGraphicsDescriptors(); - - /// Refresh the state for compute image view and sampler descriptors - void SynchronizeComputeDescriptors(); - - /// Update bound render targets and upload memory if necessary - /// @param is_clear True when the render targets are being used for clears - void UpdateRenderTargets(bool is_clear); - - /// Find a framebuffer with the currently bound render targets - /// UpdateRenderTargets should be called before this - Framebuffer* GetFramebuffer(); - - /// Mark images in a range as modified from the CPU - void WriteMemory(VAddr cpu_addr, size_t size); - - /// Download contents of host images to guest memory in a region - void DownloadMemory(VAddr cpu_addr, size_t size); - - /// Remove images in a region - void UnmapMemory(VAddr cpu_addr, size_t size); - - /// Remove images in a region - void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); - - /// Blit an image with the given parameters - void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy); - - /// Try to find a cached image view in the given CPU address - [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); - - /// Return true when there are uncommitted images to be downloaded - [[nodiscard]] bool HasUncommittedFlushes() const noexcept; - - /// Return true when the caller should wait for async downloads - [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; - - /// Commit asynchronous downloads - void CommitAsyncFlushes(); - - /// Pop asynchronous downloads - void PopAsyncFlushes(); - - /// Return true when a CPU region is modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); - - std::mutex mutex; - -private: - /// Iterate over all page indices in a range - template - static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); +template +void TextureCache

::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; } } - } - - template - static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return overlap.frame_tick >= image->frame_tick; + }); + if (!overlap_check) { + ++deletion_iterator; + continue; } - } else { - func(page); + } + if (!is_bad_overlap && must_download) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + if (is_bad_overlap) { + ++num_iterations; } } + ++deletion_iterator; + } +} + +template +void TextureCache

::TickFrame() { + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + RunGarbageCollector(); + } + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} + +template +const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} + +template +typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} + +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + +template +void TextureCache

::FillGraphicsImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} + +template +void TextureCache

::FillComputeImageViews(std::span indices, + std::span image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} + +template +typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} + +template +void TextureCache

::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} + +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + return; + } + flags[Dirty::RenderTargets] = false; + + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, + }; +} + +template +typename P::Framebuffer* TextureCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} + +template +void TextureCache

::FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, + std::span indices, + std::span image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} + +template +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} + +template +FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} + +template +void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + }); +} + +template +void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + if (!image.IsSafeDownload()) { + return; + } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; + } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } +} + +template +void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + UnregisterImage(id); + DeleteImage(id); + } +} + +template +void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} + +template +void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; + + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; + + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} + +template +typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_BITS); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; + if (image.cpu_addr != cpu_addr) { + continue; + } + if (image.image_view_ids.empty()) { + continue; + } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} + +template +bool TextureCache

::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} + +template +bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} + +template +void TextureCache

::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} + +template +void TextureCache

::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + download_map.offset = original_offset; + std::span download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop(); +} + +template +bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + is_modified = true; + return true; + }); + return is_modified; +} + +template +void TextureCache

::RefreshContents(Image& image, ImageId image_id) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image, image_id); + + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); + runtime.InsertUploadMemoryBarrier(); +} + +template +template +void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span mapped_span = staging.mapped_span; + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, staging, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(staging, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(staging, copies); + } +} + +template +ImageViewId TextureCache

::FindImageView(const TICEntry& config) { + if (!IsValidEntry(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} + +template +ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} + +template +ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} + +template +ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, + native_bgr)) { + image_id = existing_image_id; + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} + +template +ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional(fake_addr); + } + } + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} + +template +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + std::vector overlap_ids; + std::unordered_set overlaps_found; + std::vector left_aliased_ids; + std::vector right_aliased_ids; + std::unordered_set ignore_textures; + std::vector bad_overlap_ids; + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + return; + } + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); + } + return; + } + overlaps_found.insert(overlap_id); + static constexpr bool strict_size = true; + const std::optional solution = ResolveOverlap( + new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { + left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views, native_bgr)) { + right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; + } + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; + + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; } - /// Runs the Garbage Collector. - void RunGarbageCollector(); + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } - /// Fills image_view_ids in the image views in indices - void FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, std::span indices, - std::span image_view_ids); + // TODO: Only upload what we need + RefreshContents(new_image, new_image_id); - /// Find or create an image view in the guest descriptor table - ImageViewId VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, u32 index); + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + } else { + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; + } + RegisterImage(new_image_id); + return new_image_id; +} - /// Find or create a framebuffer with the given render target parameters - FramebufferId GetFramebufferId(const RenderTargets& key); +template +typename TextureCache

::BlitImages TextureCache

::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} - /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image, ImageId image_id); +template +SamplerId TextureCache

::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); + } + return pair->second; +} - /// Upload data from guest to an image - template - void UploadImageContents(Image& image, StagingBuffer& staging_buffer); +template +ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; + } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - /// Find or create an image view from a guest descriptor - [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); +template +ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - /// Create a new image view from a guest descriptor - [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); +template +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); + } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} - /// Find or create an image from the given parameters - [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options = RelaxedOptions{}); +template +template +void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + boost::container::small_vector maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} - /// Find an image from the given parameters - [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options); +template +template +void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} - /// Create an image from the given parameters - [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options); +template +template +void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} - /// Create a new image and join perfectly matching existing images - /// Remove joined images from the cache - [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); +template +template +void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool RETURNS_BOOL = std::is_same_v; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} - /// Return a blit image pair from the given guest blit parameters - [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src); +template +ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} - /// Find or create a sampler from a guest descriptor sampler - [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); +template +void TextureCache

::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); +} - /// Find or create an image view for the given color buffer index - [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); +template +void TextureCache

::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map, IdentityHash>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); + }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); +} - /// Find or create an image view for the depth buffer - [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); +template +void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); +} - /// Find or create a view for a render target with the given image parameters - [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear); +template +void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } +} - /// Iterates over all the images in a region calling func - template - void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); +template +void TextureCache

::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - template - void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); - template - void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + other_image.CheckAliasState(); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); - /// Iterates over all the images in a region calling func - template - void ForEachSparseSegment(ImageBase& image, Func&& func); + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} - /// Find or create an image view in the given image with the passed parameters - [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); +template +void TextureCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} - /// Register image in the page table - void RegisterImage(ImageId image); +template +void TextureCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} - /// Unregister image from the page table - void UnregisterImage(ImageId image); +template +void TextureCache

::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} - /// Track CPU reads and writes for image - void TrackImage(ImageBase& image, ImageId image_id); +template +void TextureCache

::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} - /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image, ImageId image_id); +template +void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image, image_id); + } + } else { + RefreshContents(image, image_id); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} - /// Delete image from the cache - void DeleteImage(ImageId image); +template +void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } + PrepareImage(image_view.image_id, is_modification, invalidate); +} - /// Remove image views references from the cache - void RemoveImageViewReferences(std::span removed_views); +template +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); - /// Remove framebuffers using the given image views from the cache - void RemoveFramebuffers(std::span removed_views); + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); - /// Mark an image as modified from the GPU - void MarkModification(ImageBase& image) noexcept; + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} - /// Synchronize image aliases, copying data if needed - void SynchronizeAliases(ImageId image_id); +template +void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} - /// Prepare an image to be used - void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); +template +std::pair TextureCache

::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} - /// Prepare an image view to be used - void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); - - /// Execute copies from one image to the other, even if they are incompatible - void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); - - /// Bind an image view as render target, downloading resources preemtively if needed - void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); - - /// Create a render target from a given image and image view parameters - [[nodiscard]] std::pair RenderTargetFromImage( - ImageId, const ImageViewInfo& view_info); - - /// Returns true if the current clear parameters clear the whole image of a given image view - [[nodiscard]] bool IsFullClear(ImageViewId id); - - Runtime& runtime; - VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; - - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; - - RenderTargets render_targets; - - std::unordered_map image_views; - std::unordered_map samplers; - std::unordered_map framebuffers; - - std::unordered_map, IdentityHash> page_table; - std::unordered_map, IdentityHash> gpu_page_table; - std::unordered_map, IdentityHash> sparse_page_table; - - std::unordered_map> sparse_views; - - VAddr virtual_invalid_space{}; - - bool has_deleted_images = false; - u64 total_used_memory = 0; - u64 minimum_memory; - u64 expected_memory; - u64 critical_memory; - - SlotVector slot_images; - SlotVector slot_map_views; - SlotVector slot_image_views; - SlotVector slot_image_allocs; - SlotVector slot_samplers; - SlotVector slot_framebuffers; - - // TODO: This data structure is not optimal and it should be reworked - std::vector uncommitted_downloads; - std::queue> committed_downloads; - - static constexpr size_t TICKS_TO_DESTROY = 6; - DelayedDestructionRing sentenced_images; - DelayedDestructionRing sentenced_image_view; - DelayedDestructionRing sentenced_framebuffers; - - std::unordered_map image_allocs_table; - - u64 modification_tick = 0; - u64 frame_tick = 0; - typename SlotVector::Iterator deletion_iterator; -}; +template +bool TextureCache

::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h new file mode 100644 index 0000000000..a4f6e94224 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -0,0 +1,402 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/literals.h" +#include "common/logging/log.h" +#include "common/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/descriptor_table.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; + +template +class TextureCache { + /// Address shift for caching images into a hash table + static constexpr u64 PAGE_BITS = 20; + + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + /// True when some copies have to be emulated + static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + /// True when the API can provide info about the memory of the device. + static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; + + /// Image view ID for null descriptors + static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; + /// Sampler ID for bugged sampler ids + static constexpr SamplerId NULL_SAMPLER_ID{0}; + + static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; + static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; + + using Runtime = typename P::Runtime; + using Image = typename P::Image; + using ImageAlloc = typename P::ImageAlloc; + using ImageView = typename P::ImageView; + using Sampler = typename P::Sampler; + using Framebuffer = typename P::Framebuffer; + + struct BlitImages { + ImageId dst_id; + ImageId src_id; + PixelFormat dst_format; + PixelFormat src_format; + }; + + template + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast(value); + } + }; + +public: + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, + Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); + + /// Notify the cache that a new frame has been queued + void TickFrame(); + + /// Return a constant reference to the given image view id + [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; + + /// Return a reference to the given image view id + [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + + /// Fill image_view_ids with the graphics images in indices + void FillGraphicsImageViews(std::span indices, + std::span image_view_ids); + + /// Fill image_view_ids with the compute images in indices + void FillComputeImageViews(std::span indices, std::span image_view_ids); + + /// Get the sampler from the graphics descriptor table in the specified index + Sampler* GetGraphicsSampler(u32 index); + + /// Get the sampler from the compute descriptor table in the specified index + Sampler* GetComputeSampler(u32 index); + + /// Refresh the state for graphics image view and sampler descriptors + void SynchronizeGraphicsDescriptors(); + + /// Refresh the state for compute image view and sampler descriptors + void SynchronizeComputeDescriptors(); + + /// Update bound render targets and upload memory if necessary + /// @param is_clear True when the render targets are being used for clears + void UpdateRenderTargets(bool is_clear); + + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); + + /// Mark images in a range as modified from the CPU + void WriteMemory(VAddr cpu_addr, size_t size); + + /// Download contents of host images to guest memory in a region + void DownloadMemory(VAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapMemory(VAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + + /// Blit an image with the given parameters + void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); + + /// Try to find a cached image view in the given CPU address + [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); + + /// Return true when there are uncommitted images to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; + + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; + + /// Commit asynchronous downloads + void CommitAsyncFlushes(); + + /// Pop asynchronous downloads + void PopAsyncFlushes(); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + + std::mutex mutex; + +private: + /// Iterate over all page indices in a range + template + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Runs the Garbage Collector. + void RunGarbageCollector(); + + /// Fills image_view_ids in the image views in indices + void FillImageViews(DescriptorTable& table, + std::span cached_image_view_ids, std::span indices, + std::span image_view_ids); + + /// Find or create an image view in the guest descriptor table + ImageViewId VisitImageView(DescriptorTable& table, + std::span cached_image_view_ids, u32 index); + + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); + + /// Refresh the contents (pixel data) of an image + void RefreshContents(Image& image, ImageId image_id); + + /// Upload data from guest to an image + template + void UploadImageContents(Image& image, StagingBuffer& staging_buffer); + + /// Find or create an image view from a guest descriptor + [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); + + /// Create a new image view from a guest descriptor + [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); + + /// Find or create an image from the given parameters + [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options = RelaxedOptions{}); + + /// Find an image from the given parameters + [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); + + /// Create an image from the given parameters + [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); + + /// Create a new image and join perfectly matching existing images + /// Remove joined images from the cache + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + + /// Return a blit image pair from the given guest blit parameters + [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src); + + /// Find or create a sampler from a guest descriptor sampler + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); + + /// Find or create an image view for the given color buffer index + [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + + /// Find or create an image view for the depth buffer + [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); + + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear); + + /// Iterates over all the images in a region calling func + template + void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + + template + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + template + void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template + void ForEachSparseSegment(ImageBase& image, Func&& func); + + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); + + /// Register image in the page table + void RegisterImage(ImageId image); + + /// Unregister image from the page table + void UnregisterImage(ImageId image); + + /// Track CPU reads and writes for image + void TrackImage(ImageBase& image, ImageId image_id); + + /// Stop tracking CPU reads and writes for image + void UntrackImage(ImageBase& image, ImageId image_id); + + /// Delete image from the cache + void DeleteImage(ImageId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(ImageBase& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(ImageId image_id); + + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); + + /// Bind an image view as render target, downloading resources preemtively if needed + void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair RenderTargetFromImage( + ImageId, const ImageViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(ImageViewId id); + + Runtime& runtime; + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; + + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; + + RenderTargets render_targets; + + std::unordered_map image_views; + std::unordered_map samplers; + std::unordered_map framebuffers; + + std::unordered_map, IdentityHash> page_table; + std::unordered_map, IdentityHash> gpu_page_table; + std::unordered_map, IdentityHash> sparse_page_table; + + std::unordered_map> sparse_views; + + VAddr virtual_invalid_space{}; + + bool has_deleted_images = false; + u64 total_used_memory = 0; + u64 minimum_memory; + u64 expected_memory; + u64 critical_memory; + + SlotVector slot_images; + SlotVector slot_map_views; + SlotVector slot_image_views; + SlotVector slot_image_allocs; + SlotVector slot_samplers; + SlotVector slot_framebuffers; + + // TODO: This data structure is not optimal and it should be reworked + std::vector uncommitted_downloads; + std::queue> committed_downloads; + + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing sentenced_images; + DelayedDestructionRing sentenced_image_view; + DelayedDestructionRing sentenced_framebuffers; + + std::unordered_map image_allocs_table; + + u64 modification_tick = 0; + u64 frame_tick = 0; + typename SlotVector::Iterator deletion_iterator; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h deleted file mode 100644 index 8440d23d1d..0000000000 --- a/src/video_core/texture_cache/texture_cache_templates.h +++ /dev/null @@ -1,1507 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCommon { - -using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; -using Tegra::Texture::TICEntry; -using Tegra::Texture::TSCEntry; -using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceType; -using namespace Common::Literals; - -template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { - // Configure null sampler - TSCEntry sampler_descriptor{}; - sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); - sampler_descriptor.cubemap_anisotropy.Assign(1); - - // Make sure the first index is reserved for the null resources - // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); - void(slot_samplers.insert(runtime, sampler_descriptor)); - - deletion_iterator = slot_images.begin(); - - if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); - minimum_memory = 0; - } else { - // on OGL we can be more conservatives as the driver takes care. - expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; - critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; - } -} - -template -void TextureCache

::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } - } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } - } - ++deletion_iterator; - } -} - -template -void TextureCache

::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { - RunGarbageCollector(); - } - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; -} - -template -const typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) const noexcept { - return slot_image_views[id]; -} - -template -typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { - return slot_image_views[id]; -} - -template -void TextureCache

::MarkModification(ImageId id) noexcept { - MarkModification(slot_images[id]); -} - -template -void TextureCache

::FillGraphicsImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); -} - -template -void TextureCache

::FillComputeImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); -} - -template -typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template -void TextureCache

::SynchronizeGraphicsDescriptors() { - using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template -void TextureCache

::UpdateRenderTargets(bool is_clear) { - using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::RenderTargets]) { - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - return; - } - flags[Dirty::RenderTargets] = false; - - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; - - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - - for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); - } - render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, - }; -} - -template -typename P::Framebuffer* TextureCache

::GetFramebuffer() { - return &slot_framebuffers[GetFramebufferId(render_targets)]; -} - -template -void TextureCache

::FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span indices, - std::span image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); - do { - has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); -} - -template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); - return NULL_IMAGE_VIEW_ID; - } - const auto [descriptor, is_new] = table.Read(index); - ImageViewId& image_view_id = cached_image_view_ids[index]; - if (is_new) { - image_view_id = FindImageView(descriptor); - } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; -} - -template -FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { - const auto [pair, is_new] = framebuffers.try_emplace(key); - FramebufferId& framebuffer_id = pair->second; - if (!is_new) { - return framebuffer_id; - } - std::array color_buffers; - std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), - [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); - ImageView* const depth_buffer = - key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; - framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); - return framebuffer_id; -} - -template -void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { - ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - }); -} - -template -void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector images; - ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - if (!image.IsSafeDownload()) { - return; - } - image.flags &= ~ImageFlagBits::GpuModified; - images.push_back(image_id); - }); - if (images.empty()) { - return; - } - std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { - return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; - }); - for (const ImageId image_id : images) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); - } -} - -template -void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - UnregisterImage(id); - DeleteImage(id); - } -} - -template -void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, - [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Remapped)) { - continue; - } - image.flags |= ImageFlagBits::Remapped; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - } -} - -template -void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); - const ImageId dst_id = images.dst_id; - const ImageId src_id = images.src_id; - PrepareImage(src_id, false, false); - PrepareImage(dst_id, true, false); - - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; - - // TODO: Deduplicate - const std::optional src_base = src_image.TryFindBase(src.Address()); - const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; - const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); - const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ - Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, - Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, - }; - - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ - Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, - Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, - }; - - // Always call this after src_framebuffer_id was queried, as the address might be invalidated. - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - if constexpr (FRAMEBUFFER_BLITS) { - // OpenGL blits from framebuffers, not images - Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; - runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, - copy.filter, copy.operation); - } else { - // Vulkan can blit images, but it lacks format reinterpretations - // Provide a framebuffer in case it's necessary - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, - copy.operation); - } -} - -template -typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_addr) { - // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_BITS); - if (it == page_table.end()) { - return nullptr; - } - const auto& image_map_ids = it->second; - for (const ImageMapId map_id : image_map_ids) { - const ImageMapView& map = slot_map_views[map_id]; - const ImageBase& image = slot_images[map.image_id]; - if (image.cpu_addr != cpu_addr) { - continue; - } - if (image.image_view_ids.empty()) { - continue; - } - return &slot_image_views[image.image_view_ids.at(0)]; - } - return nullptr; -} - -template -bool TextureCache

::HasUncommittedFlushes() const noexcept { - return !uncommitted_downloads.empty(); -} - -template -bool TextureCache

::ShouldWaitAsyncFlushes() const noexcept { - return !committed_downloads.empty() && !committed_downloads.front().empty(); -} - -template -void TextureCache

::CommitAsyncFlushes() { - // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); - uncommitted_downloads.clear(); -} - -template -void TextureCache

::PopAsyncFlushes() { - if (committed_downloads.empty()) { - return; - } - const std::span download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; - } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); - } - committed_downloads.pop(); -} - -template -bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { - bool is_modified = false; - ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { - if (False(image.flags & ImageFlagBits::GpuModified)) { - return false; - } - is_modified = true; - return true; - }); - return is_modified; -} - -template -void TextureCache

::RefreshContents(Image& image, ImageId image_id) { - if (False(image.flags & ImageFlagBits::CpuModified)) { - // Only upload modified images - return; - } - image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image, image_id); - - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); - return; - } - auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, staging); - runtime.InsertUploadMemoryBarrier(); -} - -template -template -void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) { - const std::span mapped_span = staging.mapped_span; - const GPUVAddr gpu_addr = image.gpu_addr; - - if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); - const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, staging, uploads); - } else if (True(image.flags & ImageFlagBits::Converted)) { - std::vector unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); - ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(staging, copies); - } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(staging, copies); - } -} - -template -ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - -template -ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { - const ImageInfo info(config); - if (info.type == ImageType::Buffer) { - const ImageViewInfo view_info(config, 0); - return slot_image_views.insert(runtime, info, view_info, config.Address()); - } - const u32 layer_offset = config.BaseLayer() * info.layer_stride; - const GPUVAddr image_gpu_addr = config.Address() - layer_offset; - const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - ImageBase& image = slot_images[image_id]; - const SubresourceBase base = image.TryFindBase(config.Address()).value(); - ASSERT(base.level == 0); - const ImageViewInfo view_info(config, base.layer); - const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); - ImageViewBase& image_view = slot_image_views[image_view_id]; - image_view.flags |= ImageViewFlagBits::Strong; - image.flags |= ImageFlagBits::Strong; - return image_view_id; -} - -template -ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { - return image_id; - } - return InsertImage(info, gpu_addr, options); -} - -template -ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); - if (!cpu_addr) { - return ImageId{}; - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; - const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { - if (True(existing_image.flags & ImageFlagBits::Remapped)) { - return false; - } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { - const bool strict_size = False(options & RelaxedOptions::Size) && - True(existing_image.flags & ImageFlagBits::Strong); - const ImageInfo& existing = existing_image.info; - if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && - existing.pitch == info.pitch && - IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { - image_id = existing_image_id; - return true; - } - } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, - native_bgr)) { - image_id = existing_image_id; - return true; - } - return false; - }; - ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; -} - -template -ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); - if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional(fake_addr); - } - } - ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); - const Image& image = slot_images[image_id]; - // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different - const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); - if (is_new) { - it->second = slot_image_allocs.insert(); - } - slot_image_allocs[it->second].images.push_back(image_id); - return image_id; -} - -template -ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { - ImageInfo new_info = info; - const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - std::vector overlap_ids; - std::unordered_set overlaps_found; - std::vector left_aliased_ids; - std::vector right_aliased_ids; - std::unordered_set ignore_textures; - std::vector bad_overlap_ids; - const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - return; - } - if (info.type == ImageType::Linear) { - if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { - // Alias linear images with the same pitch - left_aliased_ids.push_back(overlap_id); - } - return; - } - overlaps_found.insert(overlap_id); - static constexpr bool strict_size = true; - const std::optional solution = ResolveOverlap( - new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); - if (solution) { - gpu_addr = solution->gpu_addr; - cpu_addr = solution->cpu_addr; - new_info.resources = solution->resources; - overlap_ids.push_back(overlap_id); - return; - } - static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; - const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { - left_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, - broken_views, native_bgr)) { - right_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else { - bad_overlap_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::BadOverlap; - } - }; - ForEachImageInRegion(cpu_addr, size_bytes, region_check); - const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - } - if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { - ignore_textures.insert(overlap_id); - } - } - }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); - const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); - Image& new_image = slot_images[new_image_id]; - - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { - new_image.flags |= ImageFlagBits::Sparse; - } - - for (const ImageId overlap_id : ignore_textures) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - - // TODO: Only upload what we need - RefreshContents(new_image, new_image_id); - - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; - if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); - } else { - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - ImageBase& new_image_base = new_image; - for (const ImageId aliased_id : right_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : left_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : bad_overlap_ids) { - ImageBase& aliased = slot_images[aliased_id]; - aliased.overlapping_images.push_back(new_image_id); - new_image.overlapping_images.push_back(aliased_id); - new_image.flags |= ImageFlagBits::BadOverlap; - } - RegisterImage(new_image_id); - return new_image_id; -} - -template -typename TextureCache

::BlitImages TextureCache

::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; - const GPUVAddr dst_addr = dst.Address(); - const GPUVAddr src_addr = src.Address(); - ImageInfo dst_info(dst); - ImageInfo src_info(src); - ImageId dst_id; - ImageId src_id; - do { - has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; - const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; - } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } - } while (has_deleted_images); - return BlitImages{ - .dst_id = dst_id, - .src_id = src_id, - .dst_format = dst_info.format, - .src_format = src_info.format, - }; -} - -template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { - if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { - return NULL_SAMPLER_ID; - } - const auto [pair, is_new] = samplers.try_emplace(config); - if (is_new) { - pair->second = slot_samplers.insert(runtime, config); - } - return pair->second; -} - -template -ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; - if (index >= regs.rt_control.count) { - return ImageViewId{}; - } - const auto& rt = regs.rt[index]; - const GPUVAddr gpu_addr = rt.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - if (rt.format == Tegra::RenderTargetFormat::NONE) { - return ImageViewId{}; - } - const ImageInfo info(regs, index); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; - if (!regs.zeta_enable) { - return ImageViewId{}; - } - const GPUVAddr gpu_addr = regs.zeta.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - const ImageInfo info(regs); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template -ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - Image& image = slot_images[image_id]; - const ImageViewType view_type = RenderTargetImageViewType(info); - SubresourceBase base; - if (image.info.type == ImageType::Linear) { - base = SubresourceBase{.level = 0, .layer = 0}; - } else { - base = image.TryFindBase(gpu_addr).value(); - } - const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; - const SubresourceRange range{ - .base = base, - .extent = {.levels = 1, .layers = layers}, - }; - return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); -} - -template -template -void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - boost::container::small_vector maps; - ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { - const auto it = page_table.find(page); - if (it == page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageMapId map_id : it->second) { - ImageMapView& map = slot_map_views[map_id]; - if (map.picked) { - continue; - } - if (!map.Overlaps(cpu_addr, size)) { - continue; - } - map.picked = true; - maps.push_back(map_id); - Image& image = slot_images[map.image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(map.image_id); - if constexpr (BOOL_BREAK) { - if (func(map.image_id, image)) { - return true; - } - } else { - func(map.image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } - for (const ImageMapId map_id : maps) { - slot_map_views[map_id].picked = false; - } -} - -template -template -void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); - for (auto& segment : segments) { - const auto gpu_addr = segment.first; - const auto size = segment.second; - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - if constexpr (RETURNS_BOOL) { - if (func(gpu_addr, *cpu_addr, size)) { - break; - } - } else { - func(gpu_addr, *cpu_addr, size); - } - } -} - -template -ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { - Image& image = slot_images[image_id]; - if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { - return image_view_id; - } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); - image.InsertView(info, image_view_id); - return image_view_id; -} - -template -void TextureCache

::RegisterImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), - "Trying to register an already registered image"); - image.flags |= ImageFlagBits::Registered; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory += Common::AlignUp(tentative_size, 1024); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - auto map_id = - slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - image.map_view_id = map_id; - return; - } - std::vector sparse_maps{}; - ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); - ForEachCPUPage(cpu_addr, size, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - sparse_maps.push_back(map_id); - }); - sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); -} - -template -void TextureCache

::UnregisterImage(ImageId image_id) { - Image& image = slot_images[image_id]; - ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), - "Trying to unregister an already registered image"); - image.flags &= ~ImageFlagBits::Registered; - image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); - const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { - const auto page_it = selected_page_table.find(page); - if (page_it == selected_page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); - }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - const auto map_id = image.map_view_id; - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - const auto vector_it = std::ranges::find(image_map_ids, map_id); - if (vector_it == image_map_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_map_ids.erase(vector_it); - }); - slot_map_views.erase(map_id); - return; - } - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); - }); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; - const std::size_t size = map_range.size; - ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - auto vector_it = image_map_ids.begin(); - while (vector_it != image_map_ids.end()) { - ImageMapView& map = slot_map_views[*vector_it]; - if (map.image_id != image_id) { - vector_it++; - continue; - } - if (!map.picked) { - map.picked = true; - } - vector_it = image_map_ids.erase(vector_it); - } - }); - slot_map_views.erase(map_view_id); - } - sparse_views.erase(it); -} - -template -void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { - ASSERT(False(image.flags & ImageFlagBits::Tracked)); - image.flags |= ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); - return; - } - if (True(image.flags & ImageFlagBits::Registered)) { - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - } - return; - } - ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - }); -} - -template -void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { - ASSERT(True(image.flags & ImageFlagBits::Tracked)); - image.flags &= ~ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); - return; - } - ASSERT(True(image.flags & ImageFlagBits::Registered)); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - } -} - -template -void TextureCache

::DeleteImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - const GPUVAddr gpu_addr = image.gpu_addr; - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it == image_allocs_table.end()) { - UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", - gpu_addr); - return; - } - const ImageAllocId alloc_id = alloc_it->second; - std::vector& alloc_images = slot_image_allocs[alloc_id].images; - const auto alloc_image_it = std::ranges::find(alloc_images, image_id); - if (alloc_image_it == alloc_images.end()) { - UNREACHABLE_MSG("Trying to delete an image that does not exist"); - return; - } - ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - - // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::RenderTargets] = true; - dirty[Dirty::ZetaBuffer] = true; - for (size_t rt = 0; rt < NUM_RT; ++rt) { - dirty[Dirty::ColorBuffer0 + rt] = true; - } - const std::span image_view_ids = image.image_view_ids; - for (const ImageViewId image_view_id : image_view_ids) { - std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); - if (render_targets.depth_buffer_id == image_view_id) { - render_targets.depth_buffer_id = ImageViewId{}; - } - } - RemoveImageViewReferences(image_view_ids); - RemoveFramebuffers(image_view_ids); - - for (const AliasedImage& alias : image.aliased_images) { - ImageBase& other_image = slot_images[alias.id]; - [[maybe_unused]] const size_t num_removed_aliases = - std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { - return other_alias.id == image_id; - }); - other_image.CheckAliasState(); - ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", - num_removed_aliases); - } - for (const ImageId overlap_id : image.overlapping_images) { - ImageBase& other_image = slot_images[overlap_id]; - [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( - other_image.overlapping_images, - [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); - other_image.CheckBadOverlapState(); - ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", - num_removed_overlaps); - } - for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); - slot_image_views.erase(image_view_id); - } - sentenced_images.Push(std::move(slot_images[image_id])); - slot_images.erase(image_id); - - alloc_images.erase(alloc_image_it); - if (alloc_images.empty()) { - image_allocs_table.erase(alloc_it); - } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); - } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); - has_deleted_images = true; -} - -template -void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { - const auto found = std::ranges::find(removed_views, it->second); - if (found != removed_views.end()) { - it = image_views.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::RemoveFramebuffers(std::span removed_views) { - auto it = framebuffers.begin(); - while (it != framebuffers.end()) { - if (it->first.Contains(removed_views)) { - it = framebuffers.erase(it); - } else { - ++it; - } - } -} - -template -void TextureCache

::MarkModification(ImageBase& image) noexcept { - image.flags |= ImageFlagBits::GpuModified; - image.modification_tick = ++modification_tick; -} - -template -void TextureCache

::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector aliased_images; - ImageBase& image = slot_images[image_id]; - u64 most_recent_tick = image.modification_tick; - for (const AliasedImage& aliased : image.aliased_images) { - ImageBase& aliased_image = slot_images[aliased.id]; - if (image.modification_tick < aliased_image.modification_tick) { - most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); - aliased_images.push_back(&aliased); - } - } - if (aliased_images.empty()) { - return; - } - image.modification_tick = most_recent_tick; - std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { - const ImageBase& lhs_image = slot_images[lhs->id]; - const ImageBase& rhs_image = slot_images[rhs->id]; - return lhs_image.modification_tick < rhs_image.modification_tick; - }); - for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); - } -} - -template -void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { - Image& image = slot_images[image_id]; - if (invalidate) { - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); - if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image, image_id); - } - } else { - RefreshContents(image, image_id); - SynchronizeAliases(image_id); - } - if (is_modification) { - MarkModification(image); - } - image.frame_tick = frame_tick; -} - -template -void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, - bool invalidate) { - if (!image_view_id) { - return; - } - const ImageViewBase& image_view = slot_image_views[image_view_id]; - if (image_view.IsBuffer()) { - return; - } - PrepareImage(image_view.image_id, is_modification, invalidate); -} - -template -void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { - Image& dst = slot_images[dst_id]; - Image& src = slot_images[src_id]; - const auto dst_format_type = GetFormatType(dst.info.format); - const auto src_format_type = GetFormatType(src.info.format); - if (src_format_type == dst_format_type) { - if constexpr (HAS_EMULATED_COPIES) { - if (!runtime.CanImageBeCopied(dst, src)) { - return runtime.EmulateCopyImage(dst, src, copies); - } - } - return runtime.CopyImage(dst, src, copies); - } - UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); - UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); - for (const ImageCopy& copy : copies) { - UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); - UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); - - const SubresourceBase dst_base{ - .level = copy.dst_subresource.base_level, - .layer = copy.dst_subresource.base_layer, - }; - const SubresourceBase src_base{ - .level = copy.src_subresource.base_level, - .layer = copy.src_subresource.base_layer, - }; - const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; - const SubresourceExtent src_extent{.levels = 1, .layers = 1}; - const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; - const SubresourceRange src_range{.base = src_base, .extent = src_extent}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); - const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - [[maybe_unused]] const Extent3D expected_size{ - .width = std::min(dst_view.size.width, src_view.size.width), - .height = std::min(dst_view.size.height, src_view.size.height), - .depth = std::min(dst_view.size.depth, src_view.size.depth), - }; - UNIMPLEMENTED_IF(copy.extent != expected_size); - - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); - } -} - -template -void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { - if (*old_id == new_id) { - return; - } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; - if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); - } - } - *old_id = new_id; -} - -template -std::pair TextureCache

::RenderTargetFromImage( - ImageId image_id, const ImageViewInfo& view_info) { - const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); - const ImageBase& image = slot_images[image_id]; - const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; - const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; - const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); - const u32 num_samples = image.info.num_samples; - const auto [samples_x, samples_y] = SamplesLog2(num_samples); - const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ - .color_buffer_ids = {color_view_id}, - .depth_buffer_id = depth_view_id, - .size = {extent.width >> samples_x, extent.height >> samples_y}, - }); - return {framebuffer_id, view_id}; -} - -template -bool TextureCache

::IsFullClear(ImageViewId id) { - if (!id) { - return true; - } - const ImageViewBase& image_view = slot_image_views[id]; - const ImageBase& image = slot_images[image_view.image_id]; - const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; - const auto& scissor = regs.scissor_test[0]; - if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { - // Images with multiple resources can't be cleared in a single call - return false; - } - if (regs.clear_flags.scissor == 0) { - // If scissor testing is disabled, the clear is always full - return true; - } - // Make sure the clear covers all texels in the subresource - return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && - scissor.max_y >= size.height; -} - -} // namespace VideoCommon From 02e98f6c93e4c6b360934e154f453d5b01394104 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 20:52:12 +0000 Subject: [PATCH 18/35] texture_cache: Don't change copyright year --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 2 +- src/video_core/texture_cache/texture_cache_base.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 26b423f5ea..53848ca5ea 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index b0496556d0..8e029bcb34 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5884fa16e3..d7fe875148 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a4f6e94224..09474b8237 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,4 +1,4 @@ -// Copyright 2021 yuzu Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. From 8ba551e1cdadd59ce830463356f3e5b710a9768b Mon Sep 17 00:00:00 2001 From: gidoly <66776795+gidoly@users.noreply.github.com> Date: Thu, 5 Aug 2021 07:10:08 +0900 Subject: [PATCH 19/35] Update configure_graphics_advanced.ui add description too fast gpu time --- src/yuzu/configuration/configure_graphics_advanced.ui | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 379dc5d2e8..4fe6b86ae7 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,14 +82,17 @@ Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental. - Use asynchronous shader building + Use asynchronous shader building (hack) + + Enables Fast GPU Time. This option will force most games to run at their highest native resolution. + - Use Fast GPU Time + Use Fast GPU Time (hack) From e611f522c28edcc6f6850a5884e8b1bb41004a57 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Thu, 5 Aug 2021 21:09:08 +0000 Subject: [PATCH 20/35] memory: Clean up CopyBlock too --- src/core/memory.cpp | 51 +++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2e578f1891..0b8e36b081 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -176,7 +176,7 @@ struct Memory::Impl { return string; } - void WalkBlock(const Kernel::KProcess& process, VAddr addr, const std::size_t size, + void WalkBlock(const Kernel::KProcess& process, const VAddr addr, const std::size_t size, auto on_unmapped, auto on_memory, auto on_rasterizer, auto increment) { const auto& page_table = process.PageTable().PageTableImpl(); std::size_t remaining_size = size; @@ -211,7 +211,6 @@ struct Memory::Impl { page_index++; page_offset = 0; - addr += static_cast(copy_amount); increment(copy_amount); remaining_size -= copy_amount; } @@ -307,47 +306,27 @@ struct Memory::Impl { void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr, const std::size_t size) { - const auto& page_table = process.PageTable().PageTableImpl(); - std::size_t remaining_size = size; - std::size_t page_index = src_addr >> PAGE_BITS; - std::size_t page_offset = src_addr & PAGE_MASK; - - while (remaining_size) { - const std::size_t copy_amount = - std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); - const auto current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - - const auto [pointer, type] = page_table.pointers[page_index].PointerType(); - switch (type) { - case Common::PageType::Unmapped: { + WalkBlock( + process, dest_addr, size, + [this, &process, &dest_addr, &src_addr, size](const std::size_t copy_amount, + const VAddr current_vaddr) { LOG_ERROR(HW_Memory, "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", current_vaddr, src_addr, size); ZeroBlock(process, dest_addr, copy_amount); - break; - } - case Common::PageType::Memory: { - DEBUG_ASSERT(pointer); - const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS); + }, + [this, &process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) { WriteBlockImpl(process, dest_addr, src_ptr, copy_amount); - break; - } - case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)}; + }, + [this, &system = system, &process, &dest_addr]( + const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) { system.GPU().FlushRegion(current_vaddr, copy_amount); WriteBlockImpl(process, dest_addr, host_ptr, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - dest_addr += static_cast(copy_amount); - src_addr += static_cast(copy_amount); - remaining_size -= copy_amount; - } + }, + [&dest_addr, &src_addr](const std::size_t copy_amount) { + dest_addr += static_cast(copy_amount); + src_addr += static_cast(copy_amount); + }); } void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { From d20c5ac720a0d26457c070b6d135c780af73107a Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 6 Aug 2021 00:41:55 -0400 Subject: [PATCH 21/35] common: uuid: Add hash function for UUID Used when UUID is a key in an unordered_map. The hash is produced by XORing the high and low 64-bits of the UUID together. --- src/common/uuid.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/common/uuid.h b/src/common/uuid.h index aeb36939a3..2353179d87 100644 --- a/src/common/uuid.h +++ b/src/common/uuid.h @@ -69,3 +69,14 @@ struct UUID { static_assert(sizeof(UUID) == 16, "UUID is an invalid size!"); } // namespace Common + +namespace std { + +template <> +struct hash { + size_t operator()(const Common::UUID& uuid) const noexcept { + return uuid.uuid[1] ^ uuid.uuid[0]; + } +}; + +} // namespace std From e80323b8b070c42a8846a23200f9a36787f790d8 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Sat, 7 Aug 2021 01:27:47 +0000 Subject: [PATCH 22/35] texture_cache: Address ameerj's review --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 ++ src/video_core/texture_cache/texture_cache.h | 2 ++ src/video_core/texture_cache/texture_cache_base.h | 10 ---------- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 53848ca5ea..b0aee6cc12 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/surface.h" +#include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" namespace OpenGL { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8e029bcb34..8f4df71223 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -19,6 +19,8 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d7fe875148..40953afb70 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include "video_core/dirty_flags.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" namespace VideoCommon { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 09474b8237..b72448c0db 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -4,12 +4,8 @@ #pragma once -#include #include -#include -#include #include -#include #include #include #include @@ -22,11 +18,9 @@ #include "common/alignment.h" #include "common/common_types.h" #include "common/literals.h" -#include "common/logging/log.h" #include "common/settings.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" -#include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" @@ -34,14 +28,10 @@ #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" #include "video_core/texture_cache/descriptor_table.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" -#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/render_targets.h" -#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" #include "video_core/texture_cache/util.h" From 70cc4c0f46ed68a4d660aa9867b5b8de41b77549 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Sat, 7 Aug 2021 01:32:06 +0000 Subject: [PATCH 23/35] memory: Dedup Read and Write and fix logging bugs --- src/core/memory.cpp | 256 +++++++++++++++++++++----------------------- 1 file changed, 121 insertions(+), 135 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 0b8e36b081..778d152dd5 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -5,6 +5,10 @@ #include #include +#define BOOST_HANA_CONFIG_ENABLE_STRING_UDL +#include +#undef BOOST_HANA_CONFIG_ENABLE_STRING_UDL + #include "common/assert.h" #include "common/atomic_ops.h" #include "common/common_types.h" @@ -19,6 +23,8 @@ #include "core/memory.h" #include "video_core/gpu.h" +using namespace boost::hana::literals; + namespace Core::Memory { // Implementation class used to keep the specifics of the memory subsystem hidden @@ -68,18 +74,6 @@ struct Memory::Impl { return system.DeviceMemory().GetPointer(paddr) + vaddr; } - [[nodiscard]] u8* GetPointer(const VAddr vaddr) const { - const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); - if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { - return pointer + vaddr; - } - const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer); - if (type == Common::PageType::RasterizerCachedMemory) { - return GetPointerFromRasterizerCachedMemory(vaddr); - } - return nullptr; - } - u8 Read8(const VAddr addr) { return Read(addr); } @@ -452,6 +446,83 @@ struct Memory::Impl { } } + /** + * Returns a message like "Unmapped NameBits @ 0x{:016X}Suffix". + * + * @tparam NAME The caller name like "Read"_s or "Write"_s. + * @tparam BYTES The number of bits written. 0 is for read and sizeof(T) is for write. + * @tparam SUFFIX A suffix. ""_s is for read and " = 0x{:016X}" is for write. + */ + template + static consteval const char* GetPointerImplError() { + constexpr auto unmapped_fmt = ([]() { + constexpr auto prefix = "Unmapped "_s + NAME; + constexpr auto suffix = " @ 0x{:016X}"_s + SUFFIX; + const char* result = nullptr; + switch (BYTES * 8) { + case 0: + result = (prefix + suffix).c_str(); + break; +#define BITS_CASE(x) \ + case x: \ + result = (prefix + BOOST_HANA_STRING(#x) + suffix).c_str(); \ + break; + BITS_CASE(8) + BITS_CASE(16) + BITS_CASE(32) + BITS_CASE(64) + BITS_CASE(128) +#undef BITS_CASE + default: + break; + } + return result; + })(); + static_assert(unmapped_fmt); + return unmapped_fmt; + } + + [[nodiscard]] u8* GetPointerImpl(VAddr vaddr, auto on_unmapped, auto on_rasterizer) const { + // AARCH64 masks the upper 16 bit of all memory accesses + vaddr &= 0xffffffffffffLL; + + if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) { + on_unmapped(); + return nullptr; + } + + // Avoid adding any extra logic to this fast-path block + const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); + if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { + return &pointer[vaddr]; + } + switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { + case Common::PageType::Unmapped: + on_unmapped(); + return nullptr; + case Common::PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr); + return nullptr; + case Common::PageType::RasterizerCachedMemory: { + u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; + on_rasterizer(); + return host_ptr; + } + default: + UNREACHABLE(); + } + return nullptr; + } + + [[nodiscard]] u8* GetPointer(const VAddr vaddr) const { + return GetPointerImpl( + vaddr, + [vaddr]() { + LOG_ERROR(HW_Memory, GetPointerImplError<"GetPointer"_s, 0, ""_s>(), vaddr); + }, + []() {}); + } + /** * Reads a particular data type out of memory at the given virtual address. * @@ -465,39 +536,17 @@ struct Memory::Impl { */ template T Read(VAddr vaddr) { - // AARCH64 masks the upper 16 bit of all memory accesses - vaddr &= 0xffffffffffffLL; - - if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); - return 0; + T result = 0; + const u8* const ptr = GetPointerImpl( + vaddr, + [vaddr]() { + LOG_ERROR(HW_Memory, GetPointerImplError<"Read"_s, sizeof(T), ""_s>(), vaddr); + }, + [&system = system, vaddr]() { system.GPU().FlushRegion(vaddr, sizeof(T)); }); + if (ptr) { + std::memcpy(&result, ptr, sizeof(T)); } - - // Avoid adding any extra logic to this fast-path block - const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); - if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { - T value; - std::memcpy(&value, &pointer[vaddr], sizeof(T)); - return value; - } - switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); - return 0; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); - break; - case Common::PageType::RasterizerCachedMemory: { - const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; - system.GPU().FlushRegion(vaddr, sizeof(T)); - T value; - std::memcpy(&value, host_ptr, sizeof(T)); - return value; - } - default: - UNREACHABLE(); - } - return {}; + return result; } /** @@ -511,110 +560,47 @@ struct Memory::Impl { */ template void Write(VAddr vaddr, const T data) { - // AARCH64 masks the upper 16 bit of all memory accesses - vaddr &= 0xffffffffffffLL; - - if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) { - LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast(data), vaddr); - return; - } - - // Avoid adding any extra logic to this fast-path block - const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); - if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { - std::memcpy(&pointer[vaddr], &data, sizeof(T)); - return; - } - switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast(data), vaddr); - return; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); - break; - case Common::PageType::RasterizerCachedMemory: { - u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; - system.GPU().InvalidateRegion(vaddr, sizeof(T)); - std::memcpy(host_ptr, &data, sizeof(T)); - break; - } - default: - UNREACHABLE(); + u8* const ptr = GetPointerImpl( + vaddr, + [vaddr, data]() { + LOG_ERROR(HW_Memory, GetPointerImplError<"Write"_s, sizeof(T), " = 0x{:016X}"_s>(), + vaddr, static_cast(data)); + }, + [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); + if (ptr) { + std::memcpy(ptr, &data, sizeof(T)); } } template bool WriteExclusive(VAddr vaddr, const T data, const T expected) { - // AARCH64 masks the upper 16 bit of all memory accesses - vaddr &= 0xffffffffffffLL; - - if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) { - LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast(data), vaddr); - return true; - } - - const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); - if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { - // NOTE: Avoid adding any extra logic to this fast-path block - const auto volatile_pointer = reinterpret_cast(&pointer[vaddr]); + u8* const ptr = GetPointerImpl( + vaddr, + [vaddr, data]() { + LOG_ERROR(HW_Memory, GetPointerImplError<"Write"_s, sizeof(T), " = 0x{:016X}"_s>(), + vaddr, static_cast(data)); + }, + [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); + if (ptr) { + const auto volatile_pointer = reinterpret_cast(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); } - switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast(data), vaddr); - return true; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); - break; - case Common::PageType::RasterizerCachedMemory: { - u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; - system.GPU().InvalidateRegion(vaddr, sizeof(T)); - auto* pointer = reinterpret_cast(&host_ptr); - return Common::AtomicCompareAndSwap(pointer, data, expected); - } - default: - UNREACHABLE(); - } return true; } bool WriteExclusive128(VAddr vaddr, const u128 data, const u128 expected) { - // AARCH64 masks the upper 16 bit of all memory accesses - vaddr &= 0xffffffffffffLL; - - if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) { - LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast(data[0]), vaddr); - return true; - } - - const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw(); - if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { - // NOTE: Avoid adding any extra logic to this fast-path block - const auto volatile_pointer = reinterpret_cast(&pointer[vaddr]); + u8* const ptr = GetPointerImpl( + vaddr, + [vaddr, data]() { + LOG_ERROR(HW_Memory, + GetPointerImplError<"Write"_s, sizeof(u128), " = 0x{:016X}{:016X}"_s>(), + vaddr, static_cast(data[1]), static_cast(data[0])); + }, + [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); }); + if (ptr) { + const auto volatile_pointer = reinterpret_cast(ptr); return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); } - switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, - static_cast(data[1]), static_cast(data[0]), vaddr); - return true; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); - break; - case Common::PageType::RasterizerCachedMemory: { - u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; - system.GPU().InvalidateRegion(vaddr, sizeof(u128)); - auto* pointer = reinterpret_cast(&host_ptr); - return Common::AtomicCompareAndSwap(pointer, data, expected); - } - default: - UNREACHABLE(); - } return true; } From 5f97f74a9aeed0b00b7592becef58a6fc4943d6a Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Sat, 7 Aug 2021 03:03:21 +0000 Subject: [PATCH 24/35] memory: Address lioncash's review --- src/core/memory.cpp | 58 +++++---------------------------------------- 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 778d152dd5..51c4dea26c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -5,10 +5,6 @@ #include #include -#define BOOST_HANA_CONFIG_ENABLE_STRING_UDL -#include -#undef BOOST_HANA_CONFIG_ENABLE_STRING_UDL - #include "common/assert.h" #include "common/atomic_ops.h" #include "common/common_types.h" @@ -23,8 +19,6 @@ #include "core/memory.h" #include "video_core/gpu.h" -using namespace boost::hana::literals; - namespace Core::Memory { // Implementation class used to keep the specifics of the memory subsystem hidden @@ -446,42 +440,6 @@ struct Memory::Impl { } } - /** - * Returns a message like "Unmapped NameBits @ 0x{:016X}Suffix". - * - * @tparam NAME The caller name like "Read"_s or "Write"_s. - * @tparam BYTES The number of bits written. 0 is for read and sizeof(T) is for write. - * @tparam SUFFIX A suffix. ""_s is for read and " = 0x{:016X}" is for write. - */ - template - static consteval const char* GetPointerImplError() { - constexpr auto unmapped_fmt = ([]() { - constexpr auto prefix = "Unmapped "_s + NAME; - constexpr auto suffix = " @ 0x{:016X}"_s + SUFFIX; - const char* result = nullptr; - switch (BYTES * 8) { - case 0: - result = (prefix + suffix).c_str(); - break; -#define BITS_CASE(x) \ - case x: \ - result = (prefix + BOOST_HANA_STRING(#x) + suffix).c_str(); \ - break; - BITS_CASE(8) - BITS_CASE(16) - BITS_CASE(32) - BITS_CASE(64) - BITS_CASE(128) -#undef BITS_CASE - default: - break; - } - return result; - })(); - static_assert(unmapped_fmt); - return unmapped_fmt; - } - [[nodiscard]] u8* GetPointerImpl(VAddr vaddr, auto on_unmapped, auto on_rasterizer) const { // AARCH64 masks the upper 16 bit of all memory accesses vaddr &= 0xffffffffffffLL; @@ -516,10 +474,7 @@ struct Memory::Impl { [[nodiscard]] u8* GetPointer(const VAddr vaddr) const { return GetPointerImpl( - vaddr, - [vaddr]() { - LOG_ERROR(HW_Memory, GetPointerImplError<"GetPointer"_s, 0, ""_s>(), vaddr); - }, + vaddr, [vaddr]() { LOG_ERROR(HW_Memory, "Unmapped GetPointer @ 0x{:016X}", vaddr); }, []() {}); } @@ -540,7 +495,7 @@ struct Memory::Impl { const u8* const ptr = GetPointerImpl( vaddr, [vaddr]() { - LOG_ERROR(HW_Memory, GetPointerImplError<"Read"_s, sizeof(T), ""_s>(), vaddr); + LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, vaddr); }, [&system = system, vaddr]() { system.GPU().FlushRegion(vaddr, sizeof(T)); }); if (ptr) { @@ -563,7 +518,7 @@ struct Memory::Impl { u8* const ptr = GetPointerImpl( vaddr, [vaddr, data]() { - LOG_ERROR(HW_Memory, GetPointerImplError<"Write"_s, sizeof(T), " = 0x{:016X}"_s>(), + LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, vaddr, static_cast(data)); }, [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); @@ -577,8 +532,8 @@ struct Memory::Impl { u8* const ptr = GetPointerImpl( vaddr, [vaddr, data]() { - LOG_ERROR(HW_Memory, GetPointerImplError<"Write"_s, sizeof(T), " = 0x{:016X}"_s>(), - vaddr, static_cast(data)); + LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", + sizeof(T) * 8, vaddr, static_cast(data)); }, [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); }); if (ptr) { @@ -592,8 +547,7 @@ struct Memory::Impl { u8* const ptr = GetPointerImpl( vaddr, [vaddr, data]() { - LOG_ERROR(HW_Memory, - GetPointerImplError<"Write"_s, sizeof(u128), " = 0x{:016X}{:016X}"_s>(), + LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", vaddr, static_cast(data[1]), static_cast(data[0])); }, [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); }); From 928b64d2ce91d45f65e34071653f235d2bd5eff6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 7 Aug 2021 01:08:33 -0400 Subject: [PATCH 25/35] nvdec: Better logging for unimplemented codecs --- src/video_core/command_classes/nvdec.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index b5e3b70fcc..b5c55f14aa 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -39,7 +39,7 @@ void Nvdec::Execute() { codec->Decode(); break; default: - UNIMPLEMENTED_MSG("Unknown codec {}", static_cast(codec->GetCurrentCodec())); + UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName()); break; } } From bb29dcb7f2dad1e0ed67b32387912d56e40a5e5d Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Sat, 7 Aug 2021 15:15:01 +0200 Subject: [PATCH 26/35] vulkan_memory_allocator: Respect bufferImageGranularity --- src/video_core/vulkan_common/vulkan_memory_allocator.cpp | 8 ++++++-- src/video_core/vulkan_common/vulkan_memory_allocator.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index aa173d19ef..300a61205e 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -228,7 +228,9 @@ void MemoryCommit::Release() { MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()}, - export_allocations{export_allocations_} {} + export_allocations{export_allocations_}, + buffer_image_granularity{ + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} MemoryAllocator::~MemoryAllocator() = default; @@ -258,7 +260,9 @@ MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage } MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) { - auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage); + VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image); + requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity); + auto commit = Commit(requirements, usage); image.BindMemory(commit.Memory(), commit.Offset()); return commit; } diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index b61e931e06..86e8ed1196 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -123,6 +123,8 @@ private: const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. const bool export_allocations; ///< True when memory allocations have to be exported. std::vector> allocations; ///< Current allocations. + VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers + // and optimal images }; /// Returns true when a memory usage is guaranteed to be host visible. From acce512ae80fc19a63391a097a68ab1ef6cb68f6 Mon Sep 17 00:00:00 2001 From: german77 Date: Sat, 7 Aug 2021 15:45:29 -0500 Subject: [PATCH 27/35] main: Avoid stopping emulation when taking a screenshot --- src/yuzu/main.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9544f0fb00..5940e0cfda 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2814,8 +2814,6 @@ void GMainWindow::OnToggleFilterBar() { } void GMainWindow::OnCaptureScreenshot() { - OnPauseGame(); - const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); const auto screenshot_path = QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)); @@ -2827,23 +2825,22 @@ void GMainWindow::OnCaptureScreenshot() { .arg(date); if (!Common::FS::CreateDir(screenshot_path.toStdString())) { - OnStartGame(); return; } #ifdef _WIN32 if (UISettings::values.enable_screenshot_save_as) { + OnPauseGame(); filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename, tr("PNG Image (*.png)")); + OnStartGame(); if (filename.isEmpty()) { - OnStartGame(); return; } } #endif render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(), filename); - OnStartGame(); } // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant From 48b6d41f1b95cd61d6c60c3db1a5b89ac788a66a Mon Sep 17 00:00:00 2001 From: german77 Date: Thu, 22 Jul 2021 19:59:26 -0500 Subject: [PATCH 28/35] input_common: Improve SDL joystick and hide toggle option --- src/input_common/main.cpp | 8 +- src/input_common/mouse/mouse_poller.cpp | 1 + src/input_common/sdl/sdl_impl.cpp | 87 ++++++++++++++----- .../configuration/configure_input_player.cpp | 13 +-- 4 files changed, 76 insertions(+), 33 deletions(-) diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 8de3d4520e..ff23230f00 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -304,10 +304,10 @@ std::vector> InputSubsystem::GetPollers([ } std::string GenerateKeyboardParam(int key_code) { - Common::ParamPackage param{ - {"engine", "keyboard"}, - {"code", std::to_string(key_code)}, - }; + Common::ParamPackage param; + param.Set("engine", "keyboard"); + param.Set("code", key_code); + param.Set("toggle", false); return param.Serialize(); } diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp index efcdd85d2c..090b26972d 100644 --- a/src/input_common/mouse/mouse_poller.cpp +++ b/src/input_common/mouse/mouse_poller.cpp @@ -57,6 +57,7 @@ Common::ParamPackage MouseButtonFactory::GetNextInput() const { if (pad.button != MouseInput::MouseButton::Undefined) { params.Set("engine", "mouse"); params.Set("button", static_cast(pad.button)); + params.Set("toggle", false); return params; } } diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index 70a0ba09c6..f1f950d8a7 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -82,6 +82,12 @@ public: state.buttons.insert_or_assign(button, value); } + void PreSetButton(int button) { + if (!state.buttons.contains(button)) { + SetButton(button, false); + } + } + void SetMotion(SDL_ControllerSensorEvent event) { constexpr float gravity_constant = 9.80665f; std::lock_guard lock{mutex}; @@ -155,9 +161,16 @@ public: state.axes.insert_or_assign(axis, value); } - float GetAxis(int axis, float range) const { + void PreSetAxis(int axis) { + if (!state.axes.contains(axis)) { + SetAxis(axis, 0); + } + } + + float GetAxis(int axis, float range, float offset) const { std::lock_guard lock{mutex}; - return static_cast(state.axes.at(axis)) / (32767.0f * range); + const float value = static_cast(state.axes.at(axis)) / 32767.0f; + return (value + offset) / range; } bool RumblePlay(u16 amp_low, u16 amp_high) { @@ -174,9 +187,10 @@ public: return false; } - std::tuple GetAnalog(int axis_x, int axis_y, float range) const { - float x = GetAxis(axis_x, range); - float y = GetAxis(axis_y, range); + std::tuple GetAnalog(int axis_x, int axis_y, float range, float offset_x, + float offset_y) const { + float x = GetAxis(axis_x, range, offset_x); + float y = GetAxis(axis_y, range, offset_y); y = -y; // 3DS uses an y-axis inverse from SDL // Make sure the coordinates are in the unit circle, @@ -483,7 +497,7 @@ public: trigger_if_greater(trigger_if_greater_) {} bool GetStatus() const override { - const float axis_value = joystick->GetAxis(axis, 1.0f); + const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f); if (trigger_if_greater) { return axis_value > threshold; } @@ -500,12 +514,14 @@ private: class SDLAnalog final : public Input::AnalogDevice { public: explicit SDLAnalog(std::shared_ptr joystick_, int axis_x_, int axis_y_, - bool invert_x_, bool invert_y_, float deadzone_, float range_) + bool invert_x_, bool invert_y_, float deadzone_, float range_, + float offset_x_, float offset_y_) : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), - invert_y(invert_y_), deadzone(deadzone_), range(range_) {} + invert_y(invert_y_), deadzone(deadzone_), range(range_), offset_x(offset_x_), + offset_y(offset_y_) {} std::tuple GetStatus() const override { - auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); + auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range, offset_x, offset_y); const float r = std::sqrt((x * x) + (y * y)); if (invert_x) { x = -x; @@ -522,8 +538,8 @@ public: } std::tuple GetRawStatus() const override { - const float x = joystick->GetAxis(axis_x, range); - const float y = joystick->GetAxis(axis_y, range); + const float x = joystick->GetAxis(axis_x, range, offset_x); + const float y = joystick->GetAxis(axis_y, range, offset_y); return {x, -y}; } @@ -555,6 +571,8 @@ private: const bool invert_y; const float deadzone; const float range; + const float offset_x; + const float offset_y; }; class SDLVibration final : public Input::VibrationDevice { @@ -621,7 +639,7 @@ public: trigger_if_greater(trigger_if_greater_) {} Input::MotionStatus GetStatus() const override { - const float axis_value = joystick->GetAxis(axis, 1.0f); + const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f); bool trigger = axis_value < threshold; if (trigger_if_greater) { trigger = axis_value > threshold; @@ -720,13 +738,13 @@ public: LOG_ERROR(Input, "Unknown direction {}", direction_name); } // This is necessary so accessing GetAxis with axis won't crash - joystick->SetAxis(axis, 0); + joystick->PreSetAxis(axis); return std::make_unique(joystick, axis, threshold, trigger_if_greater); } const int button = params.Get("button", 0); // This is necessary so accessing GetButton with button won't crash - joystick->SetButton(button, false); + joystick->PreSetButton(button); return std::make_unique(joystick, button, toggle); } @@ -757,13 +775,15 @@ public: const std::string invert_y_value = params.Get("invert_y", "+"); const bool invert_x = invert_x_value == "-"; const bool invert_y = invert_y_value == "-"; + const float offset_x = params.Get("offset_x", 0.0f); + const float offset_y = params.Get("offset_y", 0.0f); auto joystick = state.GetSDLJoystickByGUID(guid, port); // This is necessary so accessing GetAxis with axis_x and axis_y won't crash - joystick->SetAxis(axis_x, 0); - joystick->SetAxis(axis_y, 0); + joystick->PreSetAxis(axis_x); + joystick->PreSetAxis(axis_y); return std::make_unique(joystick, axis_x, axis_y, invert_x, invert_y, deadzone, - range); + range, offset_x, offset_y); } private: @@ -844,13 +864,13 @@ public: LOG_ERROR(Input, "Unknown direction {}", direction_name); } // This is necessary so accessing GetAxis with axis won't crash - joystick->SetAxis(axis, 0); + joystick->PreSetAxis(axis); return std::make_unique(joystick, axis, threshold, trigger_if_greater); } const int button = params.Get("button", 0); // This is necessary so accessing GetButton with button won't crash - joystick->SetButton(button, false); + joystick->PreSetButton(button); return std::make_unique(joystick, button); } @@ -995,6 +1015,7 @@ Common::ParamPackage BuildButtonParamPackageForButton(int port, std::string guid params.Set("port", port); params.Set("guid", std::move(guid)); params.Set("button", button); + params.Set("toggle", false); return params; } @@ -1134,13 +1155,15 @@ Common::ParamPackage BuildParamPackageForBinding(int port, const std::string& gu } Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& guid, int axis_x, - int axis_y) { + int axis_y, float offset_x, float offset_y) { Common::ParamPackage params; params.Set("engine", "sdl"); params.Set("port", port); params.Set("guid", guid); params.Set("axis_x", axis_x); params.Set("axis_y", axis_y); + params.Set("offset_x", offset_x); + params.Set("offset_y", offset_y); params.Set("invert_x", "+"); params.Set("invert_y", "+"); return params; @@ -1342,24 +1365,39 @@ AnalogMapping SDLState::GetAnalogMappingForDevice(const Common::ParamPackage& pa const auto& binding_left_y = SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_LEFTY); if (params.Has("guid2")) { + joystick2->PreSetAxis(binding_left_x.value.axis); + joystick2->PreSetAxis(binding_left_y.value.axis); + const auto left_offset_x = -joystick2->GetAxis(binding_left_x.value.axis, 1.0f, 0); + const auto left_offset_y = -joystick2->GetAxis(binding_left_y.value.axis, 1.0f, 0); mapping.insert_or_assign( Settings::NativeAnalog::LStick, BuildParamPackageForAnalog(joystick2->GetPort(), joystick2->GetGUID(), - binding_left_x.value.axis, binding_left_y.value.axis)); + binding_left_x.value.axis, binding_left_y.value.axis, + left_offset_x, left_offset_y)); } else { + joystick->PreSetAxis(binding_left_x.value.axis); + joystick->PreSetAxis(binding_left_y.value.axis); + const auto left_offset_x = -joystick->GetAxis(binding_left_x.value.axis, 1.0f, 0); + const auto left_offset_y = -joystick->GetAxis(binding_left_y.value.axis, 1.0f, 0); mapping.insert_or_assign( Settings::NativeAnalog::LStick, BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(), - binding_left_x.value.axis, binding_left_y.value.axis)); + binding_left_x.value.axis, binding_left_y.value.axis, + left_offset_x, left_offset_y)); } const auto& binding_right_x = SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTX); const auto& binding_right_y = SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTY); + joystick->PreSetAxis(binding_right_x.value.axis); + joystick->PreSetAxis(binding_right_y.value.axis); + const auto right_offset_x = -joystick->GetAxis(binding_right_x.value.axis, 1.0f, 0); + const auto right_offset_y = -joystick->GetAxis(binding_right_y.value.axis, 1.0f, 0); mapping.insert_or_assign(Settings::NativeAnalog::RStick, BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(), binding_right_x.value.axis, - binding_right_y.value.axis)); + binding_right_y.value.axis, right_offset_x, + right_offset_y)); return mapping; } @@ -1563,8 +1601,9 @@ public: } if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) { + // Set offset to zero since the joystick is not on center auto params = BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(), - first_axis, axis); + first_axis, axis, 0, 0); first_axis = -1; return params; } diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 6b9bd05f18..7527c068b9 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -309,11 +309,14 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i buttons_param[button_id].Clear(); button_map[button_id]->setText(tr("[not set]")); }); - context_menu.addAction(tr("Toggle button"), [&] { - const bool toggle_value = !buttons_param[button_id].Get("toggle", false); - buttons_param[button_id].Set("toggle", toggle_value); - button_map[button_id]->setText(ButtonToText(buttons_param[button_id])); - }); + if (buttons_param[button_id].Has("toggle")) { + context_menu.addAction(tr("Toggle button"), [&] { + const bool toggle_value = + !buttons_param[button_id].Get("toggle", false); + buttons_param[button_id].Set("toggle", toggle_value); + button_map[button_id]->setText(ButtonToText(buttons_param[button_id])); + }); + } if (buttons_param[button_id].Has("threshold")) { context_menu.addAction(tr("Set threshold"), [&] { const int button_threshold = static_cast( From 8e0cc3e59a066bc731598d0ce31cb498e7a48382 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 8 Aug 2021 01:00:40 -0400 Subject: [PATCH 29/35] configure_general: Swap positions of speed limit and frame limit options --- src/yuzu/configuration/configure_general.ui | 60 ++++++++++----------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index 8ce97edeca..69b6c2d668 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui @@ -24,6 +24,36 @@ + + + + + + Framerate Cap + + + Requires the use of the FPS Limiter Toggle hotkey to take effect. + + + + + + + x + + + 1 + + + 1000 + + + 500 + + + + + @@ -51,36 +81,6 @@ - - - - - - Framerate Cap - - - Requires the use of the FPS Limiter Toggle hotkey to take effect. - - - - - - - x - - - 1 - - - 1000 - - - 500 - - - - - From c4eafcc86142e1de764748659d74a27c97930dc8 Mon Sep 17 00:00:00 2001 From: yzct12345 <87620833+yzct12345@users.noreply.github.com> Date: Sun, 8 Aug 2021 11:02:51 +0000 Subject: [PATCH 30/35] texture_cache: Address ameerj's review --- src/video_core/texture_cache/image_view_info.cpp | 2 ++ src/video_core/texture_cache/texture_cache.h | 2 ++ src/video_core/texture_cache/texture_cache_base.h | 7 ------- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index f14a925655..6527e14c84 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -14,6 +14,8 @@ namespace VideoCommon { namespace { +using Tegra::Texture::TextureType; + constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits::max(); [[nodiscard]] u8 CastSwizzle(SwizzleSource source) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 40953afb70..a087498ff5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include "common/alignment.h" +#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index b72448c0db..e4ae351cb2 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -10,15 +10,10 @@ #include #include #include -#include #include -#include - -#include "common/alignment.h" #include "common/common_types.h" #include "common/literals.h" -#include "common/settings.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" @@ -40,7 +35,6 @@ namespace VideoCommon { using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; using VideoCore::Surface::GetFormatType; @@ -48,7 +42,6 @@ using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template From fa226957050816280bfd7aad3458fe8408118ece Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 8 Aug 2021 13:49:00 -0400 Subject: [PATCH 31/35] vp9: Ensure the first frame is complete Silences a runtime error due to the first frame missing the frame data, and being set to hidden despite being a key-frame. --- src/video_core/command_classes/codecs/vp9.cpp | 4 ++-- src/video_core/command_classes/codecs/vp9_types.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 7eecb39915..70030066ae 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -397,14 +397,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) next_frame = std::move(temp); } else { next_frame.info = current_frame.info; - next_frame.bit_stream = std::move(current_frame.bit_stream); + next_frame.bit_stream = current_frame.bit_stream; } return current_frame; } std::vector VP9::ComposeCompressedHeader() { VpxRangeEncoder writer{}; - const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame; + const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame; if (!current_frame_info.lossless) { if (static_cast(current_frame_info.transform_mode) >= 3) { writer.Write(3, 2); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 6820afa266..87eafdb038 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -176,7 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, - .show_frame = false, + .show_frame = true, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, From 23ca1eb82ee47e533e92ee9ccdc82a92647d1009 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 8 Aug 2021 17:34:43 +0200 Subject: [PATCH 32/35] yuzu-cmd/CMakeLists: Correct attribution for this function. --- src/yuzu_cmd/CMakeLists.txt | 1 + src/yuzu_cmd/emu_window/emu_window_sdl2.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt index e55a196494..74fc249725 100644 --- a/src/yuzu_cmd/CMakeLists.txt +++ b/src/yuzu_cmd/CMakeLists.txt @@ -1,5 +1,6 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) +# Credits to Samantas5855 and others for this function. function(create_resource file output filename) # Read hex data from file file(READ ${file} filedata HEX) diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index c80f7791c5..87fce0c230 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -232,6 +232,7 @@ void EmuWindow_SDL2::WaitEvent() { } } +// Credits to Samantas5855 and others for this function. void EmuWindow_SDL2::SetWindowIcon() { SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size); if (yuzu_icon_stream == nullptr) { From a779cede7c9f8860ed005cbc16158c97c69765e7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 8 Aug 2021 00:51:25 -0400 Subject: [PATCH 33/35] vic: Specify sws_scale height stride. Silences a sws_scale runtime warning about unaligned strides. --- src/video_core/command_classes/vic.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index d5e77941cd..0ee07f3982 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -96,12 +96,11 @@ void Vic::Execute() { if (!converted_frame_buffer) { converted_frame_buffer = AVMallocPtr{static_cast(av_malloc(linear_size)), av_free}; } - - const int converted_stride{frame->width * 4}; + const std::array converted_stride{frame->width * 4, frame->height * 4, 0, 0}; u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, - &converted_frame_buf_addr, &converted_stride); + &converted_frame_buf_addr, converted_stride.data()); const u32 blk_kind = static_cast(config.block_linear_kind); if (blk_kind != 0) { From fe2e7100031c64525c8ad893c7584d045269b137 Mon Sep 17 00:00:00 2001 From: german77 Date: Tue, 10 Aug 2021 19:16:30 -0500 Subject: [PATCH 34/35] externals: Update sdl2 to 2.0.16 --- CMakeLists.txt | 4 ++-- externals/SDL | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de2413843a..826bc42c08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -376,7 +376,7 @@ if (ENABLE_SDL2) if (YUZU_USE_BUNDLED_SDL2) # Detect toolchain and platform if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64) - set(SDL2_VER "SDL2-2.0.15-prerelease") + set(SDL2_VER "SDL2-2.0.16") else() message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.") endif() @@ -396,7 +396,7 @@ if (ENABLE_SDL2) elseif (YUZU_USE_EXTERNAL_SDL2) message(STATUS "Using SDL2 from externals.") else() - find_package(SDL2 2.0.15 REQUIRED) + find_package(SDL2 2.0.16 REQUIRED) # Some installations don't set SDL2_LIBRARIES if("${SDL2_LIBRARIES}" STREQUAL "") diff --git a/externals/SDL b/externals/SDL index 2f248a2a31..25f9ed87ff 160000 --- a/externals/SDL +++ b/externals/SDL @@ -1 +1 @@ -Subproject commit 2f248a2a31c3323ecc37c00ad5e269e347ae392a +Subproject commit 25f9ed87ff6947d9576fc9d79dee0784e638ac58 From 5be2d6fd2829e1bbb056f38101a0d6106736217a Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Wed, 11 Aug 2021 15:49:01 -0700 Subject: [PATCH 35/35] settings: Fix MSVC issues According to https://stackoverflow.com/questions/469508, we run into a MSVC bug (since VS 2005) when using diamond inheritance for RangedSetting. This explicitly implements those functions in RangedSetting. GetValue is implemented as just calling the inherited version. The explicit converson operator is reimplemented. I opted for this over ignoring the warning with a pragma since this specifies the inherited behavior, and I have now less faith in MSVC to pick the right one. In addition, we mark destructors as virtual to silence what I believe is a fair MSVC compilation error. --- src/common/settings.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index c4afff50b5..1ba9b606c8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -75,7 +75,7 @@ public: */ explicit BasicSetting(const Type& default_val, const std::string& name) : default_value{default_val}, global{default_val}, label{name} {} - ~BasicSetting() = default; + virtual ~BasicSetting() = default; /** * Returns a reference to the setting's value. @@ -161,7 +161,7 @@ public: explicit BasicRangedSetting(const Type& default_val, const Type& min_val, const Type& max_val, const std::string& name) : BasicSetting{default_val, name}, minimum{min_val}, maximum{max_val} {} - ~BasicRangedSetting() = default; + virtual ~BasicRangedSetting() = default; /** * Like BasicSetting's SetValue, except value is clamped to the range of the setting. @@ -208,7 +208,7 @@ public: */ explicit Setting(const Type& default_val, const std::string& name) : BasicSetting(default_val, name) {} - ~Setting() = default; + virtual ~Setting() = default; /** * Tells this setting to represent either the global or custom setting when other member @@ -237,13 +237,13 @@ public: * * @returns The required value of the setting */ - [[nodiscard]] const Type& GetValue() const override { + [[nodiscard]] virtual const Type& GetValue() const override { if (use_global) { return this->global; } return custom; } - [[nodiscard]] const Type& GetValue(bool need_global) const { + [[nodiscard]] virtual const Type& GetValue(bool need_global) const { if (use_global || need_global) { return this->global; } @@ -286,7 +286,7 @@ public: * * @returns A reference to the current setting value */ - explicit operator const Type&() const override { + virtual explicit operator const Type&() const override { if (use_global) { return this->global; } @@ -318,7 +318,22 @@ public: : BasicSetting{default_val, name}, BasicRangedSetting{default_val, min_val, max_val, name}, Setting{default_val, name} {} - ~RangedSetting() = default; + virtual ~RangedSetting() = default; + + // The following are needed to avoid a MSVC bug + // (source: https://stackoverflow.com/questions/469508) + [[nodiscard]] const Type& GetValue() const override { + return Setting::GetValue(); + } + [[nodiscard]] const Type& GetValue(bool need_global) const override { + return Setting::GetValue(need_global); + } + explicit operator const Type&() const override { + if (this->use_global) { + return this->global; + } + return this->custom; + } /** * Like BasicSetting's SetValue, except value is clamped to the range of the setting. Sets the