From ba82dc57d64fe8fc1b268e24fa583b350404b9a6 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Sun, 1 Feb 2026 02:06:05 +0100 Subject: [PATCH] OpenGL+Vulkan: Use unified uniform block layout for output shaders (#1785) --- .../LegacyShaderDecompiler/LatteDecompiler.h | 2 +- .../LatteDecompilerEmitGLSLHeader.hpp | 2 +- .../LatteDecompilerEmitMSLHeader.hpp | 2 +- .../Renderer/Metal/RendererShaderMtl.cpp | 26 ---- .../Latte/Renderer/Metal/RendererShaderMtl.h | 6 - .../Latte/Renderer/OpenGL/OpenGLRenderer.cpp | 13 +- .../HW/Latte/Renderer/OpenGL/OpenGLRenderer.h | 3 + .../OpenGL/OpenGLRendererUniformData.cpp | 3 +- .../Latte/Renderer/OpenGL/RendererShaderGL.h | 10 +- .../HW/Latte/Renderer/RendererOuputShader.cpp | 87 +++---------- .../HW/Latte/Renderer/RendererOuputShader.h | 20 +-- src/Cafe/HW/Latte/Renderer/RendererShader.h | 6 - .../Renderer/Vulkan/RendererShaderVk.cpp | 26 ---- .../Latte/Renderer/Vulkan/RendererShaderVk.h | 5 - .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 87 ++++++------- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 1 + .../Renderer/Vulkan/VulkanRendererCore.cpp | 123 +++++++++--------- 17 files changed, 155 insertions(+), 267 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 475bacb0..b370123e 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -219,7 +219,7 @@ struct LatteDecompilerShader float ufCurrentValueFragCoordScale[2]; sint32 loc_verticesPerInstance; sint32 loc_streamoutBufferBase[LATTE_NUM_STREAMOUT_BUFFER]; - sint32 uniformRangeSize; // entire size of uniform variable block + uint32 uniformRangeSize; // entire size of uniform variable block }uniform{ 0 }; // fast access struct _RemappedUniformBufferGroup diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp index 07b1fd34..add25195 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp @@ -16,7 +16,7 @@ namespace LatteDecompiler } } - sint32 uniformCurrentOffset = 0; + uint32 uniformCurrentOffset = 0; auto shader = decompilerContext->shader; auto shaderType = decompilerContext->shader->shaderType; auto shaderSrc = decompilerContext->shaderSource; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index f61abcb1..ac48f1f7 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -14,7 +14,7 @@ namespace LatteDecompiler src->add("struct SupportBuffer {" _CRLF); - sint32 uniformCurrentOffset = 0; + uint32 uniformCurrentOffset = 0; auto shader = decompilerContext->shader; auto shaderType = decompilerContext->shader->shaderType; if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index a05bcf03..f00af85a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -231,32 +231,6 @@ RendererShaderMtl::~RendererShaderMtl() m_function->release(); } -sint32 RendererShaderMtl::GetUniformLocation(const char* name) -{ - cemu_assert_suspicious(); - return 0; -} - -void RendererShaderMtl::SetUniform1i(sint32 location, sint32 value) -{ - cemu_assert_suspicious(); -} - -void RendererShaderMtl::SetUniform1f(sint32 location, float value) -{ - cemu_assert_suspicious(); -} - -void RendererShaderMtl::SetUniform2fv(sint32 location, void* data, sint32 count) -{ - cemu_assert_suspicious(); -} - -void RendererShaderMtl::SetUniform4iv(sint32 location, void* data, sint32 count) -{ - cemu_assert_suspicious(); -} - void RendererShaderMtl::PreponeCompilation(bool isRenderThread) { shaderMtlThreadPool.s_compilationQueueMutex.lock(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 9527dc8a..1a0c33a9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -36,12 +36,6 @@ public: return m_function; } - sint32 GetUniformLocation(const char* name) override; - void SetUniform1i(sint32 location, sint32 value) override; - void SetUniform1f(sint32 location, float value) override; - void SetUniform2fv(sint32 location, void* data, sint32 count) override; - void SetUniform4iv(sint32 location, void* data, sint32 count) override; - void PreponeCompilation(bool isRenderThread) override; bool IsCompiled() override; bool WaitForCompiled() override; diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.cpp b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.cpp index 76f67692..a4d1966d 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.cpp @@ -145,6 +145,8 @@ OpenGLRenderer::~OpenGLRenderer() { if(m_pipeline != 0) glDeleteProgramPipelines(1, &m_pipeline); + + glDeleteBuffers(1, &m_backbufferBlit_uniformBuffer); } OpenGLRenderer* OpenGLRenderer::GetInstance() @@ -371,6 +373,10 @@ void OpenGLRenderer::Initialize() glBindFramebuffer(GL_FRAMEBUFFER_EXT, 0); } + // create uniform buffers for backbufferblit + glCreateBuffers(1, &m_backbufferBlit_uniformBuffer); + glNamedBufferStorage(m_backbufferBlit_uniformBuffer, sizeof(RendererOutputShader::OutputUniformVariables), nullptr, GL_DYNAMIC_STORAGE_BIT); + draw_init(); catchOpenGLError(); @@ -603,7 +609,12 @@ void OpenGLRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutpu shader_unbind(RendererShader::ShaderType::kGeometry); shader_bind(shader->GetVertexShader()); shader_bind(shader->GetFragmentShader()); - shader->SetUniformParameters(*texView, {imageWidth, imageHeight}, padView); + + // update and bind uniform buffer + auto uniformBuffer = shader->FillUniformBlockBuffer(*texView, {imageWidth, imageHeight}, padView); + glNamedBufferSubData(m_backbufferBlit_uniformBuffer, 0, sizeof(uniformBuffer), &uniformBuffer); + + glBindBufferBase(GL_UNIFORM_BUFFER, 0, m_backbufferBlit_uniformBuffer); // set viewport glViewportIndexedf(0, imageX, imageY, imageWidth, imageHeight); diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h index 3ed4c27b..5b9ccd41 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h @@ -209,6 +209,9 @@ private: sint32 activeTextureUnit = 0; void* m_latteBoundTextures[Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE * 3]{}; + // backbuffer blit + GLuint m_backbufferBlit_uniformBuffer; + // attribute stream GLuint glAttributeCacheAB{}; GLuint _boundArrayBuffer{}; diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererUniformData.cpp b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererUniformData.cpp index 73da3a3b..656536df 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererUniformData.cpp +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererUniformData.cpp @@ -1,3 +1,4 @@ +#include "RendererShaderGL.h" #include "Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h" #include "Cafe/HW/Latte/Core/LatteShader.h" @@ -28,7 +29,7 @@ void OpenGLRenderer::uniformData_update() if (!shader) continue; - auto hostShader = shader->shader; + auto hostShader = (RendererShaderGL*)shader->shader; if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED) { diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h b/src/Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h index db69c14f..1e8784ff 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h @@ -17,12 +17,12 @@ public: GLuint GetProgram() const { cemu_assert_debug(m_isCompiled); return m_program; } GLuint GetShaderObject() const { cemu_assert_debug(m_isCompiled); return m_shader_object; } - sint32 GetUniformLocation(const char* name) override; + sint32 GetUniformLocation(const char* name); - void SetUniform1i(sint32 location, sint32 value) override; - void SetUniform1f(sint32 location, float value) override; - void SetUniform2fv(sint32 location, void* data, sint32 count) override; - void SetUniform4iv(sint32 location, void* data, sint32 count) override; + void SetUniform1i(sint32 location, sint32 value); + void SetUniform1f(sint32 location, float value); + void SetUniform2fv(sint32 location, void* data, sint32 count); + void SetUniform4iv(sint32 location, void* data, sint32 count); static void ShaderCacheLoading_begin(uint64 cacheTitleId); static void ShaderCacheLoading_end(); diff --git a/src/Cafe/HW/Latte/Renderer/RendererOuputShader.cpp b/src/Cafe/HW/Latte/Renderer/RendererOuputShader.cpp index 8c4dd5d7..a4f538a8 100644 --- a/src/Cafe/HW/Latte/Renderer/RendererOuputShader.cpp +++ b/src/Cafe/HW/Latte/Renderer/RendererOuputShader.cpp @@ -263,69 +263,24 @@ RendererOutputShader::RendererOutputShader(const std::string& vertex_source, con if(!m_fragment_shader->WaitForCompiled()) throw std::exception(); - if (g_renderer->GetType() == RendererAPI::OpenGL) - { - m_uniformLocations[0].m_loc_textureSrcResolution = m_vertex_shader->GetUniformLocation("textureSrcResolution"); - m_uniformLocations[0].m_loc_nativeResolution = m_vertex_shader->GetUniformLocation("nativeResolution"); - m_uniformLocations[0].m_loc_outputResolution = m_vertex_shader->GetUniformLocation("outputResolution"); - m_uniformLocations[0].m_loc_applySRGBEncoding = m_vertex_shader->GetUniformLocation("applySRGBEncoding"); - m_uniformLocations[0].m_loc_targetGamma = m_fragment_shader->GetUniformLocation("targetGamma"); - m_uniformLocations[0].m_loc_displayGamma = m_fragment_shader->GetUniformLocation("displayGamma"); - - m_uniformLocations[1].m_loc_textureSrcResolution = m_fragment_shader->GetUniformLocation("textureSrcResolution"); - m_uniformLocations[1].m_loc_nativeResolution = m_fragment_shader->GetUniformLocation("nativeResolution"); - m_uniformLocations[1].m_loc_outputResolution = m_fragment_shader->GetUniformLocation("outputResolution"); - m_uniformLocations[1].m_loc_applySRGBEncoding = m_fragment_shader->GetUniformLocation("applySRGBEncoding"); - m_uniformLocations[1].m_loc_targetGamma = m_fragment_shader->GetUniformLocation("targetGamma"); - m_uniformLocations[1].m_loc_displayGamma = m_fragment_shader->GetUniformLocation("displayGamma"); - } } -void RendererOutputShader::SetUniformParameters(const LatteTextureView& texture_view, const Vector2i& output_res, const bool padView) const +RendererOutputShader::OutputUniformVariables RendererOutputShader::FillUniformBlockBuffer(const LatteTextureView& texture_view, const Vector2i& output_res, const bool padView) const { + OutputUniformVariables vars; + sint32 effectiveWidth, effectiveHeight; texture_view.baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, 0); - auto setUniforms = [&](RendererShader* shader, const UniformLocations& locations){ - float res[2]; - if (locations.m_loc_textureSrcResolution != -1) - { - res[0] = (float)effectiveWidth; - res[1] = (float)effectiveHeight; - shader->SetUniform2fv(locations.m_loc_textureSrcResolution, res, 1); - } + vars.textureSrcResolution = {(float)effectiveWidth, (float)effectiveHeight}; - if (locations.m_loc_nativeResolution != -1) - { - res[0] = (float)texture_view.baseTexture->width; - res[1] = (float)texture_view.baseTexture->height; - shader->SetUniform2fv(locations.m_loc_nativeResolution, res, 1); - } + vars.nativeResolution = {(float)texture_view.baseTexture->width, (float)texture_view.baseTexture->height}; + vars.outputResolution = output_res; - if (locations.m_loc_outputResolution != -1) - { - res[0] = (float)output_res.x; - res[1] = (float)output_res.y; - shader->SetUniform2fv(locations.m_loc_outputResolution, res, 1); - } + vars.applySRGBEncoding = padView ? LatteGPUState.drcBufferUsesSRGB : LatteGPUState.tvBufferUsesSRGB; + vars.targetGamma = padView ? ActiveSettings::GetDRCGamma() : ActiveSettings::GetTVGamma(); + vars.displayGamma = GetConfig().userDisplayGamma; - if (locations.m_loc_applySRGBEncoding != -1) - { - shader->SetUniform1i(locations.m_loc_applySRGBEncoding, padView ? LatteGPUState.drcBufferUsesSRGB : LatteGPUState.tvBufferUsesSRGB); - } - - if (locations.m_loc_targetGamma != -1) - { - shader->SetUniform1f(locations.m_loc_targetGamma, padView ? ActiveSettings::GetDRCGamma() : ActiveSettings::GetTVGamma()); - } - - if (locations.m_loc_displayGamma != -1) - { - shader->SetUniform1f(locations.m_loc_displayGamma, GetConfig().userDisplayGamma); - } - - }; - setUniforms(m_vertex_shader.get(), m_uniformLocations[0]); - setUniforms(m_fragment_shader.get(), m_uniformLocations[1]); + return vars; } RendererOutputShader* RendererOutputShader::s_copy_shader; @@ -478,27 +433,23 @@ vertex VertexOut main0(ushort vid [[vertex_id]]) { std::string RendererOutputShader::PrependFragmentPreamble(const std::string& shaderSrc) { return R"(#version 430 +layout(location = 0) smooth in vec2 passUV; +layout(binding = 0) uniform sampler2D textureSrc; +layout(location = 0) out vec4 colorOut0; + #ifdef VULKAN -layout(push_constant) uniform pc { - vec2 textureSrcResolution; - vec2 nativeResolution; - vec2 outputResolution; - bool applySRGBEncoding; // true = app requested sRGB encoding - float targetGamma; - float displayGamma; -}; +layout (binding = 1, std140) #else +layout (binding = 0, std140) +#endif +uniform parameters { uniform vec2 textureSrcResolution; uniform vec2 nativeResolution; uniform vec2 outputResolution; uniform bool applySRGBEncoding; uniform float targetGamma; uniform float displayGamma; -#endif - -layout(location = 0) smooth in vec2 passUV; -layout(binding = 0) uniform sampler2D textureSrc; -layout(location = 0) out vec4 colorOut0; +}; float sRGBEncode(float linear) { diff --git a/src/Cafe/HW/Latte/Renderer/RendererOuputShader.h b/src/Cafe/HW/Latte/Renderer/RendererOuputShader.h index 2322fc6a..433a3308 100644 --- a/src/Cafe/HW/Latte/Renderer/RendererOuputShader.h +++ b/src/Cafe/HW/Latte/Renderer/RendererOuputShader.h @@ -8,6 +8,15 @@ class RendererOutputShader { public: + struct OutputUniformVariables + { + Vector2f textureSrcResolution; + Vector2f nativeResolution; + Vector2f outputResolution; + uint32 applySRGBEncoding; + float targetGamma; + float displayGamma; + }; enum Shader { kCopy, @@ -17,7 +26,7 @@ public: RendererOutputShader(const std::string& vertex_source, const std::string& fragment_source); virtual ~RendererOutputShader() = default; - void SetUniformParameters(const LatteTextureView& texture_view, const Vector2i& output_res, const bool padView) const; + OutputUniformVariables FillUniformBlockBuffer(const LatteTextureView& texture_view, const Vector2i& output_res, const bool padView) const; RendererShader* GetVertexShader() const { @@ -51,15 +60,6 @@ protected: std::unique_ptr m_vertex_shader; std::unique_ptr m_fragment_shader; - struct UniformLocations - { - sint32 m_loc_textureSrcResolution = -1; - sint32 m_loc_nativeResolution = -1; - sint32 m_loc_outputResolution = -1; - sint32 m_loc_applySRGBEncoding = -1; - sint32 m_loc_targetGamma = -1; - sint32 m_loc_displayGamma = -1; - } m_uniformLocations[2]{}; private: static const std::string s_copy_shader_source; diff --git a/src/Cafe/HW/Latte/Renderer/RendererShader.h b/src/Cafe/HW/Latte/Renderer/RendererShader.h index b3d6d62b..2bc66769 100644 --- a/src/Cafe/HW/Latte/Renderer/RendererShader.h +++ b/src/Cafe/HW/Latte/Renderer/RendererShader.h @@ -18,12 +18,6 @@ public: virtual bool IsCompiled() = 0; virtual bool WaitForCompiled() = 0; - virtual sint32 GetUniformLocation(const char* name) = 0; - - virtual void SetUniform1i(sint32 location, sint32 value) = 0; - virtual void SetUniform1f(sint32 location, float value) = 0; - virtual void SetUniform2fv(sint32 location, void* data, sint32 count) = 0; - virtual void SetUniform4iv(sint32 location, void* data, sint32 count) = 0; protected: // if isGameShader is true, then baseHash and auxHash are valid diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp index c58470e0..15fd66e0 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp @@ -226,32 +226,6 @@ void RendererShaderVk::Shutdown() ShaderVkThreadPool.StopThreads(); } -sint32 RendererShaderVk::GetUniformLocation(const char* name) -{ - cemu_assert_suspicious(); - return 0; -} - -void RendererShaderVk::SetUniform1i(sint32 location, sint32 value) -{ - cemu_assert_suspicious(); -} - -void RendererShaderVk::SetUniform1f(sint32 location, float value) -{ - cemu_assert_suspicious(); -} - -void RendererShaderVk::SetUniform2fv(sint32 location, void* data, sint32 count) -{ - cemu_assert_suspicious(); -} - -void RendererShaderVk::SetUniform4iv(sint32 location, void* data, sint32 count) -{ - cemu_assert_suspicious(); -} - void RendererShaderVk::CreateVkShaderModule(std::span spirvBuffer) { VkShaderModuleCreateInfo createInfo{}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h index 7b9fc34b..e2e1169a 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h @@ -31,11 +31,6 @@ public: static void Init(); static void Shutdown(); - sint32 GetUniformLocation(const char* name) override; - void SetUniform1i(sint32 location, sint32 value) override; - void SetUniform1f(sint32 location, float value) override; - void SetUniform2fv(sint32 location, void* data, sint32 count) override; - void SetUniform4iv(sint32 location, void* data, sint32 count) override; VkShaderModule& GetShaderModule() { return m_shader_module; } static inline FSpinlock s_dependencyLock; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index a7a94800..251fd69c 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -2225,14 +2225,20 @@ void VulkanRenderer::CreatePipelineCache() void VulkanRenderer::swapchain_createDescriptorSetLayout() { - VkDescriptorSetLayoutBinding samplerLayoutBinding = {}; + VkDescriptorSetLayoutBinding bindings[2]{}; + VkDescriptorSetLayoutBinding& samplerLayoutBinding = bindings[0]; samplerLayoutBinding.binding = 0; samplerLayoutBinding.descriptorCount = 1; samplerLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; samplerLayoutBinding.pImmutableSamplers = nullptr; samplerLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - VkDescriptorSetLayoutBinding bindings[] = { samplerLayoutBinding }; + VkDescriptorSetLayoutBinding& uniformBufferBinding = bindings[1]; + uniformBufferBinding.binding = 1; + uniformBufferBinding.descriptorCount = 1; + uniformBufferBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + uniformBufferBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + VkDescriptorSetLayoutCreateInfo layoutInfo = {}; layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; layoutInfo.bindingCount = std::size(bindings); @@ -2638,20 +2644,10 @@ VkPipeline VulkanRenderer::backbufferBlit_createGraphicsPipeline(VkDescriptorSet colorBlending.blendConstants[2] = 0.0f; colorBlending.blendConstants[3] = 0.0f; - VkPushConstantRange pushConstantRange{ - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .offset = 0, - .size = 3 * sizeof(float) * 2 // 3 vec2's - + 4 // + 1 VkBool32 - + 4 * 2 // + 2 float - }; - VkPipelineLayoutCreateInfo pipelineLayoutInfo{}; pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipelineLayoutInfo.setLayoutCount = 1; pipelineLayoutInfo.pSetLayouts = &descriptorLayout; - pipelineLayoutInfo.pushConstantRangeCount = 1; - pipelineLayoutInfo.pPushConstantRanges = &pushConstantRange; VkResult result; if (m_pipelineLayout == VK_NULL_HANDLE) @@ -3027,37 +3023,12 @@ void VulkanRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutpu vkCmdBindPipeline(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); m_state.currentPipeline = pipeline; - vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &descriptSet, 0, nullptr); + auto outputUniforms = shader->FillUniformBlockBuffer(*texView, {imageWidth, imageHeight}, padView); + auto outputUniformOffset = uniformData_uploadUniformDataBufferGetOffset({(uint8*)&outputUniforms, sizeof(decltype(outputUniforms))}); - // update push constants - struct - { - Vector2f vecs[3]; - VkBool32 applySRGBEncoding; - float targetGamma; - float displayGamma; - } pushData; - - // textureSrcResolution - sint32 effectiveWidth, effectiveHeight; - texView->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, 0); - pushData.vecs[0] = {(float)effectiveWidth, (float)effectiveHeight}; - - // nativeResolution - pushData.vecs[1] = { - (float)texViewVk->baseTexture->width, - (float)texViewVk->baseTexture->height, - }; - - // outputResolution - pushData.vecs[2] = {(float)imageWidth,(float)imageHeight}; - - pushData.applySRGBEncoding = padView ? LatteGPUState.drcBufferUsesSRGB : LatteGPUState.tvBufferUsesSRGB; - pushData.targetGamma = padView ? ActiveSettings::GetDRCGamma() : ActiveSettings::GetTVGamma(); - pushData.displayGamma = GetConfig().userDisplayGamma; - - vkCmdPushConstants(m_state.currentCommandBuffer, m_pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(pushData), &pushData); + vkCmdBindDescriptorSets(m_state.currentCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &descriptSet, + 1, &outputUniformOffset); vkCmdDraw(m_state.currentCommandBuffer, 6, 1, 0, 0); @@ -3119,16 +3090,32 @@ VkDescriptorSet VulkanRenderer::backbufferBlit_createDescriptorSet(VkDescriptorS imageInfo.imageView = texViewVk->GetViewRGBA()->m_textureImageView; imageInfo.sampler = texViewVk->GetDefaultTextureSampler(useLinearTexFilter); - VkWriteDescriptorSet descriptorWrites = {}; - descriptorWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptorWrites.dstSet = result; - descriptorWrites.dstBinding = 0; - descriptorWrites.dstArrayElement = 0; - descriptorWrites.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptorWrites.descriptorCount = 1; - descriptorWrites.pImageInfo = &imageInfo; + VkWriteDescriptorSet descriptorWrites[2]{}; - vkUpdateDescriptorSets(m_logicalDevice, 1, &descriptorWrites, 0, nullptr); + VkWriteDescriptorSet& samplerWrite = descriptorWrites[0]; + samplerWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + samplerWrite.dstSet = result; + samplerWrite.dstBinding = 0; + samplerWrite.dstArrayElement = 0; + samplerWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + samplerWrite.descriptorCount = 1; + samplerWrite.pImageInfo = &imageInfo; + + VkWriteDescriptorSet& uniformBufferWrite = descriptorWrites[1]; + uniformBufferWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + uniformBufferWrite.dstSet = result; + uniformBufferWrite.dstBinding = 1; + uniformBufferWrite.descriptorCount = 1; + uniformBufferWrite.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + + VkDescriptorBufferInfo uniformBufferInfo{}; + uniformBufferInfo.buffer = m_uniformVarBuffer; + uniformBufferInfo.offset = 0; + uniformBufferInfo.range = sizeof(RendererOutputShader::OutputUniformVariables); + uniformBufferWrite.pBufferInfo = &uniformBufferInfo; + + + vkUpdateDescriptorSets(m_logicalDevice, std::size(descriptorWrites), descriptorWrites, 0, nullptr); performanceMonitor.vk.numDescriptorSamplerTextures.increment(); m_backbufferBlitDescriptorSetCache[hash] = result; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index cae4bd10..2c7af53b 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -554,6 +554,7 @@ private: VkCommandBuffer getCurrentCommandBuffer() const { return m_state.currentCommandBuffer; } // uniform + uint32 uniformData_uploadUniformDataBufferGetOffset(std::span data); void uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader); // misc diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index b41b7c37..23fb910c 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -376,6 +376,68 @@ void VulkanRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation) float s_vkUniformData[512 * 4]; +uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span data) +{ + const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; + const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1; + + auto waitWhileCondition = [&](std::function condition) { + while (condition()) + { + if (m_commandBufferSyncIndex == m_commandBufferIndex) + { + if (m_cmdBufferUniformRingbufIndices[m_commandBufferIndex] != m_uniformVarBufferReadIndex) + { + draw_endRenderPass(); + SubmitCommandBuffer(); + } + else + { + // submitting work would not change readIndex, so there's no way for conditions based on it to change + cemuLog_log(LogType::Force, "draw call overflowed and corrupted uniform ringbuffer. expect visual corruption"); + cemu_assert_suspicious(); + break; + } + } + WaitForNextFinishedCommandBuffer(); + } + }; + + // wrap around if it doesnt fit consecutively + if (m_uniformVarBufferWriteIndex + uniformSize > UNIFORMVAR_RINGBUFFER_SIZE) + { + waitWhileCondition([&]() { + return m_uniformVarBufferReadIndex > m_uniformVarBufferWriteIndex || m_uniformVarBufferReadIndex == 0; + }); + m_uniformVarBufferWriteIndex = 0; + } + + auto ringBufRemaining = [&]() { + ssize_t ringBufferUsedBytes = (ssize_t)m_uniformVarBufferWriteIndex - m_uniformVarBufferReadIndex; + if (ringBufferUsedBytes < 0) + ringBufferUsedBytes += UNIFORMVAR_RINGBUFFER_SIZE; + return UNIFORMVAR_RINGBUFFER_SIZE - 1 - ringBufferUsedBytes; + }; + waitWhileCondition([&]() { + return ringBufRemaining() < uniformSize; + }); + + const uint32 uniformOffset = m_uniformVarBufferWriteIndex; + memcpy(m_uniformVarBufferPtr + uniformOffset, data.data(), data.size()); + m_uniformVarBufferWriteIndex += uniformSize; + // flush if not coherent + if (!m_uniformVarBufferMemoryIsCoherent) + { + VkMappedMemoryRange flushedRange{}; + flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + flushedRange.memory = m_uniformVarBufferMemory; + flushedRange.offset = uniformOffset; + flushedRange.size = uniformSize; + vkFlushMappedMemoryRanges(m_logicalDevice, 1, &flushedRange); + } + return uniformOffset; +} + void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader) { auto GET_UNIFORM_DATA_PTR = [](size_t index) { return s_vkUniformData + (index / 4); }; @@ -453,66 +515,7 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt } } } - // upload - const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; - const uint32 uniformSize = (shader->uniform.uniformRangeSize + bufferAlignmentM1) & ~bufferAlignmentM1; - - auto waitWhileCondition = [&](std::function condition) { - while (condition()) - { - if (m_commandBufferSyncIndex == m_commandBufferIndex) - { - if (m_cmdBufferUniformRingbufIndices[m_commandBufferIndex] != m_uniformVarBufferReadIndex) - { - draw_endRenderPass(); - SubmitCommandBuffer(); - } - else - { - // submitting work would not change readIndex, so there's no way for conditions based on it to change - cemuLog_log(LogType::Force, "draw call overflowed and corrupted uniform ringbuffer. expect visual corruption"); - cemu_assert_suspicious(); - break; - } - } - WaitForNextFinishedCommandBuffer(); - } - }; - - // wrap around if it doesnt fit consecutively - if (m_uniformVarBufferWriteIndex + uniformSize > UNIFORMVAR_RINGBUFFER_SIZE) - { - waitWhileCondition([&]() { - return m_uniformVarBufferReadIndex > m_uniformVarBufferWriteIndex || m_uniformVarBufferReadIndex == 0; - }); - m_uniformVarBufferWriteIndex = 0; - } - - auto ringBufRemaining = [&]() { - ssize_t ringBufferUsedBytes = (ssize_t)m_uniformVarBufferWriteIndex - m_uniformVarBufferReadIndex; - if (ringBufferUsedBytes < 0) - ringBufferUsedBytes += UNIFORMVAR_RINGBUFFER_SIZE; - return UNIFORMVAR_RINGBUFFER_SIZE - 1 - ringBufferUsedBytes; - }; - waitWhileCondition([&]() { - return ringBufRemaining() < uniformSize; - }); - - const uint32 uniformOffset = m_uniformVarBufferWriteIndex; - memcpy(m_uniformVarBufferPtr + uniformOffset, s_vkUniformData, shader->uniform.uniformRangeSize); - m_uniformVarBufferWriteIndex += uniformSize; - // update dynamic offset - dynamicOffsetInfo.uniformVarBufferOffset[shaderStageIndex] = uniformOffset; - // flush if not coherent - if (!m_uniformVarBufferMemoryIsCoherent) - { - VkMappedMemoryRange flushedRange{}; - flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flushedRange.memory = m_uniformVarBufferMemory; - flushedRange.offset = uniformOffset; - flushedRange.size = uniformSize; - vkFlushMappedMemoryRanges(m_logicalDevice, 1, &flushedRange); - } + dynamicOffsetInfo.uniformVarBufferOffset[shaderStageIndex] = uniformData_uploadUniformDataBufferGetOffset({(uint8*)s_vkUniformData, shader->uniform.uniformRangeSize}); } }