diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index efac22b3ed..ab63cb2ea1 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -169,7 +169,11 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) { - OS << "uniform sampler2D frag_depth;\n"; + const auto frag_depth_type = (m_prog.ctrl & RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER) + ? "sampler2DMS" + : "sampler2D"; + + OS << "uniform " << frag_depth_type << " frag_depth;\n"; } OS << "\n"; @@ -244,6 +248,7 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL); m_shader_props.require_color_format_convert = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT); m_shader_props.emulate_depth_compare = !!(m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE); + m_shader_props.depth_buffer_multisampled = !!(m_prog.ctrl & RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER); glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 34c2ca72d3..1a59f0438b 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -116,7 +116,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool /* rsx::rtt_config_dirty | rsx::rtt_config_contested | rsx::rtt_config_valid | - rsx::rtt_cache_state_dirty); + rsx::rtt_cache_state_dirty | + rsx::pipeline_config_dirty); get_framebuffer_layout(context, m_framebuffer_layout); if (!m_graphics_state.test(rsx::rtt_config_valid)) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 9387f4f207..3cc40efed1 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -272,15 +272,6 @@ namespace rsx } } - void set_depth_func(context* ctx, u32 reg, u32 arg) - { - if (arg != REGS(ctx)->latch) - { - set_surface_options_dirty_bit(ctx, reg, arg); - RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; - } - } - void set_color_mask(context* ctx, u32 reg, u32 arg) { if (arg == REGS(ctx)->latch) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.h b/rpcs3/Emu/RSX/NV47/HW/nv4097.h index aced1244e9..a5a434e47d 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.h +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.h @@ -89,8 +89,6 @@ namespace rsx void set_aa_control(context* ctx, u32 reg, u32 arg); - void set_depth_func(context* ctx, u32 reg, u32 arg); - #define RSX(ctx) ctx->rsxthr #define REGS(ctx) (&rsx::method_registers) diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index 7749b24ac8..4c6d8625fa 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -247,6 +247,11 @@ namespace glsl { enabled_options.push_back("_ENABLE_DEPTH_COMPARE"); } + + if (props.depth_buffer_multisampled) + { + enabled_options.push_back("_ENABLE_DEPTH_BUFFER_MULTISAMPLED"); + } } // Import common header diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl index 6009c342e5..af2df8115e 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl @@ -39,7 +39,11 @@ R"( #endif #ifdef _ENABLE_DEPTH_COMPARE +#ifdef _ENABLE_DEPTH_BUFFER_MULTISAMPLED + float dstDepth = texelFetch(frag_depth, ivec2(gl_FragCoord.xy), gl_SampleID).r; +#else float dstDepth = texelFetch(frag_depth, ivec2(gl_FragCoord.xy), 0).r; +#endif float srcDepth = gl_FragCoord.z; float scale = _test_bit(rop_control, FRAG_DEPTH_24_BIT) ? float(0xffffffu) : float(0xffffu); diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index 468f69b10e..a470ad5e3b 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -38,6 +38,7 @@ namespace glsl bool require_fog_read : 1; bool emulate_shadow_compare : 1; bool emulate_depth_compare : 1; + bool depth_buffer_multisampled : 1; bool low_precision_tests : 1; bool disable_early_discard : 1; bool supports_native_fp16 : 1; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index ddb9e0848d..cfd8a58dc1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1681,15 +1681,6 @@ namespace rsx m_frame_stats.framebuffer_stats.add(layout.width, layout.height, aa_mode); } - // Flags - if (g_cfg.video.emulate_depth_compare && - m_framebuffer_layout.zeta_address && - method_registers.depth_func() == rsx::comparison_function::equal) - { - current_fragment_program.ctrl |= RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE; - current_fragment_program.ctrl &= ~RSX_SHADER_CONTROL_DISABLE_EARLY_Z; - } - // Check if anything has changed bool really_changed = false; @@ -1975,6 +1966,36 @@ namespace rsx return true; } + rsx::flags32_t thread::get_fragment_program_export_config() + { + if (!g_cfg.video.emulate_depth_compare) [[ likely ]] + { + return 0; + } + + if (m_ctx->register_state->current_draw_clause.classify_mode() != primitive_class::polygon) + { + return 0; + } + + u32 expected_ctrl = 0; + + if (m_framebuffer_layout.zeta_address && + m_ctx->register_state->depth_test_enabled() && + m_ctx->register_state->depth_func() == rsx::comparison_function::equal) + { + expected_ctrl |= RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE; + + if (backend_config.supports_hw_msaa && + m_ctx->register_state->surface_antialias() != rsx::surface_antialiasing::center_1_sample) + { + expected_ctrl |= RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER; + } + } + + return expected_ctrl; + } + void thread::prefetch_fragment_program() { if (!m_graphics_state.test(rsx::pipeline_state::fragment_program_ucode_dirty)) @@ -2069,6 +2090,18 @@ namespace rsx { m_program_cache_hint.invalidate(m_graphics_state.load()); + constexpr u32 fs_export_config_mask = (RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE | RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER); + if (u32 export_ctrl = get_fragment_program_export_config(); + (current_fragment_program.ctrl & fs_export_config_mask) != export_ctrl) + { + // Update control bits for immediate consumers + current_fragment_program.ctrl &= ~fs_export_config_mask; + current_fragment_program.ctrl |= export_ctrl; + + // Signal backend to reload pipeline + m_graphics_state.set(rsx::pipeline_state::fragment_state_dirty); + } + prefetch_vertex_program(); prefetch_fragment_program(); } @@ -2163,12 +2196,7 @@ namespace rsx } } - if (g_cfg.video.emulate_depth_compare && - m_framebuffer_layout.zeta_address && - method_registers.depth_func() == rsx::comparison_function::equal) - { - current_fragment_program.ctrl |= RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE; - } + current_fragment_program.ctrl |= get_fragment_program_export_config(); } else if (m_ctx->register_state->point_sprite_enabled() && m_ctx->register_state->current_draw_clause.primitive == primitive_type::points) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 2438fc0f57..30ff3bd346 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -280,11 +280,13 @@ namespace rsx // Prefetch and analyze the currently active vertex program ucode void prefetch_vertex_program(); + // Update fragment program export configuration. Can invalidate the current program. + rsx::flags32_t get_fragment_program_export_config(); + + // Gets the current vertex program and associated state. Can invalidate the bound progam. void get_current_vertex_program(const std::array, rsx::limits::vertex_textures_count>& sampler_descriptors); - /** - * Gets current fragment program and associated fragment state - */ + // Gets current fragment program and associated fragment state. Can invalidate the bound program. void get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors); public: diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 43294b798e..8a6789213b 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -607,6 +607,16 @@ void VKGSRender::load_texture_env() m_samplers_dirty.store(false); + if (current_fragment_program.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) + { + // Transition our FBO to a loop-friendly format. + // We can also convert it into an input attachment, but for now this is easier. + auto ds = ensure(m_rtts.m_bound_depth_stencil.second, "Invalid FS export configuration."); + ds->texture_barrier(*m_current_command_buffer); + + check_for_cyclic_refs = true; + } + if (check_for_cyclic_refs) { // Regenerate renderpass key @@ -771,8 +781,6 @@ bool VKGSRender::bind_texture_env() if (current_fragment_program.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) { auto ds = ensure(m_rtts.m_bound_depth_stencil.second); - vk::insert_texture_barrier(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, true); - auto view = ds->get_view(rsx::default_remap_vector, VK_IMAGE_ASPECT_DEPTH_BIT); m_program->bind_uniform({ *view, vk::null_sampler() }, vk::glsl::binding_set_index_fragment, m_fs_binding_table->frag_depth_input_location); } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 7f7168322a..fda0690d33 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -247,7 +247,11 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) { - OS << "layout(set=" << vk::glsl::binding_set_index_fragment << ", binding=" << vk_prog->binding_table.frag_depth_input_location << ") uniform sampler2D frag_depth;\n"; + const auto frag_depth_type = (m_prog.ctrl & RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER) + ? "sampler2DMS" + : "sampler2D"; + + OS << "layout(set=" << vk::glsl::binding_set_index_fragment << ", binding=" << vk_prog->binding_table.frag_depth_input_location << ") uniform " << frag_depth_type << " frag_depth;\n"; auto in = vk::glsl::program_input::make( glsl::glsl_fragment_program, @@ -361,6 +365,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL); m_shader_props.require_color_format_convert = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT); m_shader_props.emulate_depth_compare = !!(m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE); + m_shader_props.depth_buffer_multisampled = !!(m_prog.ctrl & RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER); // Declare global constants if (m_shader_props.require_fog_read) diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index cd27a76a78..aaae097684 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -443,33 +443,35 @@ namespace gcm enum { - CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT = 0xe, ///< shader program exports the depth of the shaded fragment - CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS = 0x40, ///< shader program exports 32 bits registers values (instead of 16 bits ones) + CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT = 0x0000000e, ///< shader program exports the depth of the shaded fragment + CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS = 0x00000040, ///< shader program exports 32 bits registers values (instead of 16 bits ones) // Other known flags - RSX_SHADER_CONTROL_USED_REGS_MASK = 0xf, - RSX_SHADER_CONTROL_USED_TEMP_REGS_MASK = 0xff << 24, - RSX_SHADER_CONTROL_USES_KIL = 0x80, // program uses KIL op - RSX_SHADER_CONTROL_UNKNOWN0 = 0x400, // seemingly always set - RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used?? + RSX_SHADER_CONTROL_USED_REGS_MASK = 0x0000000f, + RSX_SHADER_CONTROL_USED_TEMP_REGS_MASK = 0xff000000, + + RSX_SHADER_CONTROL_USES_KIL = 0x00000080, // program uses KIL op + RSX_SHADER_CONTROL_UNKNOWN0 = 0x00000400, // seemingly always set + RSX_SHADER_CONTROL_UNKNOWN1 = 0x00008000, // seemingly set when srgb packer is used?? // Custom - RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x00010000, // Rasterizing triangles and not lines or points - RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x00020000, // Support instance ID offsets when loading constants - RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x00040000, // Compile internals expecting interpreter + RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x00010000, // Rasterizing triangles and not lines or points + RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x00020000, // Support instance ID offsets when loading constants + RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x00040000, // Compile internals expecting interpreter - RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x00080000, // Quantize outputs to 8-bit FBO - RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x00100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate. + RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x00080000, // Quantize outputs to 8-bit FBO + RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x00100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate. - RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x00200000, // Uses alpha kill on texture input - RSX_SHADER_CONTROL_ALPHA_TEST = 0x00400000, // Uses alpha test on the outputs - RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x00800000, // Uses polygon stipple for dithered rendering - RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x01000000, // Alpha to coverage + RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x00200000, // Uses alpha kill on texture input + RSX_SHADER_CONTROL_ALPHA_TEST = 0x00400000, // Uses alpha test on the outputs + RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x00800000, // Uses polygon stipple for dithered rendering + RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x01000000, // Alpha to coverage - RSX_SHADER_CONTROL_DISABLE_EARLY_Z = 0x02000000, // Do not allow early-Z optimizations on this shader + RSX_SHADER_CONTROL_DISABLE_EARLY_Z = 0x02000000, // Do not allow early-Z optimizations on this shader - RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT = 0x04000000, // Allow format conversions (BX2, SNORM, SRGB, RENORM) - RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE = 0x08000000, // Emulate depth comparisons + RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT = 0x04000000, // Allow format conversions (BX2, SNORM, SRGB, RENORM) + RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE = 0x08000000, // Emulate depth comparisons + RSX_SHADER_CONTROL_MULTISAMPLED_ZBUFFER = 0x10000000, // Z buffer is multisampled. Only affects depth comparison behavior at this time. // Meta RSX_SHADER_CONTROL_META_USES_DISCARD = (RSX_SHADER_CONTROL_USES_KIL | RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL | RSX_SHADER_CONTROL_ALPHA_TEST | RSX_SHADER_CONTROL_POLYGON_STIPPLE | RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 2c2552e088..03bbd1e52a 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -1686,7 +1686,7 @@ namespace rsx bind(NV4097_SET_ZPASS_PIXEL_COUNT_ENABLE, nv4097::set_zcull_pixel_count_enable); bind(NV4097_CLEAR_ZCULL_SURFACE, nv4097::clear_zcull); bind(NV4097_SET_DEPTH_TEST_ENABLE, nv4097::set_surface_options_dirty_bit); - bind(NV4097_SET_DEPTH_FUNC, nv4097::set_depth_func); + bind(NV4097_SET_DEPTH_FUNC, nv4097::set_surface_options_dirty_bit); bind(NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit); bind(NV4097_SET_COLOR_MASK, nv4097::set_color_mask); bind(NV4097_SET_COLOR_MASK_MRT, nv4097::set_surface_options_dirty_bit);