diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 3cc40efed1..9387f4f207 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -272,6 +272,15 @@ namespace rsx } } + void set_depth_func(context* ctx, u32 reg, u32 arg) + { + if (arg != REGS(ctx)->latch) + { + set_surface_options_dirty_bit(ctx, reg, arg); + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; + } + } + void set_color_mask(context* ctx, u32 reg, u32 arg) { if (arg == REGS(ctx)->latch) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.h b/rpcs3/Emu/RSX/NV47/HW/nv4097.h index a5a434e47d..aced1244e9 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.h +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.h @@ -89,6 +89,8 @@ namespace rsx void set_aa_control(context* ctx, u32 reg, u32 arg); + void set_depth_func(context* ctx, u32 reg, u32 arg); + #define RSX(ctx) ctx->rsxthr #define REGS(ctx) (&rsx::method_registers) diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index febb5c93bd..7749b24ac8 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -200,6 +200,8 @@ namespace glsl { "ALPHA_TEST_FUNC_LENGTH ", rsx::ROP_control_bits::ALPHA_FUNC_NUM_BITS }, { "MSAA_SAMPLE_CTRL_OFFSET ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_OFFSET }, { "MSAA_SAMPLE_CTRL_LENGTH ", rsx::ROP_control_bits::MSAA_SAMPLE_CTRL_NUM_BITS }, + { "FRAG_DEPTH_24_BIT ", rsx::ROP_control_bits::FRAG_DEPTH_24_BIT }, + { "FRAG_DEPTH_FLOAT_BIT ", rsx::ROP_control_bits::FRAG_DEPTH_FLOAT_BIT }, { "ROP_CMD_MASK ", rsx::ROP_control_bits::ROP_CMD_MASK } }); @@ -240,6 +242,11 @@ namespace glsl { enabled_options.push_back("_ENABLE_POLYGON_STIPPLE"); } + + if (props.emulate_depth_compare) + { + enabled_options.push_back("_ENABLE_DEPTH_COMPARE"); + } } // Import common header diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.h b/rpcs3/Emu/RSX/Program/GLSLCommon.h index 81b5043fc9..b71dc8c80d 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.h @@ -21,10 +21,12 @@ namespace rsx // Auxilliary config INT_FRAMEBUFFER_BIT = 16, MSAA_WRITE_ENABLE_BIT = 17, + FRAG_DEPTH_24_BIT = 18, + FRAG_DEPTH_FLOAT_BIT = 19, // Data - ALPHA_FUNC_OFFSET = 18, - MSAA_SAMPLE_CTRL_OFFSET = 21, + ALPHA_FUNC_OFFSET = 20, + MSAA_SAMPLE_CTRL_OFFSET = 23, // Data lengths ALPHA_FUNC_NUM_BITS = 3, diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl index 240186cd29..6009c342e5 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl @@ -38,6 +38,22 @@ R"( } #endif +#ifdef _ENABLE_DEPTH_COMPARE + float dstDepth = texelFetch(frag_depth, ivec2(gl_FragCoord.xy), 0).r; + float srcDepth = gl_FragCoord.z; + float scale = _test_bit(rop_control, FRAG_DEPTH_24_BIT) ? float(0xffffffu) : float(0xffffu); + + // Quantize and compare + if (abs(srcDepth - dstDepth) < (1.f / scale)) + { + gl_FragDepth = dstDepth; + } + else + { + gl_FragDepth = srcDepth; + } +#endif + #ifdef _ENABLE_PROGRAMMABLE_BLENDING switch (framebufferCount) { diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index 685d87fafd..468f69b10e 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -37,6 +37,7 @@ namespace glsl bool require_linear_to_srgb : 1; bool require_fog_read : 1; bool emulate_shadow_compare : 1; + bool emulate_depth_compare : 1; bool low_precision_tests : 1; bool disable_early_discard : 1; bool supports_native_fp16 : 1; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index a97fa70258..fa54fe1080 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1681,6 +1681,15 @@ namespace rsx m_frame_stats.framebuffer_stats.add(layout.width, layout.height, aa_mode); } + // Flags + if (g_cfg.video.emulate_depth_compare && + m_framebuffer_layout.zeta_address && + method_registers.depth_func() == rsx::comparison_function::equal) + { + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE; + current_fragment_program.ctrl &= ~RSX_SHADER_CONTROL_DISABLE_EARLY_Z; + } + // Check if anything has changed bool really_changed = false; @@ -2153,6 +2162,12 @@ namespace rsx current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE; } } + + if (m_framebuffer_layout.zeta_address && + method_registers.depth_func() == rsx::comparison_function::equal) + { + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE; + } } else if (m_ctx->register_state->point_sprite_enabled() && m_ctx->register_state->current_draw_clause.primitive == primitive_type::points) @@ -2321,7 +2336,7 @@ namespace rsx { m_graphics_state |= rsx::zeta_address_is_cyclic; - if (!(current_fragment_program.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD)) && + if (!(current_fragment_program.ctrl & (CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_META_USES_DISCARD | RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE)) && m_framebuffer_layout.zeta_write_enabled) { current_fragment_program.ctrl |= RSX_SHADER_CONTROL_DISABLE_EARLY_Z; diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index f7dffc3029..43294b798e 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -768,6 +768,15 @@ bool VKGSRender::bind_texture_env() m_vs_binding_table->vtex_location[i]); } + if (current_fragment_program.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) + { + auto ds = ensure(m_rtts.m_bound_depth_stencil.second); + vk::insert_texture_barrier(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, true); + + auto view = ds->get_view(rsx::default_remap_vector, VK_IMAGE_ASPECT_DEPTH_BIT); + m_program->bind_uniform({ *view, vk::null_sampler() }, vk::glsl::binding_set_index_fragment, m_fs_binding_table->frag_depth_input_location); + } + return out_of_memory; } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 853f9f8683..7f7168322a 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -91,6 +91,11 @@ void VKFragmentDecompilerThread::prepareBindingTable() } } } + + if (m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) + { + vk_prog->binding_table.frag_depth_input_location = location++; + } } void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) @@ -240,6 +245,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } + if (m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE) + { + OS << "layout(set=" << vk::glsl::binding_set_index_fragment << ", binding=" << vk_prog->binding_table.frag_depth_input_location << ") uniform sampler2D frag_depth;\n"; + + auto in = vk::glsl::program_input::make( + glsl::glsl_fragment_program, + "frag_depth", + vk::glsl::input_type_texture, + vk::glsl::binding_set_index_fragment, + vk_prog->binding_table.frag_depth_input_location + ); + inputs.push_back(in); + } + // Draw params are always provided by vertex program. Instead of pointer chasing, they're provided as varyings. if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INTERPRETER_MODEL)) { @@ -341,6 +360,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj; m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL); m_shader_props.require_color_format_convert = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT); + m_shader_props.emulate_depth_compare = !!(m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE); // Declare global constants if (m_shader_props.require_fog_read) @@ -465,7 +485,8 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "void main()\n"; OS << "{\n"; - if (m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST) + if ((m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST) || + (m_prog.ctrl & RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE)) { OS << " const uint rop_control = fs_contexts[_fs_context_offset].rop_control;\n" diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index 50469aeca4..0412561340 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -75,6 +75,7 @@ public: u32 polygon_stipple_params_location = umax; // Polygon stipple settings u32 ftex_location[16]; // Texture locations array u32 ftex_stencil_location[16]; // Texture stencil mirror array + u32 frag_depth_input_location = umax; // Fragment depth compare } binding_table; diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index 542151e340..cd27a76a78 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -454,21 +454,22 @@ namespace gcm RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used?? // Custom - RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x0010000, // Rasterizing triangles and not lines or points - RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x0020000, // Support instance ID offsets when loading constants - RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x0040000, // Compile internals expecting interpreter + RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x00010000, // Rasterizing triangles and not lines or points + RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x00020000, // Support instance ID offsets when loading constants + RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x00040000, // Compile internals expecting interpreter - RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x0080000, // Quantize outputs to 8-bit FBO - RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x0100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate. + RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x00080000, // Quantize outputs to 8-bit FBO + RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x00100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate. - RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x0200000, // Uses alpha kill on texture input - RSX_SHADER_CONTROL_ALPHA_TEST = 0x0400000, // Uses alpha test on the outputs - RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x0800000, // Uses polygon stipple for dithered rendering - RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x1000000, // Alpha to coverage + RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x00200000, // Uses alpha kill on texture input + RSX_SHADER_CONTROL_ALPHA_TEST = 0x00400000, // Uses alpha test on the outputs + RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x00800000, // Uses polygon stipple for dithered rendering + RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x01000000, // Alpha to coverage - RSX_SHADER_CONTROL_DISABLE_EARLY_Z = 0x2000000, // Do not allow early-Z optimizations on this shader + RSX_SHADER_CONTROL_DISABLE_EARLY_Z = 0x02000000, // Do not allow early-Z optimizations on this shader - RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT = 0x4000000, // Allow format conversions (BX2, SNORM, SRGB, RENORM) + RSX_SHADER_CONTROL_TEXTURE_FORMAT_CONVERT = 0x04000000, // Allow format conversions (BX2, SNORM, SRGB, RENORM) + RSX_SHADER_CONTROL_EMULATE_DEPTH_COMPARE = 0x08000000, // Emulate depth comparisons // Meta RSX_SHADER_CONTROL_META_USES_DISCARD = (RSX_SHADER_CONTROL_USES_KIL | RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL | RSX_SHADER_CONTROL_ALPHA_TEST | RSX_SHADER_CONTROL_POLYGON_STIPPLE | RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE) diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 03bbd1e52a..2c2552e088 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -1686,7 +1686,7 @@ namespace rsx bind(NV4097_SET_ZPASS_PIXEL_COUNT_ENABLE, nv4097::set_zcull_pixel_count_enable); bind(NV4097_CLEAR_ZCULL_SURFACE, nv4097::clear_zcull); bind(NV4097_SET_DEPTH_TEST_ENABLE, nv4097::set_surface_options_dirty_bit); - bind(NV4097_SET_DEPTH_FUNC, nv4097::set_surface_options_dirty_bit); + bind(NV4097_SET_DEPTH_FUNC, nv4097::set_depth_func); bind(NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit); bind(NV4097_SET_COLOR_MASK, nv4097::set_color_mask); bind(NV4097_SET_COLOR_MASK_MRT, nv4097::set_surface_options_dirty_bit);