This commit is contained in:
kd-11 2025-12-15 14:28:29 +01:00 committed by GitHub
commit a7dcd0a30e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 482 additions and 374 deletions

View File

@ -680,79 +680,9 @@ namespace rsx
ROP_control_t rop_control{};
alignas(16) fragment_context_t payload{};
if (REGS(m_ctx)->alpha_test_enabled())
{
const u32 alpha_func = static_cast<u32>(REGS(m_ctx)->alpha_func());
rop_control.set_alpha_test_func(alpha_func);
rop_control.enable_alpha_test();
}
if (REGS(m_ctx)->polygon_stipple_enabled())
{
rop_control.enable_polygon_stipple();
}
auto can_use_hw_a2c = [&]() -> bool
{
const auto& config = RSX(m_ctx)->get_backend_config();
if (!config.supports_hw_a2c)
{
return false;
}
if (config.supports_hw_a2c_1spp)
{
return true;
}
return REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample;
};
if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !can_use_hw_a2c())
{
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
// Alpha values generate a coverage mask for order independent blending
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
// Simulated using combined alpha blend and alpha test
rop_control.enable_alpha_to_coverage();
if (REGS(m_ctx)->msaa_sample_mask())
{
rop_control.enable_MSAA_writes();
}
// Sample configuration bits
switch (REGS(m_ctx)->surface_antialias())
{
case rsx::surface_antialiasing::center_1_sample:
break;
case rsx::surface_antialiasing::diagonal_centered_2_samples:
rop_control.set_msaa_control(1u);
break;
default:
rop_control.set_msaa_control(3u);
break;
}
}
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (REGS(m_ctx)->surface_color())
{
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
// These behave very differently from "normal" formats.
break;
default:
// Integer framebuffer formats.
rop_control.enable_framebuffer_INT();
// Check if we want sRGB conversion.
if (REGS(m_ctx)->framebuffer_srgb_enabled())
{
rop_control.enable_framebuffer_sRGB();
}
break;
}
// Always encode the alpha function. Toggling alpha-test is not guaranteed to trigger context param reload anymore.
const u32 alpha_func = static_cast<u32>(REGS(m_ctx)->alpha_func());
rop_control.set_alpha_test_func(alpha_func);
// Generate wpos coefficients
// wpos equation is now as follows (ignoring pixel center offset):
@ -766,7 +696,6 @@ namespace rsx
payload.rop_control = rop_control.value;
payload.alpha_ref = REGS(m_ctx)->alpha_ref();
const auto window_origin = REGS(m_ctx)->shader_window_origin();
const u32 window_height = REGS(m_ctx)->shader_window_height();
const auto pixel_center = REGS(m_ctx)->pixel_center();

View File

@ -103,8 +103,19 @@ void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
const auto reg_type = float_type ? "vec4" : getHalfTypeName(4);
for (uint i = 0; i < std::size(table); ++i)
{
if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second))
OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n";
if (!m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second))
{
continue;
}
if (i >= m_prog.mrt_buffers_count)
{
// Dead writes. Declare as temp variables for DCE to clean up.
OS << "vec4 " << table[i].first << "; // Unused\n";
continue;
}
OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n";
}
}
@ -190,7 +201,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
" uvec4 stipple_pattern[8];\n"
"};\n\n"
"#define texture_base_index 0\n\n";
"#define texture_base_index 0\n"
"#define TEX_PARAM(index) texture_parameters[index]\n\n";
}
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
@ -207,16 +219,24 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.require_fog_read = properties.in_register_mask & in_fogc;
m_shader_props.emulate_coverage_tests = !rsx::get_renderer_backend_config().supports_hw_a2c_1spp;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION);
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low;
m_shader_props.ROP_output_rounding = (g_cfg.video.shader_precision != gpu_preset_level::low) && !!(m_prog.ctrl & RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER);
m_shader_props.ROP_sRGB_packing = !!(m_prog.ctrl & RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER);
m_shader_props.ROP_alpha_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST);
m_shader_props.ROP_alpha_to_coverage_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE);
m_shader_props.ROP_polygon_stipple_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_POLYGON_STIPPLE);
m_shader_props.ROP_discard = !!(m_prog.ctrl & RSX_SHADER_CONTROL_USES_KIL);
m_shader_props.require_tex1D_ops = properties.has_tex1D;
m_shader_props.require_tex2D_ops = properties.has_tex2D;
m_shader_props.require_tex3D_ops = properties.has_tex3D;
m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj;
m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL);
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}

View File

@ -300,6 +300,7 @@ namespace gl
}
builder << "\n"
"#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n"
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"

View File

@ -250,8 +250,12 @@ namespace rsx
const auto current = REGS(ctx)->decode<NV4097_SET_SURFACE_FORMAT>(arg);
const auto previous = REGS(ctx)->decode<NV4097_SET_SURFACE_FORMAT>(REGS(ctx)->latch);
if (*current.antialias() != *previous.antialias() || // Antialias control has changed, update ROP parameters
current.is_integer_color_format() != previous.is_integer_color_format()) // The type of color format also requires ROP control update
if (current.is_integer_color_format() != previous.is_integer_color_format()) // Different ROP emulation
{
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty;
}
if (*current.antialias() != *previous.antialias()) // Antialias control has changed, update ROP parameters
{
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty;
}
@ -302,6 +306,34 @@ namespace rsx
REGS(ctx)->decode(reg, REGS(ctx)->latch);
}
void set_aa_control(context* ctx, u32 reg, u32 arg)
{
const auto latch = REGS(ctx)->latch;
if (arg == latch)
{
return;
}
// Reconfigure pipeline.
RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty;
// If we support A2C in hardware, leave the rest upto the hardware. The pipeline config should take care of it.
const auto& backend_config = RSX(ctx)->get_backend_config();
if (backend_config.supports_hw_a2c &&
backend_config.supports_hw_a2c_1spp)
{
return;
}
// No A2C hardware support or partial hardware support. Invalidate the current program if A2C state changed.
const auto a2c_old = REGS(ctx)->decode<NV4097_SET_ANTI_ALIASING_CONTROL>(latch).msaa_alpha_to_coverage();
const auto a2c_new = REGS(ctx)->decode<NV4097_SET_ANTI_ALIASING_CONTROL>(arg).msaa_alpha_to_coverage();
if (a2c_old != a2c_new)
{
RSX(ctx)->m_graphics_state |= rsx::fragment_program_state_dirty;
}
}
///// Draw call setup (vertex, etc)
void set_array_element16(context* ctx, u32, u32 arg)

View File

@ -87,6 +87,8 @@ namespace rsx
void set_transform_constant_load(context* ctx, u32 reg, u32 arg);
void set_aa_control(context* ctx, u32 reg, u32 arg);
#define RSX(ctx) ctx->rsxthr
#define REGS(ctx) (&rsx::method_registers)

View File

@ -551,18 +551,22 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
{
std::string ret;
u32 precision_modifier = 0;
u32 register_index = umax;
if constexpr (std::is_same_v<T, SRC0>)
{
precision_modifier = src1.src0_prec_mod;
register_index = 0;
}
else if constexpr (std::is_same_v<T, SRC1>)
{
precision_modifier = src1.src1_prec_mod;
register_index = 1;
}
else if constexpr (std::is_same_v<T, SRC2>)
{
precision_modifier = src1.src2_prec_mod;
register_index = 2;
}
switch (src.reg_type)
@ -645,18 +649,29 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
{
// TEX0 - TEX9
// Texcoord 2d mask seems to reset the last 2 arguments to 0 and w if set
// Opt: Skip emitting w dependency unless w coord is actually being sampled
ensure(register_index != umax);
const auto lane_mask = FP::get_src_vector_lane_mask_shuffled(m_prog, m_instruction, register_index);
const auto touches_z = !!(lane_mask & (1u << 2));
const bool touches_w = !!(lane_mask & (1u << 3));
const u8 texcoord = u8(register_id) - 4;
if (m_prog.texcoord_is_point_coord(texcoord))
{
// Point sprite coord generation. Stacks with the 2D override mask.
if (m_prog.texcoord_is_2d(texcoord))
if (!m_prog.texcoord_is_2d(texcoord))
{
ret += getFloatTypeName(4) + "(gl_PointCoord, 0., in_w)";
properties.has_w_access = true;
ret += getFloatTypeName(4) + "(gl_PointCoord, 1., 0.)";
}
else if (!touches_w)
{
ret += getFloatTypeName(4) + "(gl_PointCoord, 0., 0.)";
}
else
{
ret += getFloatTypeName(4) + "(gl_PointCoord, 1., 0.)";
ret += getFloatTypeName(4) + "(gl_PointCoord, 0., in_w)";
properties.has_w_access = true;
}
}
else if (src2.perspective_corr)
@ -673,14 +688,19 @@ template<typename T> std::string FragmentProgramDecompiler::GetSRC(T src)
}
else
{
if (m_prog.texcoord_is_2d(texcoord))
const bool skip_zw_load = !touches_z && !touches_w;
if (!m_prog.texcoord_is_2d(texcoord) || skip_zw_load)
{
ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)";
properties.has_w_access = true;
ret += reg_var;
}
else if (!touches_w)
{
ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., 0.)";
}
else
{
ret += reg_var;
ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)";
properties.has_w_access = true;
}
}
break;

View File

@ -216,12 +216,12 @@ namespace glsl
enabled_options.push_back("_32_BIT_OUTPUT");
}
if (!props.fp32_outputs)
if (props.ROP_sRGB_packing)
{
enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB");
}
if (props.disable_early_discard)
if (props.disable_early_discard && props.ROP_discard)
{
enabled_options.push_back("_DISABLE_EARLY_DISCARD");
}
@ -231,7 +231,15 @@ namespace glsl
enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING");
}
enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
if (props.ROP_alpha_test)
{
enabled_options.push_back("_ENABLE_ALPHA_TEST");
}
if (props.ROP_polygon_stipple_test)
{
enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
}
}
// Import common header
@ -276,12 +284,12 @@ namespace glsl
return;
}
if (props.emulate_coverage_tests)
if (props.ROP_alpha_to_coverage_test)
{
enabled_options.push_back("_EMULATE_COVERAGE_TEST");
enabled_options.push_back("_ENABLE_ALPHA_TO_COVERAGE_TEST");
}
if (!props.fp32_outputs || props.require_linear_to_srgb)
if (props.ROP_sRGB_packing || props.require_linear_to_srgb)
{
enabled_options.push_back("_ENABLE_LINEAR_TO_SRGB");
}
@ -296,6 +304,11 @@ namespace glsl
enabled_options.push_back("_ENABLE_WPOS");
}
if (props.ROP_alpha_test || (props.require_msaa_ops && props.require_tex_shadow_ops))
{
enabled_options.push_back("_ENABLE_COMPARISON_FUNC");
}
if (props.require_fog_read)
{
program_common::define_glsl_constants<rsx::fog_mode>(OS,
@ -385,6 +398,11 @@ namespace glsl
enabled_options.push_back("_ENABLE_SHADOWPROJ");
}
if (props.require_alpha_kill)
{
enabled_options.push_back("_ENABLE_TEXTURE_ALPHA_KILL");
}
program_common::define_glsl_switches(OS, enabled_options);
enabled_options.clear();

View File

@ -81,7 +81,7 @@ vec4 fetch_fog_value(const in uint mode)
}
#endif
#ifdef _EMULATE_COVERAGE_TEST
#ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST
// Purely stochastic
bool coverage_test_passes(const in vec4 _sample)
{
@ -109,6 +109,7 @@ vec4 srgb_to_linear(const in vec4 cs)
}
#endif
#ifdef _ENABLE_COMPARISON_FUNC
// Required by all fragment shaders for alpha test
bool comparison_passes(const in float a, const in float b, const in uint func)
{
@ -125,5 +126,6 @@ bool comparison_passes(const in float a, const in float b, const in uint func)
case 7: return true; //always
}
}
#endif
)"

View File

@ -1,8 +1,8 @@
R"(
#define ZS_READ(index, coord) vec2(texture(TEX_NAME(index), coord).r, float(texture(TEX_NAME_STENCIL(index), coord).x))
#define TEX1D_Z24X8_RGBA8(index, coord1) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX2D_Z24X8_RGBA8(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX3D_Z24X8_RGBA8(index, coord3) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX1D_Z24X8_RGBA8(index, coord1) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX2D_Z24X8_RGBA8(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX3D_Z24X8_RGBA8(index, coord3) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index))
// NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
// The A component (Z) is useless (should contain stencil8 or just 1)

View File

@ -1,10 +1,10 @@
R"(
#define ZCOMPARE_FUNC(index) _get_bits(TEX_FLAGS(index), DEPTH_COMPARE, 3)
#define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, index).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, index).x))
#define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index))
#define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, index).x, coord3.z, ZCOMPARE_FUNC(index)))
#define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, TEX_PARAM(index)).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, TEX_PARAM(index)).x))
#define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, TEX_PARAM(index)), TEX_FLAGS(index))
#define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, TEX_PARAM(index)).x, coord3.z, ZCOMPARE_FUNC(index)))
#define TEX2D_SHADOWPROJ_MS(index, coord4) TEX2D_SHADOW_MS(index, (coord4.xyz / coord4.w))
#define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index))
#define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index))
vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step)
{

View File

@ -1,5 +1,5 @@
R"(
vec4 texelFetch2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 sample_count, const in ivec2 icoords, const in int index, const in ivec2 offset)
vec4 texelFetch2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 sample_count, const in ivec2 icoords, const in ivec2 offset)
{
const vec2 resolve_coords = vec2(icoords + offset);
const vec2 aa_coords = floor(resolve_coords / sample_count); // AA coords = real_coords / sample_count
@ -8,15 +8,15 @@ vec4 texelFetch2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 sample_count, cons
return texelFetch(tex, ivec2(aa_coords), int(sample_index));
}
vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const in int index)
vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const in sampler_info tex_params)
{
const uint flags = TEX_FLAGS(index);
const vec2 scaled_coords = COORD_SCALE2(index, coords);
const uint flags = tex_params.flags;
const vec2 scaled_coords = _texcoord_xform(coords, tex_params);
const vec2 normalized_coords = texture2DMSCoord(scaled_coords, flags);
const vec2 sample_count = vec2(2., textureSamples(tex) * 0.5);
const vec2 image_size = textureSize(tex) * sample_count;
const ivec2 icoords = ivec2(normalized_coords * image_size);
const vec4 sample0 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0));
const vec4 sample0 = texelFetch2DMS(tex, sample_count, icoords, ivec2(0));
if (_get_bits(flags, FILTERED_MAG_BIT, 2) == 0)
{
@ -35,7 +35,7 @@ vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const i
vec4 a, b;
float factor;
const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0, 1)); // Top left
const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, ivec2(0, 1)); // Top left
if (no_filter.x)
{
@ -46,21 +46,21 @@ vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const i
else
{
// Filter required, sample more data
const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 0)); // Bottom right
const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 1)); // Top right
const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, ivec2(1, 0)); // Bottom right
const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, ivec2(1, 1)); // Top right
if (actual_step.x > uv_step.x)
{
// Downscale in X, centered
const vec3 weights = compute2x2DownsampleWeights(normalized_coords.x, uv_step.x, actual_step.x);
const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 0)); // Further bottom right
a = fma(sample0, weights.xxxx, sample1 * weights.y) + (sample4 * weights.z); // Weighted sum
const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, ivec2(2, 0)); // Further bottom right
a = fma(sample0, weights.xxxx, sample1 * weights.y) + (sample4 * weights.z); // Weighted sum
if (!no_filter.y)
{
const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 1)); // Further top right
b = fma(sample2, weights.xxxx, sample3 * weights.y) + (sample5 * weights.z); // Weighted sum
const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, ivec2(2, 1)); // Further top right
b = fma(sample2, weights.xxxx, sample3 * weights.y) + (sample5 * weights.z); // Weighted sum
}
}
else if (actual_step.x < uv_step.x)

View File

@ -24,17 +24,17 @@ R"(
uint _texture_flag_override = 0;
#define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK
#define _disable_texture_expand() _texture_flag_override = 0
#define TEX_FLAGS(index) (texture_parameters[index + texture_base_index].flags | _texture_flag_override)
#define TEX_FLAGS(index) (TEX_PARAM(index).flags | _texture_flag_override)
#else
#define TEX_FLAGS(index) texture_parameters[index + texture_base_index].flags
#define TEX_FLAGS(index) TEX_PARAM(index).flags
#endif
#define TEX_NAME(index) tex##index
#define TEX_NAME_STENCIL(index) tex##index##_stencil
#define COORD_SCALE1(index, coord1) _texcoord_xform(coord1, texture_parameters[index + texture_base_index])
#define COORD_SCALE2(index, coord2) _texcoord_xform(coord2, texture_parameters[index + texture_base_index])
#define COORD_SCALE3(index, coord3) _texcoord_xform(coord3, texture_parameters[index + texture_base_index])
#define COORD_SCALE1(index, coord1) _texcoord_xform(coord1, TEX_PARAM(index))
#define COORD_SCALE2(index, coord2) _texcoord_xform(coord2, TEX_PARAM(index))
#define COORD_SCALE3(index, coord3) _texcoord_xform(coord3, TEX_PARAM(index))
#define COORD_PROJ1(index, coord2) COORD_SCALE1(index, coord2.x / coord2.y)
#define COORD_PROJ2(index, coord3) COORD_SCALE2(index, coord3.xy / coord3.z)
#define COORD_PROJ3(index, coord4) COORD_SCALE3(index, coord4.xyz / coord4.w)
@ -57,9 +57,9 @@ R"(
#ifdef _ENABLE_SHADOW
#ifdef _EMULATED_TEXSHADOW
#define SHADOW_COORD(index, coord3) _texcoord_xform_shadow(coord3, texture_parameters[index + texture_base_index])
#define SHADOW_COORD4(index, coord4) _texcoord_xform_shadow(coord4, texture_parameters[index + texture_base_index])
#define SHADOW_COORD_PROJ(index, coord4) _texcoord_xform_shadow(coord4.xyz / coord4.w, texture_parameters[index + texture_base_index])
#define SHADOW_COORD(index, coord3) _texcoord_xform_shadow(coord3, TEX_PARAM(index))
#define SHADOW_COORD4(index, coord4) _texcoord_xform_shadow(coord4, TEX_PARAM(index))
#define SHADOW_COORD_PROJ(index, coord4) _texcoord_xform_shadow(coord4.xyz / coord4.w, TEX_PARAM(index))
#define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(index, coord3))
#define TEX3D_SHADOW(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(index, coord4))
@ -188,6 +188,7 @@ vec4 _process_texel(in vec4 rgba, const in uint control_bits)
return rgba;
}
#ifdef _ENABLE_TEXTURE_ALPHA_KILL
if (_test_bit(control_bits, ALPHAKILL))
{
// Alphakill
@ -197,6 +198,7 @@ vec4 _process_texel(in vec4 rgba, const in uint control_bits)
return rgba;
}
}
#endif
if (_test_bit(control_bits, RENORMALIZE))
{

View File

@ -8,43 +8,33 @@ R"(
#endif
#ifdef _ENABLE_FRAMEBUFFER_SRGB
if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))
{
col0.rgb = _mrt_color_t(linear_to_srgb(col0)).rgb;
col1.rgb = _mrt_color_t(linear_to_srgb(col1)).rgb;
col2.rgb = _mrt_color_t(linear_to_srgb(col2)).rgb;
col3.rgb = _mrt_color_t(linear_to_srgb(col3)).rgb;
}
col0.rgb = _mrt_color_t(linear_to_srgb(col0)).rgb;
col1.rgb = _mrt_color_t(linear_to_srgb(col1)).rgb;
col2.rgb = _mrt_color_t(linear_to_srgb(col2)).rgb;
col3.rgb = _mrt_color_t(linear_to_srgb(col3)).rgb;
#endif
#ifdef _ENABLE_ROP_OUTPUT_ROUNDING
if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))
{
col0 = round_to_8bit(col0);
col1 = round_to_8bit(col1);
col2 = round_to_8bit(col2);
col3 = round_to_8bit(col3);
}
col0 = round_to_8bit(col0);
col1 = round_to_8bit(col1);
col2 = round_to_8bit(col2);
col3 = round_to_8bit(col3);
#endif
// Post-output stages
// Alpha Testing
if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))
#ifdef _ENABLE_ALPHA_TEST
const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);
if (!comparison_passes(col0.a, alpha_ref, alpha_func))
{
const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);
if (!comparison_passes(col0.a, alpha_ref, alpha_func))
{
discard;
}
discard;
}
#endif
#ifdef _EMULATE_COVERAGE_TEST
if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))
#ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST
if (!coverage_test_passes(col0))
{
if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0))
{
discard;
}
discard;
}
#endif

View File

@ -1,24 +1,21 @@
R"(
#ifdef _ENABLE_POLYGON_STIPPLE
if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT))
{
// Convert x,y to linear address
const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);
const uint address = stipple_coord.y * 32u + stipple_coord.x;
const uint bit_offset = (address & 31u);
#ifdef VULKAN
// In vulkan we have a unified array with a dynamic offset
const uint word_index = _get_bits(address, 7, 3) + _fs_stipple_pattern_array_offset;
#else
const uint word_index = _get_bits(address, 7, 3);
#endif
const uint sub_index = _get_bits(address, 5, 2);
// Convert x,y to linear address
const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32);
const uint address = stipple_coord.y * 32u + stipple_coord.x;
const uint bit_offset = (address & 31u);
#ifdef VULKAN
// In vulkan we have a unified array with a dynamic offset
const uint word_index = _get_bits(address, 7, 3) + _fs_stipple_pattern_array_offset;
#else
const uint word_index = _get_bits(address, 7, 3);
#endif
const uint sub_index = _get_bits(address, 5, 2);
if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))
{
_kill();
}
if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset)))
{
_kill();
}
#endif

View File

@ -36,12 +36,18 @@ namespace glsl
bool require_srgb_to_linear : 1;
bool require_linear_to_srgb : 1;
bool require_fog_read : 1;
bool emulate_coverage_tests : 1;
bool emulate_shadow_compare : 1;
bool low_precision_tests : 1;
bool disable_early_discard : 1;
bool supports_native_fp16 : 1;
// ROP control flags
bool ROP_output_rounding : 1;
bool ROP_sRGB_packing : 1;
bool ROP_alpha_test : 1;
bool ROP_alpha_to_coverage_test : 1;
bool ROP_polygon_stipple_test : 1;
bool ROP_discard : 1;
// Texturing spec
bool require_texture_ops : 1; // Global switch to enable/disable all texture code
@ -53,5 +59,6 @@ namespace glsl
bool require_tex2D_ops : 1; // Include 2D texture stuff
bool require_tex3D_ops : 1; // Include 3D texture stuff (including cubemap)
bool require_shadowProj_ops : 1; // Include shadow2DProj projection textures (1D is unsupported anyway)
bool require_alpha_kill : 1; // Include alpha kill checking code
};
};

View File

@ -2052,220 +2052,270 @@ namespace rsx
m_graphics_state.clear(rsx::pipeline_state::fragment_program_dirty);
current_fragment_program.ctrl = m_ctx->register_state->shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
current_fragment_program.ctrl = m_ctx->register_state->shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_USES_KIL);
current_fragment_program.texcoord_control_mask = m_ctx->register_state->texcoord_control_mask();
current_fragment_program.two_sided_lighting = m_ctx->register_state->two_side_light_en();
current_fragment_program.mrt_buffers_count = rsx::utility::get_mrt_buffers_count(m_ctx->register_state->surface_color_target());
if (method_registers.current_draw_clause.classify_mode() == primitive_class::polygon)
if (m_ctx->register_state->current_draw_clause.classify_mode() == primitive_class::polygon)
{
if (!backend_config.supports_normalized_barycentrics)
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION;
}
if (m_ctx->register_state->alpha_test_enabled())
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TEST;
}
if (m_ctx->register_state->polygon_stipple_enabled())
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_POLYGON_STIPPLE;
}
if (m_ctx->register_state->msaa_alpha_to_coverage_enabled())
{
const bool is_multiple_samples = m_ctx->register_state->surface_antialias() != rsx::surface_antialiasing::center_1_sample;
if (!backend_config.supports_hw_a2c || (!is_multiple_samples && !backend_config.supports_hw_a2c_1spp))
{
// Emulation required
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE;
}
}
}
else if (method_registers.point_sprite_enabled() &&
method_registers.current_draw_clause.primitive == primitive_type::points)
else if (m_ctx->register_state->point_sprite_enabled() &&
m_ctx->register_state->current_draw_clause.primitive == primitive_type::points)
{
// Set high word of the control mask to store point sprite control
current_fragment_program.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16;
current_fragment_program.texcoord_control_mask |= u32(m_ctx->register_state->point_sprite_control_mask()) << 16;
}
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (m_ctx->register_state->surface_color())
{
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
// These behave very differently from "normal" formats.
break;
default:
// Integer framebuffer formats. These can support sRGB output as well as some special rules for output quantization.
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER;
if (!(current_fragment_program.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) && // Cannot output sRGB from 32-bit registers
m_ctx->register_state->framebuffer_srgb_enabled())
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER;
}
break;
}
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1)) continue;
auto &tex = rsx::method_registers.fragment_textures[i];
auto &tex = m_ctx->register_state->fragment_textures[i];
current_fp_texture_state.clear(i);
if (tex.enabled() && sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_UNDEFINED)
if (!tex.enabled() || sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_UNDEFINED)
{
std::memcpy(current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, 6 * sizeof(f32));
current_fragment_program.texture_params[i].remap = tex.remap();
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
u32 texture_control = 0;
current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i);
if (sampler_descriptors[i]->texcoord_xform.clamp)
{
std::memcpy(current_fragment_program.texture_params[i].clamp_min, sampler_descriptors[i]->texcoord_xform.clamp_min, 4 * sizeof(f32));
texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT);
}
if (tex.alpha_kill_enabled())
{
//alphakill can be ignored unless a valid comparison function is set
texture_control |= (1 << texture_control_bits::ALPHAKILL);
}
//const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 raw_format = tex.format();
const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
if (raw_format & CELL_GCM_TEXTURE_UN)
{
if (tex.min_filter() == rsx::texture_minify_filter::nearest ||
tex.mag_filter() == rsx::texture_magnify_filter::nearest)
{
// Subpixel offset so that (X + bias) * scale will round correctly.
// This is done to work around fdiv precision issues in some GPUs (NVIDIA)
// We apply the simplification where (x + bias) * z = xz + zbias here.
constexpr auto subpixel_bias = 0.01f;
current_fragment_program.texture_params[i].bias[0] += (subpixel_bias * current_fragment_program.texture_params[i].scale[0]);
current_fragment_program.texture_params[i].bias[1] += (subpixel_bias * current_fragment_program.texture_params[i].scale[1]);
current_fragment_program.texture_params[i].bias[2] += (subpixel_bias * current_fragment_program.texture_params[i].scale[2]);
}
}
if (backend_config.supports_hw_msaa && sampler_descriptors[i]->samples > 1)
{
current_fp_texture_state.multisampled_textures |= (1 << i);
texture_control |= (static_cast<u32>(tex.zfunc()) << texture_control_bits::DEPTH_COMPARE_OP);
texture_control |= (static_cast<u32>(tex.mag_filter() != rsx::texture_magnify_filter::nearest) << texture_control_bits::FILTERED_MAG);
texture_control |= (static_cast<u32>(tex.min_filter() != rsx::texture_minify_filter::nearest) << texture_control_bits::FILTERED_MIN);
texture_control |= (((tex.format() & CELL_GCM_TEXTURE_UN) >> 6) << texture_control_bits::UNNORMALIZED_COORDS);
if (rsx::is_texcoord_wrapping_mode(tex.wrap_s()))
{
texture_control |= (1 << texture_control_bits::WRAP_S);
}
if (rsx::is_texcoord_wrapping_mode(tex.wrap_t()))
{
texture_control |= (1 << texture_control_bits::WRAP_T);
}
if (rsx::is_texcoord_wrapping_mode(tex.wrap_r()))
{
texture_control |= (1 << texture_control_bits::WRAP_R);
}
}
if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR)
{
switch (sampler_descriptors[i]->format_class)
{
case RSX_FORMAT_CLASS_DEPTH16_FLOAT:
case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32:
texture_control |= (1 << texture_control_bits::DEPTH_FLOAT);
break;
default:
break;
}
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_D8R8G8B8:
{
// Emulate bitcast in shader
current_fp_texture_state.redirected_textures |= (1 << i);
const auto float_en = (sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)? 1 : 0;
texture_control |= (float_en << texture_control_bits::DEPTH_FLOAT);
break;
}
case CELL_GCM_TEXTURE_X16:
{
// A simple way to quickly read DEPTH16 data without shadow comparison
break;
}
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
{
// Natively supported Z formats with shadow comparison feature
const auto compare_mode = tex.zfunc();
if (!tex.alpha_kill_enabled() &&
compare_mode < rsx::comparison_function::always &&
compare_mode > rsx::comparison_function::never)
{
current_fp_texture_state.shadow_textures |= (1 << i);
}
break;
}
default:
rsx_log.error("Depth texture bound to pipeline with unexpected format 0x%X", format);
}
}
else if (!backend_config.supports_hw_renormalization /* &&
tex.min_filter() == rsx::texture_minify_filter::nearest &&
tex.mag_filter() == rsx::texture_magnify_filter::nearest*/)
{
// FIXME: This check should only apply to point-sampled textures. However, it severely regresses some games (id tech 5).
// This is because even when filtering is active, the error from the PS3 texture expansion still applies.
// A proper fix is to expand these formats into BGRA8 when high texture precision is required. That requires different GUI settings and inflation shaders, so it will be handled separately.
switch (format)
{
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_R6G5B5:
texture_control |= (1 << texture_control_bits::RENORMALIZE);
break;
default:
break;
}
}
if (rsx::is_int8_remapped_format(format))
{
// Special operations applied to 8-bit formats such as gamma correction and sign conversion
// NOTE: The unsigned_remap=bias flag being set flags the texture as being compressed normal (2n-1 / BX2) (UE3)
// NOTE: The ARGB8_signed flag means to reinterpret the raw bytes as signed. This is different than unsigned_remap=bias which does range decompression.
// This is a separate method of setting the format to signed mode without doing so per-channel
// Precedence = SNORM > GAMMA > UNSIGNED_REMAP (See Resistance 3 for GAMMA/BX2 relationship, UE3 for BX2 effect)
const u32 argb8_signed = tex.argb_signed(); // _SNROM
const u32 gamma = tex.gamma() & ~argb8_signed; // _SRGB
const u32 unsigned_remap = (tex.unsigned_remap() == CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL)? 0u : (~(gamma | argb8_signed) & 0xF); // _BX2
u32 argb8_convert = gamma;
// The options are mutually exclusive
ensure((argb8_signed & gamma) == 0);
ensure((argb8_signed & unsigned_remap) == 0);
ensure((gamma & unsigned_remap) == 0);
// Helper function to apply a per-channel mask based on an input mask
const auto apply_sign_convert_mask = [&](u32 mask, u32 bit_offset)
{
// TODO: Use actual remap mask to account for 0 and 1 overrides in default mapping
// TODO: Replace this clusterfuck of texture control with matrix transformation
const auto remap_ctrl = (tex.remap() >> 8) & 0xAA;
if (remap_ctrl == 0xAA)
{
argb8_convert |= (mask & 0xFu) << bit_offset;
return;
}
if ((remap_ctrl & 0x03) == 0x02) argb8_convert |= (mask & 0x1u) << bit_offset;
if ((remap_ctrl & 0x0C) == 0x08) argb8_convert |= (mask & 0x2u) << bit_offset;
if ((remap_ctrl & 0x30) == 0x20) argb8_convert |= (mask & 0x4u) << bit_offset;
if ((remap_ctrl & 0xC0) == 0x80) argb8_convert |= (mask & 0x8u) << bit_offset;
};
if (argb8_signed)
{
// Apply integer sign extension from uint8 to sint8 and renormalize
apply_sign_convert_mask(argb8_signed, texture_control_bits::SEXT_OFFSET);
}
if (unsigned_remap)
{
// Apply sign expansion, compressed normal-map style (2n - 1)
apply_sign_convert_mask(unsigned_remap, texture_control_bits::EXPAND_OFFSET);
}
texture_control |= argb8_convert;
}
current_fragment_program.texture_params[i].control = texture_control;
continue;
}
std::memcpy(
current_fragment_program.texture_params[i].scale,
sampler_descriptors[i]->texcoord_xform.scale,
sizeof(sampler_descriptors[i]->texcoord_xform.scale) * 2); // Copy scale and bias together
current_fragment_program.texture_params[i].remap = tex.remap();
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
u32 texture_control = 0;
current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i);
if (sampler_descriptors[i]->texcoord_xform.clamp)
{
std::memcpy(
current_fragment_program.texture_params[i].clamp_min,
sampler_descriptors[i]->texcoord_xform.clamp_min,
sizeof(sampler_descriptors[i]->texcoord_xform.clamp_min) * 2); // Copy clamp_min and clamp_max together
texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT);
}
if (tex.alpha_kill_enabled())
{
//alphakill can be ignored unless a valid comparison function is set
texture_control |= (1 << texture_control_bits::ALPHAKILL);
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL;
}
//const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
const u32 raw_format = tex.format();
const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
if (raw_format & CELL_GCM_TEXTURE_UN)
{
if (tex.min_filter() == rsx::texture_minify_filter::nearest ||
tex.mag_filter() == rsx::texture_magnify_filter::nearest)
{
// Subpixel offset so that (X + bias) * scale will round correctly.
// This is done to work around fdiv precision issues in some GPUs (NVIDIA)
// We apply the simplification where (x + bias) * z = xz + zbias here.
constexpr auto subpixel_bias = 0.01f;
current_fragment_program.texture_params[i].bias[0] += (subpixel_bias * current_fragment_program.texture_params[i].scale[0]);
current_fragment_program.texture_params[i].bias[1] += (subpixel_bias * current_fragment_program.texture_params[i].scale[1]);
current_fragment_program.texture_params[i].bias[2] += (subpixel_bias * current_fragment_program.texture_params[i].scale[2]);
}
}
if (backend_config.supports_hw_msaa && sampler_descriptors[i]->samples > 1)
{
current_fp_texture_state.multisampled_textures |= (1 << i);
texture_control |= (static_cast<u32>(tex.zfunc()) << texture_control_bits::DEPTH_COMPARE_OP);
texture_control |= (static_cast<u32>(tex.mag_filter() != rsx::texture_magnify_filter::nearest) << texture_control_bits::FILTERED_MAG);
texture_control |= (static_cast<u32>(tex.min_filter() != rsx::texture_minify_filter::nearest) << texture_control_bits::FILTERED_MIN);
texture_control |= (((tex.format() & CELL_GCM_TEXTURE_UN) >> 6) << texture_control_bits::UNNORMALIZED_COORDS);
if (rsx::is_texcoord_wrapping_mode(tex.wrap_s()))
{
texture_control |= (1 << texture_control_bits::WRAP_S);
}
if (rsx::is_texcoord_wrapping_mode(tex.wrap_t()))
{
texture_control |= (1 << texture_control_bits::WRAP_T);
}
if (rsx::is_texcoord_wrapping_mode(tex.wrap_r()))
{
texture_control |= (1 << texture_control_bits::WRAP_R);
}
}
if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR)
{
switch (sampler_descriptors[i]->format_class)
{
case RSX_FORMAT_CLASS_DEPTH16_FLOAT:
case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32:
texture_control |= (1 << texture_control_bits::DEPTH_FLOAT);
break;
default:
break;
}
switch (format)
{
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_D8R8G8B8:
{
// Emulate bitcast in shader
current_fp_texture_state.redirected_textures |= (1 << i);
const auto float_en = (sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)? 1 : 0;
texture_control |= (float_en << texture_control_bits::DEPTH_FLOAT);
break;
}
case CELL_GCM_TEXTURE_X16:
{
// A simple way to quickly read DEPTH16 data without shadow comparison
break;
}
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
{
// Natively supported Z formats with shadow comparison feature
const auto compare_mode = tex.zfunc();
if (!tex.alpha_kill_enabled() &&
compare_mode < rsx::comparison_function::always &&
compare_mode > rsx::comparison_function::never)
{
current_fp_texture_state.shadow_textures |= (1 << i);
}
break;
}
default:
rsx_log.error("Depth texture bound to pipeline with unexpected format 0x%X", format);
}
}
else if (!backend_config.supports_hw_renormalization /* &&
tex.min_filter() == rsx::texture_minify_filter::nearest &&
tex.mag_filter() == rsx::texture_magnify_filter::nearest*/)
{
// FIXME: This check should only apply to point-sampled textures. However, it severely regresses some games (id tech 5).
// This is because even when filtering is active, the error from the PS3 texture expansion still applies.
// A proper fix is to expand these formats into BGRA8 when high texture precision is required. That requires different GUI settings and inflation shaders, so it will be handled separately.
switch (format)
{
case CELL_GCM_TEXTURE_A1R5G5B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_D1R5G5B5:
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_R6G5B5:
texture_control |= (1 << texture_control_bits::RENORMALIZE);
break;
default:
break;
}
}
if (rsx::is_int8_remapped_format(format))
{
// Special operations applied to 8-bit formats such as gamma correction and sign conversion
// NOTE: The unsigned_remap=bias flag being set flags the texture as being compressed normal (2n-1 / BX2) (UE3)
// NOTE: The ARGB8_signed flag means to reinterpret the raw bytes as signed. This is different than unsigned_remap=bias which does range decompression.
// This is a separate method of setting the format to signed mode without doing so per-channel
// Precedence = SNORM > GAMMA > UNSIGNED_REMAP (See Resistance 3 for GAMMA/BX2 relationship, UE3 for BX2 effect)
const u32 argb8_signed = tex.argb_signed(); // _SNROM
const u32 gamma = tex.gamma() & ~argb8_signed; // _SRGB
const u32 unsigned_remap = (tex.unsigned_remap() == CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL)? 0u : (~(gamma | argb8_signed) & 0xF); // _BX2
u32 argb8_convert = gamma;
// The options are mutually exclusive
ensure((argb8_signed & gamma) == 0);
ensure((argb8_signed & unsigned_remap) == 0);
ensure((gamma & unsigned_remap) == 0);
// Helper function to apply a per-channel mask based on an input mask
const auto apply_sign_convert_mask = [&](u32 mask, u32 bit_offset)
{
// TODO: Use actual remap mask to account for 0 and 1 overrides in default mapping
// TODO: Replace this clusterfuck of texture control with matrix transformation
const auto remap_ctrl = (tex.remap() >> 8) & 0xAA;
if (remap_ctrl == 0xAA)
{
argb8_convert |= (mask & 0xFu) << bit_offset;
return;
}
if ((remap_ctrl & 0x03) == 0x02) argb8_convert |= (mask & 0x1u) << bit_offset;
if ((remap_ctrl & 0x0C) == 0x08) argb8_convert |= (mask & 0x2u) << bit_offset;
if ((remap_ctrl & 0x30) == 0x20) argb8_convert |= (mask & 0x4u) << bit_offset;
if ((remap_ctrl & 0xC0) == 0x80) argb8_convert |= (mask & 0x8u) << bit_offset;
};
if (argb8_signed)
{
// Apply integer sign extension from uint8 to sint8 and renormalize
apply_sign_convert_mask(argb8_signed, texture_control_bits::SEXT_OFFSET);
}
if (unsigned_remap)
{
// Apply sign expansion, compressed normal-map style (2n - 1)
apply_sign_convert_mask(unsigned_remap, texture_control_bits::EXPAND_OFFSET);
}
texture_control |= argb8_convert;
}
current_fragment_program.texture_params[i].control = texture_control;
}
// Update texture configuration
@ -2275,7 +2325,7 @@ namespace rsx
if (current_fragment_program.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{
//Check that the depth stage is not disabled
if (!rsx::method_registers.depth_test_enabled())
if (!m_ctx->register_state->depth_test_enabled())
{
rsx_log.trace("FS exports depth component but depth test is disabled (INVALID_OPERATION)");
}

View File

@ -150,17 +150,27 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
{ "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" },
};
//NOTE: We do not skip outputs, the only possible combinations are a(0), b(0), ab(0,1), abc(0,1,2), abcd(0,1,2,3)
// NOTE: We do not skip outputs, the only possible combinations are a(0), b(0), ab(0,1), abc(0,1,2), abcd(0,1,2,3)
u8 output_index = 0;
const bool float_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) || !device_props.has_native_half_support;
const auto reg_type = float_type ? "vec4" : getHalfTypeName(4);
for (uint i = 0; i < std::size(table); ++i)
{
if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second))
if (!m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second))
{
OS << "layout(location=" << std::to_string(output_index++) << ") " << "out vec4 " << table[i].first << ";\n";
vk_prog->output_color_masks[i] = -1;
continue;
}
if (i >= m_prog.mrt_buffers_count)
{
// Dead writes. Declare as temp variables for DCE to clean up.
OS << "vec4 " << table[i].first << "; // Unused\n";
vk_prog->output_color_masks[i] = 0;
continue;
}
OS << "layout(location=" << std::to_string(output_index++) << ") " << "out vec4 " << table[i].first << ";\n";
vk_prog->output_color_masks[i] = -1;
}
}
@ -308,16 +318,24 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.require_fog_read = properties.in_register_mask & in_fogc;
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = device_props.has_low_precision_rounding && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION);
m_shader_props.disable_early_discard = !vk::is_NVIDIA(vk::get_driver_vendor());
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low;
m_shader_props.ROP_output_rounding = (g_cfg.video.shader_precision != gpu_preset_level::low) && !!(m_prog.ctrl & RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER);
m_shader_props.ROP_sRGB_packing = !!(m_prog.ctrl & RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER);
m_shader_props.ROP_alpha_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST);
m_shader_props.ROP_alpha_to_coverage_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE);
m_shader_props.ROP_polygon_stipple_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_POLYGON_STIPPLE);
m_shader_props.ROP_discard = !!(m_prog.ctrl & RSX_SHADER_CONTROL_USES_KIL);
m_shader_props.require_tex1D_ops = properties.has_tex1D;
m_shader_props.require_tex2D_ops = properties.has_tex2D;
m_shader_props.require_tex3D_ops = properties.has_tex3D;
m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj;
m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL);
// Declare global constants
if (m_shader_props.require_fog_read)
@ -336,7 +354,8 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
}
OS <<
"#define texture_base_index _fs_texture_base_index\n\n";
"#define texture_base_index _fs_texture_base_index\n"
"#define TEX_PARAM(index) texture_parameters_##index\n\n";
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}
@ -422,6 +441,16 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS)
if (properties.in_register_mask & in_spec_color)
OS << " vec4 spec_color = gl_FrontFacing ? spec_color1 : spec_color0;\n";
}
for (u16 i = 0, mask = (properties.common_access_sampler_mask | properties.shadow_sampler_mask); mask != 0; ++i, mask >>= 1)
{
if (!(mask & 1))
{
continue;
}
OS << " const sampler_info texture_parameters_" << i << " = texture_parameters[texture_base_index + " << i << "];\n";
}
}
void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)

View File

@ -264,6 +264,7 @@ namespace vk
}
builder << "\n"
"#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n"
"#define IS_TEXTURE_RESIDENT(index) true\n"
"#define SAMPLER1D(index) sampler1D_array[index]\n"
"#define SAMPLER2D(index) sampler2D_array[index]\n"

View File

@ -454,9 +454,17 @@ namespace gcm
RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used??
// Custom
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000, // Rasterizing triangles and not lines or points
RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x20000, // Support instance ID offsets when loading constants
RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x40000, // Compile internals expecting interpreter
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x0010000, // Rasterizing triangles and not lines or points
RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x0020000, // Support instance ID offsets when loading constants
RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x0040000, // Compile internals expecting interpreter
RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x0080000, // Quantize outputs to 8-bit FBO
RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x0100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate.
RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x0200000, // Uses alpha kill on texture input
RSX_SHADER_CONTROL_ALPHA_TEST = 0x0400000, // Uses alpha test on the outputs
RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x0800000, // Uses polygon stipple for dithered rendering
RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x1000000, // Alpha to coverage
};
// GCM Reports

View File

@ -716,9 +716,8 @@ namespace rsx
state_signals[NV4097_SET_POINT_SIZE] = rsx::vertex_state_dirty;
state_signals[NV4097_SET_ALPHA_FUNC] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_ALPHA_REF] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_ALPHA_TEST_ENABLE] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_ANTI_ALIASING_CONTROL] = rsx::fragment_state_dirty | rsx::pipeline_config_dirty;
state_signals[NV4097_SET_SHADER_PACKER] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_ALPHA_TEST_ENABLE] = rsx::fragment_program_state_dirty;
state_signals[NV4097_SET_SHADER_PACKER] = rsx::fragment_program_state_dirty;
state_signals[NV4097_SET_SHADER_WINDOW] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_FOG_MODE] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_SCISSOR_HORIZONTAL] = rsx::scissor_config_state_dirty;
@ -733,7 +732,7 @@ namespace rsx
state_signals[NV4097_SET_VIEWPORT_OFFSET + 0] = rsx::vertex_state_dirty;
state_signals[NV4097_SET_VIEWPORT_OFFSET + 1] = rsx::vertex_state_dirty;
state_signals[NV4097_SET_VIEWPORT_OFFSET + 2] = rsx::vertex_state_dirty;
state_signals[NV4097_SET_POLYGON_STIPPLE] = rsx::fragment_state_dirty;
state_signals[NV4097_SET_POLYGON_STIPPLE] = rsx::fragment_program_state_dirty;
state_signals[NV4097_SET_POLYGON_STIPPLE_PATTERN + 0] = rsx::polygon_stipple_pattern_dirty;
state_signals[NV4097_SET_POLYGON_STIPPLE_PATTERN + 1] = rsx::polygon_stipple_pattern_dirty;
state_signals[NV4097_SET_POLYGON_STIPPLE_PATTERN + 2] = rsx::polygon_stipple_pattern_dirty;
@ -1714,6 +1713,7 @@ namespace rsx
bind(NV4097_SET_BLEND_EQUATION, nv4097::set_blend_equation);
bind(NV4097_SET_BLEND_FUNC_SFACTOR, nv4097::set_blend_factor);
bind(NV4097_SET_BLEND_FUNC_DFACTOR, nv4097::set_blend_factor);
bind(NV4097_SET_ANTI_ALIASING_CONTROL, nv4097::set_aa_control);
//NV308A (0xa400..0xbffc!)
bind_array(NV308A_COLOR, 1, 256 * 7, nv308a::color::impl);