video_core: Add patches for 3D Classics shaders (#1593)

This commit is contained in:
PabloMK7 2026-01-09 18:28:40 +01:00 committed by GitHub
parent 7de2d7b846
commit 5c863db0f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 131 additions and 0 deletions

View File

@ -158,6 +158,29 @@ HackManager hack_manager = {
0x0004013020003203, // New 3DS safe mode
},
}},
{HackType::REQUIRES_SHADER_FIXUP,
HackEntry{
.mode = HackAllowMode::FORCE,
.affected_title_ids =
{
// 3D Thunder Blade
0x0004000000128A00, // JPN
0x0004000000158200, // EUR
0x0004000000158C00, // USA
// 3D After Burner II
0x0004000000114200, // JPN
0x0004000000157A00, // EUR
0x0004000000158900, // USA
// 3D Classics
0x0004000000154000, // 1 (JPN)
0x0004000000180E00, // 2 (JPN)
0x000400000019A700, // 2 (EUR)
0x0004000000185E00, // 2 (USA)
0x00040000001AA300, // 3 (JPN)
},
}},
}};
}

View File

@ -70,10 +70,118 @@ u64 ShaderSetup::GetSwizzleDataHash() {
}
void ShaderSetup::DoProgramCodeFixup() {
// This function holds shader fixups that are required for proper emulation of some games. As
// emulation gets more accurate the goal is to have this function as empty as possible.
// WARNING: If the hashing method is changed, the hashes of the different shaders will need
// adjustment.
if (!requires_fixup || !program_code_pending_fixup) {
return;
}
program_code_pending_fixup = false;
auto prepare_for_fixup = [this]() {
memcpy(program_code_fixup.data(), program_code.data(), program_code.size());
has_fixup = true;
program_code_hash_dirty = true;
};
/**
* Affected games:
* Some Sega 3D Classics games.
*
* Problem:
* The geometry shaders used by some of the Sega 3D Classics have a shader
* (gf2_five_color_gshader.vsh) that presents two separate issues.
*
* - The shader does not have an "end" instruction. This causes the execution
* to be unbounded and start running whatever is in the code buffer after the end of the
* program. Usually this is filled with zeroes, which are add instructions that have no effect
* due to no more vertices being emitted. It's not clear what happens when the PC reaches
* 0x3FFC and is incremented. The most likely scenario is that it overflows back to the start,
* where there is an end instruction close by. This causes the game to execute around 4K
* instructions per vertex, which is very slow on emulator.
*
* - The shader relies on a HW bug or quirk that we do not currently understand or implement.
* The game builds a quad (4 vertices) using two inputs, the upper left coordinate, and the
* bottom right coordinate. The generated vertex coordinates are put in output register o0
* before being emitted. Here is the pseudocode of the shader:
* o0.xyzw = leftTop.xyzw
* emit <- Emits the top left vertex
* o0._yzw = leftTop.xyzw
* o0.x___ = rightBottom.xyzw
* emit <- Emits the top right vertex
* o0._y__ = rightBottom.xyzw
* emit <- Emits the bottom left vertex (!)
* o0.xyzw = rightBottom.xyzw
* emit <- Emits the bottom right vertex
*
* This shader code has a bug. When the bottom left vertex is emitted, the y element is
* updated to the bottom coordinate, but the x element is left untouched. One would say that
* since the x element was last set to the RIGHT coordinate, the vertex would end up being
* drawn to the bottom RIGHT instead of the intended bottom LEFT (which is what we observe on
* the emulator). But on real HW, the vertex is drawn to the bottom LEFT instead. This
* suggests a HW bug or quirk that is triggered whenever some elements of an output register
* are not written to between emits. In order for the quad to look proper, the xzw elements
* should somehow keep the contents from the first emit, where the top left coordinate was
* written. The specifics of the HW bug that causes this are unknown.
*
* Solution:
* The following patches are made to fix the shaders:
*
* - An end instruction is inserted at the end of the shader to prevent unbounded execution.
*
* - Before the third vertex is emited and the y element of o0 is adjusted, a mov o0.xywz
* leftTop.xywz instruction is inserted to update the xzw elements of the output register to
* the expected values. This requires making room in the shader code, but luckily there are no
* jumps that need relocation.
*
*/
constexpr u64 SEGA_3D_CLASSICS_THUNDER_BLADE = 0x0797513756f2c8c9;
constexpr u64 SEGA_3D_CLASSICS_AFTER_BURNER = 0x188e959fbe31324d;
constexpr u64 SEGA_3D_CLASSICS_COLLECTION_TB = 0x5954c8e4d13cdd86;
constexpr u64 SEGA_3D_CLASSICS_COLLECTION_PD = 0x27496993b307355b;
const auto fix_3d_classics_common = [this](u32 offset_base, u32 mov_swizzle) {
offset_base /= 4;
// Make some room to insert an instruction
std::memmove(program_code_fixup.data() + offset_base + 1,
program_code_fixup.data() + offset_base, 0x1C);
// mov o0.xyzw v0.xyzw (out.pos <- vLeftTop)
program_code_fixup[offset_base] = 0x4c000000 | mov_swizzle;
// end
program_code_fixup[offset_base + 0x20] = 0x88000000;
// Adjust biggest program size
if (biggest_program_size <= offset_base + 0x20) {
biggest_program_size = offset_base + 0x20 + 1;
}
};
// Select shader fixup
switch (GetProgramCodeHash()) {
case SEGA_3D_CLASSICS_THUNDER_BLADE: {
prepare_for_fixup();
fix_3d_classics_common(0x510, 0xA);
} break;
case SEGA_3D_CLASSICS_AFTER_BURNER: {
prepare_for_fixup();
fix_3d_classics_common(0x50C, 0xA);
} break;
case SEGA_3D_CLASSICS_COLLECTION_TB: {
prepare_for_fixup();
fix_3d_classics_common(0xAE0, 0xC);
} break;
case SEGA_3D_CLASSICS_COLLECTION_PD: {
prepare_for_fixup();
fix_3d_classics_common(0xAF0, 0xC);
} break;
default:
break;
}
}
} // namespace Pica