shader_jit: Emit LG2/EX2 subroutines on-demand (#2046)

Rather than emitting these subroutine functions for _every_ shader, only emit
the subroutines when the `LG2` and `EX2` instructions are actually used.
This saves a good chunk of memory across all shaders.

Inspired by Tanuki3DS.
This commit is contained in:
Wunk 2026-04-24 11:34:46 -07:00 committed by GitHub
parent 37b6c91de6
commit 91128d6625
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 40 additions and 43 deletions

View File

@ -508,6 +508,7 @@ void JitShader::Compile_DPH(Instruction instr) {
void JitShader::Compile_EX2(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
STR(X30, SP, POST_INDEXED, -16);
exp2_used = true;
BL(exp2_subroutine);
LDR(X30, SP, PRE_INDEXED, 16);
Compile_DestEnable(instr, SRC1);
@ -516,6 +517,7 @@ void JitShader::Compile_EX2(Instruction instr) {
void JitShader::Compile_LG2(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
STR(X30, SP, POST_INDEXED, -16);
log2_used = true;
BL(log2_subroutine);
LDR(X30, SP, PRE_INDEXED, 16);
Compile_DestEnable(instr, SRC1);
@ -994,6 +996,14 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
// Compile entire program
Compile_Block(static_cast<u32>(program_code->size()));
// Compile utility functions
if (log2_used) {
Compile_Log2(log2_subroutine);
}
if (exp2_used) {
Compile_Exp2(exp2_subroutine);
}
// Free memory that's no longer needed
program_code = nullptr;
swizzle_data = nullptr;
@ -1021,18 +1031,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
code_vec.shrink_to_fit();
}
JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {
CompilePrelude();
}
void JitShader::CompilePrelude() {
log2_subroutine = CompilePrelude_Log2();
exp2_subroutine = CompilePrelude_Exp2();
}
Label JitShader::CompilePrelude_Log2() {
Label subroutine;
JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {}
void JitShader::Compile_Log2(Label subroutine) {
// We perform this approximation by first performing a range reduction into the range
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
// evaluated. We multiply the result by (x - 1) then restore the result into the appropriate
@ -1136,13 +1137,9 @@ Label JitShader::CompilePrelude_Log2() {
DUP(SRC1.S4(), SRC1.Selem()[0]);
RET();
return subroutine;
}
Label JitShader::CompilePrelude_Exp2() {
Label subroutine;
void JitShader::Compile_Exp2(Label subroutine) {
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
// result into the appropriate range.
@ -1241,8 +1238,6 @@ Label JitShader::CompilePrelude_Exp2() {
DUP(SRC1.S4(), SRC1.Selem()[0]);
RET();
return subroutine;
}
} // namespace Pica::Shader

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -123,9 +123,8 @@ private:
/**
* Emits data and code for utility functions.
*/
void CompilePrelude();
oaknut::Label CompilePrelude_Log2();
oaknut::Label CompilePrelude_Exp2();
void Compile_Log2(oaknut::Label subroutine);
void Compile_Exp2(oaknut::Label subroutine);
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
@ -146,6 +145,10 @@ private:
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);
CompiledShader* program = nullptr;
/// Library functions, emitted as used
bool log2_used : 1 = false;
bool exp2_used : 1 = false;
oaknut::Label log2_subroutine;
oaknut::Label exp2_subroutine;
};

View File

@ -511,12 +511,14 @@ void JitShader::Compile_DPH(Instruction instr) {
void JitShader::Compile_EX2(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
exp2_used = true;
call(exp2_subroutine);
Compile_DestEnable(instr, SRC1);
}
void JitShader::Compile_LG2(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
log2_used = true;
call(log2_subroutine);
Compile_DestEnable(instr, SRC1);
}
@ -1038,6 +1040,14 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
// Compile entire program
Compile_Block(static_cast<u32>(program_code->size()));
// Compile utility functions
if (log2_used) {
Compile_Log2(log2_subroutine);
}
if (exp2_used) {
Compile_Exp2(exp2_subroutine);
}
// Free memory that's no longer needed
program_code = nullptr;
swizzle_data = nullptr;
@ -1050,18 +1060,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
LOG_DEBUG(HW_GPU, "Compiled shader size={}", getSize());
}
JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {
CompilePrelude();
}
void JitShader::CompilePrelude() {
log2_subroutine = CompilePrelude_Log2();
exp2_subroutine = CompilePrelude_Exp2();
}
Xbyak::Label JitShader::CompilePrelude_Log2() {
Xbyak::Label subroutine;
JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
void JitShader::Compile_Log2(Xbyak::Label subroutine) {
// SSE does not have a log instruction, thus we must approximate.
// We perform this approximation first performaing a range reduction into the range [1.0, 2.0).
// A minimax polynomial which was fit for the function log2(x) / (x - 1) is then evaluated.
@ -1163,12 +1164,9 @@ Xbyak::Label JitShader::CompilePrelude_Log2() {
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
ret();
return subroutine;
}
Xbyak::Label JitShader::CompilePrelude_Exp2() {
Xbyak::Label subroutine;
void JitShader::Compile_Exp2(Xbyak::Label subroutine) {
// SSE does not have a exp instruction, thus we must approximate.
// We perform this approximation first performaing a range reduction into the range [-0.5, 0.5).
@ -1271,8 +1269,6 @@ Xbyak::Label JitShader::CompilePrelude_Exp2() {
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
ret();
return subroutine;
}
} // namespace Pica::Shader

View File

@ -1,4 +1,4 @@
// Copyright 2015 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -115,9 +115,8 @@ private:
/**
* Emits data and code for utility functions.
*/
void CompilePrelude();
Xbyak::Label CompilePrelude_Log2();
Xbyak::Label CompilePrelude_Exp2();
void Compile_Log2(Xbyak::Label subroutine);
void Compile_Exp2(Xbyak::Label subroutine);
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
@ -138,6 +137,10 @@ private:
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
CompiledShader* program = nullptr;
/// Library functions, emitted as used
bool log2_used : 1 = false;
bool exp2_used : 1 = false;
Xbyak::Label log2_subroutine;
Xbyak::Label exp2_subroutine;
};