mirror of
https://github.com/Lime3DS/Lime3DS.git
synced 2026-04-28 05:55:11 -06:00
shader_jit: Emit LG2/EX2 subroutines on-demand (#2046)
Rather than emitting these subroutine functions for _every_ shader, only emit the subroutines when the `LG2` and `EX2` instructions are actually used. This saves a good chunk of memory across all shaders. Inspired by Tanuki3DS.
This commit is contained in:
parent
37b6c91de6
commit
91128d6625
@ -508,6 +508,7 @@ void JitShader::Compile_DPH(Instruction instr) {
|
||||
void JitShader::Compile_EX2(Instruction instr) {
|
||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||
STR(X30, SP, POST_INDEXED, -16);
|
||||
exp2_used = true;
|
||||
BL(exp2_subroutine);
|
||||
LDR(X30, SP, PRE_INDEXED, 16);
|
||||
Compile_DestEnable(instr, SRC1);
|
||||
@ -516,6 +517,7 @@ void JitShader::Compile_EX2(Instruction instr) {
|
||||
void JitShader::Compile_LG2(Instruction instr) {
|
||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||
STR(X30, SP, POST_INDEXED, -16);
|
||||
log2_used = true;
|
||||
BL(log2_subroutine);
|
||||
LDR(X30, SP, PRE_INDEXED, 16);
|
||||
Compile_DestEnable(instr, SRC1);
|
||||
@ -994,6 +996,14 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||
// Compile entire program
|
||||
Compile_Block(static_cast<u32>(program_code->size()));
|
||||
|
||||
// Compile utility functions
|
||||
if (log2_used) {
|
||||
Compile_Log2(log2_subroutine);
|
||||
}
|
||||
if (exp2_used) {
|
||||
Compile_Exp2(exp2_subroutine);
|
||||
}
|
||||
|
||||
// Free memory that's no longer needed
|
||||
program_code = nullptr;
|
||||
swizzle_data = nullptr;
|
||||
@ -1021,18 +1031,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||
code_vec.shrink_to_fit();
|
||||
}
|
||||
|
||||
JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {
|
||||
CompilePrelude();
|
||||
}
|
||||
|
||||
void JitShader::CompilePrelude() {
|
||||
log2_subroutine = CompilePrelude_Log2();
|
||||
exp2_subroutine = CompilePrelude_Exp2();
|
||||
}
|
||||
|
||||
Label JitShader::CompilePrelude_Log2() {
|
||||
Label subroutine;
|
||||
JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {}
|
||||
|
||||
void JitShader::Compile_Log2(Label subroutine) {
|
||||
// We perform this approximation by first performing a range reduction into the range
|
||||
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
|
||||
// evaluated. We multiply the result by (x - 1) then restore the result into the appropriate
|
||||
@ -1136,13 +1137,9 @@ Label JitShader::CompilePrelude_Log2() {
|
||||
DUP(SRC1.S4(), SRC1.Selem()[0]);
|
||||
|
||||
RET();
|
||||
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
Label JitShader::CompilePrelude_Exp2() {
|
||||
Label subroutine;
|
||||
|
||||
void JitShader::Compile_Exp2(Label subroutine) {
|
||||
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
|
||||
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
|
||||
// result into the appropriate range.
|
||||
@ -1241,8 +1238,6 @@ Label JitShader::CompilePrelude_Exp2() {
|
||||
DUP(SRC1.S4(), SRC1.Selem()[0]);
|
||||
|
||||
RET();
|
||||
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
} // namespace Pica::Shader
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Citra Emulator Project
|
||||
// Copyright Citra Emulator Project / Azahar Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
@ -123,9 +123,8 @@ private:
|
||||
/**
|
||||
* Emits data and code for utility functions.
|
||||
*/
|
||||
void CompilePrelude();
|
||||
oaknut::Label CompilePrelude_Log2();
|
||||
oaknut::Label CompilePrelude_Exp2();
|
||||
void Compile_Log2(oaknut::Label subroutine);
|
||||
void Compile_Exp2(oaknut::Label subroutine);
|
||||
|
||||
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
|
||||
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
|
||||
@ -146,6 +145,10 @@ private:
|
||||
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);
|
||||
CompiledShader* program = nullptr;
|
||||
|
||||
/// Library functions, emitted as used
|
||||
bool log2_used : 1 = false;
|
||||
bool exp2_used : 1 = false;
|
||||
|
||||
oaknut::Label log2_subroutine;
|
||||
oaknut::Label exp2_subroutine;
|
||||
};
|
||||
|
||||
@ -511,12 +511,14 @@ void JitShader::Compile_DPH(Instruction instr) {
|
||||
|
||||
void JitShader::Compile_EX2(Instruction instr) {
|
||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||
exp2_used = true;
|
||||
call(exp2_subroutine);
|
||||
Compile_DestEnable(instr, SRC1);
|
||||
}
|
||||
|
||||
void JitShader::Compile_LG2(Instruction instr) {
|
||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||
log2_used = true;
|
||||
call(log2_subroutine);
|
||||
Compile_DestEnable(instr, SRC1);
|
||||
}
|
||||
@ -1038,6 +1040,14 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||
// Compile entire program
|
||||
Compile_Block(static_cast<u32>(program_code->size()));
|
||||
|
||||
// Compile utility functions
|
||||
if (log2_used) {
|
||||
Compile_Log2(log2_subroutine);
|
||||
}
|
||||
if (exp2_used) {
|
||||
Compile_Exp2(exp2_subroutine);
|
||||
}
|
||||
|
||||
// Free memory that's no longer needed
|
||||
program_code = nullptr;
|
||||
swizzle_data = nullptr;
|
||||
@ -1050,18 +1060,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||
LOG_DEBUG(HW_GPU, "Compiled shader size={}", getSize());
|
||||
}
|
||||
|
||||
JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {
|
||||
CompilePrelude();
|
||||
}
|
||||
|
||||
void JitShader::CompilePrelude() {
|
||||
log2_subroutine = CompilePrelude_Log2();
|
||||
exp2_subroutine = CompilePrelude_Exp2();
|
||||
}
|
||||
|
||||
Xbyak::Label JitShader::CompilePrelude_Log2() {
|
||||
Xbyak::Label subroutine;
|
||||
JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
|
||||
|
||||
void JitShader::Compile_Log2(Xbyak::Label subroutine) {
|
||||
// SSE does not have a log instruction, thus we must approximate.
|
||||
// We perform this approximation first performaing a range reduction into the range [1.0, 2.0).
|
||||
// A minimax polynomial which was fit for the function log2(x) / (x - 1) is then evaluated.
|
||||
@ -1163,12 +1164,9 @@ Xbyak::Label JitShader::CompilePrelude_Log2() {
|
||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
ret();
|
||||
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
Xbyak::Label JitShader::CompilePrelude_Exp2() {
|
||||
Xbyak::Label subroutine;
|
||||
void JitShader::Compile_Exp2(Xbyak::Label subroutine) {
|
||||
|
||||
// SSE does not have a exp instruction, thus we must approximate.
|
||||
// We perform this approximation first performaing a range reduction into the range [-0.5, 0.5).
|
||||
@ -1271,8 +1269,6 @@ Xbyak::Label JitShader::CompilePrelude_Exp2() {
|
||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
ret();
|
||||
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
} // namespace Pica::Shader
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Copyright Citra Emulator Project / Azahar Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
@ -115,9 +115,8 @@ private:
|
||||
/**
|
||||
* Emits data and code for utility functions.
|
||||
*/
|
||||
void CompilePrelude();
|
||||
Xbyak::Label CompilePrelude_Log2();
|
||||
Xbyak::Label CompilePrelude_Exp2();
|
||||
void Compile_Log2(Xbyak::Label subroutine);
|
||||
void Compile_Exp2(Xbyak::Label subroutine);
|
||||
|
||||
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
|
||||
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
|
||||
@ -138,6 +137,10 @@ private:
|
||||
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
||||
CompiledShader* program = nullptr;
|
||||
|
||||
/// Library functions, emitted as used
|
||||
bool log2_used : 1 = false;
|
||||
bool exp2_used : 1 = false;
|
||||
|
||||
Xbyak::Label log2_subroutine;
|
||||
Xbyak::Label exp2_subroutine;
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user