Compare commits

...

12 Commits

Author SHA1 Message Date
jbm11208
a6e918d4e6
Merge 641691c497 into 000530c028 2026-04-06 10:08:19 +00:00
SiniKraft
000530c028 android : Fix navigation bar overlapping the Show Home Menu apps button 2026-04-06 11:25:14 +02:00
SiniKraft
df05b5f3db android : Fix emulation exit showing an Invalid Rom Format error 2026-04-06 11:25:14 +02:00
Wunkolo
06a535f50e shader_jit: Add SETEMIT unit test 2026-04-05 23:02:56 +02:00
Wunkolo
4cbd75b413 shader_jit: Optimize GeometryEmitter SETEMIT state
The `SETEMIT`/`SETE` instruction only actually encodes 4 bits of possible state,
but this is currently expanded into three separate bytes of
data(four with padding) and requires three separate byte-writes for the x64 and
a64 JITs to write into. These 4 bits from the instruction can instead be
compacted into a singular 1-byte write from the JIT by encoding these 4 bits of
state into a singular byte at JIT-time, and unpacking this data is instead done
by `GeometryEmitter::Emit`. This also allows the serializer to use a singular
byte for all 3 fields now as well.
2026-04-05 23:02:56 +02:00
Wunk
60f331b43b
cmake: Allow Catch test discovery (#1997)
Allows individual unit-tests to be discovered, tested, and debugged by IDEs without having to run _all_ of the unit-tests just to debug one specific test.
2026-04-05 23:01:05 +02:00
OpenSauce04
ba6f8cb744 Disable Vulkan renderer on NetBSD because OS Vulkan support doesn't exist 2026-04-05 13:11:40 +01:00
OpenSauce04
118579adb3 Fixed launch failures on NetBSD due to PaX MPROTECT restrictions 2026-04-05 13:11:40 +01:00
OpenSauce04
23393904e0 Fixed NetBSD build issues
- Added missing include
- Fixed X11 include directory not being included
2026-04-05 13:11:40 +01:00
jbm11208
641691c497
Merge branch 'master' into Shader-JIT-Multithreading 2026-01-18 15:51:44 -05:00
jbm11208
88d63eb176 Remove Unused Function 2025-11-30 18:46:51 +00:00
jbm11208
9a3ab6a2db Shader JIT Multithreading 2025-11-30 18:46:51 +00:00
18 changed files with 233 additions and 46 deletions

View File

@ -126,7 +126,8 @@ CMAKE_DEPENDENT_OPTION(ENABLE_LIBUSB "Enable libusb for GameCube Adapter support
CMAKE_DEPENDENT_OPTION(ENABLE_SOFTWARE_RENDERER "Enables the software renderer" ON "NOT ANDROID" OFF)
CMAKE_DEPENDENT_OPTION(ENABLE_OPENGL "Enables the OpenGL renderer" ${DEFAULT_ENABLE_OPENGL} "NOT APPLE" OFF)
option(ENABLE_VULKAN "Enables the Vulkan renderer" ON)
# NetBSD doesn't support Vulkan yet, remove this check when it does.
CMAKE_DEPENDENT_OPTION(ENABLE_VULKAN "Enables the Vulkan renderer" ON "NOT (BSD MATCHES \"NetBSD\")" OFF)
option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)

View File

@ -0,0 +1,11 @@
function(disable_pax_mprotect target)
if (BSD STREQUAL "NetBSD")
add_custom_command(TARGET ${target} POST_BUILD
COMMAND paxctl +m "$<TARGET_FILE:${target}>"
COMMENT "Disabling PaX MPROTECT restrictions for '${target}'"
VERBATIM
)
else()
message(FATAL_ERROR "disable_pax_mprotect only applies on NetBSD.")
endif()
endfunction()

View File

@ -60,6 +60,7 @@ if (ENABLE_TESTS)
add_subdirectory(catch2)
endif()
target_link_libraries(catch2 INTERFACE Catch2::Catch2WithMain)
include(Catch)
endif()
# Crypto++
@ -499,6 +500,15 @@ if (ENABLE_VULKAN)
else()
target_include_directories(vulkan-headers INTERFACE ./vulkan-headers/include)
target_disable_warnings(vulkan-headers)
if (BSD STREQUAL "NetBSD")
# There may be a better way to do this with
# find_package(X11), but I couldn't get
# CMake to do it, so we're depending on
# the x11-links package and assuming the
# prefix location. -OS
target_include_directories(vulkan-headers INTERFACE
/usr/pkg/share/x11-links/include)
endif()
endif()
# adrenotools
@ -519,4 +529,4 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|armv8")
else()
target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_SCALAR)
message(STATUS "Disabling SIMD for xxHash")
endif()
endif()

View File

@ -504,8 +504,9 @@ object NativeLibrary {
const val ErrorSystemFiles = 8
const val ErrorSavestate = 9
const val ErrorArticDisconnected = 10
const val ShutdownRequested = 11
const val ErrorUnknown = 12
const val ErrorN3DSApplication = 11
const val ShutdownRequested = 12
const val ErrorUnknown = 13
fun newInstance(resultCode: Int): EmulationErrorDialogFragment {
val args = Bundle()

View File

@ -5,13 +5,13 @@
android:id="@+id/coordinator_about"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:fitsSystemWindows="true"
android:background="?attr/colorSurface">
<com.google.android.material.appbar.AppBarLayout
android:id="@+id/appbar_about"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:fitsSystemWindows="true">
android:layout_height="wrap_content">
<com.google.android.material.appbar.MaterialToolbar
android:id="@+id/toolbar_system_files"

View File

@ -31,6 +31,11 @@ add_library(azahar_libretro SHARED
create_target_directory_groups(azahar_libretro)
if (BSD STREQUAL "NetBSD")
include(DisablePaxMprotect)
disable_pax_mprotect(azahar_libretro)
endif()
target_link_libraries(citra_common PRIVATE libretro)
target_link_libraries(citra_core PRIVATE libretro)
target_link_libraries(video_core PRIVATE libretro)

View File

@ -6,6 +6,11 @@ add_executable(citra_meta
set_target_properties(citra_meta PROPERTIES OUTPUT_NAME "azahar")
if (BSD STREQUAL "NetBSD")
include(DisablePaxMprotect)
disable_pax_mprotect(citra_meta)
endif()
if (APPLE)
set(DIST_DIR "../../dist/apple")
set(APPLE_RESOURCES

View File

@ -4,6 +4,11 @@ add_executable(citra_room_standalone
set_target_properties(citra_room_standalone PROPERTIES OUTPUT_NAME "azahar-room")
if (BSD STREQUAL "NetBSD")
include(DisablePaxMprotect)
disable_pax_mprotect(citra_room_standalone)
endif()
target_link_libraries(citra_room_standalone PRIVATE citra_room)
if(UNIX AND NOT APPLE)

View File

@ -23,6 +23,11 @@ add_executable(tests
create_target_directory_groups(tests)
if (BSD STREQUAL "NetBSD")
include(DisablePaxMprotect)
disable_pax_mprotect(tests)
endif()
target_link_libraries(tests PRIVATE citra_common citra_core video_core audio_core)
target_link_libraries(tests PRIVATE ${PLATFORM_LIBRARIES} catch2 nihstro-headers Threads::Threads)
@ -31,6 +36,10 @@ if (ENABLE_LIBRETRO)
endif()
add_test(NAME tests COMMAND tests)
if(NOT ANDROID)
catch_discover_tests(tests)
endif()
if (CITRA_USE_PRECOMPILED_HEADERS)
target_precompile_headers(tests PRIVATE precompiled_headers.h)

View File

@ -481,6 +481,39 @@ SHADER_TEST_CASE("RSQ", "[video_core][shader]") {
REQUIRE(shader.Run({0.0625f}).x == Catch::Approx(4.0f).margin(0.004f));
}
SHADER_TEST_CASE("SETEMIT", "[video_core][shader]") {
Pica::GeometryEmitter geometry_emitter;
for (u8 winding = 0; winding <= 1; ++winding) {
for (u8 prim_emit = 0; prim_emit <= 1; ++prim_emit) {
for (u8 vertex_id = 0; vertex_id <= 3; ++vertex_id) {
auto shader_setup = CompileShaderSetup({
{OpCode::Id::NOP}, // setemit
{OpCode::Id::END},
});
// nihstro does not support the SETEMIT instructions, so the instruction-binary must
// be manually
// inserted here:
nihstro::Instruction SETEMIT = {};
SETEMIT.opcode = nihstro::OpCode(nihstro::OpCode::Id::SETEMIT);
SETEMIT.setemit.winding.Assign(winding);
SETEMIT.setemit.prim_emit.Assign(prim_emit);
SETEMIT.setemit.vertex_id.Assign(vertex_id);
shader_setup->UpdateProgramCode(0, SETEMIT.hex);
auto shader = TestType(std::move(shader_setup));
Pica::ShaderUnit shader_unit(&geometry_emitter);
shader.Run(shader_unit, 1.0f);
REQUIRE(geometry_emitter.emit_state.winding == winding);
REQUIRE(geometry_emitter.emit_state.prim_emit == prim_emit);
REQUIRE(geometry_emitter.emit_state.vertex_id == vertex_id);
}
}
}
}
SHADER_TEST_CASE("Uniform Read", "[video_core][shader]") {
const auto sh_input = SourceRegister::MakeInput(0);
const auto sh_c0 = SourceRegister::MakeFloat(0);

View File

@ -1,9 +1,10 @@
// Copyright 2023 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <bit>
#include <boost/serialization/binary_object.hpp>
#include "common/vector_math.h"

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -29,15 +29,15 @@ void ShaderUnit::WriteOutput(const ShaderRegs& config, AttributeBuffer& buffer)
}
void GeometryEmitter::Emit(std::span<Common::Vec4<f24>, 16> output_regs) {
ASSERT(vertex_id < 3);
ASSERT(emit_state.vertex_id < 3);
u32 output_index{};
for (u32 reg : Common::BitSet<u32>(output_mask)) {
buffer[vertex_id][output_index++] = output_regs[reg];
buffer[emit_state.vertex_id][output_index++] = output_regs[reg];
}
if (prim_emit) {
if (winding) {
if (emit_state.prim_emit) {
if (emit_state.winding) {
handlers->winding_setter();
}
for (std::size_t i = 0; i < buffer.size(); ++i) {

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -75,11 +75,18 @@ struct GeometryEmitter {
void Emit(std::span<Common::Vec4<f24>, 16> output_regs);
public:
std::array<AttributeBuffer, 3> buffer;
u8 vertex_id;
bool prim_emit;
bool winding;
union EmitState {
struct {
bool winding : 1;
bool prim_emit : 1;
u8 vertex_id : 2;
};
u8 raw;
} emit_state;
static_assert(sizeof(emit_state) == 1);
u32 output_mask;
std::array<AttributeBuffer, 3> buffer;
Handlers* handlers;
private:
@ -87,9 +94,7 @@ private:
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar & buffer;
ar & vertex_id;
ar & prim_emit;
ar & winding;
ar & emit_state.raw;
ar & output_mask;
}
};

View File

@ -671,9 +671,9 @@ static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state,
case OpCode::Id::SETEMIT: {
auto* emitter = state.emitter_ptr;
ASSERT_MSG(emitter, "Execute SETEMIT on VS");
emitter->vertex_id = instr.setemit.vertex_id;
emitter->prim_emit = instr.setemit.prim_emit != 0;
emitter->winding = instr.setemit.winding != 0;
emitter->emit_state.vertex_id = instr.setemit.vertex_id;
emitter->emit_state.prim_emit = instr.setemit.prim_emit != 0;
emitter->emit_state.winding = instr.setemit.winding != 0;
break;
}

View File

@ -16,11 +16,54 @@
#if CITRA_ARCH(x86_64)
#include "video_core/shader/shader_jit_x64_compiler.h"
#endif
#include <future>
namespace Pica::Shader {
JitEngine::JitEngine() = default;
JitEngine::~JitEngine() = default;
JitEngine::JitEngine() {
stub_shader = std::make_unique<JitShader>();
// Optionally, compile a minimal stub shader here if needed
StartThreadPool(std::thread::hardware_concurrency());
}
JitEngine::~JitEngine() {
StopThreadPool();
}
void JitEngine::StartThreadPool(size_t num_threads) {
stop_threads = false;
for (size_t i = 0; i < num_threads; ++i) {
thread_pool.emplace_back([this]() { ThreadWorker(); });
}
}
void JitEngine::StopThreadPool() {
{
std::lock_guard<std::mutex> lock(queue_mutex);
stop_threads = true;
}
queue_cv.notify_all();
for (auto& t : thread_pool) {
if (t.joinable())
t.join();
}
thread_pool.clear();
}
void JitEngine::ThreadWorker() {
while (true) {
std::function<void()> job;
{
std::unique_lock<std::mutex> lock(queue_mutex);
queue_cv.wait(lock, [this]() { return stop_threads || !compile_queue.empty(); });
if (stop_threads && compile_queue.empty())
return;
job = std::move(compile_queue.front());
compile_queue.pop();
}
job();
}
}
void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
@ -29,23 +72,54 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
setup.DoProgramCodeFixup();
const u64 code_hash = setup.GetProgramCodeHash();
const u64 swizzle_hash = setup.GetSwizzleDataHash();
const u64 cache_key = Common::HashCombine(code_hash, swizzle_hash);
auto iter = cache.find(cache_key);
if (iter != cache.end()) {
setup.cached_shader = iter->second.get();
} else {
auto shader = std::make_unique<JitShader>();
shader->Compile(&setup.GetProgramCode(), &setup.GetSwizzleData());
setup.cached_shader = shader.get();
cache.emplace_hint(iter, cache_key, std::move(shader));
std::shared_future<std::unique_ptr<JitShader>> shader_future;
{
std::lock_guard<std::mutex> lock(cache_mutex);
auto iter = cache.find(cache_key);
if (iter != cache.end()) {
shader_future = iter->second;
UpdateLRU(cache_key);
} else {
// Compile synchronously and store the result
auto shader = std::make_unique<JitShader>();
shader->Compile(&setup.GetProgramCode(), &setup.GetSwizzleData());
auto ready_future = std::make_shared<std::promise<std::unique_ptr<JitShader>>>();
ready_future->set_value(std::move(shader));
shader_future = ready_future->get_future().share();
cache[cache_key] = shader_future;
lru_list.push_front(cache_key);
}
}
// Wait for the shader to be ready (if compiling in background)
setup.cached_shader = shader_future.get().get();
}
void JitEngine::EvictLRU() {
if (lru_list.empty()) {
return;
}
const u64 key = lru_list.back();
lru_list.pop_back();
cache.erase(key);
}
void JitEngine::UpdateLRU(u64 key) {
auto it = std::find(lru_list.begin(), lru_list.end(), key);
if (it != lru_list.end()) {
lru_list.erase(it);
}
lru_list.push_front(key);
}
MICROPROFILE_DECLARE(GPU_Shader);
void JitEngine::Run(const ShaderSetup& setup, ShaderUnit& state) const {
ASSERT(setup.cached_shader != nullptr);
// Null check: skip draw if shader is not ready
if (!setup.cached_shader) {
return;
}
MICROPROFILE_SCOPE(GPU_Shader);

View File

@ -1,4 +1,4 @@
// Copyright 2016 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -7,8 +7,14 @@
#include "common/arch.h"
#if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
#include <functional>
#include <future>
#include <list>
#include <memory>
#include <mutex>
#include <thread>
#include <unordered_map>
#include <queue>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
@ -25,7 +31,24 @@ public:
void Run(const ShaderSetup& setup, ShaderUnit& state) const override;
private:
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
static constexpr size_t MAX_CACHE_SIZE = 1000; // Maximum number of shaders to cache
std::unordered_map<u64, std::shared_future<std::unique_ptr<JitShader>>> cache;
std::list<u64> lru_list; // Track LRU order of shaders
mutable std::mutex cache_mutex;
// Parallel compilation support
std::vector<std::thread> thread_pool;
std::queue<std::function<void()>> compile_queue;
std::mutex queue_mutex;
std::condition_variable queue_cv;
bool stop_threads = false;
std::unique_ptr<JitShader> stub_shader;
void EvictLRU();
void UpdateLRU(u64 key);
void ThreadWorker();
void StartThreadPool(size_t num_threads);
void StopThreadPool();
};
} // namespace Pica::Shader

View File

@ -865,12 +865,13 @@ void JitShader::Compile_SETE(Instruction instr) {
l(have_emitter);
MOV(XSCRATCH1.toW(), instr.setemit.vertex_id);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, vertex_id)));
MOV(XSCRATCH1.toW(), instr.setemit.prim_emit);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, prim_emit)));
MOV(XSCRATCH1.toW(), instr.setemit.winding);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, winding)));
const GeometryEmitter::EmitState new_state{
.winding = instr.setemit.winding != 0,
.prim_emit = instr.setemit.prim_emit != 0,
.vertex_id = static_cast<uint8_t>(instr.setemit.vertex_id),
};
MOV(XSCRATCH1.toW(), new_state.raw);
STRB(XSCRATCH1.toW(), XSCRATCH0, u32(offsetof(GeometryEmitter, emit_state)));
l(end);
}

View File

@ -905,9 +905,12 @@ void JitShader::Compile_SETE(Instruction instr) {
jmp(end);
L(have_emitter);
mov(byte[rax + offsetof(GeometryEmitter, vertex_id)], instr.setemit.vertex_id);
mov(byte[rax + offsetof(GeometryEmitter, prim_emit)], instr.setemit.prim_emit);
mov(byte[rax + offsetof(GeometryEmitter, winding)], instr.setemit.winding);
const GeometryEmitter::EmitState new_state{
.winding = instr.setemit.winding != 0,
.prim_emit = instr.setemit.prim_emit != 0,
.vertex_id = static_cast<uint8_t>(instr.setemit.vertex_id),
};
mov(byte[rax + offsetof(GeometryEmitter, emit_state)], new_state.raw);
L(end);
}