Compare commits

...

5 Commits

Author SHA1 Message Date
kd-11
a7dcd0a30e
Merge a7e0e4d76e into cf87f24587 2025-12-15 14:28:29 +01:00
Megamouse
cf87f24587 cellGem: improve bayer demosaicing
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
2025-12-15 14:20:34 +01:00
Megamouse
12a3818fcf Fix logging of gem configs 2025-12-15 14:20:34 +01:00
kd-11
d9da5f26c0 vk: Ignore memory pressure handling when allocating data heaps into ReBAR memory. 2025-12-15 11:53:47 +00:00
kd-11
1a3e150a62 vk: Extend memory allocation system to explicitly allow requesting no VRAM recovery on allocation fail. 2025-12-15 11:53:47 +00:00
8 changed files with 229 additions and 101 deletions

View File

@ -626,9 +626,9 @@ public:
cellGem.notice("Could not load mouse gem config. Using defaults."); cellGem.notice("Could not load mouse gem config. Using defaults.");
} }
cellGem.notice("Real gem config=\n", g_cfg_gem_real.to_string()); cellGem.notice("Real gem config=%s", g_cfg_gem_real.to_string());
cellGem.notice("Fake gem config=\n", g_cfg_gem_fake.to_string()); cellGem.notice("Fake gem config=%s", g_cfg_gem_fake.to_string());
cellGem.notice("Mouse gem config=\n", g_cfg_gem_mouse.to_string()); cellGem.notice("Mouse gem config=%s", g_cfg_gem_mouse.to_string());
} }
}; };
@ -719,43 +719,120 @@ namespace gem
constexpr u32 in_pitch = 640; constexpr u32 in_pitch = 640;
constexpr u32 out_pitch = 640 * 4; constexpr u32 out_pitch = 640 * 4;
for (u32 y = 0; y < 480 - 1; y += 2) // HamiltonAdams demosaicing
for (s32 y = 0; y < 480; y++)
{ {
const u8* src0 = src + y * in_pitch; const bool is_even_y = (y % 2) == 0;
const u8* src1 = src0 + in_pitch; const u8* srcc = src + y * in_pitch;
const u8* srcu = src + std::max(0, y - 1) * in_pitch;
const u8* srcd = src + std::min(480 - 1, y + 1) * in_pitch;
u8* dst0 = dst + y * out_pitch; u8* dst0 = dst + y * out_pitch;
u8* dst1 = dst0 + out_pitch;
for (u32 x = 0; x < 640 - 1; x += 2, src0 += 2, src1 += 2, dst0 += 8, dst1 += 8) // Split loops (roughly twice the performance by removing one condition)
if (is_even_y)
{ {
u8 b = src0[0]; for (s32 x = 0; x < 640; x++, dst0 += 4)
u8 g0 = src0[1];
u8 g1 = src1[0];
u8 r = src1[1];
if constexpr (use_gain)
{ {
b = static_cast<u8>(std::clamp(b * gain_b, 0.0f, 255.0f)); const bool is_even_x = (x % 2) == 0;
g0 = static_cast<u8>(std::clamp(g0 * gain_g, 0.0f, 255.0f)); const int xl = std::max(0, x - 1);
g1 = static_cast<u8>(std::clamp(g1 * gain_g, 0.0f, 255.0f)); const int xr = std::min(640 - 1, x + 1);
r = static_cast<u8>(std::clamp(r * gain_r, 0.0f, 255.0f));
u8 r, b, g;
if (is_even_x)
{
// Blue pixel
const u8 up = srcu[x];
const u8 down = srcd[x];
const u8 left = srcc[xl];
const u8 right = srcc[xr];
const int dh = std::abs(int(left) - int(right));
const int dv = std::abs(int(up) - int(down));
r = (srcu[xl] + srcu[xr] + srcd[xl] + srcd[xr]) / 4;
if (dh < dv)
g = (left + right) / 2;
else if (dv < dh)
g = (up + down) / 2;
else
g = (up + down + left + right) / 4;
b = srcc[x];
}
else
{
// Green (on blue row)
r = (srcu[x] + srcd[x]) / 2;
g = srcc[x];
b = (srcc[xl] + srcc[xr]) / 2;
}
if constexpr (use_gain)
{
dst0[0] = static_cast<u8>(std::clamp(r * gain_r, 0.0f, 255.0f));
dst0[1] = static_cast<u8>(std::clamp(b * gain_b, 0.0f, 255.0f));
dst0[2] = static_cast<u8>(std::clamp(g * gain_g, 0.0f, 255.0f));
}
else
{
dst0[0] = r;
dst0[1] = g;
dst0[2] = b;
}
dst0[3] = alpha;
} }
}
else
{
for (s32 x = 0; x < 640; x++, dst0 += 4)
{
const bool is_even_x = (x % 2) == 0;
const int xl = std::max(0, x - 1);
const int xr = std::min(640 - 1, x + 1);
const u8 top[4] = { r, g0, b, alpha }; u8 r, b, g;
const u8 bottom[4] = { r, g1, b, alpha };
// Top-Left if (is_even_x)
std::memcpy(dst0, top, 4); {
// Green (on red row)
r = (srcc[xl] + srcc[xr]) / 2;
g = srcc[x];
b = (srcu[x] + srcd[x]) / 2;
}
else
{
// Red pixel
const u8 up = srcu[x];
const u8 down = srcd[x];
const u8 left = srcc[xl];
const u8 right = srcc[xr];
const int dh = std::abs(int(left) - int(right));
const int dv = std::abs(int(up) - int(down));
// Top-Right Pixel r = srcc[x];
std::memcpy(dst0 + 4, top, 4); if (dh < dv)
g = (left + right) / 2;
else if (dv < dh)
g = (up + down) / 2;
else
g = (up + down + left + right) / 4;
b = (srcu[xl] + srcu[xr] + srcd[xl] + srcd[xr]) / 4;
}
// Bottom-Left Pixel if constexpr (use_gain)
std::memcpy(dst1, bottom, 4); {
dst0[0] = static_cast<u8>(std::clamp(r * gain_r, 0.0f, 255.0f));
// Bottom-Right Pixel dst0[1] = static_cast<u8>(std::clamp(b * gain_b, 0.0f, 255.0f));
std::memcpy(dst1 + 4, bottom, 4); dst0[2] = static_cast<u8>(std::clamp(g * gain_g, 0.0f, 255.0f));
}
else
{
dst0[0] = r;
dst0[1] = g;
dst0[2] = b;
}
dst0[3] = alpha;
}
} }
} }
} }

View File

@ -50,6 +50,7 @@ namespace vk
: m_device(dev) : m_device(dev)
{ {
const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3); const bool nullable = !!(flags & VK_BUFFER_CREATE_ALLOW_NULL_RPCS3);
const bool no_vmem_recovery = !!(flags & VK_BUFFER_CREATE_IGNORE_VMEM_PRESSURE_RPCS3);
info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3; info.flags = flags & ~VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3;
@ -69,18 +70,27 @@ namespace vk
fmt::throw_exception("No compatible memory type was found!"); fmt::throw_exception("No compatible memory type was found!");
} }
memory = std::make_unique<memory_block>(m_device, memory_reqs.size, memory_reqs.alignment, allocation_type_info, allocation_pool, nullable); memory_allocation_request request
{
.size = memory_reqs.size,
.alignment = memory_reqs.alignment,
.memory_type = &allocation_type_info,
.pool = allocation_pool,
.throw_on_fail = !nullable,
.recover_vmem_on_fail = !no_vmem_recovery
};
memory = std::make_unique<memory_block>(m_device, request);
if (auto device_memory = memory->get_vk_device_memory(); if (auto device_memory = memory->get_vk_device_memory();
device_memory != VK_NULL_HANDLE) device_memory != VK_NULL_HANDLE)
{ {
vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset()); vkBindBufferMemory(dev, value, device_memory, memory->get_vk_device_memory_offset());
return;
} }
else
{ ensure(nullable);
ensure(nullable); vkDestroyBuffer(m_device, value, nullptr);
vkDestroyBuffer(m_device, value, nullptr); value = VK_NULL_HANDLE;
value = VK_NULL_HANDLE;
}
} }
buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size) buffer::buffer(const vk::render_device& dev, VkBufferUsageFlags usage, void* host_pointer, u64 size)

View File

@ -9,9 +9,10 @@ namespace vk
{ {
enum : u32 enum : u32
{ {
VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x80000000, VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 = 0x10000000, // If we cannot allocate memory for the buffer, just return an empty but valid object with a null handle.
VK_BUFFER_CREATE_IGNORE_VMEM_PRESSURE_RPCS3 = 0x20000000, // If we cannot allocate memory for the buffer, do not run recovery routine to recover VRAM. Crash or return empty handle immediately instead.
VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3) VK_BUFFER_CREATE_SPECIAL_FLAGS_RPCS3 = (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 | VK_BUFFER_CREATE_IGNORE_VMEM_PRESSURE_RPCS3)
}; };
struct buffer_view : public unique_resource struct buffer_view : public unique_resource

View File

@ -53,7 +53,7 @@ namespace vk
VkFlags create_flags = 0; VkFlags create_flags = 0;
if (m_prefer_writethrough) if (m_prefer_writethrough)
{ {
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; create_flags |= (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 | VK_BUFFER_CREATE_IGNORE_VMEM_PRESSURE_RPCS3);
} }
heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); heap = std::make_unique<buffer>(*g_render_device, size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);
@ -146,7 +146,7 @@ namespace vk
VkFlags create_flags = 0; VkFlags create_flags = 0;
if (m_prefer_writethrough) if (m_prefer_writethrough)
{ {
create_flags |= VK_BUFFER_CREATE_ALLOW_NULL_RPCS3; create_flags |= (VK_BUFFER_CREATE_ALLOW_NULL_RPCS3 | VK_BUFFER_CREATE_IGNORE_VMEM_PRESSURE_RPCS3);
} }
heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM); heap = std::make_unique<buffer>(*g_render_device, aligned_new_size, memory_index, memory_flags, usage, create_flags, VMM_ALLOCATION_POOL_SYSTEM);

View File

@ -128,7 +128,16 @@ namespace vk
fmt::throw_exception("No compatible memory type was found!"); fmt::throw_exception("No compatible memory type was found!");
} }
memory = std::make_shared<vk::memory_block>(m_device, memory_req.size, memory_req.alignment, allocation_type_info, allocation_pool, nullable); memory_allocation_request alloc_request
{
.size = memory_req.size,
.alignment = memory_req.alignment,
.memory_type = &allocation_type_info,
.pool = allocation_pool,
.throw_on_fail = !nullable
};
memory = std::make_shared<vk::memory_block>(m_device, alloc_request);
if (auto device_mem = memory->get_vk_device_memory(); if (auto device_mem = memory->get_vk_device_memory();
device_mem != VK_NULL_HANDLE) [[likely]] device_mem != VK_NULL_HANDLE) [[likely]]
{ {

View File

@ -224,7 +224,7 @@ namespace vk
vmaDestroyAllocator(m_allocator); vmaDestroyAllocator(m_allocator);
} }
mem_allocator_vk::mem_handle_t mem_allocator_vma::alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool, bool throw_on_fail) mem_allocator_vk::mem_handle_t mem_allocator_vma::alloc(const memory_allocation_request& request)
{ {
VmaAllocation vma_alloc; VmaAllocation vma_alloc;
VkMemoryRequirements mem_req = {}; VkMemoryRequirements mem_req = {};
@ -233,11 +233,11 @@ namespace vk
auto do_vma_alloc = [&]() -> std::tuple<VkResult, u32> auto do_vma_alloc = [&]() -> std::tuple<VkResult, u32>
{ {
for (const auto& memory_type_index : memory_type) for (const auto& memory_type_index : *request.memory_type)
{ {
mem_req.memoryTypeBits = 1u << memory_type_index; mem_req.memoryTypeBits = 1u << memory_type_index;
mem_req.size = ::align2(block_sz, alignment); mem_req.size = ::align2(request.size, request.alignment);
mem_req.alignment = alignment; mem_req.alignment = request.alignment;
create_info.memoryTypeBits = 1u << memory_type_index; create_info.memoryTypeBits = 1u << memory_type_index;
create_info.flags = m_allocation_flags; create_info.flags = m_allocation_flags;
@ -256,26 +256,29 @@ namespace vk
const auto [status, type] = do_vma_alloc(); const auto [status, type] = do_vma_alloc();
if (status == VK_SUCCESS) if (status == VK_SUCCESS)
{ {
vmm_notify_memory_allocated(vma_alloc, type, block_sz, pool); vmm_notify_memory_allocated(vma_alloc, type, request.size, request.pool);
return vma_alloc; return vma_alloc;
} }
} }
const auto severity = (throw_on_fail) ? rsx::problem_severity::fatal : rsx::problem_severity::severe; if (request.recover_vmem_on_fail)
if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
vmm_handle_memory_pressure(severity))
{ {
// Out of memory. Try again. const auto severity = (request.throw_on_fail) ? rsx::problem_severity::fatal : rsx::problem_severity::severe;
const auto [status, type] = do_vma_alloc(); if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
if (status == VK_SUCCESS) vmm_handle_memory_pressure(severity))
{ {
rsx_log.warning("Renderer ran out of video memory but successfully recovered."); // Out of memory. Try again.
vmm_notify_memory_allocated(vma_alloc, type, block_sz, pool); const auto [status, type] = do_vma_alloc();
return vma_alloc; if (status == VK_SUCCESS)
{
rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
vmm_notify_memory_allocated(vma_alloc, type, request.size, request.pool);
return vma_alloc;
}
} }
} }
if (!throw_on_fail) if (!request.throw_on_fail)
{ {
return VK_NULL_HANDLE; return VK_NULL_HANDLE;
} }
@ -361,18 +364,18 @@ namespace vk
m_allocation_flags = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT; m_allocation_flags = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT;
} }
mem_allocator_vk::mem_handle_t mem_allocator_vk::alloc(u64 block_sz, u64 /*alignment*/, const memory_type_info& memory_type, vmm_allocation_pool pool, bool throw_on_fail) mem_allocator_vk::mem_handle_t mem_allocator_vk::alloc(const memory_allocation_request& request)
{ {
VkResult error_code = VK_ERROR_UNKNOWN; VkResult error_code = VK_ERROR_UNKNOWN;
VkDeviceMemory memory; VkDeviceMemory memory;
VkMemoryAllocateInfo info = {}; VkMemoryAllocateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
info.allocationSize = block_sz; info.allocationSize = request.size;
auto do_vk_alloc = [&]() -> std::tuple<VkResult, u32> auto do_vk_alloc = [&]() -> std::tuple<VkResult, u32>
{ {
for (const auto& memory_type_index : memory_type) for (const auto& memory_type_index : *request.memory_type)
{ {
info.memoryTypeIndex = memory_type_index; info.memoryTypeIndex = memory_type_index;
error_code = vkAllocateMemory(m_device, &info, nullptr, &memory); error_code = vkAllocateMemory(m_device, &info, nullptr, &memory);
@ -389,26 +392,29 @@ namespace vk
const auto [status, type] = do_vk_alloc(); const auto [status, type] = do_vk_alloc();
if (status == VK_SUCCESS) if (status == VK_SUCCESS)
{ {
vmm_notify_memory_allocated(memory, type, block_sz, pool); vmm_notify_memory_allocated(memory, type, request.size, request.pool);
return memory; return memory;
} }
} }
const auto severity = (throw_on_fail) ? rsx::problem_severity::fatal : rsx::problem_severity::severe; if (request.recover_vmem_on_fail)
if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
vmm_handle_memory_pressure(severity))
{ {
// Out of memory. Try again. const auto severity = (request.throw_on_fail) ? rsx::problem_severity::fatal : rsx::problem_severity::severe;
const auto [status, type] = do_vk_alloc(); if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
if (status == VK_SUCCESS) vmm_handle_memory_pressure(severity))
{ {
rsx_log.warning("Renderer ran out of video memory but successfully recovered."); // Out of memory. Try again.
vmm_notify_memory_allocated(memory, type, block_sz, pool); const auto [status, type] = do_vk_alloc();
return memory; if (status == VK_SUCCESS)
{
rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
vmm_notify_memory_allocated(memory, type, request.size, request.pool);
return memory;
}
} }
} }
if (!throw_on_fail) if (!request.throw_on_fail)
{ {
return VK_NULL_HANDLE; return VK_NULL_HANDLE;
} }
@ -455,11 +461,11 @@ namespace vk
return g_render_device->get_allocator(); return g_render_device->get_allocator();
} }
memory_block::memory_block(VkDevice dev, u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool, bool nullable) memory_block::memory_block(VkDevice dev, const memory_allocation_request& alloc_request)
: m_device(dev), m_size(block_sz) : m_device(dev), m_size(alloc_request.size)
{ {
m_mem_allocator = get_current_mem_allocator(); m_mem_allocator = get_current_mem_allocator();
m_mem_handle = m_mem_allocator->alloc(block_sz, alignment, memory_type, pool, !nullable); m_mem_handle = m_mem_allocator->alloc(alloc_request);
} }
memory_block::~memory_block() memory_block::~memory_block()

View File

@ -66,6 +66,16 @@ namespace vk
u64 size; u64 size;
}; };
struct memory_allocation_request
{
u64 size = 0;
u64 alignment = 1;
const memory_type_info* memory_type = nullptr;
vmm_allocation_pool pool = VMM_ALLOCATION_POOL_UNDEFINED;
bool throw_on_fail = true;
bool recover_vmem_on_fail = true;
};
class mem_allocator_base class mem_allocator_base
{ {
public: public:
@ -76,7 +86,7 @@ namespace vk
virtual void destroy() = 0; virtual void destroy() = 0;
virtual mem_handle_t alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool, bool throw_on_fail) = 0; virtual mem_handle_t alloc(const memory_allocation_request& request) = 0;
virtual void free(mem_handle_t mem_handle) = 0; virtual void free(mem_handle_t mem_handle) = 0;
virtual void* map(mem_handle_t mem_handle, u64 offset, u64 size) = 0; virtual void* map(mem_handle_t mem_handle, u64 offset, u64 size) = 0;
virtual void unmap(mem_handle_t mem_handle) = 0; virtual void unmap(mem_handle_t mem_handle) = 0;
@ -104,7 +114,7 @@ namespace vk
void destroy() override; void destroy() override;
mem_handle_t alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool, bool throw_on_fail) override; mem_handle_t alloc(const memory_allocation_request& request) override;
void free(mem_handle_t mem_handle) override; void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override; void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override;
@ -134,7 +144,7 @@ namespace vk
void destroy() override {} void destroy() override {}
mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, const memory_type_info& memory_type, vmm_allocation_pool pool, bool throw_on_fail) override; mem_handle_t alloc(const memory_allocation_request& request) override;
void free(mem_handle_t mem_handle) override; void free(mem_handle_t mem_handle) override;
void* map(mem_handle_t mem_handle, u64 offset, u64 size) override; void* map(mem_handle_t mem_handle, u64 offset, u64 size) override;
@ -147,7 +157,7 @@ namespace vk
struct memory_block struct memory_block
{ {
memory_block(VkDevice dev, u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool, bool nullable = false); memory_block(VkDevice dev, const memory_allocation_request& alloc_request);
virtual ~memory_block(); virtual ~memory_block();
virtual VkDeviceMemory get_vk_device_memory(); virtual VkDeviceMemory get_vk_device_memory();

View File

@ -114,45 +114,60 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
case CELL_CAMERA_RAW8: // The game seems to expect BGGR case CELL_CAMERA_RAW8: // The game seems to expect BGGR
{ {
// Let's use a very simple algorithm to convert the image to raw BGGR // Let's use a very simple algorithm to convert the image to raw BGGR
const auto convert_to_bggr = [&image_buffer, &image, width, height](u32 y_begin, u32 y_end) const auto convert_to_bggr = [this, &image_buffer, &image, width, height](u32 y_begin, u32 y_end)
{ {
u8* dst = &image_buffer.data[image_buffer.width * y_begin]; u8* dst = &image_buffer.data[image_buffer.width * y_begin];
for (u32 y = y_begin; y < height && y < y_end; y++) for (u32 y = y_begin; y < height && y < y_end; y++)
{ {
const u8* src = image.constScanLine(y); const u8* src = image.constScanLine(y);
const u8* srcu = image.constScanLine(std::max<s32>(0, y - 1));
const u8* srcd = image.constScanLine(std::min(height - 1, y + 1));
const bool is_top_pixel = (y % 2) == 0; const bool is_top_pixel = (y % 2) == 0;
// We apply gaussian blur to get better demosaicing results later when debayering again
const auto blurred = [&](s32 x, s32 c)
{
const s32 i = x * 4 + c;
const s32 il = std::max(0, x - 1) * 4 + c;
const s32 ir = std::min<s32>(width - 1, x + 1) * 4 + c;
const s32 sum =
srcu[i] +
src[il] + 4 * src[i] + src[ir] +
srcd[i];
return static_cast<u8>(std::clamp((sum + 4) / 8, 0, 255));
};
// Split loops (roughly twice the performance by removing one condition) // Split loops (roughly twice the performance by removing one condition)
if (is_top_pixel) if (is_top_pixel)
{ {
for (u32 x = 0; x < width; x++, dst++, src += 4) for (u32 x = 0; x < width; x++, dst++)
{ {
const bool is_left_pixel = (x % 2) == 0; const bool is_left_pixel = (x % 2) == 0;
if (is_left_pixel) if (is_left_pixel)
{ {
*dst = src[2]; // Blue *dst = blurred(x, 2); // Blue
} }
else else
{ {
*dst = src[1]; // Green *dst = blurred(x, 1); // Green
} }
} }
} }
else else
{ {
for (u32 x = 0; x < width; x++, dst++, src += 4) for (u32 x = 0; x < width; x++, dst++)
{ {
const bool is_left_pixel = (x % 2) == 0; const bool is_left_pixel = (x % 2) == 0;
if (is_left_pixel) if (is_left_pixel)
{ {
*dst = src[1]; // Green *dst = blurred(x, 1); // Green
} }
else else
{ {
*dst = src[0]; // Red *dst = blurred(x, 0); // Red
} }
} }
} }
@ -182,13 +197,13 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
// Simple RGB to Y0_U_Y1_V conversion from stackoverflow. // Simple RGB to Y0_U_Y1_V conversion from stackoverflow.
const auto convert_to_yuv422 = [&image_buffer, &image, width, height, format = m_format](u32 y_begin, u32 y_end) const auto convert_to_yuv422 = [&image_buffer, &image, width, height, format = m_format](u32 y_begin, u32 y_end)
{ {
constexpr int yuv_bytes_per_pixel = 2; constexpr s32 yuv_bytes_per_pixel = 2;
const int yuv_pitch = image_buffer.width * yuv_bytes_per_pixel; const s32 yuv_pitch = image_buffer.width * yuv_bytes_per_pixel;
const int y0_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 0 : 3; const s32 y0_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 0 : 3;
const int u_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 1 : 2; const s32 u_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 1 : 2;
const int y1_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 2 : 1; const s32 y1_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 2 : 1;
const int v_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 3 : 0; const s32 v_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 3 : 0;
for (u32 y = y_begin; y < height && y < y_end; y++) for (u32 y = y_begin; y < height && y < y_end; y++)
{ {
@ -197,19 +212,19 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
for (u32 x = 0; x < width - 1; x += 2, src += 8) for (u32 x = 0; x < width - 1; x += 2, src += 8)
{ {
const float r1 = src[0]; const f32 r1 = src[0];
const float g1 = src[1]; const f32 g1 = src[1];
const float b1 = src[2]; const f32 b1 = src[2];
const float r2 = src[4]; const f32 r2 = src[4];
const float g2 = src[5]; const f32 g2 = src[5];
const float b2 = src[6]; const f32 b2 = src[6];
const int y0 = (0.257f * r1) + (0.504f * g1) + (0.098f * b1) + 16.0f; const s32 y0 = (0.257f * r1) + (0.504f * g1) + (0.098f * b1) + 16.0f;
const int u = -(0.148f * r1) - (0.291f * g1) + (0.439f * b1) + 128.0f; const s32 u = -(0.148f * r1) - (0.291f * g1) + (0.439f * b1) + 128.0f;
const int v = (0.439f * r1) - (0.368f * g1) - (0.071f * b1) + 128.0f; const s32 v = (0.439f * r1) - (0.368f * g1) - (0.071f * b1) + 128.0f;
const int y1 = (0.257f * r2) + (0.504f * g2) + (0.098f * b2) + 16.0f; const s32 y1 = (0.257f * r2) + (0.504f * g2) + (0.098f * b2) + 16.0f;
const int yuv_index = x * yuv_bytes_per_pixel; const s32 yuv_index = x * yuv_bytes_per_pixel;
yuv_row_ptr[yuv_index + y0_offset] = static_cast<u8>(std::clamp(y0, 0, 255)); yuv_row_ptr[yuv_index + y0_offset] = static_cast<u8>(std::clamp(y0, 0, 255));
yuv_row_ptr[yuv_index + u_offset] = static_cast<u8>(std::clamp( u, 0, 255)); yuv_row_ptr[yuv_index + u_offset] = static_cast<u8>(std::clamp( u, 0, 255));
yuv_row_ptr[yuv_index + y1_offset] = static_cast<u8>(std::clamp(y1, 0, 255)); yuv_row_ptr[yuv_index + y1_offset] = static_cast<u8>(std::clamp(y1, 0, 255));