cellGem: improve bayer demosaicing
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run

This commit is contained in:
Megamouse 2025-12-15 13:34:06 +01:00
parent 12a3818fcf
commit cf87f24587
2 changed files with 142 additions and 50 deletions

View File

@ -719,43 +719,120 @@ namespace gem
constexpr u32 in_pitch = 640; constexpr u32 in_pitch = 640;
constexpr u32 out_pitch = 640 * 4; constexpr u32 out_pitch = 640 * 4;
for (u32 y = 0; y < 480 - 1; y += 2) // HamiltonAdams demosaicing
for (s32 y = 0; y < 480; y++)
{ {
const u8* src0 = src + y * in_pitch; const bool is_even_y = (y % 2) == 0;
const u8* src1 = src0 + in_pitch; const u8* srcc = src + y * in_pitch;
const u8* srcu = src + std::max(0, y - 1) * in_pitch;
const u8* srcd = src + std::min(480 - 1, y + 1) * in_pitch;
u8* dst0 = dst + y * out_pitch; u8* dst0 = dst + y * out_pitch;
u8* dst1 = dst0 + out_pitch;
for (u32 x = 0; x < 640 - 1; x += 2, src0 += 2, src1 += 2, dst0 += 8, dst1 += 8) // Split loops (roughly twice the performance by removing one condition)
if (is_even_y)
{ {
u8 b = src0[0]; for (s32 x = 0; x < 640; x++, dst0 += 4)
u8 g0 = src0[1];
u8 g1 = src1[0];
u8 r = src1[1];
if constexpr (use_gain)
{ {
b = static_cast<u8>(std::clamp(b * gain_b, 0.0f, 255.0f)); const bool is_even_x = (x % 2) == 0;
g0 = static_cast<u8>(std::clamp(g0 * gain_g, 0.0f, 255.0f)); const int xl = std::max(0, x - 1);
g1 = static_cast<u8>(std::clamp(g1 * gain_g, 0.0f, 255.0f)); const int xr = std::min(640 - 1, x + 1);
r = static_cast<u8>(std::clamp(r * gain_r, 0.0f, 255.0f));
u8 r, b, g;
if (is_even_x)
{
// Blue pixel
const u8 up = srcu[x];
const u8 down = srcd[x];
const u8 left = srcc[xl];
const u8 right = srcc[xr];
const int dh = std::abs(int(left) - int(right));
const int dv = std::abs(int(up) - int(down));
r = (srcu[xl] + srcu[xr] + srcd[xl] + srcd[xr]) / 4;
if (dh < dv)
g = (left + right) / 2;
else if (dv < dh)
g = (up + down) / 2;
else
g = (up + down + left + right) / 4;
b = srcc[x];
}
else
{
// Green (on blue row)
r = (srcu[x] + srcd[x]) / 2;
g = srcc[x];
b = (srcc[xl] + srcc[xr]) / 2;
}
if constexpr (use_gain)
{
dst0[0] = static_cast<u8>(std::clamp(r * gain_r, 0.0f, 255.0f));
dst0[1] = static_cast<u8>(std::clamp(b * gain_b, 0.0f, 255.0f));
dst0[2] = static_cast<u8>(std::clamp(g * gain_g, 0.0f, 255.0f));
}
else
{
dst0[0] = r;
dst0[1] = g;
dst0[2] = b;
}
dst0[3] = alpha;
} }
}
else
{
for (s32 x = 0; x < 640; x++, dst0 += 4)
{
const bool is_even_x = (x % 2) == 0;
const int xl = std::max(0, x - 1);
const int xr = std::min(640 - 1, x + 1);
const u8 top[4] = { r, g0, b, alpha }; u8 r, b, g;
const u8 bottom[4] = { r, g1, b, alpha };
// Top-Left if (is_even_x)
std::memcpy(dst0, top, 4); {
// Green (on red row)
r = (srcc[xl] + srcc[xr]) / 2;
g = srcc[x];
b = (srcu[x] + srcd[x]) / 2;
}
else
{
// Red pixel
const u8 up = srcu[x];
const u8 down = srcd[x];
const u8 left = srcc[xl];
const u8 right = srcc[xr];
const int dh = std::abs(int(left) - int(right));
const int dv = std::abs(int(up) - int(down));
// Top-Right Pixel r = srcc[x];
std::memcpy(dst0 + 4, top, 4); if (dh < dv)
g = (left + right) / 2;
else if (dv < dh)
g = (up + down) / 2;
else
g = (up + down + left + right) / 4;
b = (srcu[xl] + srcu[xr] + srcd[xl] + srcd[xr]) / 4;
}
// Bottom-Left Pixel if constexpr (use_gain)
std::memcpy(dst1, bottom, 4); {
dst0[0] = static_cast<u8>(std::clamp(r * gain_r, 0.0f, 255.0f));
// Bottom-Right Pixel dst0[1] = static_cast<u8>(std::clamp(b * gain_b, 0.0f, 255.0f));
std::memcpy(dst1 + 4, bottom, 4); dst0[2] = static_cast<u8>(std::clamp(g * gain_g, 0.0f, 255.0f));
}
else
{
dst0[0] = r;
dst0[1] = g;
dst0[2] = b;
}
dst0[3] = alpha;
}
} }
} }
} }

View File

@ -114,45 +114,60 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
case CELL_CAMERA_RAW8: // The game seems to expect BGGR case CELL_CAMERA_RAW8: // The game seems to expect BGGR
{ {
// Let's use a very simple algorithm to convert the image to raw BGGR // Let's use a very simple algorithm to convert the image to raw BGGR
const auto convert_to_bggr = [&image_buffer, &image, width, height](u32 y_begin, u32 y_end) const auto convert_to_bggr = [this, &image_buffer, &image, width, height](u32 y_begin, u32 y_end)
{ {
u8* dst = &image_buffer.data[image_buffer.width * y_begin]; u8* dst = &image_buffer.data[image_buffer.width * y_begin];
for (u32 y = y_begin; y < height && y < y_end; y++) for (u32 y = y_begin; y < height && y < y_end; y++)
{ {
const u8* src = image.constScanLine(y); const u8* src = image.constScanLine(y);
const u8* srcu = image.constScanLine(std::max<s32>(0, y - 1));
const u8* srcd = image.constScanLine(std::min(height - 1, y + 1));
const bool is_top_pixel = (y % 2) == 0; const bool is_top_pixel = (y % 2) == 0;
// We apply gaussian blur to get better demosaicing results later when debayering again
const auto blurred = [&](s32 x, s32 c)
{
const s32 i = x * 4 + c;
const s32 il = std::max(0, x - 1) * 4 + c;
const s32 ir = std::min<s32>(width - 1, x + 1) * 4 + c;
const s32 sum =
srcu[i] +
src[il] + 4 * src[i] + src[ir] +
srcd[i];
return static_cast<u8>(std::clamp((sum + 4) / 8, 0, 255));
};
// Split loops (roughly twice the performance by removing one condition) // Split loops (roughly twice the performance by removing one condition)
if (is_top_pixel) if (is_top_pixel)
{ {
for (u32 x = 0; x < width; x++, dst++, src += 4) for (u32 x = 0; x < width; x++, dst++)
{ {
const bool is_left_pixel = (x % 2) == 0; const bool is_left_pixel = (x % 2) == 0;
if (is_left_pixel) if (is_left_pixel)
{ {
*dst = src[2]; // Blue *dst = blurred(x, 2); // Blue
} }
else else
{ {
*dst = src[1]; // Green *dst = blurred(x, 1); // Green
} }
} }
} }
else else
{ {
for (u32 x = 0; x < width; x++, dst++, src += 4) for (u32 x = 0; x < width; x++, dst++)
{ {
const bool is_left_pixel = (x % 2) == 0; const bool is_left_pixel = (x % 2) == 0;
if (is_left_pixel) if (is_left_pixel)
{ {
*dst = src[1]; // Green *dst = blurred(x, 1); // Green
} }
else else
{ {
*dst = src[0]; // Red *dst = blurred(x, 0); // Red
} }
} }
} }
@ -182,13 +197,13 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
// Simple RGB to Y0_U_Y1_V conversion from stackoverflow. // Simple RGB to Y0_U_Y1_V conversion from stackoverflow.
const auto convert_to_yuv422 = [&image_buffer, &image, width, height, format = m_format](u32 y_begin, u32 y_end) const auto convert_to_yuv422 = [&image_buffer, &image, width, height, format = m_format](u32 y_begin, u32 y_end)
{ {
constexpr int yuv_bytes_per_pixel = 2; constexpr s32 yuv_bytes_per_pixel = 2;
const int yuv_pitch = image_buffer.width * yuv_bytes_per_pixel; const s32 yuv_pitch = image_buffer.width * yuv_bytes_per_pixel;
const int y0_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 0 : 3; const s32 y0_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 0 : 3;
const int u_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 1 : 2; const s32 u_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 1 : 2;
const int y1_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 2 : 1; const s32 y1_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 2 : 1;
const int v_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 3 : 0; const s32 v_offset = (format == CELL_CAMERA_Y0_U_Y1_V) ? 3 : 0;
for (u32 y = y_begin; y < height && y < y_end; y++) for (u32 y = y_begin; y < height && y < y_end; y++)
{ {
@ -197,19 +212,19 @@ bool qt_camera_video_sink::present(const QVideoFrame& frame)
for (u32 x = 0; x < width - 1; x += 2, src += 8) for (u32 x = 0; x < width - 1; x += 2, src += 8)
{ {
const float r1 = src[0]; const f32 r1 = src[0];
const float g1 = src[1]; const f32 g1 = src[1];
const float b1 = src[2]; const f32 b1 = src[2];
const float r2 = src[4]; const f32 r2 = src[4];
const float g2 = src[5]; const f32 g2 = src[5];
const float b2 = src[6]; const f32 b2 = src[6];
const int y0 = (0.257f * r1) + (0.504f * g1) + (0.098f * b1) + 16.0f; const s32 y0 = (0.257f * r1) + (0.504f * g1) + (0.098f * b1) + 16.0f;
const int u = -(0.148f * r1) - (0.291f * g1) + (0.439f * b1) + 128.0f; const s32 u = -(0.148f * r1) - (0.291f * g1) + (0.439f * b1) + 128.0f;
const int v = (0.439f * r1) - (0.368f * g1) - (0.071f * b1) + 128.0f; const s32 v = (0.439f * r1) - (0.368f * g1) - (0.071f * b1) + 128.0f;
const int y1 = (0.257f * r2) + (0.504f * g2) + (0.098f * b2) + 16.0f; const s32 y1 = (0.257f * r2) + (0.504f * g2) + (0.098f * b2) + 16.0f;
const int yuv_index = x * yuv_bytes_per_pixel; const s32 yuv_index = x * yuv_bytes_per_pixel;
yuv_row_ptr[yuv_index + y0_offset] = static_cast<u8>(std::clamp(y0, 0, 255)); yuv_row_ptr[yuv_index + y0_offset] = static_cast<u8>(std::clamp(y0, 0, 255));
yuv_row_ptr[yuv_index + u_offset] = static_cast<u8>(std::clamp( u, 0, 255)); yuv_row_ptr[yuv_index + u_offset] = static_cast<u8>(std::clamp( u, 0, 255));
yuv_row_ptr[yuv_index + y1_offset] = static_cast<u8>(std::clamp(y1, 0, 255)); yuv_row_ptr[yuv_index + y1_offset] = static_cast<u8>(std::clamp(y1, 0, 255));