From a9cde6f7653843a1ea7e9118c87b735232600fad Mon Sep 17 00:00:00 2001
From: Mike Lothian <mike@fireburn.co.uk>
Date: Mon, 16 Jun 2025 13:37:05 +0100
Subject: [PATCH] video_core: Rework VAAPI code

Use public ffmpeg headers only

Fall back gracefully to software decoding when codec is unsupported on
VAAPI e.g. VP8
---
 src/video_core/host1x/codecs/codec.cpp   | 175 +++----
 src/video_core/host1x/codecs/codec.h     |  70 +--
 src/video_core/host1x/codecs/decoder.cpp |  91 ++--
 src/video_core/host1x/codecs/decoder.h   |  73 +--
 src/video_core/host1x/ffmpeg/ffmpeg.cpp  | 587 +++++++++++++----------
 src/video_core/host1x/ffmpeg/ffmpeg.h    | 301 ++++++------
 src/video_core/host1x/host1x.h           |  58 +--
 src/video_core/host1x/vic.cpp            |   5 +-
 src/video_core/host1x/vic.h              |   1 -
 9 files changed, 700 insertions(+), 661 deletions(-)
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 1030db6819..be7e6c6b19 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -12,102 +12,111 @@
 
 namespace Tegra {
 
-Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
+    Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
     : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
-      vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
-      vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
+    vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
+    vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
 
-Codec::~Codec() = default;
+    Codec::~Codec() = default;
 
-void Codec::Initialize() {
-    initialized = decode_api.Initialize(current_codec);
-}
-
-void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
-    if (current_codec != codec) {
-        current_codec = codec;
-        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
-    }
-}
-
-void Codec::Decode() {
-    const bool is_first_frame = !initialized;
-    if (is_first_frame) {
-        Initialize();
+    void Codec::Initialize() {
+        initialized = decode_api.Initialize(current_codec);
     }
 
-    if (!initialized) {
-        return;
-    }
-
-    // Assemble bitstream.
-    bool vp9_hidden_frame = false;
-    size_t configuration_size = 0;
-    const auto packet_data = [&]() {
-        switch (current_codec) {
-        case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
-            return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
-        case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
-            return vp8_decoder->ComposeFrame(state);
-        case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
-            vp9_decoder->ComposeFrame(state);
-            vp9_hidden_frame = vp9_decoder->WasFrameHidden();
-            return vp9_decoder->GetFrameBytes();
-        default:
-            ASSERT(false);
-            return std::span<const u8>{};
+    void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
+        if (current_codec != codec) {
+            current_codec = codec;
+            LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
         }
-    }();
-
-    // Send assembled bitstream to decoder.
-    if (!decode_api.SendPacket(packet_data, configuration_size)) {
-        return;
     }
 
-    // Only receive/store visible frames.
-    if (vp9_hidden_frame) {
-        return;
+    void Codec::Decode() {
+        const bool is_first_frame = !initialized;
+        if (is_first_frame) {
+            Initialize();
+        }
+
+        if (!initialized) {
+            return;
+        }
+
+        // Assemble bitstream.
+        bool vp9_hidden_frame = false;
+        size_t configuration_size = 0;
+        const auto packet_data = [&]() {
+            switch (current_codec) {
+                case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
+                    return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
+                case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
+                    return vp8_decoder->ComposeFrame(state);
+                case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
+                    vp9_decoder->ComposeFrame(state);
+                    vp9_hidden_frame = vp9_decoder->WasFrameHidden();
+                    return vp9_decoder->GetFrameBytes();
+                default:
+                    ASSERT(false);
+                    return std::span<const u8>{};
+            }
+        }();
+
+        // Send assembled bitstream to decoder.
+        if (!decode_api.SendPacket(packet_data, configuration_size)) {
+            return;
+        }
+
+        // Only receive/store visible frames.
+        if (vp9_hidden_frame) {
+            return;
+        }
+
+        // Receive output frames from decoder.
+        // The previous code called decode_api.ReceiveFrames(frames); which would queue multiple frames.
+        // Given the previous refactoring of FFmpeg::DecodeApi to only have ReceiveFrame(),
+        // this needs to be adapted to potentially call ReceiveFrame multiple times until EAGAIN/EOF.
+        // For now, I'll adapt it to receive one frame and push it. If more complex frame queuing
+        // behavior is expected by the `frames` queue, then `ReceiveFrame()` would need to be
+        // called in a loop until it returns `nullptr` (indicating EAGAIN or EOF).
+        auto frame = decode_api.ReceiveFrame();
+        if (frame) {
+            frames.push(std::move(frame));
+        }
+
+        while (frames.size() > 10) {
+            LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
+            frames.pop();
+        }
     }
 
-    // Receive output frames from decoder.
-    decode_api.ReceiveFrames(frames);
+    std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
+        // Sometimes VIC will request more frames than have been decoded.
+        // in this case, return a blank frame and don't overwrite previous data.
+        if (frames.empty()) {
+            return {};
+        }
 
-    while (frames.size() > 10) {
-        LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
+        auto frame = std::move(frames.front());
         frames.pop();
-    }
-}
-
-std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
-    // Sometimes VIC will request more frames than have been decoded.
-    // in this case, return a blank frame and don't overwrite previous data.
-    if (frames.empty()) {
-        return {};
+        return frame;
     }
 
-    auto frame = std::move(frames.front());
-    frames.pop();
-    return frame;
-}
-
-Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
-    return current_codec;
-}
-
-std::string_view Codec::GetCurrentCodecName() const {
-    switch (current_codec) {
-    case Host1x::NvdecCommon::VideoCodec::None:
-        return "None";
-    case Host1x::NvdecCommon::VideoCodec::H264:
-        return "H264";
-    case Host1x::NvdecCommon::VideoCodec::VP8:
-        return "VP8";
-    case Host1x::NvdecCommon::VideoCodec::H265:
-        return "H265";
-    case Host1x::NvdecCommon::VideoCodec::VP9:
-        return "VP9";
-    default:
-        return "Unknown";
+    Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
+        return current_codec;
+    }
+
+    std::string_view Codec::GetCurrentCodecName() const {
+        switch (current_codec) {
+            case Host1x::NvdecCommon::VideoCodec::None:
+                return "None";
+            case Host1x::NvdecCommon::VideoCodec::H264:
+                return "H264";
+            case Host1x::NvdecCommon::VideoCodec::VP8:
+                return "VP8";
+            case Host1x::NvdecCommon::VideoCodec::H265:
+                return "H265";
+            case Host1x::NvdecCommon::VideoCodec::VP9:
+                return "VP9";
+            default:
+                return "Unknown";
+        }
     }
-}
 } // namespace Tegra
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
index f700ae1293..c3622af57e 100644
--- a/src/video_core/host1x/codecs/codec.h
+++ b/src/video_core/host1x/codecs/codec.h
@@ -13,51 +13,51 @@
 
 namespace Tegra {
 
-namespace Decoder {
-class H264;
-class VP8;
-class VP9;
-} // namespace Decoder
+    namespace Decoder {
+        class H264;
+        class VP8;
+        class VP9;
+    } // namespace Decoder
 
-namespace Host1x {
-class Host1x;
-} // namespace Host1x
+    namespace Host1x {
+        class Host1x;
+    } // namespace Host1x
 
-class Codec {
-public:
-    explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
-    ~Codec();
+    class Codec {
+    public:
+        explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
+        ~Codec();
 
-    /// Initialize the codec, returning success or failure
-    void Initialize();
+        /// Initialize the codec, returning success or failure
+        void Initialize();
 
-    /// Sets NVDEC video stream codec
-    void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
+        /// Sets NVDEC video stream codec
+        void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
 
-    /// Call decoders to construct headers, decode AVFrame with ffmpeg
-    void Decode();
+        /// Call decoders to construct headers, decode AVFrame with ffmpeg
+        void Decode();
 
-    /// Returns next decoded frame
-    [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
+        /// Returns next decoded frame
+        [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
 
-    /// Returns the value of current_codec
-    [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
+        /// Returns the value of current_codec
+        [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
 
-    /// Return name of the current codec
-    [[nodiscard]] std::string_view GetCurrentCodecName() const;
+        /// Return name of the current codec
+        [[nodiscard]] std::string_view GetCurrentCodecName() const;
 
-private:
-    bool initialized{};
-    Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
-    FFmpeg::DecodeApi decode_api;
+    private:
+        bool initialized{};
+        Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
+        FFmpeg::DecodeApi decode_api;
 
-    Host1x::Host1x& host1x;
-    const Host1x::NvdecCommon::NvdecRegisters& state;
-    std::unique_ptr<Decoder::H264> h264_decoder;
-    std::unique_ptr<Decoder::VP8> vp8_decoder;
-    std::unique_ptr<Decoder::VP9> vp9_decoder;
+        Host1x::Host1x& host1x;
+        const Host1x::NvdecCommon::NvdecRegisters& state;
+        std::unique_ptr<Decoder::H264> h264_decoder;
+        std::unique_ptr<Decoder::VP8> vp8_decoder;
+        std::unique_ptr<Decoder::VP9> vp9_decoder;
 
-    std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
-};
+        std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
+    };
 
 } // namespace Tegra
diff --git a/src/video_core/host1x/codecs/decoder.cpp b/src/video_core/host1x/codecs/decoder.cpp
index 49a601969c..69371f6108 100755
--- a/src/video_core/host1x/codecs/decoder.cpp
+++ b/src/video_core/host1x/codecs/decoder.cpp
@@ -9,63 +9,54 @@
 
 namespace Tegra {
 
-Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
-                 Host1x::FrameQueue& frame_queue_)
+    Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_,
+                     Host1x::FrameQueue& frame_queue_)
     : host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{
-                                                                                frame_queue_} {}
+        frame_queue_} {}
 
-Decoder::~Decoder() = default;
+        Decoder::~Decoder() = default;
 
-void Decoder::Decode() {
-    if (!initialized) {
-        return;
-    }
+        void Decoder::Decode() {
+            if (!initialized) {
+                return;
+            }
 
-    const auto packet_data = ComposeFrame();
-    // Send assembled bitstream to decoder.
-    if (!decode_api.SendPacket(packet_data)) {
-        return;
-    }
+            const auto packet_data = ComposeFrame();
 
-    // Only receive/store visible frames.
-    if (vp9_hidden_frame) {
-        return;
-    }
+           // Capture the state needed for queuing BEFORE sending the packet
+           // and potentially yielding. The main `regs` and `current_context` can be
+           // overwritten by the time FFmpeg returns a frame.
+           const bool is_interlaced_frame = IsInterlaced();
+           const auto interlaced_offsets = GetInterlacedOffsets();
+           const auto progressive_offsets = GetProgressiveOffsets();
 
-    // Receive output frames from decoder.
-    auto frame = decode_api.ReceiveFrame();
+            // Send assembled bitstream to decoder.
+            if (!decode_api.SendPacket(packet_data)) {
+                return;
+            }
 
-    if (IsInterlaced()) {
-        auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets();
-        auto frame_copy = frame;
+            // Only process visible frames.
+            if (vp9_hidden_frame) {
+                return;
+            }
 
-        if (!frame.get()) {
-            LOG_ERROR(HW_GPU,
-                      "Nvdec {} dailed to decode interlaced frame for top 0x{:X} bottom 0x{:X}", id,
-                      luma_top, luma_bottom);
+            // Receive output frames from decoder.
+            // A single packet can produce multiple frames, so we loop until we've received them all.
+            while (true) {
+                auto frame = decode_api.ReceiveFrame();
+                if (!frame) { // No more frames available for now.
+                    break;
+                }
+
+                if (is_interlaced_frame) {
+                    auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = interlaced_offsets;
+                    auto frame_copy = frame;
+                    frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
+                    frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
+                } else {
+                    auto [luma_offset, chroma_offset] = progressive_offsets;
+                    frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
+                }
+            }
         }
-
-        if (UsingDecodeOrder()) {
-            frame_queue.PushDecodeOrder(id, luma_top, std::move(frame));
-            frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy));
-        } else {
-            frame_queue.PushPresentOrder(id, luma_top, std::move(frame));
-            frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy));
-        }
-    } else {
-        auto [luma_offset, chroma_offset] = GetProgressiveOffsets();
-
-        if (!frame.get()) {
-            LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma 0x{:X}", id,
-                      luma_offset);
-        }
-
-        if (UsingDecodeOrder()) {
-            frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame));
-        } else {
-            frame_queue.PushPresentOrder(id, luma_offset, std::move(frame));
-        }
-    }
-}
-
 } // namespace Tegra
diff --git a/src/video_core/host1x/codecs/decoder.h b/src/video_core/host1x/codecs/decoder.h
index 22e6db8151..c456bbb1b0 100755
--- a/src/video_core/host1x/codecs/decoder.h
+++ b/src/video_core/host1x/codecs/decoder.h
@@ -16,49 +16,50 @@
 
 namespace Tegra {
 
-namespace Host1x {
-class Host1x;
-class FrameQueue;
-} // namespace Host1x
+    namespace Host1x {
+        class Host1x;
+        class FrameQueue;
+    } // namespace Host1x
 
-class Decoder {
-public:
-    virtual ~Decoder();
+    class Decoder {
+    public:
+        virtual ~Decoder();
 
-    /// Call decoders to construct headers, decode AVFrame with ffmpeg
-    void Decode();
+        /// Call decoders to construct headers, decode AVFrame with ffmpeg
+        void Decode();
 
-    bool UsingDecodeOrder() const {
-        return decode_api.UsingDecodeOrder();
-    }
+        // Removed UsingDecodeOrder() as it's no longer available in FFmpeg::DecodeApi
+        // bool UsingDecodeOrder() const {
+        //     return decode_api.UsingDecodeOrder();
+        // }
 
-    /// Returns the value of current_codec
-    [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const {
-        return codec;
-    }
+        /// Returns the value of current_codec
+        [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const {
+            return codec;
+        }
 
-    /// Return name of the current codec
-    [[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
+        /// Return name of the current codec
+        [[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0;
 
-protected:
-    explicit Decoder(Host1x::Host1x& host1x, s32 id,
-                     const Host1x::NvdecCommon::NvdecRegisters& regs,
-                     Host1x::FrameQueue& frame_queue);
+    protected:
+        explicit Decoder(Host1x::Host1x& host1x, s32 id,
+                         const Host1x::NvdecCommon::NvdecRegisters& regs,
+                         Host1x::FrameQueue& frame_queue);
 
-    virtual std::span<const u8> ComposeFrame() = 0;
-    virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
-    virtual std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() = 0;
-    virtual bool IsInterlaced() = 0;
+        virtual std::span<const u8> ComposeFrame() = 0;
+        virtual std::tuple<u64, u64> GetProgressiveOffsets() = 0;
+        virtual std::tuple<u64, u64, u64, u64> GetInterlacedOffsets() = 0;
+        virtual bool IsInterlaced() = 0;
 
-    Host1x::Host1x& host1x;
-    Tegra::MemoryManager& memory_manager;
-    const Host1x::NvdecCommon::NvdecRegisters& regs;
-    s32 id;
-    Host1x::FrameQueue& frame_queue;
-    Host1x::NvdecCommon::VideoCodec codec;
-    FFmpeg::DecodeApi decode_api;
-    bool initialized{};
-    bool vp9_hidden_frame{};
-};
+        Host1x::Host1x& host1x;
+        Tegra::MemoryManager& memory_manager;
+        const Host1x::NvdecCommon::NvdecRegisters& regs;
+        s32 id;
+        Host1x::FrameQueue& frame_queue;
+        Host1x::NvdecCommon::VideoCodec codec;
+        FFmpeg::DecodeApi decode_api;
+        bool initialized{};
+        bool vp9_hidden_frame{};
+    };
 
 } // namespace Tegra
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
index 0f829ca02c..4915f44610 100644
--- a/src/video_core/host1x/ffmpeg/ffmpeg.cpp
+++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
@@ -10,327 +10,388 @@
 #include "video_core/memory_manager.h"
 
 extern "C" {
-#ifdef LIBVA_FOUND
-// for querying VAAPI driver information
-#include <libavutil/hwcontext_vaapi.h>
-#endif
+    #ifdef LIBVA_FOUND
+    // for querying VAAPI driver information
+    #include <libavutil/hwcontext_vaapi.h>
+    #endif
 }
 
 namespace FFmpeg {
 
-namespace {
+    namespace {
 
-constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
-constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
-constexpr std::array PreferredGpuDecoders = {
-    AV_HWDEVICE_TYPE_CUDA,
-#ifdef _WIN32
-    AV_HWDEVICE_TYPE_D3D11VA,
-    AV_HWDEVICE_TYPE_DXVA2,
-#elif defined(__unix__)
-    AV_HWDEVICE_TYPE_VAAPI,
-    AV_HWDEVICE_TYPE_VDPAU,
-#endif
-    AV_HWDEVICE_TYPE_VULKAN
-};
+        void FfmpegLog(void* ptr, int level, const char* fmt, va_list vl) {
+            if (level > av_log_get_level()) {
+                return;
+            }
 
-AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
-    for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
-        if (*p == codec_context->pix_fmt) {
-            return codec_context->pix_fmt;
+            char line[1024];
+            vsnprintf(line, sizeof(line), fmt, vl);
+
+            // Remove trailing newline
+            size_t len = strlen(line);
+            if (len > 0 && line[len - 1] == '\n') {
+                line[len - 1] = '\0';
+            }
+
+            // Map FFmpeg log levels to yuzu log levels.
+            switch (level) {
+            case AV_LOG_PANIC:
+            case AV_LOG_FATAL:
+            case AV_LOG_ERROR:
+                LOG_ERROR(HW_GPU, "FFmpeg: {}", line);
+                break;
+            case AV_LOG_WARNING:
+                LOG_WARNING(HW_GPU, "FFmpeg: {}", line);
+                break;
+            default:
+                LOG_INFO(HW_GPU, "FFmpeg: {}", line);
+                break;
+            }
         }
+
+        constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
+        constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
+        constexpr std::array PreferredGpuDecoders = {
+            AV_HWDEVICE_TYPE_CUDA,
+            #ifdef _WIN32
+            AV_HWDEVICE_TYPE_D3D11VA,
+            AV_HWDEVICE_TYPE_DXVA2,
+            #elif defined(__unix__)
+            AV_HWDEVICE_TYPE_VAAPI,
+            AV_HWDEVICE_TYPE_VDPAU,
+            #endif
+            AV_HWDEVICE_TYPE_VULKAN
+        };
+
+        AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
+            for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
+                // The initial format from hw_config is an opaque type like AV_PIX_FMT_VAAPI.
+                // The decoder may instead offer a list of concrete surface formats it can use
+                // with that hardware context. We need to find a compatible one.
+                // For VA-API, NV12 is the common hardware surface format.
+                if (*p == codec_context->pix_fmt || *p == AV_PIX_FMT_NV12) {
+                    // Found a compatible hardware format.
+                    LOG_INFO(HW_GPU, "FFmpeg: Selected hardware pixel format {}.",
+                             av_get_pix_fmt_name(*p));
+                    return *p;
+                }
+            }
+
+            // The decoder does not support the requested hardware format for this stream.
+            // Build a list of supported formats for the log message.
+            std::string supported_formats_str;
+            for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
+                supported_formats_str += av_get_pix_fmt_name(*p);
+                if (p[1] != AV_PIX_FMT_NONE) {
+                    supported_formats_str += ", ";
+                }
+            }
+
+            const AVHWDeviceContext* device_ctx =
+                reinterpret_cast<const AVHWDeviceContext*>(codec_context->hw_device_ctx->data);
+
+            LOG_WARNING(HW_GPU,
+                        "Hardware decoder '{}' on device '{}' does not support format '{}' for this "
+                        "stream. Supported formats: [{}]. Falling back to software decoding.",
+                        codec_context->codec->name, av_hwdevice_get_type_name(device_ctx->type),
+                        av_get_pix_fmt_name(codec_context->pix_fmt), supported_formats_str);
+
+            // Fallback to software.
+            av_buffer_unref(&codec_context->hw_device_ctx);
+
+            // Check if the preferred software format is supported.
+            for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
+                if (*p == PreferredCpuFormat) {
+                    codec_context->pix_fmt = PreferredCpuFormat;
+                    return PreferredCpuFormat;
+                }
+            }
+
+            LOG_ERROR(HW_GPU, "Decoder does not support preferred software format {}. Decoding will likely fail.",
+                      av_get_pix_fmt_name(PreferredCpuFormat));
+            return AV_PIX_FMT_NONE; // This will cause avcodec_open2 to fail, which is correct.
+        }
+
+        std::string AVError(int errnum) {
+            char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
+            av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
+            return errbuf;
+        }
+
+    } // namespace
+
+    Packet::Packet(std::span<const u8> data) {
+        m_packet = av_packet_alloc();
+        m_packet->data = const_cast<u8*>(data.data());
+        m_packet->size = static_cast<s32>(data.size());
     }
 
-    LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
-    av_buffer_unref(&codec_context->hw_device_ctx);
-
-    codec_context->pix_fmt = PreferredCpuFormat;
-    return codec_context->pix_fmt;
-}
-
-std::string AVError(int errnum) {
-    char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
-    av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
-    return errbuf;
-}
-
-} // namespace
-
-Packet::Packet(std::span<const u8> data) {
-    m_packet = av_packet_alloc();
-    m_packet->data = const_cast<u8*>(data.data());
-    m_packet->size = static_cast<s32>(data.size());
-}
-
-Packet::~Packet() {
-    av_packet_free(&m_packet);
-}
-
-Frame::Frame() {
-    m_frame = av_frame_alloc();
-}
-
-Frame::~Frame() {
-    av_frame_free(&m_frame);
-}
-
-Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
-    const AVCodecID av_codec = [&] {
-        switch (codec) {
-        case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
-            return AV_CODEC_ID_H264;
-        case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
-            return AV_CODEC_ID_VP8;
-        case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
-            return AV_CODEC_ID_VP9;
-        default:
-            UNIMPLEMENTED_MSG("Unknown codec {}", codec);
-            return AV_CODEC_ID_NONE;
-        }
-    }();
-
-    m_codec = avcodec_find_decoder(av_codec);
-}
-
-bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
-    for (int i = 0;; i++) {
-        const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
-        if (!config) {
-            LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type));
-            break;
-        }
-        if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
-            config->device_type == type) {
-            LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
-            *out_pix_fmt = config->pix_fmt;
-            return true;
-        }
+    Packet::~Packet() {
+        av_packet_free(&m_packet);
     }
 
-    return false;
-}
-
-std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
-    std::vector<AVHWDeviceType> types;
-    AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
-
-    while (true) {
-        current_device_type = av_hwdevice_iterate_types(current_device_type);
-        if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
-            return types;
-        }
-
-        types.push_back(current_device_type);
-    }
-}
-
-HardwareContext::~HardwareContext() {
-    av_buffer_unref(&m_gpu_decoder);
-}
-
-bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) {
-    const auto supported_types = GetSupportedDeviceTypes();
-    for (const auto type : PreferredGpuDecoders) {
-        AVPixelFormat hw_pix_fmt;
-
-        if (std::ranges::find(supported_types, type) == supported_types.end()) {
-            LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
-            continue;
-        }
-
-        if (!this->InitializeWithType(type)) {
-            continue;
-        }
-
-        if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
-            decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
-            return true;
-        }
+    Frame::Frame() {
+        m_frame = av_frame_alloc();
     }
 
-    return false;
-}
+    Frame::~Frame() {
+        av_frame_free(&m_frame);
+    }
 
-bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
-    av_buffer_unref(&m_gpu_decoder);
+    Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
+        const AVCodecID av_codec = [&] {
+            switch (codec) {
+                case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
+                    return AV_CODEC_ID_H264;
+                case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
+                    return AV_CODEC_ID_VP8;
+                case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
+                    return AV_CODEC_ID_VP9;
+                default:
+                    UNIMPLEMENTED_MSG("Unknown codec {}", codec);
+                    return AV_CODEC_ID_NONE;
+            }
+        }();
+
+        m_codec = avcodec_find_decoder(av_codec);
+        ASSERT_MSG(m_codec, "Failed to find decoder for AVCodecID {}", av_codec);
+    }
+
+    bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
+        for (int i = 0;; i++) {
+            const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
+            if (!config) {
+                LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type));
+                break;
+            }
+            if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) {
+                LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+                *out_pix_fmt = config->pix_fmt;
+                return true;
+            }
+        }
 
-    if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) {
-        LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), AVError(ret));
         return false;
     }
 
-#ifdef LIBVA_FOUND
-    if (type == AV_HWDEVICE_TYPE_VAAPI) {
-        // We need to determine if this is an impersonated VAAPI driver.
-        auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
-        auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
-        const char* vendor_name = vaQueryVendorString(vactx->display);
-        if (strstr(vendor_name, "VDPAU backend")) {
-            // VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
-            LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
-            return false;
-        } else {
-            // According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
-            // Log the driver name just in case.
-            LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
+    std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
+        std::vector<AVHWDeviceType> types;
+        AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
+
+        while (true) {
+            current_device_type = av_hwdevice_iterate_types(current_device_type);
+            if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
+                return types;
+            }
+
+            types.push_back(current_device_type);
         }
     }
-#endif
 
-    return true;
-}
+    HardwareContext::~HardwareContext() {
+        av_buffer_unref(&m_gpu_decoder);
+    }
 
-DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
-    m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
-    av_opt_set(m_codec_context->priv_data, "preset", "veryfast", 0);
-    av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
-    m_codec_context->thread_count = 0;
-    m_codec_context->thread_type &= ~FF_THREAD_FRAME;
-}
+    bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) {
+        const auto supported_types = GetSupportedDeviceTypes();
+        for (const auto type : PreferredGpuDecoders) {
+            AVPixelFormat hw_pix_fmt;
 
-DecoderContext::~DecoderContext() {
-    av_buffer_unref(&m_codec_context->hw_device_ctx);
-    avcodec_free_context(&m_codec_context);
-}
+            if (std::ranges::find(supported_types, type) == supported_types.end()) {
+                LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
+                continue;
+            }
 
-void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) {
-    m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
-    m_codec_context->get_format = GetGpuFormat;
-    m_codec_context->pix_fmt = hw_pix_fmt;
-}
+            if (!this->InitializeWithType(type)) {
+                continue;
+            }
+
+            if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
+                decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
+                return true;
+            }
+        }
 
-bool DecoderContext::OpenContext(const Decoder& decoder) {
-    if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
-        LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
         return false;
     }
 
-    if (!m_codec_context->hw_device_ctx) {
-        LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
-    }
+    bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
+        av_buffer_unref(&m_gpu_decoder);
 
-    return true;
-}
-
-} // namespace
-bool DecoderContext::SendPacket(const Packet& packet) {
-    m_temp_frame = std::make_shared<Frame>();
-    m_got_frame = 0;
-
-    if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
-        m_decode_order = true;
-        auto* codec{ffcodec(m_decoder.GetCodec())};
-        if (const int ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, packet.GetPacket()); ret < 0) {
-            LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", AVError(ret));
+        if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) {
+            LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), AVError(ret));
             return false;
         }
+
+        #ifdef LIBVA_FOUND
+        if (type == AV_HWDEVICE_TYPE_VAAPI) {
+            // We need to determine if this is an impersonated VAAPI driver.
+            auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
+            auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
+            const char* vendor_name = vaQueryVendorString(vactx->display);
+            if (strstr(vendor_name, "VDPAU backend")) {
+                // VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
+                LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
+                return false;
+            } else {
+                // According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
+                // Log the driver name just in case.
+                LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
+            }
+        }
+        #endif
+
         return true;
     }
 
-    if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
-        LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
-        return false;
+    DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
+        m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
+        ASSERT(m_codec_context); // Ensure allocation was successful
+
+        // Use av_opt_set_int and av_opt_set to set options
+        // "preset" and "tune" are codec-private options, so they still apply to m_codec_context->priv_data.
+        av_opt_set(m_codec_context->priv_data, "preset", "veryfast", 0);
+        av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
+
+        // Setting thread_count and thread_type using AVCodecContext members directly
+        // The previous usage of FF_THREAD_FRAME was from codec_internal.h.
+        // We'll rely on the default FFmpeg threading behavior or set a specific number of threads.
+        // A common approach is to set thread_count to 0 for auto-detection or a specific number.
+        // Since FF_THREAD_FRAME is for frame-level threading, and FF_THREAD_SLICE is for slice-level,
+        // removing FF_THREAD_FRAME effectively means we don't explicitly disable frame-level threading,
+        // but rather let FFmpeg decide or implicitly use slice-level or no threading depending on the codec and configuration.
+        // If the goal was to strictly avoid frame-level threading, avcodec_open2 will implicitly
+        // handle thread types based on supported capabilities if thread_type is not explicitly set.
+        // For simple cases, setting thread_count to 0 is often sufficient for optimal performance.
+        m_codec_context->thread_count = 0; // Use default or auto-detected thread count
+        // m_codec_context->thread_type &= ~FF_THREAD_FRAME; // Removed, as FF_THREAD_FRAME is from codec_internal.h
     }
 
-    return true;
-}
+    DecoderContext::~DecoderContext() {
+        av_buffer_unref(&m_codec_context->hw_device_ctx);
+        avcodec_free_context(&m_codec_context);
+    }
 
-std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
-    if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) {
-        m_decode_order = true;
-        auto* codec{ffcodec(m_decoder.GetCodec())};
-        int ret{0};
+    void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) {
+        m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
+        m_codec_context->get_format = GetGpuFormat;
+        m_codec_context->pix_fmt = hw_pix_fmt;
+    }
 
-        if (m_got_frame == 0) {
-            Packet packet{{}};
-            auto* pkt = packet.GetPacket();
-            pkt->data = nullptr;
-            pkt->size = 0;
-            ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, pkt);
-            m_codec_context->has_b_frames = 0;
+    bool DecoderContext::OpenContext(const Decoder& decoder) {
+        if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
+            LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
+            return false;
         }
 
-        if (m_got_frame == 0 || ret < 0) {
-            LOG_ERROR(Service_NVDRV, "Failed to receive a frame! error {}", ret);
+        if (!m_codec_context->hw_device_ctx) {
+            LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
+        }
+
+        return true;
+    }
+
+    bool DecoderContext::SendPacket(const Packet& packet) {
+        if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
+            LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
+            return false;
+        }
+
+        return true;
+    }
+
+    std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
+        auto received_frame = std::make_shared<Frame>();
+
+        const int ret = avcodec_receive_frame(m_codec_context, received_frame->GetFrame());
+        if (ret < 0) {
+            if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
+                LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
+            }
             return {};
         }
-    } else {
-        const auto ReceiveImpl = [&](AVFrame* frame) {
-            if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
-                LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
-                return false;
-            }
 
-            return true;
-        };
+        std::shared_ptr<Frame> output_frame;
 
-        if (m_codec_context->hw_device_ctx) {
-            // If we have a hardware context, make a separate frame here to receive the
-            // hardware result before sending it to the output.
-            Frame intermediate_frame;
+        if (received_frame->IsHardwareDecoded()) {
+            // Hardware frame was successfully decoded, transfer it to system memory.
+            output_frame = std::make_shared<Frame>();
 
-            if (!ReceiveImpl(intermediate_frame.GetFrame())) {
-                return {};
-            }
+            // Transfer to NV12, as the VIC pipeline can handle it.
+            output_frame->GetFrame()->format = PreferredGpuFormat;
 
-            m_temp_frame->SetFormat(PreferredGpuFormat);
-            if (const int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame.GetFrame(), 0); ret < 0) {
-                LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
+            if (const int transfer_ret =
+                    av_hwframe_transfer_data(output_frame->GetFrame(), received_frame->GetFrame(), 0);
+                transfer_ret < 0) {
+                LOG_ERROR(HW_GPU, "Failed to transfer hardware frame to system memory: {}",
+                          AVError(transfer_ret));
                 return {};
             }
         } else {
-            // Otherwise, decode the frame as normal.
-            if (!ReceiveImpl(m_temp_frame->GetFrame())) {
-                return {};
+            // Frame is already in system memory (software frame). This can happen
+            // if hardware decoding is disabled, or if FFmpeg fell back to software.
+            if (m_codec_context->hw_device_ctx) {
+                LOG_WARNING(HW_GPU,
+                            "FFmpeg returned a software frame when hardware decoding was expected. "
+                            "Format: {}. This may be due to unsupported video parameters.",
+                            av_get_pix_fmt_name(received_frame->GetPixelFormat()));
             }
+            output_frame = received_frame;
         }
+
+        // The original code toggled the interlaced flag. This is unusual but may be
+        // intentional for the emulator's video pipeline. This behavior is preserved.
+    #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
+        if (output_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) {
+            output_frame->GetFrame()->flags &= ~AV_FRAME_FLAG_INTERLACED;
+        } else {
+            output_frame->GetFrame()->flags |= AV_FRAME_FLAG_INTERLACED;
+        }
+    #else
+        output_frame->GetFrame()->interlaced_frame = !output_frame->GetFrame()->interlaced_frame;
+    #endif
+
+        return output_frame;
     }
 
-#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
-    if (m_temp_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED)
-        m_temp_frame->GetFrame()->flags &= ~AV_FRAME_FLAG_INTERLACED;
-    else
-        m_temp_frame->GetFrame()->flags |= AV_FRAME_FLAG_INTERLACED;
-#else
-    m_temp_frame->GetFrame()->interlaced_frame = !m_temp_frame->GetFrame()->interlaced_frame;
-#endif
-    return std::move(m_temp_frame);
-}
-
-void DecodeApi::Reset() {
-    m_hardware_context.reset();
-    m_decoder_context.reset();
-    m_decoder.reset();
-}
-
-bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
-    this->Reset();
-    m_decoder.emplace(codec);
-    m_decoder_context.emplace(*m_decoder);
-
-    // Enable GPU decoding if requested.
-    if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
-        m_hardware_context.emplace();
-        m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
+    void DecodeApi::Reset() {
+        m_hardware_context.reset();
+        m_decoder_context.reset();
+        m_decoder.reset();
     }
 
-    // Open the decoder context.
-    if (!m_decoder_context->OpenContext(*m_decoder)) {
+    bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
+        av_log_set_callback(FfmpegLog);
+        av_log_set_level(AV_LOG_DEBUG);
+
         this->Reset();
-        return false;
+        m_decoder.emplace(codec);
+        m_decoder_context.emplace(*m_decoder);
+
+        // Enable GPU decoding if requested.
+        if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
+            m_hardware_context.emplace();
+            m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
+        }
+
+        // Open the decoder context.
+        if (!m_decoder_context->OpenContext(*m_decoder)) {
+            this->Reset();
+            return false;
+        }
+
+        return true;
     }
 
-    return true;
-}
+    bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
+        FFmpeg::Packet packet(packet_data);
+        return m_decoder_context->SendPacket(packet);
+    }
 
-bool DecodeApi::SendPacket(std::span<const u8> packet_data) {
-    FFmpeg::Packet packet(packet_data);
-    return m_decoder_context->SendPacket(packet);
-}
-
-std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
-    // Receive raw frame from decoder.
-    return m_decoder_context->ReceiveFrame();
-}
+    std::shared_ptr<Frame> DecodeApi::ReceiveFrame() {
+        // Receive raw frame from decoder.
+        return m_decoder_context->ReceiveFrame();
+    }
 
 } // namespace FFmpeg
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h
index 28f1742b7e..cc90ab1323 100644
--- a/src/video_core/host1x/ffmpeg/ffmpeg.h
+++ b/src/video_core/host1x/ffmpeg/ffmpeg.h
@@ -14,209 +14,206 @@
 #include "video_core/host1x/nvdec_common.h"
 
 extern "C" {
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
+    #if defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wconversion"
+    #endif
 
-#include <libavcodec/avcodec.h>
-#include <libavutil/opt.h>
-#include <libavcodec/codec_internal.h>
+    #include <libavcodec/avcodec.h>
+    #include <libavutil/opt.h>
+    #include <libavutil/pixdesc.h>
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
+    #if defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic pop
+    #endif
 }
 
 namespace Tegra {
-class MemoryManager;
+    class MemoryManager;
 }
 
 namespace FFmpeg {
 
-class Packet;
-class Frame;
-class Decoder;
-class HardwareContext;
-class DecoderContext;
-class DeinterlaceFilter;
+    class Packet;
+    class Frame;
+    class Decoder;
+    class HardwareContext;
+    class DecoderContext;
+    class DeinterlaceFilter;
 
-// Wraps an AVPacket, a container for compressed bitstream data.
-class Packet {
-public:
-    YUZU_NON_COPYABLE(Packet);
-    YUZU_NON_MOVEABLE(Packet);
+    // Wraps an AVPacket, a container for compressed bitstream data.
+    class Packet {
+    public:
+        YUZU_NON_COPYABLE(Packet);
+        YUZU_NON_MOVEABLE(Packet);
 
-    explicit Packet(std::span<const u8> data);
-    ~Packet();
+        explicit Packet(std::span<const u8> data);
+        ~Packet();
 
-    AVPacket* GetPacket() const {
-        return m_packet;
-    }
+        AVPacket* GetPacket() const {
+            return m_packet;
+        }
 
-private:
-    AVPacket* m_packet{};
-};
+    private:
+        AVPacket* m_packet{};
+    };
 
-// Wraps an AVFrame, a container for audio and video stream data.
-class Frame {
-public:
-    YUZU_NON_COPYABLE(Frame);
-    YUZU_NON_MOVEABLE(Frame);
+    // Wraps an AVFrame, a container for audio and video stream data.
+    class Frame {
+    public:
+        YUZU_NON_COPYABLE(Frame);
+        YUZU_NON_MOVEABLE(Frame);
 
-    explicit Frame();
-    ~Frame();
+        explicit Frame();
+        ~Frame();
 
-    int GetWidth() const {
-        return m_frame->width;
-    }
+        int GetWidth() const {
+            return m_frame->width;
+        }
 
-    int GetHeight() const {
-        return m_frame->height;
-    }
+        int GetHeight() const {
+            return m_frame->height;
+        }
 
-    AVPixelFormat GetPixelFormat() const {
-        return static_cast<AVPixelFormat>(m_frame->format);
-    }
+        AVPixelFormat GetPixelFormat() const {
+            return static_cast<AVPixelFormat>(m_frame->format);
+        }
 
-    int GetStride(int plane) const {
-        return m_frame->linesize[plane];
-    }
+        int GetStride(int plane) const {
+            return m_frame->linesize[plane];
+        }
 
-    int* GetStrides() const {
-        return m_frame->linesize;
-    }
+        int* GetStrides() const {
+            return m_frame->linesize;
+        }
 
-    u8* GetData(int plane) const {
-        return m_frame->data[plane];
-    }
+        u8* GetData(int plane) const {
+            return m_frame->data[plane];
+        }
 
-    const u8* GetPlane(int plane) const {
-        return m_frame->data[plane];
-    }
+        const u8* GetPlane(int plane) const {
+            return m_frame->data[plane];
+        }
 
-    u8** GetPlanes() const {
-        return m_frame->data;
-    }
+        u8** GetPlanes() const {
+            return m_frame->data;
+        }
 
-    void SetFormat(int format) {
-        m_frame->format = format;
-    }
+        void SetFormat(int format) {
+            m_frame->format = format;
+        }
 
-    bool IsInterlaced() const {
-#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
-        return m_frame->flags & AV_FRAME_FLAG_INTERLACED;
-#else
-        return m_frame->interlaced_frame;
-#endif
-    }
+        bool IsInterlaced() const {
+            #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59
+            return m_frame->flags & AV_FRAME_FLAG_INTERLACED;
+            #else
+            return m_frame->interlaced_frame;
+            #endif
+        }
 
-    bool IsHardwareDecoded() const {
-        return m_frame->hw_frames_ctx != nullptr;
-    }
+        bool IsHardwareDecoded() const {
+            return m_frame->hw_frames_ctx != nullptr;
+        }
 
-    AVFrame* GetFrame() const {
-        return m_frame;
-    }
+        AVFrame* GetFrame() const {
+            return m_frame;
+        }
 
-private:
-    AVFrame* m_frame{};
-};
+    private:
+        AVFrame* m_frame{};
+    };
 
-// Wraps an AVCodec, a type containing information about a codec.
-class Decoder {
-public:
-    YUZU_NON_COPYABLE(Decoder);
-    YUZU_NON_MOVEABLE(Decoder);
+    // Wraps an AVCodec, a type containing information about a codec.
+    class Decoder {
+    public:
+        YUZU_NON_COPYABLE(Decoder);
+        YUZU_NON_MOVEABLE(Decoder);
 
-    explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
-    ~Decoder() = default;
+        explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
+        ~Decoder() = default;
 
-    bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
+        bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
 
-    const AVCodec* GetCodec() const {
-        return m_codec;
-    }
+        const AVCodec* GetCodec() const {
+            return m_codec;
+        }
 
-private:
-    const AVCodec* m_codec{};
-};
+    private:
+        const AVCodec* m_codec{};
+    };
 
-// Wraps AVBufferRef for an accelerated decoder.
-class HardwareContext {
-public:
-    YUZU_NON_COPYABLE(HardwareContext);
-    YUZU_NON_MOVEABLE(HardwareContext);
+    // Wraps AVBufferRef for an accelerated decoder.
+    class HardwareContext {
+    public:
+        YUZU_NON_COPYABLE(HardwareContext);
+        YUZU_NON_MOVEABLE(HardwareContext);
 
-    static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
+        static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
 
-    explicit HardwareContext() = default;
-    ~HardwareContext();
+        explicit HardwareContext() = default;
+        ~HardwareContext();
 
-    bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
+        bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
 
-    AVBufferRef* GetBufferRef() const {
-        return m_gpu_decoder;
-    }
+        AVBufferRef* GetBufferRef() const {
+            return m_gpu_decoder;
+        }
 
-private:
-    bool InitializeWithType(AVHWDeviceType type);
+    private:
+        bool InitializeWithType(AVHWDeviceType type);
 
-    AVBufferRef* m_gpu_decoder{};
-};
+        AVBufferRef* m_gpu_decoder{};
+    };
 
-// Wraps an AVCodecContext.
-class DecoderContext {
-public:
-    YUZU_NON_COPYABLE(DecoderContext);
-    YUZU_NON_MOVEABLE(DecoderContext);
+    // Wraps an AVCodecContext.
+    class DecoderContext {
+    public:
+        YUZU_NON_COPYABLE(DecoderContext);
+        YUZU_NON_MOVEABLE(DecoderContext);
 
-    explicit DecoderContext(const Decoder& decoder);
-    ~DecoderContext();
+        explicit DecoderContext(const Decoder& decoder);
+        ~DecoderContext();
 
-    void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
-    bool OpenContext(const Decoder& decoder);
-    bool SendPacket(const Packet& packet);
-    std::shared_ptr<Frame> ReceiveFrame();
+        void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
+        bool OpenContext(const Decoder& decoder);
+        bool SendPacket(const Packet& packet);
+        std::shared_ptr<Frame> ReceiveFrame();
 
-    AVCodecContext* GetCodecContext() const {
-        return m_codec_context;
-    }
+        AVCodecContext* GetCodecContext() const {
+            return m_codec_context;
+        }
 
-    bool UsingDecodeOrder() const {
-        return m_decode_order;
-    }
+        // Removed UsingDecodeOrder() as m_decode_order is no longer a direct member
+        // and its original purpose was tied to FF_THREAD_FRAME.
 
-private:
-    const Decoder& m_decoder;
-    AVCodecContext* m_codec_context{};
-    s32 m_got_frame{};
-    std::shared_ptr<Frame> m_temp_frame{};
-    bool m_decode_order{};
-};
+    private:
+        const Decoder& m_decoder;
+        AVCodecContext* m_codec_context{};
+        s32 m_got_frame{}; // This member is no longer used, can be removed.
+        std::shared_ptr<Frame> m_temp_frame{}; // This member is no longer used, can be removed.
+        // bool m_decode_order{}; // Removed due to removal of FF_THREAD_FRAME
+    };
 
-class DecodeApi {
-public:
-    YUZU_NON_COPYABLE(DecodeApi);
-    YUZU_NON_MOVEABLE(DecodeApi);
+    class DecodeApi {
+    public:
+        YUZU_NON_COPYABLE(DecodeApi);
+        YUZU_NON_MOVEABLE(DecodeApi);
 
-    DecodeApi() = default;
-    ~DecodeApi() = default;
+        DecodeApi() = default;
+        ~DecodeApi() = default;
 
-    bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
-    void Reset();
+        bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
+        void Reset();
 
-    bool UsingDecodeOrder() const {
-        return m_decoder_context->UsingDecodeOrder();
-    }
+        // Removed UsingDecodeOrder() as its underlying logic is removed.
 
-    bool SendPacket(std::span<const u8> packet_data);
-    std::shared_ptr<Frame> ReceiveFrame();
+        bool SendPacket(std::span<const u8> packet_data);
+        std::shared_ptr<Frame> ReceiveFrame();
 
-private:
-    std::optional<FFmpeg::Decoder> m_decoder;
-    std::optional<FFmpeg::DecoderContext> m_decoder_context;
-    std::optional<FFmpeg::HardwareContext> m_hardware_context;
-};
+    private:
+        std::optional<FFmpeg::Decoder> m_decoder;
+        std::optional<FFmpeg::DecoderContext> m_decoder_context;
+        std::optional<FFmpeg::HardwareContext> m_hardware_context;
+    };
 
 } // namespace FFmpeg
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
index 6de360d363..e258c215d5 100644
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@@ -40,29 +40,6 @@ public:
         m_decode_order.erase(fd);
     }
 
-    s32 VicFindNvdecFdFromOffset(u64 search_offset) {
-        std::scoped_lock l{m_mutex};
-        // Vic does not know which nvdec is producing frames for it, so search all the fds here for
-        // the given offset.
-        for (auto& map : m_presentation_order) {
-            for (auto& [offset, _] : map.second) {
-                if (offset == search_offset) {
-                    return map.first;
-                }
-            }
-        }
-
-        for (auto& map : m_decode_order) {
-            for (auto& [offset, _] : map.second) {
-                if (offset == search_offset) {
-                    return map.first;
-                }
-            }
-        }
-
-        return -1;
-    }
-
     void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
         std::scoped_lock l{m_mutex};
         auto map = m_presentation_order.find(fd);
@@ -78,23 +55,29 @@ public:
         if (map == m_decode_order.end()) {
             return;
         }
-        map->second.insert_or_assign(offset, std::move(frame));
+        map->second.emplace(offset, std::move(frame));
+        m_frame_available_cv.notify_all();
     }
 
-    std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
-        if (fd == -1) {
-            return {};
-        }
+    std::shared_ptr<FFmpeg::Frame> GetFrame(u64 offset) {
+        std::unique_lock l{m_mutex};
 
-        std::scoped_lock l{m_mutex};
-        auto present_map = m_presentation_order.find(fd);
-        if (present_map != m_presentation_order.end() && present_map->second.size() > 0) {
-            return GetPresentOrderLocked(fd);
-        }
-
-        auto decode_map = m_decode_order.find(fd);
-        if (decode_map != m_decode_order.end() && decode_map->second.size() > 0) {
-            return GetDecodeOrderLocked(fd, offset);
+        // Wait for the frame to become available, with a timeout to prevent deadlocks.
+        if (m_frame_available_cv.wait_for(l, std::chrono::milliseconds(250), [&] {
+                for (const auto& [fd, map] : m_decode_order) {
+                    if (map.contains(offset)) {
+                        return true;
+                    }
+                }
+                return false;
+            })) {
+            // Search all decoders for the frame with the matching offset.
+            for (auto& [decoder_id, frame_map] : m_decode_order) {
+                auto node = frame_map.extract(offset);
+                if (!node.empty()) {
+                    return std::move(node.mapped());
+                }
+            }
         }
 
         return {};
@@ -128,6 +111,7 @@ private:
     std::mutex m_mutex{};
     std::unordered_map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
     std::unordered_map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
+    std::condition_variable m_frame_available_cv;
 };
 
 enum class ChannelType : u32 {
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 3ad56bb80c..7bf27f0c60 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -136,11 +136,8 @@ void Vic::Execute() {
             }
 
             auto luma_offset{regs.surfaces[i][SurfaceIndex::Current].luma.Address()};
-            if (nvdec_id == -1) {
-                nvdec_id = frame_queue.VicFindNvdecFdFromOffset(luma_offset);
-            }
 
-            auto frame = frame_queue.GetFrame(nvdec_id, luma_offset);
+            auto frame = frame_queue.GetFrame(luma_offset);
             if (!frame.get()) {
                 LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset 0x{:X}", id, luma_offset);
                 continue;
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h
index e7600941ad..2bada64480 100644
--- a/src/video_core/host1x/vic.h
+++ b/src/video_core/host1x/vic.h
@@ -630,7 +630,6 @@ private:
     void WriteABGR(const OutputSurfaceConfig& output_surface_config);
 
     s32 id;
-    s32 nvdec_id{-1};
     u32 syncpoint;
 
     VicRegisters regs{};