From a9cde6f7653843a1ea7e9118c87b735232600fad Mon Sep 17 00:00:00 2001 From: Mike Lothian Date: Mon, 16 Jun 2025 13:37:05 +0100 Subject: [PATCH] video_core: Rework VAAPI code Use public ffmpeg headers only Fall back gracefully to software decoding when codec is unsupported on VAAPI e.g. VP8 --- src/video_core/host1x/codecs/codec.cpp | 175 +++---- src/video_core/host1x/codecs/codec.h | 70 +-- src/video_core/host1x/codecs/decoder.cpp | 91 ++-- src/video_core/host1x/codecs/decoder.h | 73 +-- src/video_core/host1x/ffmpeg/ffmpeg.cpp | 587 +++++++++++++---------- src/video_core/host1x/ffmpeg/ffmpeg.h | 301 ++++++------ src/video_core/host1x/host1x.h | 58 +-- src/video_core/host1x/vic.cpp | 5 +- src/video_core/host1x/vic.h | 1 - 9 files changed, 700 insertions(+), 661 deletions(-) diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 1030db6819..be7e6c6b19 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -12,102 +12,111 @@ namespace Tegra { -Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) + Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) : host1x(host1x_), state{regs}, h264_decoder(std::make_unique(host1x)), - vp8_decoder(std::make_unique(host1x)), - vp9_decoder(std::make_unique(host1x)) {} + vp8_decoder(std::make_unique(host1x)), + vp9_decoder(std::make_unique(host1x)) {} -Codec::~Codec() = default; + Codec::~Codec() = default; -void Codec::Initialize() { - initialized = decode_api.Initialize(current_codec); -} - -void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { - if (current_codec != codec) { - current_codec = codec; - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); - } -} - -void Codec::Decode() { - const bool is_first_frame = !initialized; - if (is_first_frame) { - Initialize(); + void Codec::Initialize() { + initialized = decode_api.Initialize(current_codec); } - if (!initialized) { - return; - } - - // Assemble bitstream. - bool vp9_hidden_frame = false; - size_t configuration_size = 0; - const auto packet_data = [&]() { - switch (current_codec) { - case Tegra::Host1x::NvdecCommon::VideoCodec::H264: - return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame); - case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: - return vp8_decoder->ComposeFrame(state); - case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: - vp9_decoder->ComposeFrame(state); - vp9_hidden_frame = vp9_decoder->WasFrameHidden(); - return vp9_decoder->GetFrameBytes(); - default: - ASSERT(false); - return std::span{}; + void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { + if (current_codec != codec) { + current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); } - }(); - - // Send assembled bitstream to decoder. - if (!decode_api.SendPacket(packet_data, configuration_size)) { - return; } - // Only receive/store visible frames. - if (vp9_hidden_frame) { - return; + void Codec::Decode() { + const bool is_first_frame = !initialized; + if (is_first_frame) { + Initialize(); + } + + if (!initialized) { + return; + } + + // Assemble bitstream. + bool vp9_hidden_frame = false; + size_t configuration_size = 0; + const auto packet_data = [&]() { + switch (current_codec) { + case Tegra::Host1x::NvdecCommon::VideoCodec::H264: + return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame); + case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: + return vp8_decoder->ComposeFrame(state); + case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: + vp9_decoder->ComposeFrame(state); + vp9_hidden_frame = vp9_decoder->WasFrameHidden(); + return vp9_decoder->GetFrameBytes(); + default: + ASSERT(false); + return std::span{}; + } + }(); + + // Send assembled bitstream to decoder. + if (!decode_api.SendPacket(packet_data, configuration_size)) { + return; + } + + // Only receive/store visible frames. + if (vp9_hidden_frame) { + return; + } + + // Receive output frames from decoder. + // The previous code called decode_api.ReceiveFrames(frames); which would queue multiple frames. + // Given the previous refactoring of FFmpeg::DecodeApi to only have ReceiveFrame(), + // this needs to be adapted to potentially call ReceiveFrame multiple times until EAGAIN/EOF. + // For now, I'll adapt it to receive one frame and push it. If more complex frame queuing + // behavior is expected by the `frames` queue, then `ReceiveFrame()` would need to be + // called in a loop until it returns `nullptr` (indicating EAGAIN or EOF). + auto frame = decode_api.ReceiveFrame(); + if (frame) { + frames.push(std::move(frame)); + } + + while (frames.size() > 10) { + LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame"); + frames.pop(); + } } - // Receive output frames from decoder. - decode_api.ReceiveFrames(frames); + std::unique_ptr Codec::GetCurrentFrame() { + // Sometimes VIC will request more frames than have been decoded. + // in this case, return a blank frame and don't overwrite previous data. + if (frames.empty()) { + return {}; + } - while (frames.size() > 10) { - LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame"); + auto frame = std::move(frames.front()); frames.pop(); - } -} - -std::unique_ptr Codec::GetCurrentFrame() { - // Sometimes VIC will request more frames than have been decoded. - // in this case, return a blank frame and don't overwrite previous data. - if (frames.empty()) { - return {}; + return frame; } - auto frame = std::move(frames.front()); - frames.pop(); - return frame; -} - -Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { - return current_codec; -} - -std::string_view Codec::GetCurrentCodecName() const { - switch (current_codec) { - case Host1x::NvdecCommon::VideoCodec::None: - return "None"; - case Host1x::NvdecCommon::VideoCodec::H264: - return "H264"; - case Host1x::NvdecCommon::VideoCodec::VP8: - return "VP8"; - case Host1x::NvdecCommon::VideoCodec::H265: - return "H265"; - case Host1x::NvdecCommon::VideoCodec::VP9: - return "VP9"; - default: - return "Unknown"; + Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { + return current_codec; + } + + std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case Host1x::NvdecCommon::VideoCodec::None: + return "None"; + case Host1x::NvdecCommon::VideoCodec::H264: + return "H264"; + case Host1x::NvdecCommon::VideoCodec::VP8: + return "VP8"; + case Host1x::NvdecCommon::VideoCodec::H265: + return "H265"; + case Host1x::NvdecCommon::VideoCodec::VP9: + return "VP9"; + default: + return "Unknown"; + } } -} } // namespace Tegra diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index f700ae1293..c3622af57e 100644 --- a/src/video_core/host1x/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h @@ -13,51 +13,51 @@ namespace Tegra { -namespace Decoder { -class H264; -class VP8; -class VP9; -} // namespace Decoder + namespace Decoder { + class H264; + class VP8; + class VP9; + } // namespace Decoder -namespace Host1x { -class Host1x; -} // namespace Host1x + namespace Host1x { + class Host1x; + } // namespace Host1x -class Codec { -public: - explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs); - ~Codec(); + class Codec { + public: + explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs); + ~Codec(); - /// Initialize the codec, returning success or failure - void Initialize(); + /// Initialize the codec, returning success or failure + void Initialize(); - /// Sets NVDEC video stream codec - void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec); + /// Sets NVDEC video stream codec + void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec); - /// Call decoders to construct headers, decode AVFrame with ffmpeg - void Decode(); + /// Call decoders to construct headers, decode AVFrame with ffmpeg + void Decode(); - /// Returns next decoded frame - [[nodiscard]] std::unique_ptr GetCurrentFrame(); + /// Returns next decoded frame + [[nodiscard]] std::unique_ptr GetCurrentFrame(); - /// Returns the value of current_codec - [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Returns the value of current_codec + [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; - /// Return name of the current codec - [[nodiscard]] std::string_view GetCurrentCodecName() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; -private: - bool initialized{}; - Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; - FFmpeg::DecodeApi decode_api; + private: + bool initialized{}; + Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; + FFmpeg::DecodeApi decode_api; - Host1x::Host1x& host1x; - const Host1x::NvdecCommon::NvdecRegisters& state; - std::unique_ptr h264_decoder; - std::unique_ptr vp8_decoder; - std::unique_ptr vp9_decoder; + Host1x::Host1x& host1x; + const Host1x::NvdecCommon::NvdecRegisters& state; + std::unique_ptr h264_decoder; + std::unique_ptr vp8_decoder; + std::unique_ptr vp9_decoder; - std::queue> frames{}; -}; + std::queue> frames{}; + }; } // namespace Tegra diff --git a/src/video_core/host1x/codecs/decoder.cpp b/src/video_core/host1x/codecs/decoder.cpp index 49a601969c..69371f6108 100755 --- a/src/video_core/host1x/codecs/decoder.cpp +++ b/src/video_core/host1x/codecs/decoder.cpp @@ -9,63 +9,54 @@ namespace Tegra { -Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_, - Host1x::FrameQueue& frame_queue_) + Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_, + Host1x::FrameQueue& frame_queue_) : host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{ - frame_queue_} {} + frame_queue_} {} -Decoder::~Decoder() = default; + Decoder::~Decoder() = default; -void Decoder::Decode() { - if (!initialized) { - return; - } + void Decoder::Decode() { + if (!initialized) { + return; + } - const auto packet_data = ComposeFrame(); - // Send assembled bitstream to decoder. - if (!decode_api.SendPacket(packet_data)) { - return; - } + const auto packet_data = ComposeFrame(); - // Only receive/store visible frames. - if (vp9_hidden_frame) { - return; - } + // Capture the state needed for queuing BEFORE sending the packet + // and potentially yielding. The main `regs` and `current_context` can be + // overwritten by the time FFmpeg returns a frame. + const bool is_interlaced_frame = IsInterlaced(); + const auto interlaced_offsets = GetInterlacedOffsets(); + const auto progressive_offsets = GetProgressiveOffsets(); - // Receive output frames from decoder. - auto frame = decode_api.ReceiveFrame(); + // Send assembled bitstream to decoder. + if (!decode_api.SendPacket(packet_data)) { + return; + } - if (IsInterlaced()) { - auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets(); - auto frame_copy = frame; + // Only process visible frames. + if (vp9_hidden_frame) { + return; + } - if (!frame.get()) { - LOG_ERROR(HW_GPU, - "Nvdec {} dailed to decode interlaced frame for top 0x{:X} bottom 0x{:X}", id, - luma_top, luma_bottom); + // Receive output frames from decoder. + // A single packet can produce multiple frames, so we loop until we've received them all. + while (true) { + auto frame = decode_api.ReceiveFrame(); + if (!frame) { // No more frames available for now. + break; + } + + if (is_interlaced_frame) { + auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = interlaced_offsets; + auto frame_copy = frame; + frame_queue.PushDecodeOrder(id, luma_top, std::move(frame)); + frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy)); + } else { + auto [luma_offset, chroma_offset] = progressive_offsets; + frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame)); + } + } } - - if (UsingDecodeOrder()) { - frame_queue.PushDecodeOrder(id, luma_top, std::move(frame)); - frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy)); - } else { - frame_queue.PushPresentOrder(id, luma_top, std::move(frame)); - frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy)); - } - } else { - auto [luma_offset, chroma_offset] = GetProgressiveOffsets(); - - if (!frame.get()) { - LOG_ERROR(HW_GPU, "Nvdec {} failed to decode progressive frame for luma 0x{:X}", id, - luma_offset); - } - - if (UsingDecodeOrder()) { - frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame)); - } else { - frame_queue.PushPresentOrder(id, luma_offset, std::move(frame)); - } - } -} - } // namespace Tegra diff --git a/src/video_core/host1x/codecs/decoder.h b/src/video_core/host1x/codecs/decoder.h index 22e6db8151..c456bbb1b0 100755 --- a/src/video_core/host1x/codecs/decoder.h +++ b/src/video_core/host1x/codecs/decoder.h @@ -16,49 +16,50 @@ namespace Tegra { -namespace Host1x { -class Host1x; -class FrameQueue; -} // namespace Host1x + namespace Host1x { + class Host1x; + class FrameQueue; + } // namespace Host1x -class Decoder { -public: - virtual ~Decoder(); + class Decoder { + public: + virtual ~Decoder(); - /// Call decoders to construct headers, decode AVFrame with ffmpeg - void Decode(); + /// Call decoders to construct headers, decode AVFrame with ffmpeg + void Decode(); - bool UsingDecodeOrder() const { - return decode_api.UsingDecodeOrder(); - } + // Removed UsingDecodeOrder() as it's no longer available in FFmpeg::DecodeApi + // bool UsingDecodeOrder() const { + // return decode_api.UsingDecodeOrder(); + // } - /// Returns the value of current_codec - [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const { - return codec; - } + /// Returns the value of current_codec + [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const { + return codec; + } - /// Return name of the current codec - [[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0; + /// Return name of the current codec + [[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0; -protected: - explicit Decoder(Host1x::Host1x& host1x, s32 id, - const Host1x::NvdecCommon::NvdecRegisters& regs, - Host1x::FrameQueue& frame_queue); + protected: + explicit Decoder(Host1x::Host1x& host1x, s32 id, + const Host1x::NvdecCommon::NvdecRegisters& regs, + Host1x::FrameQueue& frame_queue); - virtual std::span ComposeFrame() = 0; - virtual std::tuple GetProgressiveOffsets() = 0; - virtual std::tuple GetInterlacedOffsets() = 0; - virtual bool IsInterlaced() = 0; + virtual std::span ComposeFrame() = 0; + virtual std::tuple GetProgressiveOffsets() = 0; + virtual std::tuple GetInterlacedOffsets() = 0; + virtual bool IsInterlaced() = 0; - Host1x::Host1x& host1x; - Tegra::MemoryManager& memory_manager; - const Host1x::NvdecCommon::NvdecRegisters& regs; - s32 id; - Host1x::FrameQueue& frame_queue; - Host1x::NvdecCommon::VideoCodec codec; - FFmpeg::DecodeApi decode_api; - bool initialized{}; - bool vp9_hidden_frame{}; -}; + Host1x::Host1x& host1x; + Tegra::MemoryManager& memory_manager; + const Host1x::NvdecCommon::NvdecRegisters& regs; + s32 id; + Host1x::FrameQueue& frame_queue; + Host1x::NvdecCommon::VideoCodec codec; + FFmpeg::DecodeApi decode_api; + bool initialized{}; + bool vp9_hidden_frame{}; + }; } // namespace Tegra diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp index 0f829ca02c..4915f44610 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.cpp +++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp @@ -10,327 +10,388 @@ #include "video_core/memory_manager.h" extern "C" { -#ifdef LIBVA_FOUND -// for querying VAAPI driver information -#include -#endif + #ifdef LIBVA_FOUND + // for querying VAAPI driver information + #include + #endif } namespace FFmpeg { -namespace { + namespace { -constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12; -constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P; -constexpr std::array PreferredGpuDecoders = { - AV_HWDEVICE_TYPE_CUDA, -#ifdef _WIN32 - AV_HWDEVICE_TYPE_D3D11VA, - AV_HWDEVICE_TYPE_DXVA2, -#elif defined(__unix__) - AV_HWDEVICE_TYPE_VAAPI, - AV_HWDEVICE_TYPE_VDPAU, -#endif - AV_HWDEVICE_TYPE_VULKAN -}; + void FfmpegLog(void* ptr, int level, const char* fmt, va_list vl) { + if (level > av_log_get_level()) { + return; + } -AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) { - for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { - if (*p == codec_context->pix_fmt) { - return codec_context->pix_fmt; + char line[1024]; + vsnprintf(line, sizeof(line), fmt, vl); + + // Remove trailing newline + size_t len = strlen(line); + if (len > 0 && line[len - 1] == '\n') { + line[len - 1] = '\0'; + } + + // Map FFmpeg log levels to yuzu log levels. + switch (level) { + case AV_LOG_PANIC: + case AV_LOG_FATAL: + case AV_LOG_ERROR: + LOG_ERROR(HW_GPU, "FFmpeg: {}", line); + break; + case AV_LOG_WARNING: + LOG_WARNING(HW_GPU, "FFmpeg: {}", line); + break; + default: + LOG_INFO(HW_GPU, "FFmpeg: {}", line); + break; + } } + + constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12; + constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P; + constexpr std::array PreferredGpuDecoders = { + AV_HWDEVICE_TYPE_CUDA, + #ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, + AV_HWDEVICE_TYPE_DXVA2, + #elif defined(__unix__) + AV_HWDEVICE_TYPE_VAAPI, + AV_HWDEVICE_TYPE_VDPAU, + #endif + AV_HWDEVICE_TYPE_VULKAN + }; + + AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) { + for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { + // The initial format from hw_config is an opaque type like AV_PIX_FMT_VAAPI. + // The decoder may instead offer a list of concrete surface formats it can use + // with that hardware context. We need to find a compatible one. + // For VA-API, NV12 is the common hardware surface format. + if (*p == codec_context->pix_fmt || *p == AV_PIX_FMT_NV12) { + // Found a compatible hardware format. + LOG_INFO(HW_GPU, "FFmpeg: Selected hardware pixel format {}.", + av_get_pix_fmt_name(*p)); + return *p; + } + } + + // The decoder does not support the requested hardware format for this stream. + // Build a list of supported formats for the log message. + std::string supported_formats_str; + for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { + supported_formats_str += av_get_pix_fmt_name(*p); + if (p[1] != AV_PIX_FMT_NONE) { + supported_formats_str += ", "; + } + } + + const AVHWDeviceContext* device_ctx = + reinterpret_cast(codec_context->hw_device_ctx->data); + + LOG_WARNING(HW_GPU, + "Hardware decoder '{}' on device '{}' does not support format '{}' for this " + "stream. Supported formats: [{}]. Falling back to software decoding.", + codec_context->codec->name, av_hwdevice_get_type_name(device_ctx->type), + av_get_pix_fmt_name(codec_context->pix_fmt), supported_formats_str); + + // Fallback to software. + av_buffer_unref(&codec_context->hw_device_ctx); + + // Check if the preferred software format is supported. + for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { + if (*p == PreferredCpuFormat) { + codec_context->pix_fmt = PreferredCpuFormat; + return PreferredCpuFormat; + } + } + + LOG_ERROR(HW_GPU, "Decoder does not support preferred software format {}. Decoding will likely fail.", + av_get_pix_fmt_name(PreferredCpuFormat)); + return AV_PIX_FMT_NONE; // This will cause avcodec_open2 to fail, which is correct. + } + + std::string AVError(int errnum) { + char errbuf[AV_ERROR_MAX_STRING_SIZE] = {}; + av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum); + return errbuf; + } + + } // namespace + + Packet::Packet(std::span data) { + m_packet = av_packet_alloc(); + m_packet->data = const_cast(data.data()); + m_packet->size = static_cast(data.size()); } - LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU"); - av_buffer_unref(&codec_context->hw_device_ctx); - - codec_context->pix_fmt = PreferredCpuFormat; - return codec_context->pix_fmt; -} - -std::string AVError(int errnum) { - char errbuf[AV_ERROR_MAX_STRING_SIZE] = {}; - av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum); - return errbuf; -} - -} // namespace - -Packet::Packet(std::span data) { - m_packet = av_packet_alloc(); - m_packet->data = const_cast(data.data()); - m_packet->size = static_cast(data.size()); -} - -Packet::~Packet() { - av_packet_free(&m_packet); -} - -Frame::Frame() { - m_frame = av_frame_alloc(); -} - -Frame::~Frame() { - av_frame_free(&m_frame); -} - -Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) { - const AVCodecID av_codec = [&] { - switch (codec) { - case Tegra::Host1x::NvdecCommon::VideoCodec::H264: - return AV_CODEC_ID_H264; - case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: - return AV_CODEC_ID_VP8; - case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: - return AV_CODEC_ID_VP9; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", codec); - return AV_CODEC_ID_NONE; - } - }(); - - m_codec = avcodec_find_decoder(av_codec); -} - -bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const { - for (int i = 0;; i++) { - const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i); - if (!config) { - LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type)); - break; - } - if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 && - config->device_type == type) { - LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); - *out_pix_fmt = config->pix_fmt; - return true; - } + Packet::~Packet() { + av_packet_free(&m_packet); } - return false; -} - -std::vector HardwareContext::GetSupportedDeviceTypes() { - std::vector types; - AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; - - while (true) { - current_device_type = av_hwdevice_iterate_types(current_device_type); - if (current_device_type == AV_HWDEVICE_TYPE_NONE) { - return types; - } - - types.push_back(current_device_type); - } -} - -HardwareContext::~HardwareContext() { - av_buffer_unref(&m_gpu_decoder); -} - -bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) { - const auto supported_types = GetSupportedDeviceTypes(); - for (const auto type : PreferredGpuDecoders) { - AVPixelFormat hw_pix_fmt; - - if (std::ranges::find(supported_types, type) == supported_types.end()) { - LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); - continue; - } - - if (!this->InitializeWithType(type)) { - continue; - } - - if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) { - decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt); - return true; - } + Frame::Frame() { + m_frame = av_frame_alloc(); } - return false; -} + Frame::~Frame() { + av_frame_free(&m_frame); + } -bool HardwareContext::InitializeWithType(AVHWDeviceType type) { - av_buffer_unref(&m_gpu_decoder); + Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) { + const AVCodecID av_codec = [&] { + switch (codec) { + case Tegra::Host1x::NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: + return AV_CODEC_ID_VP8; + case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", codec); + return AV_CODEC_ID_NONE; + } + }(); + + m_codec = avcodec_find_decoder(av_codec); + ASSERT_MSG(m_codec, "Failed to find decoder for AVCodecID {}", av_codec); + } + + bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const { + for (int i = 0;; i++) { + const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i); + if (!config) { + LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type)); + break; + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) { + LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + *out_pix_fmt = config->pix_fmt; + return true; + } + } - if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) { - LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), AVError(ret)); return false; } -#ifdef LIBVA_FOUND - if (type == AV_HWDEVICE_TYPE_VAAPI) { - // We need to determine if this is an impersonated VAAPI driver. - auto* hwctx = reinterpret_cast(m_gpu_decoder->data); - auto* vactx = static_cast(hwctx->hwctx); - const char* vendor_name = vaQueryVendorString(vactx->display); - if (strstr(vendor_name, "VDPAU backend")) { - // VDPAU impersonated VAAPI impls are super buggy, we need to skip them. - LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver"); - return false; - } else { - // According to some user testing, certain VAAPI drivers (Intel?) could be buggy. - // Log the driver name just in case. - LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name); + std::vector HardwareContext::GetSupportedDeviceTypes() { + std::vector types; + AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; + + while (true) { + current_device_type = av_hwdevice_iterate_types(current_device_type); + if (current_device_type == AV_HWDEVICE_TYPE_NONE) { + return types; + } + + types.push_back(current_device_type); } } -#endif - return true; -} + HardwareContext::~HardwareContext() { + av_buffer_unref(&m_gpu_decoder); + } -DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} { - m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec()); - av_opt_set(m_codec_context->priv_data, "preset", "veryfast", 0); - av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0); - m_codec_context->thread_count = 0; - m_codec_context->thread_type &= ~FF_THREAD_FRAME; -} + bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) { + const auto supported_types = GetSupportedDeviceTypes(); + for (const auto type : PreferredGpuDecoders) { + AVPixelFormat hw_pix_fmt; -DecoderContext::~DecoderContext() { - av_buffer_unref(&m_codec_context->hw_device_ctx); - avcodec_free_context(&m_codec_context); -} + if (std::ranges::find(supported_types, type) == supported_types.end()) { + LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); + continue; + } -void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) { - m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef()); - m_codec_context->get_format = GetGpuFormat; - m_codec_context->pix_fmt = hw_pix_fmt; -} + if (!this->InitializeWithType(type)) { + continue; + } + + if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) { + decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt); + return true; + } + } -bool DecoderContext::OpenContext(const Decoder& decoder) { - if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) { - LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret)); return false; } - if (!m_codec_context->hw_device_ctx) { - LOG_INFO(HW_GPU, "Using FFmpeg software decoding"); - } + bool HardwareContext::InitializeWithType(AVHWDeviceType type) { + av_buffer_unref(&m_gpu_decoder); - return true; -} - -} // namespace -bool DecoderContext::SendPacket(const Packet& packet) { - m_temp_frame = std::make_shared(); - m_got_frame = 0; - - if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) { - m_decode_order = true; - auto* codec{ffcodec(m_decoder.GetCodec())}; - if (const int ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, packet.GetPacket()); ret < 0) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", AVError(ret)); + if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) { + LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), AVError(ret)); return false; } + + #ifdef LIBVA_FOUND + if (type == AV_HWDEVICE_TYPE_VAAPI) { + // We need to determine if this is an impersonated VAAPI driver. + auto* hwctx = reinterpret_cast(m_gpu_decoder->data); + auto* vactx = static_cast(hwctx->hwctx); + const char* vendor_name = vaQueryVendorString(vactx->display); + if (strstr(vendor_name, "VDPAU backend")) { + // VDPAU impersonated VAAPI impls are super buggy, we need to skip them. + LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver"); + return false; + } else { + // According to some user testing, certain VAAPI drivers (Intel?) could be buggy. + // Log the driver name just in case. + LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name); + } + } + #endif + return true; } - if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) { - LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret)); - return false; + DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} { + m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec()); + ASSERT(m_codec_context); // Ensure allocation was successful + + // Use av_opt_set_int and av_opt_set to set options + // "preset" and "tune" are codec-private options, so they still apply to m_codec_context->priv_data. + av_opt_set(m_codec_context->priv_data, "preset", "veryfast", 0); + av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0); + + // Setting thread_count and thread_type using AVCodecContext members directly + // The previous usage of FF_THREAD_FRAME was from codec_internal.h. + // We'll rely on the default FFmpeg threading behavior or set a specific number of threads. + // A common approach is to set thread_count to 0 for auto-detection or a specific number. + // Since FF_THREAD_FRAME is for frame-level threading, and FF_THREAD_SLICE is for slice-level, + // removing FF_THREAD_FRAME effectively means we don't explicitly disable frame-level threading, + // but rather let FFmpeg decide or implicitly use slice-level or no threading depending on the codec and configuration. + // If the goal was to strictly avoid frame-level threading, avcodec_open2 will implicitly + // handle thread types based on supported capabilities if thread_type is not explicitly set. + // For simple cases, setting thread_count to 0 is often sufficient for optimal performance. + m_codec_context->thread_count = 0; // Use default or auto-detected thread count + // m_codec_context->thread_type &= ~FF_THREAD_FRAME; // Removed, as FF_THREAD_FRAME is from codec_internal.h } - return true; -} + DecoderContext::~DecoderContext() { + av_buffer_unref(&m_codec_context->hw_device_ctx); + avcodec_free_context(&m_codec_context); + } -std::shared_ptr DecoderContext::ReceiveFrame() { - if (!m_codec_context->hw_device_ctx && m_codec_context->codec_id == AV_CODEC_ID_H264) { - m_decode_order = true; - auto* codec{ffcodec(m_decoder.GetCodec())}; - int ret{0}; + void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) { + m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef()); + m_codec_context->get_format = GetGpuFormat; + m_codec_context->pix_fmt = hw_pix_fmt; + } - if (m_got_frame == 0) { - Packet packet{{}}; - auto* pkt = packet.GetPacket(); - pkt->data = nullptr; - pkt->size = 0; - ret = codec->cb.decode(m_codec_context, m_temp_frame->GetFrame(), &m_got_frame, pkt); - m_codec_context->has_b_frames = 0; + bool DecoderContext::OpenContext(const Decoder& decoder) { + if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) { + LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret)); + return false; } - if (m_got_frame == 0 || ret < 0) { - LOG_ERROR(Service_NVDRV, "Failed to receive a frame! error {}", ret); + if (!m_codec_context->hw_device_ctx) { + LOG_INFO(HW_GPU, "Using FFmpeg software decoding"); + } + + return true; + } + + bool DecoderContext::SendPacket(const Packet& packet) { + if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) { + LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret)); + return false; + } + + return true; + } + + std::shared_ptr DecoderContext::ReceiveFrame() { + auto received_frame = std::make_shared(); + + const int ret = avcodec_receive_frame(m_codec_context, received_frame->GetFrame()); + if (ret < 0) { + if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) { + LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret)); + } return {}; } - } else { - const auto ReceiveImpl = [&](AVFrame* frame) { - if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) { - LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret)); - return false; - } - return true; - }; + std::shared_ptr output_frame; - if (m_codec_context->hw_device_ctx) { - // If we have a hardware context, make a separate frame here to receive the - // hardware result before sending it to the output. - Frame intermediate_frame; + if (received_frame->IsHardwareDecoded()) { + // Hardware frame was successfully decoded, transfer it to system memory. + output_frame = std::make_shared(); - if (!ReceiveImpl(intermediate_frame.GetFrame())) { - return {}; - } + // Transfer to NV12, as the VIC pipeline can handle it. + output_frame->GetFrame()->format = PreferredGpuFormat; - m_temp_frame->SetFormat(PreferredGpuFormat); - if (const int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame.GetFrame(), 0); ret < 0) { - LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret)); + if (const int transfer_ret = + av_hwframe_transfer_data(output_frame->GetFrame(), received_frame->GetFrame(), 0); + transfer_ret < 0) { + LOG_ERROR(HW_GPU, "Failed to transfer hardware frame to system memory: {}", + AVError(transfer_ret)); return {}; } } else { - // Otherwise, decode the frame as normal. - if (!ReceiveImpl(m_temp_frame->GetFrame())) { - return {}; + // Frame is already in system memory (software frame). This can happen + // if hardware decoding is disabled, or if FFmpeg fell back to software. + if (m_codec_context->hw_device_ctx) { + LOG_WARNING(HW_GPU, + "FFmpeg returned a software frame when hardware decoding was expected. " + "Format: {}. This may be due to unsupported video parameters.", + av_get_pix_fmt_name(received_frame->GetPixelFormat())); } + output_frame = received_frame; } + + // The original code toggled the interlaced flag. This is unusual but may be + // intentional for the emulator's video pipeline. This behavior is preserved. + #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 + if (output_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) { + output_frame->GetFrame()->flags &= ~AV_FRAME_FLAG_INTERLACED; + } else { + output_frame->GetFrame()->flags |= AV_FRAME_FLAG_INTERLACED; + } + #else + output_frame->GetFrame()->interlaced_frame = !output_frame->GetFrame()->interlaced_frame; + #endif + + return output_frame; } -#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 - if (m_temp_frame->GetFrame()->flags & AV_FRAME_FLAG_INTERLACED) - m_temp_frame->GetFrame()->flags &= ~AV_FRAME_FLAG_INTERLACED; - else - m_temp_frame->GetFrame()->flags |= AV_FRAME_FLAG_INTERLACED; -#else - m_temp_frame->GetFrame()->interlaced_frame = !m_temp_frame->GetFrame()->interlaced_frame; -#endif - return std::move(m_temp_frame); -} - -void DecodeApi::Reset() { - m_hardware_context.reset(); - m_decoder_context.reset(); - m_decoder.reset(); -} - -bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) { - this->Reset(); - m_decoder.emplace(codec); - m_decoder_context.emplace(*m_decoder); - - // Enable GPU decoding if requested. - if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) { - m_hardware_context.emplace(); - m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder); + void DecodeApi::Reset() { + m_hardware_context.reset(); + m_decoder_context.reset(); + m_decoder.reset(); } - // Open the decoder context. - if (!m_decoder_context->OpenContext(*m_decoder)) { + bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) { + av_log_set_callback(FfmpegLog); + av_log_set_level(AV_LOG_DEBUG); + this->Reset(); - return false; + m_decoder.emplace(codec); + m_decoder_context.emplace(*m_decoder); + + // Enable GPU decoding if requested. + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) { + m_hardware_context.emplace(); + m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder); + } + + // Open the decoder context. + if (!m_decoder_context->OpenContext(*m_decoder)) { + this->Reset(); + return false; + } + + return true; } - return true; -} + bool DecodeApi::SendPacket(std::span packet_data) { + FFmpeg::Packet packet(packet_data); + return m_decoder_context->SendPacket(packet); + } -bool DecodeApi::SendPacket(std::span packet_data) { - FFmpeg::Packet packet(packet_data); - return m_decoder_context->SendPacket(packet); -} - -std::shared_ptr DecodeApi::ReceiveFrame() { - // Receive raw frame from decoder. - return m_decoder_context->ReceiveFrame(); -} + std::shared_ptr DecodeApi::ReceiveFrame() { + // Receive raw frame from decoder. + return m_decoder_context->ReceiveFrame(); + } } // namespace FFmpeg diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h index 28f1742b7e..cc90ab1323 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.h +++ b/src/video_core/host1x/ffmpeg/ffmpeg.h @@ -14,209 +14,206 @@ #include "video_core/host1x/nvdec_common.h" extern "C" { -#if defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif + #if defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wconversion" + #endif -#include -#include -#include + #include + #include + #include -#if defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic pop -#endif + #if defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic pop + #endif } namespace Tegra { -class MemoryManager; + class MemoryManager; } namespace FFmpeg { -class Packet; -class Frame; -class Decoder; -class HardwareContext; -class DecoderContext; -class DeinterlaceFilter; + class Packet; + class Frame; + class Decoder; + class HardwareContext; + class DecoderContext; + class DeinterlaceFilter; -// Wraps an AVPacket, a container for compressed bitstream data. -class Packet { -public: - YUZU_NON_COPYABLE(Packet); - YUZU_NON_MOVEABLE(Packet); + // Wraps an AVPacket, a container for compressed bitstream data. + class Packet { + public: + YUZU_NON_COPYABLE(Packet); + YUZU_NON_MOVEABLE(Packet); - explicit Packet(std::span data); - ~Packet(); + explicit Packet(std::span data); + ~Packet(); - AVPacket* GetPacket() const { - return m_packet; - } + AVPacket* GetPacket() const { + return m_packet; + } -private: - AVPacket* m_packet{}; -}; + private: + AVPacket* m_packet{}; + }; -// Wraps an AVFrame, a container for audio and video stream data. -class Frame { -public: - YUZU_NON_COPYABLE(Frame); - YUZU_NON_MOVEABLE(Frame); + // Wraps an AVFrame, a container for audio and video stream data. + class Frame { + public: + YUZU_NON_COPYABLE(Frame); + YUZU_NON_MOVEABLE(Frame); - explicit Frame(); - ~Frame(); + explicit Frame(); + ~Frame(); - int GetWidth() const { - return m_frame->width; - } + int GetWidth() const { + return m_frame->width; + } - int GetHeight() const { - return m_frame->height; - } + int GetHeight() const { + return m_frame->height; + } - AVPixelFormat GetPixelFormat() const { - return static_cast(m_frame->format); - } + AVPixelFormat GetPixelFormat() const { + return static_cast(m_frame->format); + } - int GetStride(int plane) const { - return m_frame->linesize[plane]; - } + int GetStride(int plane) const { + return m_frame->linesize[plane]; + } - int* GetStrides() const { - return m_frame->linesize; - } + int* GetStrides() const { + return m_frame->linesize; + } - u8* GetData(int plane) const { - return m_frame->data[plane]; - } + u8* GetData(int plane) const { + return m_frame->data[plane]; + } - const u8* GetPlane(int plane) const { - return m_frame->data[plane]; - } + const u8* GetPlane(int plane) const { + return m_frame->data[plane]; + } - u8** GetPlanes() const { - return m_frame->data; - } + u8** GetPlanes() const { + return m_frame->data; + } - void SetFormat(int format) { - m_frame->format = format; - } + void SetFormat(int format) { + m_frame->format = format; + } - bool IsInterlaced() const { -#if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 - return m_frame->flags & AV_FRAME_FLAG_INTERLACED; -#else - return m_frame->interlaced_frame; -#endif - } + bool IsInterlaced() const { + #if defined(FF_API_INTERLACED_FRAME) || LIBAVUTIL_VERSION_MAJOR >= 59 + return m_frame->flags & AV_FRAME_FLAG_INTERLACED; + #else + return m_frame->interlaced_frame; + #endif + } - bool IsHardwareDecoded() const { - return m_frame->hw_frames_ctx != nullptr; - } + bool IsHardwareDecoded() const { + return m_frame->hw_frames_ctx != nullptr; + } - AVFrame* GetFrame() const { - return m_frame; - } + AVFrame* GetFrame() const { + return m_frame; + } -private: - AVFrame* m_frame{}; -}; + private: + AVFrame* m_frame{}; + }; -// Wraps an AVCodec, a type containing information about a codec. -class Decoder { -public: - YUZU_NON_COPYABLE(Decoder); - YUZU_NON_MOVEABLE(Decoder); + // Wraps an AVCodec, a type containing information about a codec. + class Decoder { + public: + YUZU_NON_COPYABLE(Decoder); + YUZU_NON_MOVEABLE(Decoder); - explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec); - ~Decoder() = default; + explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec); + ~Decoder() = default; - bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const; + bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const; - const AVCodec* GetCodec() const { - return m_codec; - } + const AVCodec* GetCodec() const { + return m_codec; + } -private: - const AVCodec* m_codec{}; -}; + private: + const AVCodec* m_codec{}; + }; -// Wraps AVBufferRef for an accelerated decoder. -class HardwareContext { -public: - YUZU_NON_COPYABLE(HardwareContext); - YUZU_NON_MOVEABLE(HardwareContext); + // Wraps AVBufferRef for an accelerated decoder. + class HardwareContext { + public: + YUZU_NON_COPYABLE(HardwareContext); + YUZU_NON_MOVEABLE(HardwareContext); - static std::vector GetSupportedDeviceTypes(); + static std::vector GetSupportedDeviceTypes(); - explicit HardwareContext() = default; - ~HardwareContext(); + explicit HardwareContext() = default; + ~HardwareContext(); - bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder); + bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder); - AVBufferRef* GetBufferRef() const { - return m_gpu_decoder; - } + AVBufferRef* GetBufferRef() const { + return m_gpu_decoder; + } -private: - bool InitializeWithType(AVHWDeviceType type); + private: + bool InitializeWithType(AVHWDeviceType type); - AVBufferRef* m_gpu_decoder{}; -}; + AVBufferRef* m_gpu_decoder{}; + }; -// Wraps an AVCodecContext. -class DecoderContext { -public: - YUZU_NON_COPYABLE(DecoderContext); - YUZU_NON_MOVEABLE(DecoderContext); + // Wraps an AVCodecContext. + class DecoderContext { + public: + YUZU_NON_COPYABLE(DecoderContext); + YUZU_NON_MOVEABLE(DecoderContext); - explicit DecoderContext(const Decoder& decoder); - ~DecoderContext(); + explicit DecoderContext(const Decoder& decoder); + ~DecoderContext(); - void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt); - bool OpenContext(const Decoder& decoder); - bool SendPacket(const Packet& packet); - std::shared_ptr ReceiveFrame(); + void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt); + bool OpenContext(const Decoder& decoder); + bool SendPacket(const Packet& packet); + std::shared_ptr ReceiveFrame(); - AVCodecContext* GetCodecContext() const { - return m_codec_context; - } + AVCodecContext* GetCodecContext() const { + return m_codec_context; + } - bool UsingDecodeOrder() const { - return m_decode_order; - } + // Removed UsingDecodeOrder() as m_decode_order is no longer a direct member + // and its original purpose was tied to FF_THREAD_FRAME. -private: - const Decoder& m_decoder; - AVCodecContext* m_codec_context{}; - s32 m_got_frame{}; - std::shared_ptr m_temp_frame{}; - bool m_decode_order{}; -}; + private: + const Decoder& m_decoder; + AVCodecContext* m_codec_context{}; + s32 m_got_frame{}; // This member is no longer used, can be removed. + std::shared_ptr m_temp_frame{}; // This member is no longer used, can be removed. + // bool m_decode_order{}; // Removed due to removal of FF_THREAD_FRAME + }; -class DecodeApi { -public: - YUZU_NON_COPYABLE(DecodeApi); - YUZU_NON_MOVEABLE(DecodeApi); + class DecodeApi { + public: + YUZU_NON_COPYABLE(DecodeApi); + YUZU_NON_MOVEABLE(DecodeApi); - DecodeApi() = default; - ~DecodeApi() = default; + DecodeApi() = default; + ~DecodeApi() = default; - bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec); - void Reset(); + bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec); + void Reset(); - bool UsingDecodeOrder() const { - return m_decoder_context->UsingDecodeOrder(); - } + // Removed UsingDecodeOrder() as its underlying logic is removed. - bool SendPacket(std::span packet_data); - std::shared_ptr ReceiveFrame(); + bool SendPacket(std::span packet_data); + std::shared_ptr ReceiveFrame(); -private: - std::optional m_decoder; - std::optional m_decoder_context; - std::optional m_hardware_context; -}; + private: + std::optional m_decoder; + std::optional m_decoder_context; + std::optional m_hardware_context; + }; } // namespace FFmpeg diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 6de360d363..e258c215d5 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -40,29 +40,6 @@ public: m_decode_order.erase(fd); } - s32 VicFindNvdecFdFromOffset(u64 search_offset) { - std::scoped_lock l{m_mutex}; - // Vic does not know which nvdec is producing frames for it, so search all the fds here for - // the given offset. - for (auto& map : m_presentation_order) { - for (auto& [offset, _] : map.second) { - if (offset == search_offset) { - return map.first; - } - } - } - - for (auto& map : m_decode_order) { - for (auto& [offset, _] : map.second) { - if (offset == search_offset) { - return map.first; - } - } - } - - return -1; - } - void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr&& frame) { std::scoped_lock l{m_mutex}; auto map = m_presentation_order.find(fd); @@ -78,23 +55,29 @@ public: if (map == m_decode_order.end()) { return; } - map->second.insert_or_assign(offset, std::move(frame)); + map->second.emplace(offset, std::move(frame)); + m_frame_available_cv.notify_all(); } - std::shared_ptr GetFrame(s32 fd, u64 offset) { - if (fd == -1) { - return {}; - } + std::shared_ptr GetFrame(u64 offset) { + std::unique_lock l{m_mutex}; - std::scoped_lock l{m_mutex}; - auto present_map = m_presentation_order.find(fd); - if (present_map != m_presentation_order.end() && present_map->second.size() > 0) { - return GetPresentOrderLocked(fd); - } - - auto decode_map = m_decode_order.find(fd); - if (decode_map != m_decode_order.end() && decode_map->second.size() > 0) { - return GetDecodeOrderLocked(fd, offset); + // Wait for the frame to become available, with a timeout to prevent deadlocks. + if (m_frame_available_cv.wait_for(l, std::chrono::milliseconds(250), [&] { + for (const auto& [fd, map] : m_decode_order) { + if (map.contains(offset)) { + return true; + } + } + return false; + })) { + // Search all decoders for the frame with the matching offset. + for (auto& [decoder_id, frame_map] : m_decode_order) { + auto node = frame_map.extract(offset); + if (!node.empty()) { + return std::move(node.mapped()); + } + } } return {}; @@ -128,6 +111,7 @@ private: std::mutex m_mutex{}; std::unordered_map>> m_presentation_order; std::unordered_map> m_decode_order; + std::condition_variable m_frame_available_cv; }; enum class ChannelType : u32 { diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 3ad56bb80c..7bf27f0c60 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -136,11 +136,8 @@ void Vic::Execute() { } auto luma_offset{regs.surfaces[i][SurfaceIndex::Current].luma.Address()}; - if (nvdec_id == -1) { - nvdec_id = frame_queue.VicFindNvdecFdFromOffset(luma_offset); - } - auto frame = frame_queue.GetFrame(nvdec_id, luma_offset); + auto frame = frame_queue.GetFrame(luma_offset); if (!frame.get()) { LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset 0x{:X}", id, luma_offset); continue; diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h index e7600941ad..2bada64480 100644 --- a/src/video_core/host1x/vic.h +++ b/src/video_core/host1x/vic.h @@ -630,7 +630,6 @@ private: void WriteABGR(const OutputSurfaceConfig& output_surface_config); s32 id; - s32 nvdec_id{-1}; u32 syncpoint; VicRegisters regs{};