From e393910f2126f7a6c34b72812fec91254422b8b8 Mon Sep 17 00:00:00 2001 From: k4yt3x Date: Sun, 10 Nov 2024 00:00:00 +0000 Subject: [PATCH] feat(encoder): added auto selection of the most suitable output pix_fmt Signed-off-by: k4yt3x --- CHANGELOG.md | 4 +++ include/libvideo2x/avutils.h | 3 ++ src/avutils.cpp | 70 ++++++++++++++++++++++++++++++++++++ src/encoder.cpp | 18 ++++------ src/libvideo2x.cpp | 2 +- 5 files changed, 85 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f09f5dd..4d908c72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Automatic selection of the most suitable pixel format for the output video. + ### Fixed - Timestamp errors processing frames with PTS equal to 0 (#1222). diff --git a/include/libvideo2x/avutils.h b/include/libvideo2x/avutils.h index 2135e2f1..59cb46cf 100644 --- a/include/libvideo2x/avutils.h +++ b/include/libvideo2x/avutils.h @@ -7,4 +7,7 @@ extern "C" { int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx); +enum AVPixelFormat +get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt); + #endif // AVUTILS_H diff --git a/src/avutils.cpp b/src/avutils.cpp index b036f0b4..798ae229 100644 --- a/src/avutils.cpp +++ b/src/avutils.cpp @@ -1,5 +1,10 @@ #include "avutils.h" +extern "C" { +#include +#include +} + #include int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) { @@ -48,3 +53,68 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) { // Estimate and return the total number of frames return static_cast(duration_secs * fps); } + +enum AVPixelFormat +get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt) { + int ret; + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + + // Retrieve the list of supported pixel formats + const enum AVPixelFormat *supported_pix_fmts = nullptr; + ret = avcodec_get_supported_config( + nullptr, encoder, AV_CODEC_CONFIG_PIX_FORMAT, 0, (const void **)&supported_pix_fmts, nullptr + ); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::error("Failed to get supported pixel formats: {}", errbuf); + return AV_PIX_FMT_NONE; + } + + if (supported_pix_fmts == nullptr) { + if (target_pix_fmt == AV_PIX_FMT_NONE) { + spdlog::warn("Encoder supports all pixel formats; defaulting to yuv420p"); + return AV_PIX_FMT_YUV420P; + } else { + spdlog::warn("Encoder supports all pixel formats; defaulting to the decoder's format"); + return target_pix_fmt; + } + } + + // Determine if the target pixel format has an alpha channel + const AVPixFmtDescriptor *desc = nullptr; + int has_alpha = 0; + if (target_pix_fmt != AV_PIX_FMT_NONE) { + desc = av_pix_fmt_desc_get(target_pix_fmt); + has_alpha = desc ? (desc->nb_components % 2 == 0) : 0; + } + + // Iterate over supported pixel formats to find the best match + enum AVPixelFormat best_pix_fmt = AV_PIX_FMT_NONE; + for (const enum AVPixelFormat *p = supported_pix_fmts; *p != AV_PIX_FMT_NONE; p++) { + if (target_pix_fmt != AV_PIX_FMT_NONE) { + best_pix_fmt = + av_find_best_pix_fmt_of_2(best_pix_fmt, *p, target_pix_fmt, has_alpha, nullptr); + if (*p == target_pix_fmt) { + best_pix_fmt = target_pix_fmt; + break; + } + } else { + best_pix_fmt = *p; + break; + } + } + if (best_pix_fmt == AV_PIX_FMT_NONE) { + spdlog::error("No suitable pixel format found for encoder"); + } + + if (target_pix_fmt != AV_PIX_FMT_NONE && best_pix_fmt != target_pix_fmt) { + spdlog::warn( + "Incompatible pixel format '%s' for encoder '%s'; auto-selecting format '%s'", + av_get_pix_fmt_name(target_pix_fmt), + encoder->name, + av_get_pix_fmt_name(best_pix_fmt) + ); + } + + return best_pix_fmt; +} diff --git a/src/encoder.cpp b/src/encoder.cpp index 6826c6b3..78d4f9d1 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -5,19 +5,15 @@ #include #include +extern "C" { +#include +} + #include +#include "avutils.h" #include "conversions.h" -static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) { - const enum AVPixelFormat *p = encoder->pix_fmts; - if (!p) { - spdlog::error("No pixel formats supported by encoder"); - return AV_PIX_FMT_NONE; - } - return *p; -} - int init_encoder( AVBufferRef *hw_ctx, std::filesystem::path out_fpath, @@ -86,12 +82,12 @@ int init_encoder( // Use the specified pixel format codec_ctx->pix_fmt = encoder_config->pix_fmt; } else { - // Fall back to the default pixel format - codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder); + codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt); if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) { spdlog::error("Could not get the default pixel format for the encoder"); return AVERROR(EINVAL); } + spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(codec_ctx->pix_fmt)); } // Set the output video's time base diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index e56552b3..812b8ac7 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -344,7 +344,7 @@ extern "C" int process_video( cleanup(); return -1; } - spdlog::info("Output video dimensions: {}x{}", output_width, output_height); + spdlog::debug("Output video dimensions: {}x{}", output_width, output_height); // Initialize output encoder encoder_config->out_width = output_width;