Add validation for num_channels

NicolasHug · NicolasHug · commit 2d76a7b619f6 · 2025-05-22T11:31:32.000+01:00
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -55,20 +55,6 @@ void validateSampleRate(const AVCodec& avCodec, int sampleRate) {
       supportedRates.str());
 }
 
-void print_supported_channel_layouts(const AVCodec *codec) {
-    if (!codec->ch_layouts) {
-        printf("No specific channel layouts supported by this encoder.\n");
-        return;
-    }
-    const AVChannelLayout *layout = codec->ch_layouts;
-    while (layout->order != AV_CHANNEL_ORDER_UNSPEC) {
-        char layout_name[256];
-        av_channel_layout_describe(layout, layout_name, sizeof(layout_name));
-        printf("Supported channel layout: %s\n", layout_name);
-        layout++;
-    }
-}
-
 static const std::vector<AVSampleFormat> preferredFormatsOrder = {
     AV_SAMPLE_FMT_FLTP,
     AV_SAMPLE_FMT_FLT,
@@ -173,13 +159,12 @@ AudioEncoder::AudioEncoder(
 void AudioEncoder::initializeEncoder(
     int sampleRate,
     std::optional<int64_t> bitRate,
-    [[maybe_unused]] std::optional<int64_t> numChannels) {
+    std::optional<int64_t> numChannels) {
   // We use the AVFormatContext's default codec for that
   // specific format/container.
   const AVCodec* avCodec =
       avcodec_find_encoder(avFormatContext_->oformat->audio_codec);
   TORCH_CHECK(avCodec != nullptr, "Codec not found");
-  print_supported_channel_layouts(avCodec);
 
   AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec);
   TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context.");
@@ -193,6 +178,7 @@ void AudioEncoder::initializeEncoder(
   avCodecContext_->bit_rate = bitRate.value_or(0);
 
   desiredNumChannels_ = static_cast<int>(numChannels.value_or(wf_.sizes()[0]));
+  validateNumChannels(*avCodec, desiredNumChannels_);
 
   setDefaultChannelLayout(avCodecContext_, desiredNumChannels_);
 
diff --git a/src/torchcodec/_core/FFMPEGCommon.cpp b/src/torchcodec/_core/FFMPEGCommon.cpp
@@ -100,22 +100,56 @@ void setDefaultChannelLayout(UniqueAVFrame& avFrame, int numChannels) {
 #endif
 }
 
-// void setChannelLayout(
-//     UniqueAVFrame& dstAVFrame,
-//     const UniqueAVCodecContext& avCodecContext) {
-// #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
-//   auto status = av_channel_layout_copy(
-//       &dstAVFrame->ch_layout, &avCodecContext->ch_layout);
-//   TORCH_CHECK(
-//       status == AVSUCCESS,
-//       "Couldn't copy channel layout to avFrame: ",
-//       getFFMPEGErrorStringFromErrorCode(status));
-// #else
-//   dstAVFrame->channel_layout = avCodecContext->channel_layout;
-//   dstAVFrame->channels = avCodecContext->channels;
-
-// #endif
-// }
+void validateNumChannels(const AVCodec& avCodec, int numChannels) {
+#if LIBAVFILTER_VERSION_MAJOR > 8 // FFmpeg > 5
+  if (avCodec.ch_layouts == nullptr) {
+    // If we can't validate, we must assume it'll be fine. If not, FFmpeg will
+    // eventually raise.
+    return;
+  }
+  for (auto i = 0; avCodec.ch_layouts[i].order != AV_CHANNEL_ORDER_UNSPEC;
+       ++i) {
+    if (numChannels == avCodec.ch_layouts[i].nb_channels) {
+      return;
+    }
+  }
+  std::stringstream supportedNumChannels;
+  for (auto i = 0; avCodec.ch_layouts[i].order != AV_CHANNEL_ORDER_UNSPEC;
+       ++i) {
+    if (i > 0) {
+      supportedNumChannels << ", ";
+    }
+    supportedNumChannels << avCodec.ch_layouts[i].nb_channels;
+  }
+#else
+  if (avCodec.channel_layouts == nullptr) {
+    // can't validate, same as above.
+    return;
+  }
+  for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) {
+    if (numChannels ==
+        av_get_channel_layout_nb_channels(avCodec.channel_layouts[i])) {
+      return;
+    }
+  }
+  std::stringstream supportedNumChannels;
+  for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) {
+    if (i > 0) {
+      supportedNumChannels << ", ";
+    }
+    supportedNumChannels << av_get_channel_layout_nb_channels(
+        avCodec.channel_layouts[i]);
+  }
+#endif
+  TORCH_CHECK(
+      false,
+      "Desired number of channels (",
+      numChannels,
+      ") is not supported by the ",
+      "encoder. Supported number of channels are: ",
+      supportedNumChannels.str(),
+      ".");
+}
 
 namespace {
 #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
diff --git a/src/torchcodec/_core/FFMPEGCommon.h b/src/torchcodec/_core/FFMPEGCommon.h
@@ -153,9 +153,7 @@ void setDefaultChannelLayout(
 
 void setDefaultChannelLayout(UniqueAVFrame& avFrame, int numChannels);
 
-// void setChannelLayout(
-//     UniqueAVFrame& dstAVFrame,
-//     const UniqueAVCodecContext& avCodecContext);
+void validateNumChannels(const AVCodec& avCodec, int numChannels);
 
 void setChannelLayout(
     UniqueAVFrame& dstAVFrame,
diff --git a/src/torchcodec/encoders/_audio_encoder.py b/src/torchcodec/encoders/_audio_encoder.py
@@ -31,23 +31,27 @@ def to_file(
         dest: Union[str, Path],
         *,
         bit_rate: Optional[int] = None,
+        num_channels: Optional[int] = None,
     ) -> None:
         _core.encode_audio_to_file(
             wf=self._samples,
             sample_rate=self._sample_rate,
             filename=dest,
             bit_rate=bit_rate,
+            num_channels=num_channels,
         )
 
     def to_tensor(
         self,
         format: str,
         *,
         bit_rate: Optional[int] = None,
+        num_channels: Optional[int] = None,
     ) -> Tensor:
         return _core.encode_audio_to_tensor(
             wf=self._samples,
             sample_rate=self._sample_rate,
             format=format,
             bit_rate=bit_rate,
+            num_channels=num_channels,
         )
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -6,6 +6,7 @@
 
 import io
 import os
+import re
 from functools import partial
 
 os.environ["TORCH_LOGS"] = "output_code"
@@ -1158,10 +1159,19 @@ def test_bad_input(self, tmp_path):
                 wf=torch.rand(10, 20), sample_rate=10, filename="doesnt_matter"
             )
 
-        encode_audio_to_file(
-            wf=torch.rand(2, 10), sample_rate=16_000, filename="ok.mp3", num_channels=8
-        )
-
+        for num_channels in (0, 3):
+            with pytest.raises(
+                RuntimeError,
+                match=re.escape(
+                    f"Desired number of channels ({num_channels}) is not supported"
+                ),
+            ):
+                encode_audio_to_file(
+                    wf=torch.rand(2, 10),
+                    sample_rate=16_000,
+                    filename="ok.mp3",
+                    num_channels=num_channels,
+                )
 
     @pytest.mark.parametrize(
         "encode_method", (encode_audio_to_file, encode_audio_to_tensor)
@@ -1335,7 +1345,7 @@ def test_contiguity(self):
     def test_num_channels(
         self, num_channels_input, num_channels_output, encode_method, tmp_path
     ):
-        # We just check that the num_channels parmameter is respected.
+        # We just check that the num_channels parameter is respected.
         # Correctness is checked in other tests (like test_against_cli())
 
         sample_rate = 16_000