@@ -288,10 +288,13 @@ void AudioEncoder::encode() {
288
288
// encoded frame would contain more samples than necessary and our results
289
289
// wouldn't match the ffmpeg CLI.
290
290
avFrame->nb_samples = numSamplesToEncode;
291
- encodeInnerLoop (autoAVPacket, avFrame);
292
291
293
- avFrame->pts += static_cast <int64_t >(numSamplesToEncode);
292
+ UniqueAVFrame convertedAVFrame = maybeConvertAVFrame (avFrame);
293
+ encodeInnerLoop (autoAVPacket, convertedAVFrame);
294
+
294
295
numEncodedSamples += numSamplesToEncode;
296
+ // TODO-ENCODING set frame pts correctly, and test against it.
297
+ // avFrame->pts += static_cast<int64_t>(numSamplesToEncode);
295
298
}
296
299
TORCH_CHECK (numEncodedSamples == numSamples, " Hmmmmmm something went wrong." );
297
300
@@ -304,42 +307,43 @@ void AudioEncoder::encode() {
304
307
getFFMPEGErrorStringFromErrorCode (status));
305
308
}
306
309
307
- void AudioEncoder::encodeInnerLoop (
308
- AutoAVPacket& autoAVPacket,
309
- const UniqueAVFrame& srcAVFrame) {
310
- bool mustConvert =
311
- (srcAVFrame != nullptr &&
312
- (avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP ||
313
- getNumChannels (srcAVFrame) != outNumChannels_));
314
-
315
- UniqueAVFrame convertedAVFrame;
316
- if (mustConvert) {
317
- if (!swrContext_) {
318
- swrContext_.reset (createSwrContext (
319
- AV_SAMPLE_FMT_FLTP,
320
- avCodecContext_->sample_fmt ,
321
- srcAVFrame->sample_rate , // No sample rate conversion
322
- srcAVFrame->sample_rate ,
323
- srcAVFrame,
324
- outNumChannels_));
325
- }
326
- convertedAVFrame = convertAudioAVFrameSamples (
327
- swrContext_,
328
- srcAVFrame,
310
+ UniqueAVFrame AudioEncoder::maybeConvertAVFrame (const UniqueAVFrame& avFrame) {
311
+ if (static_cast <AVSampleFormat>(avFrame->format ) ==
312
+ avCodecContext_->sample_fmt &&
313
+ getNumChannels (avFrame) == outNumChannels_) {
314
+ // Note: the clone references the same underlying data, it's a cheap copy.
315
+ return UniqueAVFrame (av_frame_clone (avFrame.get ()));
316
+ }
317
+
318
+ if (!swrContext_) {
319
+ swrContext_.reset (createSwrContext (
320
+ static_cast <AVSampleFormat>(avFrame->format ),
329
321
avCodecContext_->sample_fmt ,
330
- srcAVFrame->sample_rate , // No sample rate conversion
331
- outNumChannels_);
332
- TORCH_CHECK (
333
- convertedAVFrame->nb_samples == srcAVFrame->nb_samples ,
334
- " convertedAVFrame->nb_samples=" ,
335
- convertedAVFrame->nb_samples ,
336
- " differs from " ,
337
- " srcAVFrame->nb_samples=" ,
338
- srcAVFrame->nb_samples ,
339
- " This is unexpected, please report on the TorchCodec bug tracker." );
322
+ avFrame->sample_rate , // No sample rate conversion
323
+ avFrame->sample_rate ,
324
+ avFrame,
325
+ outNumChannels_));
340
326
}
341
- const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
327
+ UniqueAVFrame convertedAVFrame = convertAudioAVFrameSamples (
328
+ swrContext_,
329
+ avFrame,
330
+ avCodecContext_->sample_fmt ,
331
+ avFrame->sample_rate , // No sample rate conversion
332
+ outNumChannels_);
333
+ TORCH_CHECK (
334
+ convertedAVFrame->nb_samples == avFrame->nb_samples ,
335
+ " convertedAVFrame->nb_samples=" ,
336
+ convertedAVFrame->nb_samples ,
337
+ " differs from " ,
338
+ " avFrame->nb_samples=" ,
339
+ avFrame->nb_samples ,
340
+ " This is unexpected, please report on the TorchCodec bug tracker." );
341
+ return convertedAVFrame;
342
+ }
342
343
344
+ void AudioEncoder::encodeInnerLoop (
345
+ AutoAVPacket& autoAVPacket,
346
+ const UniqueAVFrame& avFrame) {
343
347
auto status = avcodec_send_frame (avCodecContext_.get (), avFrame.get ());
344
348
TORCH_CHECK (
345
349
status == AVSUCCESS,
0 commit comments