diff --git a/README.md b/README.md index 4277eec52..214bf5640 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ vp9_cfm, vp9_frame, vpx_ccr, [wasm](doc/formats.md#wasm), -wav, +[wav](doc/formats.md#wav), webp, [xml](doc/formats.md#xml), yaml, diff --git a/doc/formats.md b/doc/formats.md index e0138c63a..beb3937d1 100644 --- a/doc/formats.md +++ b/doc/formats.md @@ -130,7 +130,7 @@ |`vp9_frame` |VP9 frame || |`vpx_ccr` |VPX Codec Configuration Record || |[`wasm`](#wasm) |WebAssembly Binary Format || -|`wav` |WAV file |`id3v2` `id3v1` `id3v11` `dolby_metadata`| +|[`wav`](#wav) |WAV file |`id3v2` `id3v1` `id3v11` `dolby_metadata`| |`webp` |WebP image |`exif` `vp8_frame` `icc_profile` `xml`| |[`xml`](#xml) |Extensible Markup Language || |`yaml` |YAML Ain't Markup Language || @@ -616,7 +616,7 @@ RIFF / WAV / Broadcast Wave Format (BWF) chunks: - https://tech.ebu.ch/publications/tech3285s5 - https://tech.ebu.ch/files/live/sites/tech/files/shared/tech/tech3285s6.pdf - https://github.com/DolbyLaboratories/dbmd-atmos-parser -- https://github.com/MediaArea/MediaInfoLib/tree/Source/MediaInfo/Audio/File_DolbyAudioMetadata.cpp +- https://github.com/MediaArea/MediaInfoLib/blob/master/Source/MediaInfo/Audio/File_DolbyAudioMetadata.cpp ## fit Garmin Flexible and Interoperable Data Transfer. @@ -1470,6 +1470,49 @@ $ fq '.sections | {import: map(select(.id == "import_section").content.im.x[].nm ### References - https://webassembly.github.io/spec/core/ +## wav +WAV file. + +WAVE audio file format. + +Also includes support for [Audio Definition Model](https://adm.ebu.io/background/what_is_the_adm.html) and 3D Audio. + +RIFF / WAV / Broadcast Wave Format (BWF) chunks: + +- `RIFF`: primary container chunk specifying the file type and containing sub-chunks (e.g., fmt, data) +- `fmt`: describes format / stream encoding in data chunk +- `data`: indicates size and contains encoded raw sound data +- `bext`: broadcast extension chunk, containing broadcast-specific metadata such as description, originator, creation date, time reference, and more +- `LIST`: organizes additional metadata in sub-chunks, often used to include information like artist, genre, or title in INFO or other standardized formats +- `smpl`: sample metadata chunk, containing looping and sampling information, such as start and end points for loops, sample rate, and MIDI pitch +- `fact`: contains metadata on the original uncompressed data, such as the number of samples, typically used in non-PCM (compressed) formats to aid in playback and synchronization +- `chna`: track UIDs of Audio Definition Model +- `axml`: XML metadata, e.g. for Audio Definition Model ambisonics and elements as in [EBUCore spec](https://tech.ebu.ch/docs/tech/tech3293.pdf) +- `dbmd`: Dolby specific metadata like loudness and binaural settings, see also [`dolby_metadata` format](#dolby_metadata) + + +### Examples +Decode ADM configuration from `` and `` chunks: +```bash +$ fq -d wav '.chunks[] | select(.id | IN("chna", "axml")) | tovalue' amd-bwf.wav + +# Extract ADM chunk objects definitions xml content +$ fq -r -d wav '.chunks[] | select(.id | IN("axml")) | .xml | tovalue' amd-bwf.wav | tee axml-content.xml +``` + +### Authors +- [@wader](https://github.com/wader), original author +- [@johnnymarnell](https://johnnymarnell.github.io), ADM support + +### References +- http://soundfile.sapp.org/doc/WaveFormat/ +- https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/wavdec.c +- https://tech.ebu.ch/docs/tech/tech3285.pdf +- http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html +- https://adm.ebu.io/background/what_is_the_adm.html +- https://tech.ebu.ch/docs/tech/tech3285s7.pdf +- https://tech.ebu.ch/docs/tech/tech3285s5.pdf + ## xml Extensible Markup Language. diff --git a/format/riff/adm.go b/format/riff/adm.go deleted file mode 100644 index b8955263c..000000000 --- a/format/riff/adm.go +++ /dev/null @@ -1,30 +0,0 @@ -package riff - -// Audio Definition Model -// https://adm.ebu.io/background/what_is_the_adm.html -// https://tech.ebu.ch/publications/tech3285s7 -// https://tech.ebu.ch/publications/tech3285s5 - -import ( - "github.com/wader/fq/pkg/decode" -) - -func chnaDecode(d *decode.D) { - d.FieldU16("num_tracks") - d.FieldU16("num_uids") - d.FieldArray("audio_ids", func(d *decode.D) { - for !d.End() { - d.FieldStruct("audio_id", func(d *decode.D) { - d.FieldU16("track_index") - d.FieldUTF8("uid", 12) - d.FieldUTF8("track_format_id_reference", 14) - d.FieldUTF8("pack_format_id_reference", 11) - d.FieldRawLen("padding", 8) - }) - } - }) -} - -func axmlDecode(d *decode.D) { - d.FieldUTF8("xml", int(d.BitsLeft())/8) -} diff --git a/format/riff/adm.md b/format/riff/adm.md deleted file mode 100644 index 1ae565e20..000000000 --- a/format/riff/adm.md +++ /dev/null @@ -1,22 +0,0 @@ -[Audio Definition Model](https://adm.ebu.io/background/what_is_the_adm.html) including 3D Audio. - -RIFF / WAV / Broadcast Wave Format (BWF) chunks: -- `` Chunk, Track UIDs of Audio Definition Model -- `` Chunk, BWF XML Metadata, e.g. for Audio Definition Model ambisonics and elements - -### Examples -Decode ADM configuration from `` and `` chunks: -```bash -$ fq -d wav '.chunks[] | select(.id | IN("chna", "axml")) | tovalue' amd-bwf.wav - -# Extract ADM chunk objects definitions xml content -$ fq -r -d wav '.chunks[] | select(.id | IN("axml")) | .xml | tovalue' amd-bwf.wav | tee axml-content.xml -``` - -### Authors -- [@johnnymarnell](https://johnnymarnell.github.io), original author - -### References -- https://adm.ebu.io/background/what_is_the_adm.html -- https://tech.ebu.ch/publications/tech3285s7 -- https://tech.ebu.ch/publications/tech3285s5 \ No newline at end of file diff --git a/format/riff/aiff.go b/format/riff/aiff.go index d02346176..6efcd68c7 100644 --- a/format/riff/aiff.go +++ b/format/riff/aiff.go @@ -54,7 +54,7 @@ func aiffDecode(d *decode.D) any { } return id, size }, - func(d *decode.D, id string, path path, size int64) (bool, any) { + func(d *decode.D, id string, path path) (bool, any) { switch id { case "FORM": riffType = d.FieldUTF8("format", 4, d.StrAssert(aiffRiffType)) diff --git a/format/riff/avi.go b/format/riff/avi.go index f2c57406b..d35ae3a85 100644 --- a/format/riff/avi.go +++ b/format/riff/avi.go @@ -238,7 +238,7 @@ func aviDecodeEx(d *decode.D, ai format.AVI_In, extendedChunk bool) { size := d.FieldU32("size") return id, int64(size) }, - func(d *decode.D, id string, path path, size int64) (bool, any) { + func(d *decode.D, id string, path path) (bool, any) { switch id { case "RIFF": foundRiffType = d.FieldUTF8("type", 4, d.StrAssert(requiredRiffType)) diff --git a/format/riff/common.go b/format/riff/common.go index bfea3a850..05b66ae3e 100644 --- a/format/riff/common.go +++ b/format/riff/common.go @@ -19,11 +19,11 @@ func (p path) topData() any { return p[len(p)-1].data } -func riffDecode(d *decode.D, path path, headFn func(d *decode.D, path path) (string, int64), chunkFn func(d *decode.D, id string, path path, size int64) (bool, any)) { +func riffDecode(d *decode.D, path path, headFn func(d *decode.D, path path) (string, int64), chunkFn func(d *decode.D, id string, path path) (bool, any)) { id, size := headFn(d, path) d.FramedFn(size*8, func(d *decode.D) { - hasChildren, data := chunkFn(d, id, path, size) + hasChildren, data := chunkFn(d, id, path) if hasChildren { np := append(path, pathEntry{id: id, data: data}) d.FieldArray("chunks", func(d *decode.D) { @@ -58,6 +58,12 @@ var chunkIDDescriptions = scalar.StrMapDescription{ "dmlh": "Extended AVI header", + "data": "Raw sound encoded data", + "bext": "Broadcast extension, e.g. creator, date, etc.", + "smpl": "Sample metadata, e.g. loop points", + "fact": "Original info used for compression, e.g. sample length", + + // BWF ADM master and Dolby Metadata "chna": "Track UIDs of Audio Definition Model", "axml": "Audio Definition Model ambisonics and elements", "dbmd": "Dolby Metadata, e.g. Atmos, AC3, Dolby Digital [Plus]", diff --git a/format/riff/dolby.go b/format/riff/dolby.go index ce4d90cb7..32f5a30da 100644 --- a/format/riff/dolby.go +++ b/format/riff/dolby.go @@ -12,7 +12,7 @@ import ( "github.com/wader/fq/pkg/scalar" ) -func tmp_dbmdDecode(d *decode.D, size int64) any { +func old_dbmdDecode(d *decode.D) any { version := d.U32() major := (version >> 24) & 0xFF minor := (version >> 16) & 0xFF @@ -35,32 +35,32 @@ func tmp_dbmdDecode(d *decode.D, size int64) any { } segmentSize := d.FieldU16("metadata_segment_size") - bitsLeft := d.BitsLeft() + // bitsLeft := d.BitsLeft() switch segmentID { case 1: - parseDolbyE(d) + tmp_parseDolbyE(d) case 3: - parseDolbyDigital(d) + tmp_parseDolbyDigital(d) case 7: - parseDolbyDigitalPlus(d) + tmp_parseDolbyDigitalPlus(d) case 8: - parseAudioInfo(d) + tmp_parseAudioInfo(d) case 9: - parseDolbyAtmos(d) + tmp_parseDolbyAtmos(d) case 10: - parseDolbyAtmosSupplemental(d) + tmp_parseDolbyAtmosSupplemental(d) default: d.FieldRawLen("unknown_segment_raw", int64(segmentSize*8)) } - bytesRemaining := (bitsLeft-d.BitsLeft())/8 - int64(segmentSize) - if bytesRemaining < 0 { - d.Fatalf("Read too many bytes for segment %d, read %d over, expected %d", segmentID, -bytesRemaining, segmentSize) - } else if bytesRemaining > 0 { - d.FieldValueUint("SKIPPED_BYTES", uint64(bytesRemaining)) - d.SeekRel((int64(segmentSize) - bytesRemaining) * 8) - } + // bytesRemaining := (bitsLeft-d.BitsLeft())/8 - int64(segmentSize) + // if bytesRemaining < 0 { + // d.Fatalf("Read too many bytes for segment %d, read %d over, expected %d", segmentID, -bytesRemaining, segmentSize) + // } else if bytesRemaining > 0 { + // d.FieldValueUint("SKIPPED_BYTES", uint64(bytesRemaining)) + // d.SeekRel((int64(segmentSize) - bytesRemaining) * 8) + // } d.FieldU8("metadata_segment_checksum") }) @@ -224,7 +224,7 @@ func tmp_parseAudioInfo(d *decode.D) { d.FieldUTF8("segment_modified_date", 32) } -func tmp_parseDolbyAtmos(d *decode.D, size uint64) { +func tmp_parseDolbyAtmos(d *decode.D) { d.FieldValueStr("metadata_segment_type", "dolby_atmos") // d.SeekRel(32 * 8) @@ -248,7 +248,7 @@ func tmp_parseDolbyAtmos(d *decode.D, size uint64) { d.SeekRel(80 * 8) } -func tmp_parseDolbyAtmosSupplemental(d *decode.D, size uint64) { +func tmp_parseDolbyAtmosSupplemental(d *decode.D) { d.FieldValueStr("metadata_segment_type", "dolby_atmos_supplemental") sync := d.FieldU32LE("dasms_sync") diff --git a/format/riff/dolby_metadata.go b/format/riff/dolby_metadata.go index 4679e3f06..56a521139 100644 --- a/format/riff/dolby_metadata.go +++ b/format/riff/dolby_metadata.go @@ -42,7 +42,9 @@ func dbmdDecode(d *decode.D) any { for !seenEnd { d.FieldStruct("metadata_segment", func(d *decode.D) { segmentID := d.FieldU8("id", metadataSegmentTypeMap) - if segmentID == 0 { + + // TODO(jmarnell): This will always make an empty end segment, I think it would be better to omit it + if segmentID == metadataSegmentTypeEnd { seenEnd = true return } @@ -50,11 +52,11 @@ func dbmdDecode(d *decode.D) any { segmentSize := d.FieldU16("size") switch segmentID { - case metadataSegmentTypeDolbyEMetadata: + case metadataSegmentTypeDolbyE: parseDolbyE(d) - case metadataSegmentTypeDolbyEDigitaletadata: + case metadataSegmentTypeDolbyDigital: parseDolbyDigital(d) - case metadataSegmentTypeDolbyDigitalPlusMetadata: + case metadataSegmentTypeDolbyDigitalPlus: parseDolbyDigitalPlus(d) case metadataSegmentTypeAudioInfo: parseAudioInfo(d) @@ -66,6 +68,7 @@ func dbmdDecode(d *decode.D) any { d.FieldRawLen("unknown", int64(segmentSize*8)) } + // TODO: use this to validate parsing d.FieldU8("checksum", scalar.UintHex) }) } @@ -138,31 +141,31 @@ var trimConfigName = scalar.UintMapDescription{ } const ( - metadataSegmentTypeEnd = 0 - metadataSegmentTypeDolbyEMetadata = 1 - metadataSegmentTypeDolbyReserved2 = 2 - metadataSegmentTypeDolbyEDigitaletadata = 3 - metadataSegmentTypeDolbyReserved4 = 4 - metadataSegmentTypeDolbyReserved5 = 5 - metadataSegmentTypeDolbyReserved6 = 6 - metadataSegmentTypeDolbyDigitalPlusMetadata = 7 - metadataSegmentTypeAudioInfo = 8 - metadataSegmentTypeDolbyAtmos = 9 - metadataSegmentTypeDolbyAtmosSupplemental = 10 + metadataSegmentTypeEnd = 0 + metadataSegmentTypeDolbyE = 1 + metadataSegmentTypeDolbyReserved2 = 2 + metadataSegmentTypeDolbyDigital = 3 + metadataSegmentTypeDolbyReserved4 = 4 + metadataSegmentTypeDolbyReserved5 = 5 + metadataSegmentTypeDolbyReserved6 = 6 + metadataSegmentTypeDolbyDigitalPlus = 7 + metadataSegmentTypeAudioInfo = 8 + metadataSegmentTypeDolbyAtmos = 9 + metadataSegmentTypeDolbyAtmosSupplemental = 10 ) var metadataSegmentTypeMap = scalar.UintMapSymStr{ - metadataSegmentTypeEnd: "end", - metadataSegmentTypeDolbyEMetadata: "dolby_e_metadata", - metadataSegmentTypeDolbyReserved2: "reserved2", - metadataSegmentTypeDolbyEDigitaletadata: "dolby_e_digitale_tadata", - metadataSegmentTypeDolbyReserved4: "reserved4", - metadataSegmentTypeDolbyReserved5: "reserved5", - metadataSegmentTypeDolbyReserved6: "reserved6", - metadataSegmentTypeDolbyDigitalPlusMetadata: "dolby_digital_plus_metadata", - metadataSegmentTypeAudioInfo: "audio_info", - metadataSegmentTypeDolbyAtmos: "dolby_atmos", - metadataSegmentTypeDolbyAtmosSupplemental: "dolby_atmos_supplemental", + metadataSegmentTypeEnd: "end", + metadataSegmentTypeDolbyE: "dolby_e_metadata", + metadataSegmentTypeDolbyReserved2: "reserved2", + metadataSegmentTypeDolbyDigital: "dolby_digital_metadata", + metadataSegmentTypeDolbyReserved4: "reserved4", + metadataSegmentTypeDolbyReserved5: "reserved5", + metadataSegmentTypeDolbyReserved6: "reserved6", + metadataSegmentTypeDolbyDigitalPlus: "dolby_digital_plus_metadata", + metadataSegmentTypeAudioInfo: "audio_info", + metadataSegmentTypeDolbyAtmos: "dolby_atmos", + metadataSegmentTypeDolbyAtmosSupplemental: "dolby_atmos_supplemental", } func parseDolbyE(d *decode.D) { @@ -283,27 +286,19 @@ func parseDolbyAtmosSupplemental(d *decode.D) { i := 0 d.FieldStructNArray("trim_configs", "trim_config", 9, func(d *decode.D) { - d.FieldRawLen("reserved", 7) - d.FieldU1("type", scalar.UintMapSymStr{ + d.FieldRawLen("reserved0", 7) + trimType := d.FieldU1("type", scalar.UintMapSymStr{ 0: "manual", 1: "automatic", }) d.FieldValueStr("config_name", trimConfigName[uint64(i)]) - // TODO: this is null separted list of def strings? - d.FieldUTF8("raw", 14) - // str := d.UTF8(14) - // bytes := []byte(str) - // var nonZeroBytes []string - // for _, b := range bytes { - // if b != 0 { - // nonZeroBytes = append(nonZeroBytes, fmt.Sprintf("%d", b)) - // } - // } - // TODO(jmarnell): I think the +3dB trim settings are here. - // Would like this at least as an array of numbers, instead of this CSV string - // d.FieldValueStr("trim_defs", strings.Join(nonZeroBytes, ", ")) - + if trimType == 1 { + d.FieldUTF8("reserved1", 14) + } else { + // TODO: Reference MediaInfo's logic and Dolby pdf's + d.FieldUTF8("manual_trim_raw_config", 14) + } i++ }) diff --git a/format/riff/dolby_metadata.md b/format/riff/dolby_metadata.md index 19386780f..dd74b09ca 100644 --- a/format/riff/dolby_metadata.md +++ b/format/riff/dolby_metadata.md @@ -20,4 +20,4 @@ RIFF / WAV / Broadcast Wave Format (BWF) chunks: - https://tech.ebu.ch/publications/tech3285s5 - https://tech.ebu.ch/files/live/sites/tech/files/shared/tech/tech3285s6.pdf - https://github.com/DolbyLaboratories/dbmd-atmos-parser -- https://github.com/MediaArea/MediaInfoLib/tree/Source/MediaInfo/Audio/File_DolbyAudioMetadata.cpp +- https://github.com/MediaArea/MediaInfoLib/blob/master/Source/MediaInfo/Audio/File_DolbyAudioMetadata.cpp diff --git a/format/riff/wav.go b/format/riff/wav.go index aacc11833..5fb9dbf1a 100644 --- a/format/riff/wav.go +++ b/format/riff/wav.go @@ -8,12 +8,17 @@ package riff // TODO: default little endian import ( + "embed" + "github.com/wader/fq/format" "github.com/wader/fq/pkg/decode" "github.com/wader/fq/pkg/interp" "github.com/wader/fq/pkg/scalar" ) +//go:embed wav.md +var wavFS embed.FS + var wavHeaderGroup decode.Group var wavFooterGroup decode.Group var wavDolbyMetadataGroup decode.Group @@ -32,6 +37,7 @@ func init() { {Groups: []*decode.Group{format.Dolby_Metadata}, Out: &wavDolbyMetadataGroup}, }, }) + interp.RegisterFS(wavFS) } const ( @@ -78,7 +84,7 @@ func wavDecode(d *decode.D) any { return id, size }, - func(d *decode.D, id string, path path, size int64) (bool, any) { + func(d *decode.D, id string, path path) (bool, any) { switch id { case "RIFF": riffType = d.FieldUTF8("format", 4, d.StrAssert(wavRiffType)) @@ -161,13 +167,26 @@ func wavDecode(d *decode.D) any { return false, nil case "chna": - chnaDecode(d) + d.FieldU16("num_tracks") + d.FieldU16("num_uids") + d.FieldArray("audio_ids", func(d *decode.D) { + for !d.End() { + d.FieldStruct("audio_id", func(d *decode.D) { + d.FieldU16("track_index") + d.FieldUTF8("uid", 12) + d.FieldUTF8("track_format_id_reference", 14) + d.FieldUTF8("pack_format_id_reference", 11) + d.FieldRawLen("padding", 8) + }) + } + }) return false, nil case "axml": - axmlDecode(d) + d.FieldUTF8("xml", int(d.BitsLeft())/8) return false, nil case "dbmd": - d.Format(&wavDolbyMetadataGroup, nil) + // TEMP TEMP TEMP: delete old dolby.go and bring uncomment + old_dbmdDecode(d) // d.Format(&wavDolbyMetadataGroup, nil) return false, nil default: diff --git a/format/riff/wav.md b/format/riff/wav.md new file mode 100644 index 000000000..a9cb2658a --- /dev/null +++ b/format/riff/wav.md @@ -0,0 +1,39 @@ +WAVE audio file format. + +Also includes support for [Audio Definition Model](https://adm.ebu.io/background/what_is_the_adm.html) and 3D Audio. + +RIFF / WAV / Broadcast Wave Format (BWF) chunks: + +- `RIFF`: primary container chunk specifying the file type and containing sub-chunks (e.g., fmt, data) +- `fmt`: describes format / stream encoding in data chunk +- `data`: indicates size and contains encoded raw sound data +- `bext`: broadcast extension chunk, containing broadcast-specific metadata such as description, originator, creation date, time reference, and more +- `LIST`: organizes additional metadata in sub-chunks, often used to include information like artist, genre, or title in INFO or other standardized formats +- `smpl`: sample metadata chunk, containing looping and sampling information, such as start and end points for loops, sample rate, and MIDI pitch +- `fact`: contains metadata on the original uncompressed data, such as the number of samples, typically used in non-PCM (compressed) formats to aid in playback and synchronization +- `chna`: track UIDs of Audio Definition Model +- `axml`: XML metadata, e.g. for Audio Definition Model ambisonics and elements as in [EBUCore spec](https://tech.ebu.ch/docs/tech/tech3293.pdf) +- `dbmd`: Dolby specific metadata like loudness and binaural settings, see also [`dolby_metadata` format](#dolby_metadata) + + +### Examples +Decode ADM configuration from `` and `` chunks: +```bash +$ fq -d wav '.chunks[] | select(.id | IN("chna", "axml")) | tovalue' amd-bwf.wav + +# Extract ADM chunk objects definitions xml content +$ fq -r -d wav '.chunks[] | select(.id | IN("axml")) | .xml | tovalue' amd-bwf.wav | tee axml-content.xml +``` + +### Authors +- [@wader](https://github.com/wader), original author +- [@johnnymarnell](https://johnnymarnell.github.io), ADM support + +### References +- http://soundfile.sapp.org/doc/WaveFormat/ +- https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/wavdec.c +- https://tech.ebu.ch/docs/tech/tech3285.pdf +- http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html +- https://adm.ebu.io/background/what_is_the_adm.html +- https://tech.ebu.ch/docs/tech/tech3285s7.pdf +- https://tech.ebu.ch/docs/tech/tech3285s5.pdf diff --git a/format/riff/webp.go b/format/riff/webp.go index de69d6238..ebc0bbc59 100644 --- a/format/riff/webp.go +++ b/format/riff/webp.go @@ -44,7 +44,7 @@ func webpDecode(d *decode.D) any { size := d.FieldU32("size") return id, int64(size) }, - func(d *decode.D, id string, path path, size int64) (bool, any) { + func(d *decode.D, id string, path path) (bool, any) { switch id { case "RIFF": riffType = d.FieldUTF8("format", 4, d.StrAssert(webpRiffType))