210210define ('EBML_ID_CLUSTERREFERENCEVIRTUAL ' , 0x7D ); // [FD] -- Relative position of the data that should be in position of the virtual block.
211211
212212
213+ /**
214+ * Matroska constants
215+ */
216+ define ('MATROSKA_DEFAULT_TIMECODESCALE ' , 1000000 );
217+
213218/**
214219* @tutorial http://www.matroska.org/technical/specs/index.html
215220*
@@ -241,6 +246,7 @@ class getid3_matroska extends getid3_handler
241246 private $ EBMLbuffer_length = 0 ;
242247 private $ current_offset = 0 ;
243248 private $ unuseful_elements = array (EBML_ID_CRC32 , EBML_ID_VOID );
249+ private $ parse_first_cluster = false ;
244250
245251 /**
246252 * @return bool
@@ -256,14 +262,25 @@ public function Analyze()
256262 $ this ->error ('EBML parser: ' .$ e ->getMessage ());
257263 }
258264
259- // calculate playtime
260- if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
261- foreach ($ info ['matroska ' ]['info ' ] as $ key => $ infoarray ) {
262- if (isset ($ infoarray ['Duration ' ])) {
263- // TimecodeScale is how many nanoseconds each Duration unit is
264- $ info ['playtime_seconds ' ] = $ infoarray ['Duration ' ] * ((isset ($ infoarray ['TimecodeScale ' ]) ? $ infoarray ['TimecodeScale ' ] : 1000000 ) / 1000000000 );
265- break ;
266- }
265+ $ this ->calculatePlaytimeFromMetadata ($ info );
266+
267+ // If there was no duration metadata, this might be an incomplete file or a streaming file
268+ // We need Cluster information so we can use their timecodes to estimate playtime.
269+ if (!isset ($ info ['playtime_seconds ' ]) && !$ this ->parse_whole_file ) {
270+ // If we have not yet scanned the entire file, scan the start and end for Clusters,
271+ $ this ->scanStartEndForClusters ($ info );
272+ }
273+
274+ if (isset ($ info ['matroska ' ]['cluster ' ]) && is_array ($ info ['matroska ' ]['cluster ' ])) {
275+ if (!isset ($ info ['playtime_seconds ' ]) && !empty ($ info ['matroska ' ]['cluster ' ])) {
276+ // estimate playtime using clusters if not yet known
277+ $ this ->calculatePlaytimeFromClusters ($ info );
278+ }
279+
280+ // Remove cluster information from output if hide_clusters is true
281+ // These could have been set during scanStartEndForClusters()
282+ if ($ this ->hide_clusters ) {
283+ unset($ info ['matroska ' ]['cluster ' ]);
267284 }
268285 }
269286
@@ -1246,8 +1263,13 @@ private function parseEBML(&$info) {
12461263 }
12471264 $ this ->current_offset = $ subelement ['end ' ];
12481265 }
1249- if (!$ this ->hide_clusters ) {
1250- $ info ['matroska ' ]['cluster ' ][] = $ cluster_entry ;
1266+ // Always store clusters internally (for duration calculation)
1267+ // They will be removed from output later if hide_clusters is true
1268+ $ info ['matroska ' ]['cluster ' ][] = $ cluster_entry ;
1269+
1270+ // Stop parsing after finding first cluster
1271+ if ($ this ->parse_first_cluster ) {
1272+ return ;
12511273 }
12521274
12531275 // check to see if all the data we need exists already, if so, break out of the loop
@@ -1919,4 +1941,114 @@ private static function getDefaultStreamInfo($streams)
19191941 return $ info ;
19201942 }
19211943
1944+ /**
1945+ * @param array $info
1946+ *
1947+ * @return bool True if duration was set from metadata
1948+ */
1949+ private function calculatePlaytimeFromMetadata (&$ info ) {
1950+ if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
1951+ foreach ($ info ['matroska ' ]['info ' ] as $ infoarray ) {
1952+ if (isset ($ infoarray ['Duration ' ])) {
1953+ // TimecodeScale is how many nanoseconds each Duration unit is
1954+ $ info ['playtime_seconds ' ] = $ infoarray ['Duration ' ] * ((isset ($ infoarray ['TimecodeScale ' ]) ? $ infoarray ['TimecodeScale ' ] : MATROSKA_DEFAULT_TIMECODESCALE ) / 1000000000 );
1955+ return true ;
1956+ }
1957+ }
1958+ }
1959+ return false ;
1960+ }
1961+
1962+ /**
1963+ * @param int $offset New starting offset for the buffer
1964+ *
1965+ * @return void
1966+ */
1967+ private function resetParserBuffer ($ offset ) {
1968+ $ this ->current_offset = $ offset ;
1969+ $ this ->EBMLbuffer = '' ;
1970+ $ this ->EBMLbuffer_offset = 0 ;
1971+ $ this ->EBMLbuffer_length = 0 ;
1972+ }
1973+
1974+ /**
1975+ * Scan start and end of file for cluster information when Duration is missing
1976+ * Only use this if no Duration was found in the Info element and we are not in parse_whole_file mode
1977+ *
1978+ * @param array $info
1979+ *
1980+ * @return void
1981+ */
1982+ private function scanStartEndForClusters (&$ info ) {
1983+ $ this ->resetParserBuffer ($ info ['avdataoffset ' ]);
1984+
1985+ // we need to temporarily override parse_whole_file to be able to scan clusters
1986+ $ this ->parse_whole_file = true ;
1987+ $ this ->parse_first_cluster = true ;
1988+ try {
1989+ $ this ->parseEBML ($ info );
1990+ } catch (Exception $ e ) {
1991+ $ this ->error ('EBML parser (start of file): ' .$ e ->getMessage ());
1992+ }
1993+ $ this ->parse_first_cluster = false ;
1994+
1995+ // Scan end of file for last cluster
1996+ if (is_array ($ info ['matroska ' ]['cluster ' ]) && !empty ($ info ['matroska ' ]['cluster ' ])) {
1997+ // maximum 1MB scan window before EOF
1998+ $ this ->resetParserBuffer (max (0 , $ info ['avdataend ' ] - (1024 * 1024 )));
1999+ try {
2000+ $ this ->parseEBML ($ info );
2001+ } catch (Exception $ e ) {
2002+ $ this ->error ('EBML parser (end of file): ' .$ e ->getMessage ());
2003+ }
2004+ }
2005+ $ this ->parse_whole_file = false ;
2006+ }
2007+
2008+ /**
2009+ * Fetch TimecodeScale from Info element
2010+ *
2011+ * @param array $info
2012+ *
2013+ * @return int TimecodeScale value
2014+ */
2015+ private function getTimecodeScale (&$ info ) {
2016+ $ timecodeScale = MATROSKA_DEFAULT_TIMECODESCALE ;
2017+ if (isset ($ info ['matroska ' ]['info ' ]) && is_array ($ info ['matroska ' ]['info ' ])) {
2018+ foreach ($ info ['matroska ' ]['info ' ] as $ infoarray ) {
2019+ if (isset ($ infoarray ['TimecodeScale ' ])) {
2020+ $ timecodeScale = $ infoarray ['TimecodeScale ' ];
2021+ break ;
2022+ }
2023+ }
2024+ }
2025+ return $ timecodeScale ;
2026+ }
2027+
2028+ /**
2029+ * Calculate duration from scanned cluster timecodes
2030+ *
2031+ * @param array $info
2032+ *
2033+ * @return void
2034+ */
2035+ private function calculatePlaytimeFromClusters (&$ info ) {
2036+ $ minTimecode = null ;
2037+ $ maxTimecode = null ;
2038+ if (isset ($ info ['matroska ' ]['cluster ' ]) && is_array ($ info ['matroska ' ]['cluster ' ])) {
2039+ foreach ($ info ['matroska ' ]['cluster ' ] as $ cluster ) {
2040+ if (isset ($ cluster ['ClusterTimecode ' ])) {
2041+ if ($ minTimecode === null || $ cluster ['ClusterTimecode ' ] < $ minTimecode ) {
2042+ $ minTimecode = $ cluster ['ClusterTimecode ' ];
2043+ }
2044+ if ($ maxTimecode === null || $ cluster ['ClusterTimecode ' ] > $ maxTimecode ) {
2045+ $ maxTimecode = $ cluster ['ClusterTimecode ' ];
2046+ }
2047+ }
2048+ }
2049+ }
2050+ if ($ maxTimecode !== null && $ minTimecode !== null && $ maxTimecode > $ minTimecode ) {
2051+ $ info ['playtime_seconds ' ] = ($ maxTimecode - $ minTimecode ) * ($ this ->getTimecodeScale ($ info ) / 1000000000 );
2052+ }
2053+ }
19222054}
0 commit comments