From f14ce7db5a55f4a4b8de95248eb62e0a2ab38d1b Mon Sep 17 00:00:00 2001 From: David Scotson Date: Sat, 21 Mar 2020 12:57:56 +0000 Subject: [PATCH 1/3] fixes #222 and #280 uses Tim Hunt's regex fix for #333, but moves it so it's done in a different place to split on, rather than remove, special comments --- src/CSS.php | 87 ++++++++++++++++++++++++++++++++--------------------- src/JS.php | 67 +++++++++++++++++++++++++++-------------- 2 files changed, 97 insertions(+), 57 deletions(-) diff --git a/src/CSS.php b/src/CSS.php index 89fcf1bb..ba0788d5 100644 --- a/src/CSS.php +++ b/src/CSS.php @@ -295,30 +295,60 @@ protected function importFiles($source, $content) */ public function execute($path = null, $parents = array()) { - $content = ''; + $preservecommentpattern = '/( + # optional newline + \n? + # start comment + \/\* + # comment content + (?: + # either starts with an ! + ! + | + # or, after some number of characters which do not end the comment + (?:(?!\*\/).)*? + # there is either a @license or @preserve tag + @(?:license|preserve) + ) + # then match to the end of the comment + .*?\*\/\n? + )/ixs'; // loop CSS data (raw data and files) foreach ($this->data as $source => $css) { - /* - * Let's first take out strings & comments, since we can't just - * remove whitespace anywhere. If whitespace occurs inside a string, - * we should leave it alone. E.g.: - * p { content: "a test" } - */ - $this->extractStrings(); - $this->stripComments(); - $this->extractCalcs(); - $css = $this->replace($css); - - $css = $this->stripWhitespace($css); - $css = $this->shortenColors($css); - $css = $this->shortenZeroes($css); - $css = $this->shortenFontWeights($css); - $css = $this->stripEmptyTags($css); - - // restore the string we've extracted earlier - $css = $this->restoreExtractedData($css); - + // Split JS on special comments. + $chunks = preg_split($preservecommentpattern, $css, -1, PREG_SPLIT_DELIM_CAPTURE ); + $processed = []; + for ($i = 0; $i < count($chunks); $i += 2) { + $code = $chunks[$i]; + $comment = ''; + if (isset($chunks[$i + 1])) { + $comment = $chunks[$i + 1]; + } + /* + * Let's first take out strings & other comments, since we can't just + * remove whitespace anywhere. If whitespace occurs inside a string, + * we should leave it alone. E.g.: + * p { content: "a test" } + */ + $this->extractStrings(); + $this->stripComments(); + $this->extractCalcs(); + $code = $this->replace($code); + + $code = $this->stripWhitespace($code); + $code = $this->shortenColors($code); + $code = $this->shortenZeroes($code); + $code = $this->shortenFontWeights($code); + $code = $this->stripEmptyTags($code); + + // restore the string we've extracted earlier + $code = $this->restoreExtractedData($code); + + $processed[] = $code; + $processed[] = $comment; + } + $css = implode($processed); $source = is_int($source) ? '' : $source; $parents = $source ? array_merge($parents, array($source)) : $parents; $css = $this->combineImports($source, $css, $parents); @@ -335,9 +365,9 @@ public function execute($path = null, $parents = array()) $css = $this->move($converter, $css); // combine css - $content .= $css; + $content[] = $css; } - + $content = implode($content); $content = $this->moveImportsToTop($content); return $content; @@ -627,17 +657,6 @@ protected function stripEmptyTags($content) */ protected function stripComments() { - // PHP only supports $this inside anonymous functions since 5.4 - $minifier = $this; - $callback = function ($match) use ($minifier) { - $count = count($minifier->extracted); - $placeholder = '/*'.$count.'*/'; - $minifier->extracted[$placeholder] = $match[0]; - - return $placeholder; - }; - $this->registerPattern('/\n?\/\*(!|.*?@license|.*?@preserve).*?\*\/\n?/s', $callback); - $this->registerPattern('/\/\*.*?\*\//s', ''); } diff --git a/src/JS.php b/src/JS.php index 92389cdd..5355ead8 100644 --- a/src/JS.php +++ b/src/JS.php @@ -146,8 +146,6 @@ public function __construct() */ public function execute($path = null) { - $content = ''; - /* * Let's first take out strings, comments and regular expressions. * All of these can contain JS code-like characters, and we should make @@ -163,23 +161,56 @@ public function execute($path = null) $this->stripComments(); $this->extractRegex(); + $preservecommentpattern = '/( + # optional newline + \n? + # start comment + \/\* + # comment content + (?: + # either starts with an ! + ! + | + # or, after some number of characters which do not end the comment + (?:(?!\*\/).)*? + # there is either a @license or @preserve tag + @(?:license|preserve) + ) + # then match to the end of the comment + .*?\*\/\n? + )/ixs'; // loop files foreach ($this->data as $source => $js) { - // take out strings, comments & regex (for which we've registered - // the regexes just a few lines earlier) - $js = $this->replace($js); - $js = $this->propertyNotation($js); - $js = $this->shortenBools($js); - $js = $this->stripWhitespace($js); + // Split JS on special comments. + $chunks = preg_split($preservecommentpattern, $js, -1, PREG_SPLIT_DELIM_CAPTURE ); + $processed = []; + for ($i = 0; $i < count($chunks); $i += 2) { + $code = $chunks[$i]; + $comment = ''; + if (isset($chunks[$i + 1])) { + $comment = $chunks[$i + 1]; + } + + // take out strings, other comments & regex (for which we've registered + // the regexes just a few lines earlier) + $code = $this->replace($code); - // combine js: separating the scripts by a ; - $content .= $js.";"; - } + $code = $this->propertyNotation($code); + $code = $this->shortenBools($code); + $code = $this->stripWhitespace($code); + $processed[] = $code; + $processed[] = $comment; + } + $file = implode($processed); + $file = preg_replace('/;$/s', '', $file); + + $files[] = $file; + } + $content = implode(';', $files); // clean up leftover `;`s from the combination of multiple scripts $content = ltrim($content, ';'); - $content = (string) substr($content, 0, -1); /* * Earlier, we extracted strings & regular expressions and replaced them @@ -195,17 +226,7 @@ public function execute($path = null) */ protected function stripComments() { - // PHP only supports $this inside anonymous functions since 5.4 - $minifier = $this; - $callback = function ($match) use ($minifier) { - $count = count($minifier->extracted); - $placeholder = '/*'.$count.'*/'; - $minifier->extracted[$placeholder] = $match[0]; - - return $placeholder; - }; // multi-line comments - $this->registerPattern('/\n?\/\*(!|.*?@license|.*?@preserve).*?\*\/\n?/s', $callback); $this->registerPattern('/\/\*.*?\*\//s', ''); // single-line comments @@ -432,7 +453,7 @@ protected function stripWhitespace($content) * script: ASI will kick in here & we're all about minifying. * Semicolons at beginning of the file don't make any sense either. */ - $content = preg_replace('/;(\}|$)/s', '\\1', $content); + $content = preg_replace('/;(\})/s', '\\1', $content); $content = ltrim($content, ';'); // get rid of remaining whitespace af beginning/end From 279046f2b0fbc70d8a159534646c425971e7b162 Mon Sep 17 00:00:00 2001 From: David Scotson Date: Sat, 21 Mar 2020 18:25:35 +0000 Subject: [PATCH 2/3] speed/memory optimisations --- src/JS.php | 1 + src/Minify.php | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/JS.php b/src/JS.php index 5355ead8..a0196580 100644 --- a/src/JS.php +++ b/src/JS.php @@ -204,6 +204,7 @@ public function execute($path = null) $processed[] = $comment; } $file = implode($processed); + // ASI at end of file, will be added back if concatenated later. $file = preg_replace('/;$/s', '', $file); $files[] = $file; diff --git a/src/Minify.php b/src/Minify.php index 3f40bc15..dbc9e391 100644 --- a/src/Minify.php +++ b/src/Minify.php @@ -105,7 +105,7 @@ public function add($data /* $data = null, ... */) * @param string|string[] $data * * @return static - * + * * @throws IOException */ public function addFile($data /* $data = null, ... */) @@ -268,7 +268,7 @@ protected function registerPattern($pattern, $replacement = '') */ protected function replace($content) { - $processed = ''; + $processed = []; $positions = array_fill(0, count($this->patterns), -1); $matches = array(); @@ -307,7 +307,7 @@ protected function replace($content) // no more matches to find: everything's been processed, break out if (!$matches) { - $processed .= $content; + $processed[] = $content; break; } @@ -317,6 +317,7 @@ protected function replace($content) $discardLength = min($positions); $firstPattern = array_search($discardLength, $positions); $match = $matches[$firstPattern][0][0]; + $matchlen = strlen($match); // execute the pattern that matches earliest in the content string list($pattern, $replacement) = $this->patterns[$firstPattern]; @@ -325,22 +326,22 @@ protected function replace($content) // figure out which part of the string was unmatched; that's the // part we'll execute the patterns on again next $content = (string) substr($content, $discardLength); - $unmatched = (string) substr($content, strpos($content, $match) + strlen($match)); + $unmatched = (string) substr($content, strpos($content, $match) + $matchlen); // move the replaced part to $processed and prepare $content to // again match batch of patterns against - $processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched)); + $processed[] = substr($replacement, 0, strlen($replacement) - strlen($unmatched)); $content = $unmatched; // first match has been replaced & that content is to be left alone, // the next matches will start after this replacement, so we should // fix their offsets foreach ($positions as $i => $position) { - $positions[$i] -= $discardLength + strlen($match); + $positions[$i] -= $discardLength + $matchlen; } } - return $processed; + return implode($processed); } /** From 969f084873b08988df5e7efca844998f563296f3 Mon Sep 17 00:00:00 2001 From: David Scotson Date: Sun, 22 Mar 2020 12:57:32 +0000 Subject: [PATCH 3/3] tweak for #301, unnecessary regex? It's not clear if both of these are necessary, the second one should just match everything the first one matches plus things within internal lines, so it seems redundant. The unit test commited with it, plus all the others still pass, and there's no diff in the output on a long test file. --- src/CSS.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CSS.php b/src/CSS.php index ba0788d5..196f039e 100644 --- a/src/CSS.php +++ b/src/CSS.php @@ -727,7 +727,6 @@ protected function extractCalcs() return $placeholder.$rest; }; - $this->registerPattern('/calc(\(.+?)(?=$|;|}|calc\()/', $callback); $this->registerPattern('/calc(\(.+?)(?=$|;|}|calc\()/m', $callback); }