Skip to content

Commit ff4ec66

Browse files
Improve regex detection
Fixes #195
1 parent 6e9d575 commit ff4ec66

File tree

3 files changed

+101
-13
lines changed

3 files changed

+101
-13
lines changed

src/JS.php

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ class JS extends Minify
110110
*/
111111
protected $operatorsAfter = array();
112112

113+
/**
114+
* @var array
115+
*/
116+
protected $nestedExtracted = array();
117+
113118
/**
114119
* {@inheritdoc}
115120
*/
@@ -216,30 +221,101 @@ protected function extractRegex()
216221
$minifier = $this;
217222
$callback = function ($match) use ($minifier) {
218223
$count = count($minifier->extracted);
219-
$placeholder = '/'.$count.'/';
220-
$minifier->extracted[$placeholder] = $match[0];
224+
$placeholder = '"'.$count.'"';
225+
$minifier->extracted[$placeholder] = $match['regex'];
226+
227+
// because we're also trying to find regular expressions that follow
228+
// if/when/for statements, we should also make sure that the content
229+
// within these statements is also minified...
230+
// e.g. `if("some string"/* or comment */)` should become
231+
// `if("some string")`
232+
if (isset($match['before'])) {
233+
$other = new static();
234+
$other->extractStrings('\'"`', "$count-");
235+
$other->stripComments();
236+
$match['before'] = $other->replace($match['before']);
237+
$this->nestedExtracted += $other->extracted;
238+
}
221239

222-
return $placeholder;
240+
return (isset($match['before']) ? $match['before'] : '').
241+
$placeholder.
242+
(isset($match['after']) ? $match['after'] : '');
223243
};
224244

225-
$pattern = '\/.*?(?<!\\\\)(\\\\\\\\)*\/[gimy]*(?![0-9a-zA-Z\/])';
245+
$pattern = '(?P<regex>\/.*?(?<!\\\\)(\\\\\\\\)*\/[gimy]*)(?![0-9a-zA-Z\/])';
226246

227247
// a regular expression can only be followed by a few operators or some
228248
// of the RegExp methods (a `\` followed by a variable or value is
229249
// likely part of a division, not a regex)
230-
$keywords = $this->getKeywordsForRegex($this->keywordsReserved, '/');
231-
$before = '([=:,;\)\}\(\{]|^|'.implode('|', $keywords).')\s*';
232-
$after = '[\.,;\)\}]';
233-
$methods = '\.(exec|test|match|search|replace|split)\(';
234-
$this->registerPattern('/'.$before.'\K'.$pattern.'(?=\s*('.$after.'|'.$methods.'))/', $callback);
250+
$keywords = array('do', 'in', 'new', 'else', 'throw', 'yield', 'delete', 'return', 'typeof');
251+
$before = '(?P<before>[=:,;\}\(\{&\|]|^|'.implode('|', $keywords).')';
252+
$propertiesAndMethods = array(
253+
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Properties
254+
'prototype',
255+
'length',
256+
'lastIndex',
257+
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Properties_2
258+
'constructor',
259+
'flags',
260+
'global',
261+
'ignoreCase',
262+
'multiline',
263+
'source',
264+
'sticky',
265+
'unicode',
266+
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Methods_2
267+
'compile(',
268+
'exec(',
269+
'test(',
270+
'match',
271+
'replace(',
272+
'search(',
273+
'split(',
274+
'toSource(',
275+
'toString(',
276+
);
277+
$delimiters = array_fill(0, count($propertiesAndMethods), '/');
278+
$propertiesAndMethods = array_map('preg_quote', $propertiesAndMethods, $delimiters);
279+
$after = '(?P<after>[\.,;\)\}&\|+]|$|\.('.implode('|', $propertiesAndMethods).'))';
280+
$this->registerPattern('/'.$before.'\s*'.$pattern.'\s*'.$after.'/', $callback);
281+
282+
// we didn't check for regular expressions after `)`, because that is
283+
// more often than not not a character where a regex can follow (e.g.
284+
// (1+2)/3/4 -> /3/ could be considered a regex, but it's not)
285+
// however, after single-line if/while/for, there could very well be a
286+
// regex after `)` (e.g. if(true)/regex/)
287+
// there is one problem, though: it's (near) impossible to check for
288+
// when the if/while/for statement is closed (same amount of closing
289+
// brackets as there were opened), so I'll ignore single-line statements
290+
// with nested brackets followed by a regex for now...
291+
$before = '(?P<before>\b(if|while|for)\s*\((?P<code>[^\(]+?)\))';
292+
$this->registerPattern('/'.$before.'\s*'.$pattern.'\s*'.$after.'/', $callback);
235293

236294
// 1 more edge case: a regex can be followed by a lot more operators or
237295
// keywords if there's a newline (ASI) in between, where the operator
238296
// actually starts a new statement
239297
// (https://github.com/matthiasmullie/minify/issues/56)
240298
$operators = $this->getOperatorsForRegex($this->operatorsBefore, '/');
241299
$operators += $this->getOperatorsForRegex($this->keywordsReserved, '/');
242-
$this->registerPattern('/'.$pattern.'\s*\n(?=\s*('.implode('|', $operators).'))/', $callback);
300+
$after = '(?P<after>\n\s*('.implode('|', $operators).'))';
301+
$this->registerPattern('/'.$pattern.'\s*'.$after.'/', $callback);
302+
}
303+
304+
/**
305+
* In addition to the regular restore routine, we also need to restore a few
306+
* more things that have been extracted as part of the regex extraction...
307+
*
308+
* {@inheritdoc}
309+
*/
310+
protected function restoreExtractedData($content)
311+
{
312+
// restore regular extracted stuff
313+
$content = parent::restoreExtractedData($content);
314+
315+
// restore nested stuff from within regex extraction
316+
$content = strtr($content, $this->nestedExtracted);
317+
318+
return $content;
243319
}
244320

245321
/**

src/Minify.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,12 +324,13 @@ protected function replacePattern($pattern, $replacement, $content)
324324
* via restoreStrings().
325325
*
326326
* @param string[optional] $chars
327+
* @param string[optional] $placeholderPrefix
327328
*/
328-
protected function extractStrings($chars = '\'"')
329+
protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
329330
{
330331
// PHP only supports $this inside anonymous functions since 5.4
331332
$minifier = $this;
332-
$callback = function ($match) use ($minifier) {
333+
$callback = function ($match) use ($minifier, $placeholderPrefix) {
333334
// check the second index here, because the first always contains a quote
334335
if ($match[2] === '') {
335336
/*
@@ -342,7 +343,7 @@ protected function extractStrings($chars = '\'"')
342343
}
343344

344345
$count = count($minifier->extracted);
345-
$placeholder = $match[1].$count.$match[1];
346+
$placeholder = $match[1].$placeholderPrefix.$count.$match[1];
346347
$minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];
347348

348349
return $placeholder;

tests/js/JSTest.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,17 @@ function someOtherFunction() {
11001100
$the_portfolio.data(\'carouseling\',!0);$active_carousel_group.children().each(function(){$(this).css({\'width\':$(this).innerWidth()+1,\'position\':\'absolute\',\'left\':($(this).innerWidth()*($(this).data(\'position\')-1))})})}',
11011101
);
11021102

1103+
$tests[] = array(
1104+
'if("some string" /*or comment*/)/regex/',
1105+
'if("some string")/regex/',
1106+
);
1107+
1108+
// https://github.com/matthiasmullie/minify/issues/195
1109+
$tests[] = array(
1110+
'"function"!=typeof/./&&"object"!=typeof Int8Array',
1111+
'"function"!=typeof/./&&"object"!=typeof Int8Array',
1112+
);
1113+
11031114
// known minified files to help doublecheck changes in places not yet
11041115
// anticipated in these tests
11051116
$files = glob(__DIR__.'/sample/minified/*.js');

0 commit comments

Comments
 (0)