Skip to content

Commit

Permalink
Improve regex detection
Browse files Browse the repository at this point in the history
Fixes #195
  • Loading branch information
matthiasmullie committed Sep 13, 2017
1 parent 6e9d575 commit ff4ec66
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 13 deletions.
96 changes: 86 additions & 10 deletions src/JS.php
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ class JS extends Minify
*/
protected $operatorsAfter = array();

/**
* @var array
*/
protected $nestedExtracted = array();

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -216,30 +221,101 @@ protected function extractRegex()
$minifier = $this;
$callback = function ($match) use ($minifier) {
$count = count($minifier->extracted);
$placeholder = '/'.$count.'/';
$minifier->extracted[$placeholder] = $match[0];
$placeholder = '"'.$count.'"';
$minifier->extracted[$placeholder] = $match['regex'];

// because we're also trying to find regular expressions that follow
// if/when/for statements, we should also make sure that the content
// within these statements is also minified...
// e.g. `if("some string"/* or comment */)` should become
// `if("some string")`
if (isset($match['before'])) {
$other = new static();
$other->extractStrings('\'"`', "$count-");
$other->stripComments();
$match['before'] = $other->replace($match['before']);
$this->nestedExtracted += $other->extracted;
}

return $placeholder;
return (isset($match['before']) ? $match['before'] : '').
$placeholder.
(isset($match['after']) ? $match['after'] : '');
};

$pattern = '\/.*?(?<!\\\\)(\\\\\\\\)*\/[gimy]*(?![0-9a-zA-Z\/])';
$pattern = '(?P<regex>\/.*?(?<!\\\\)(\\\\\\\\)*\/[gimy]*)(?![0-9a-zA-Z\/])';

// a regular expression can only be followed by a few operators or some
// of the RegExp methods (a `\` followed by a variable or value is
// likely part of a division, not a regex)
$keywords = $this->getKeywordsForRegex($this->keywordsReserved, '/');
$before = '([=:,;\)\}\(\{]|^|'.implode('|', $keywords).')\s*';
$after = '[\.,;\)\}]';
$methods = '\.(exec|test|match|search|replace|split)\(';
$this->registerPattern('/'.$before.'\K'.$pattern.'(?=\s*('.$after.'|'.$methods.'))/', $callback);
$keywords = array('do', 'in', 'new', 'else', 'throw', 'yield', 'delete', 'return', 'typeof');
$before = '(?P<before>[=:,;\}\(\{&\|]|^|'.implode('|', $keywords).')';
$propertiesAndMethods = array(
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Properties
'prototype',
'length',
'lastIndex',
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Properties_2
'constructor',
'flags',
'global',
'ignoreCase',
'multiline',
'source',
'sticky',
'unicode',
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Methods_2
'compile(',
'exec(',
'test(',
'match',
'replace(',
'search(',
'split(',
'toSource(',
'toString(',
);
$delimiters = array_fill(0, count($propertiesAndMethods), '/');
$propertiesAndMethods = array_map('preg_quote', $propertiesAndMethods, $delimiters);
$after = '(?P<after>[\.,;\)\}&\|+]|$|\.('.implode('|', $propertiesAndMethods).'))';
$this->registerPattern('/'.$before.'\s*'.$pattern.'\s*'.$after.'/', $callback);

// we didn't check for regular expressions after `)`, because that is
// more often than not not a character where a regex can follow (e.g.
// (1+2)/3/4 -> /3/ could be considered a regex, but it's not)
// however, after single-line if/while/for, there could very well be a
// regex after `)` (e.g. if(true)/regex/)
// there is one problem, though: it's (near) impossible to check for
// when the if/while/for statement is closed (same amount of closing
// brackets as there were opened), so I'll ignore single-line statements
// with nested brackets followed by a regex for now...
$before = '(?P<before>\b(if|while|for)\s*\((?P<code>[^\(]+?)\))';
$this->registerPattern('/'.$before.'\s*'.$pattern.'\s*'.$after.'/', $callback);

// 1 more edge case: a regex can be followed by a lot more operators or
// keywords if there's a newline (ASI) in between, where the operator
// actually starts a new statement
// (https://github.com/matthiasmullie/minify/issues/56)
$operators = $this->getOperatorsForRegex($this->operatorsBefore, '/');
$operators += $this->getOperatorsForRegex($this->keywordsReserved, '/');
$this->registerPattern('/'.$pattern.'\s*\n(?=\s*('.implode('|', $operators).'))/', $callback);
$after = '(?P<after>\n\s*('.implode('|', $operators).'))';
$this->registerPattern('/'.$pattern.'\s*'.$after.'/', $callback);
}

/**
* In addition to the regular restore routine, we also need to restore a few
* more things that have been extracted as part of the regex extraction...
*
* {@inheritdoc}
*/
protected function restoreExtractedData($content)
{
// restore regular extracted stuff
$content = parent::restoreExtractedData($content);

// restore nested stuff from within regex extraction
$content = strtr($content, $this->nestedExtracted);

return $content;
}

/**
Expand Down
7 changes: 4 additions & 3 deletions src/Minify.php
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,13 @@ protected function replacePattern($pattern, $replacement, $content)
* via restoreStrings().
*
* @param string[optional] $chars
* @param string[optional] $placeholderPrefix
*/
protected function extractStrings($chars = '\'"')
protected function extractStrings($chars = '\'"', $placeholderPrefix = '')
{
// PHP only supports $this inside anonymous functions since 5.4
$minifier = $this;
$callback = function ($match) use ($minifier) {
$callback = function ($match) use ($minifier, $placeholderPrefix) {
// check the second index here, because the first always contains a quote
if ($match[2] === '') {
/*
Expand All @@ -342,7 +343,7 @@ protected function extractStrings($chars = '\'"')
}

$count = count($minifier->extracted);
$placeholder = $match[1].$count.$match[1];
$placeholder = $match[1].$placeholderPrefix.$count.$match[1];
$minifier->extracted[$placeholder] = $match[1].$match[2].$match[1];

return $placeholder;
Expand Down
11 changes: 11 additions & 0 deletions tests/js/JSTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,17 @@ function someOtherFunction() {
$the_portfolio.data(\'carouseling\',!0);$active_carousel_group.children().each(function(){$(this).css({\'width\':$(this).innerWidth()+1,\'position\':\'absolute\',\'left\':($(this).innerWidth()*($(this).data(\'position\')-1))})})}',
);

$tests[] = array(
'if("some string" /*or comment*/)/regex/',
'if("some string")/regex/',
);

// https://github.com/matthiasmullie/minify/issues/195
$tests[] = array(
'"function"!=typeof/./&&"object"!=typeof Int8Array',
'"function"!=typeof/./&&"object"!=typeof Int8Array',
);

// known minified files to help doublecheck changes in places not yet
// anticipated in these tests
$files = glob(__DIR__.'/sample/minified/*.js');
Expand Down

0 comments on commit ff4ec66

Please sign in to comment.