Skip to content

Commit 8ae3796

Browse files
authored
Fix RegexOptions.Compiled|IgnoreCase perf when dynamic code isn't supported (#107874)
If a regex is created with RegexOptions.Compiled and RegexOptions.IgnoreCase, and it begins with a pattern that's a reasonably small number of alternating strings, it'll now end up using `SearchValues<string>` to find the next possible match location. However, the `SearchValues<string>` instance doesn't end up getting created if the interpreter is being used. If the implementation falls back to the interpreter because compilation isn't supported because dynamic code isn't supported, then it won't use any optimizations to find the next starting location. That's a regression from when it would previously at least use a vectorized search to find one character class from the set of starting strings. This fixes it to just always create the `SearchValues<string>`. This adds some overhead when using RegexOptions.Compiled, but it's typically just a few percentage points, and only applies in the cases where this `SearchValues<string>` optimization kicks in. At the moment, changing it to have perfect knowledge about whether it can avoid that creation is too invasive. This overhead also doesn't apply to the source generator.
1 parent f5acabe commit 8ae3796

File tree

1 file changed

+4
-16
lines changed

1 file changed

+4
-16
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs

+4-16
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
8484
bool dfa = (options & RegexOptions.NonBacktracking) != 0;
8585
bool compiled = (options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
8686
bool interpreter = !compiled && !dfa;
87-
bool usesRfoTryFind = !compiled;
8887

8988
// For interpreter, we want to employ optimizations, but we don't want to make construction significantly
9089
// more expensive; someone who wants to pay to do more work can specify Compiled. So for the interpreter
@@ -149,10 +148,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
149148
LeadingPrefixes = caseInsensitivePrefixes;
150149
FindMode = FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight;
151150
#if SYSTEM_TEXT_REGULAREXPRESSIONS
152-
if (usesRfoTryFind)
153-
{
154-
LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase);
155-
}
151+
LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase);
156152
#endif
157153
return;
158154
}
@@ -165,10 +161,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
165161
// LeadingPrefixes = caseSensitivePrefixes;
166162
// FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight;
167163
#if SYSTEM_TEXT_REGULAREXPRESSIONS
168-
// if (usesRfoTryFind)
169-
// {
170-
// LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal);
171-
// }
164+
// LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal);
172165
#endif
173166
// return;
174167
//}
@@ -699,14 +692,9 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
699692
case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
700693
case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
701694
{
702-
if (LeadingStrings is not SearchValues<string> searchValues)
703-
{
704-
// This should be exceedingly rare and only happen if a Compiled regex selected this
705-
// option but then failed to compile (e.g. due to too deep stacks) and fell back to the interpreter.
706-
return true;
707-
}
695+
Debug.Assert(LeadingStrings is not null);
708696

709-
int i = textSpan.Slice(pos).IndexOfAny(searchValues);
697+
int i = textSpan.Slice(pos).IndexOfAny(LeadingStrings);
710698
if (i >= 0)
711699
{
712700
pos += i;

0 commit comments

Comments
 (0)