Skip to content

Commit 938090a

Browse files
committedOct 16, 2020
regnodes.h: Add two convenience bit masks
These categorize the many types of EXACT nodes, so that code can refer to a particular subset of such nodes without having to list all of them out. This simplifies some 'if' statements, and makes updating things easier.
1 parent a234542 commit 938090a

File tree

5 files changed

+63
-3
lines changed

5 files changed

+63
-3
lines changed
 

‎globvar.sym

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ PL_c9_utf8_dfa_tab
1212
PL_charclass
1313
PL_check
1414
PL_core_reg_engine
15+
PL_EXACTFish_bitmask
16+
PL_EXACT_REQ8_bitmask
1517
PL_extended_utf8_dfa_tab
1618
PL_fold
1719
PL_fold_latin1

‎pod/perldebguts.pod

-1
Original file line numberDiff line numberDiff line change
@@ -700,7 +700,6 @@ will be lost.
700700
EXACTFLU8 str Like EXACTFU, but use /il, UTF-8, (string
701701
is folded, and everything in it is above
702702
255
703-
704703
EXACT_REQ8 str Like EXACT, but only UTF-8 encoded targets
705704
can match
706705
LEXACT_REQ8 len:str 1 Like LEXACT, but only UTF-8 encoded targets

‎regcomp.sym

+3-2
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ BRANCH BRANCH, node 0 V ; Match this alternative, or the next...
129129

130130
#*Literals
131131
# NOTE: the relative ordering of these types is important do not change it
132+
# By convention, folding nodes begin with EXACTF; A digit 8 is in the name if
133+
# and only if it it requires a UTF-8 target string in order to successfully
134+
# match.
132135

133136
EXACT EXACT, str ; Match this string (flags field is the length).
134137

@@ -158,8 +161,6 @@ EXACTFUP EXACT, str ; Like EXACT, but match using /iu rules; (string n
158161
# representable in the UTF-8 target string.
159162

160163
EXACTFLU8 EXACT, str ; Like EXACTFU, but use /il, UTF-8, (string is folded, and everything in it is above 255
161-
162-
163164
EXACT_REQ8 EXACT, str ; Like EXACT, but only UTF-8 encoded targets can match
164165
LEXACT_REQ8 EXACT, len:str 1 ; Like LEXACT, but only UTF-8 encoded targets can match
165166
EXACTFU_REQ8 EXACT, str ; Like EXACTFU, but only UTF-8 encoded targets can match

‎regen/regcomp.pl

+44
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,49 @@ sub process_flags {
244244
EOP
245245
}
246246

247+
sub print_process_EXACTish {
248+
my ($out)= @_;
249+
250+
# Creates some bitmaps for EXACTish nodes.
251+
252+
my @folded;
253+
my @req8;
254+
255+
my $base;
256+
for my $node (@ops) {
257+
next unless $node->{type} eq 'EXACT';
258+
my $name = $node->{name};
259+
$base = $node->{id} if $name eq 'EXACT';
260+
261+
my $index = $node->{id} - $base;
262+
263+
# This depends entirely on naming conventions in regcomp.sym
264+
$folded[$index] = $name =~ /^EXACTF/ || 0;
265+
$req8[$index] = $name =~ /8/ || 0;
266+
}
267+
268+
die "Can't cope with > 32 EXACTish nodes" if @folded > 32;
269+
270+
my $exactf = sprintf "%X", oct("0b" . join "", reverse @folded);
271+
my $req8 = sprintf "%X", oct("0b" . join "", reverse @req8);
272+
print $out <<EOP,
273+
274+
/* Is 'op', known to be of type EXACT, folding? */
275+
#define isEXACTFish(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT))))
276+
277+
/* Do only UTF-8 target strings match 'op', known to be of type EXACT? */
278+
#define isEXACT_REQ8(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT))))
279+
280+
#ifndef DOINIT
281+
EXTCONST U32 PL_EXACTFish_bitmask;
282+
EXTCONST U32 PL_EXACT_REQ8_bitmask;
283+
#else
284+
EXTCONST U32 PL_EXACTFish_bitmask = 0x$exactf;
285+
EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x$req8;
286+
#endif /* DOINIT */
287+
EOP
288+
}
289+
247290
sub read_definition {
248291
my ( $file )= @_;
249292
my ( $seen_sep, $pod_comment )= "";
@@ -752,6 +795,7 @@ sub do_perldebguts {
752795
print_reg_extflags_name($out);
753796
print_reg_intflags_name($out);
754797
print_process_flags($out);
798+
print_process_EXACTish($out);
755799
read_only_bottom_close_and_rename($out);
756800

757801
do_perldebguts();

‎regnodes.h

+14
Original file line numberDiff line numberDiff line change
@@ -1753,4 +1753,18 @@ EXTCONST U8 PL_simple_bitmask[] = {
17531753
};
17541754
#endif /* DOINIT */
17551755

1756+
/* Is 'op', known to be of type EXACT, folding? */
1757+
#define isEXACTFish(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT))))
1758+
1759+
/* Do only UTF-8 target strings match 'op', known to be of type EXACT? */
1760+
#define isEXACT_REQ8(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT))))
1761+
1762+
#ifndef DOINIT
1763+
EXTCONST U32 PL_EXACTFish_bitmask;
1764+
EXTCONST U32 PL_EXACT_REQ8_bitmask;
1765+
#else
1766+
EXTCONST U32 PL_EXACTFish_bitmask = 0x33F8;
1767+
EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x1E00;
1768+
#endif /* DOINIT */
1769+
17561770
/* ex: set ro: */

0 commit comments

Comments
 (0)