-
Notifications
You must be signed in to change notification settings - Fork 7
/
utf-8-test.pl
executable file
·84 lines (73 loc) · 1.48 KB
/
utf-8-test.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/home/ben/software/install/bin/perl
use warnings;
use strict;
use utf8;
use FindBin '$Bin';
use File::Slurper 'read_binary';
use Text::LineNumber;
my $output = "$Bin/utf-8-test-data.c";
open my $out, ">", $output or die $!;
my $input = read_binary ("$Bin/UTF-8-test.txt");
my $tln = Text::LineNumber->new ($input);
if ($input =~ /You should see the Greek word 'kosme':\s*"(\S+)"/) {
print $out "const char * kuhn_1 = \"$1\";\n";
}
else {
die;
}
my %ids;
my %expect;
my $id_re = qr!^([0-9]+\.[0-9]+\.[0-9]+)!m;
while ($input =~ m!
$id_re
\s+
[^U]*
(?:\(?U[-\+]([0-9A-F]+)\)?)?
.*?
[:=]
\s+
"((?:\x00|[^"])+)"
\s*
(\||$)
!xgm) {
my $id = $1;
my $expect = $2;
my $bytes = $3;
$id =~ s!\.!_!g;
if ($ids{$id}) {
next;
}
$ids{$id} = hexify ($bytes);
if ($expect) {
$expect{$id} = $expect;
}
}
while ($input =~ m!
(3\.(?:[0-9]+)\.(?:[0-9]+))
[^=:]+
[=:]
\s*
"([^"]+)"
!xg) {
my $id = $1;
my $bytes = $2;
$id =~ s!\.!_!g;
if ($ids{$id}) {
next;
}
$ids{$id} = hexify ($bytes);
}
for my $id (sort keys %ids) {
print $out "const char * kuhn_$id = \"$ids{$id}\";\n";
if ($expect{$id}) {
print $out "uint32_t expect_$id = 0x$expect{$id};\n";
}
}
close $out or die $!;
exit;
sub hexify
{
my ($bytes) = @_;
$bytes =~ s/([\x00-\x20\x7F-\xFF])/sprintf ("\\x%02X", ord ($1))/ge;
return $bytes;
}