-
Notifications
You must be signed in to change notification settings - Fork 5
/
PrimitiveXML.pm
119 lines (101 loc) · 3.05 KB
/
PrimitiveXML.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package PrimitiveXML;
use strict;
use warnings;
use XML::Bare;
use Scalar::Util qw(openhandle);
use HTML::Entities;
my $e2c = {
amp => '&',
'lt' => '<',
'gt' => '>',
quot => q{"},
apos => q{'},
};
sub new {
my ($class) = shift @_;
my ($method, $input) = @_;
my $in;
if ($method eq 'location') {
open $in, '<', $input or die "cannot open input for method '$method'";
}
if ($method eq 'handle') {
$in = openhandle($input) or die "cannot open input for method '$method'";
}
else {
die "unsupported input method '$method'";
}
my $infotxt = do {local $/ = "</siteinfo>\n"; <$in>;};
my $info = XML::Bare->new(text => $infotxt)->parse()->{mediawiki}->{siteinfo};
my %self = (
page => {},
list => [],
reader => $in,
base => $info->{base}->{value},
sitename => $info->{sitename}->{value},
_namespaces => {
map {
$_->{value} // "" => $_->{key}->{value}
} @{ $info->{namespaces}->{namespace} }
},
);
$self{nsre} = join( q{|}, map { quotemeta($_) } keys %{$self{_namespaces}} );
return bless \%self, $class;
}
sub next {
my ($self) = @_;
my $reader = $self->{reader};
if (!eof($reader) && !@{$self->{list}}) {
local $/ = "</revision>\n";
my $c = 0;
while (($c < 50) && (my $line = <$reader>)) {
push @{$self->{list}}, $line;
$c++;
}
}
my $elt = shift @{$self->{list}};
return if not $elt;
#print STDERR "$elt\n";
substr($elt, 0, 10) = '' if substr($elt, 0, 9) eq ' </page>';
my $r;
if (substr($elt,0,14) eq ' <revision>') {
$r = (XML::Bare->new(text => $elt))[1]->{revision};
}
elsif (substr($elt,0,8) eq ' <page>') {
my $p = XML::Bare->new(text => $elt)->parse;
my $value = $p->{page}->{title}->{value} // "";
_decode_entities($value, $e2c);
my ($ns, $title);
if ($value =~ m/^($self->{nsre}):(.+)/) {
($ns, $title) = ($1, $2);
}
else {
($ns, $title) = ('Main', $value);
}
my $id = $p->{page}->{id}->{value};
my %h = (title => $title, namespace => $ns, nsid => $self->{_namespaces}{$ns} || 0, id => $id);
$self->{page} = \%h;
$r = $p->{page}->{revision};
}
else {
return;
}
my $c = $r->{comment}->{value} // "";
my $t = $r->{text}->{value} // "";
my $u = $r->{contributor}->{username}->{value} // "";
_decode_entities($c, $e2c);
_decode_entities($t, $e2c);
_decode_entities($u, $e2c);
my %data = (
%{ $self->{page} },
revision_id => $r->{id}->{value},
comment => $c,
text => $t,
timestamp => $r->{timestamp}->{value},
userid => $r->{contributor}->{id}->{value},
username => $u,
ip => $r->{contributor}->{ip}->{value},
);
$data{minor} = 1 if exists $r->{minor};
return \%data;
}
1;