Skip to content

Commit e1e3578

Browse files
committed
Implement ordinal and ratio deltas, rename jaccard
1 parent 08b1385 commit e1e3578

File tree

4 files changed

+91
-49
lines changed

4 files changed

+91
-49
lines changed

lib/Statistics/Krippendorff.pm

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,15 @@ sub delta_nominal($, $s1, $s2) { $s1 eq $s2 ? 0 : 1 }
8181

8282
sub delta_interval($, $v0, $v1) { ($v0 - $v1) ** 2 }
8383

84-
sub jaccard_index($, $s1, $s2) {
84+
sub delta_ordinal($self, $v0, $v1) {
85+
my ($from, $to) = sort { $a <=> $b } $v0, $v1;
86+
(sum(map $self->frequency($_), $from .. $to)
87+
- ($self->frequency($from) + $self->frequency($to))/ 2) ** 2
88+
}
89+
90+
sub delta_ratio($self, $v0, $v1) { (($v0 - $v1) / ($v0 + $v1)) ** 2}
91+
92+
sub delta_jaccard($, $s1, $s2) {
8593
my @s1 = split /,/, $s1;
8694
my @s2 = split /,/, $s2;
8795

@@ -226,13 +234,18 @@ Used for nominal data, i.e. labels with no ordering.
226234
Used for numeric values that are ordered, but can't be used in mathematical
227235
operations, for example number of stars in a movie rating system (we don't say
228236
that the distance from one star to two stars is the same as the distance from
229-
three starts to four stars).
237+
three starts to four stars). See the implementation on why C<$self> is needed
238+
as a parameter to delta.
230239
231240
=head4 delta_interval
232241
233242
Used for numeric values that can be used in mathematical operations.
234243
235-
=head4 jaccard_index
244+
=head4 delta_ratio
245+
246+
Used for non-negative numeric values (think degrees Kelvin).
247+
248+
=head4 delta_jaccard
236249
237250
This can be used when coders can specify more than one value. Join the values
238251
with commas; Jaccard index then uses the formula C<intersection_size /

t/01-deltas.t

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,20 @@ my $sk = 'Statistics::Krippendorff'->new(units => []);
1212
is $sk->delta_nominal('a', 'a'), 0, 'nominal a-a';
1313
is $sk->delta_nominal('a', 'b'), 1, 'nominal a-b';
1414

15-
is $sk->jaccard_index('a', 'a'), 0, 'a a';
16-
is $sk->jaccard_index('a', 'b'), 1, 'a b';
17-
18-
is $sk->jaccard_index('a', 'a,b'), 0.5, 'a ab';
19-
is $sk->jaccard_index('a', 'b,a'), 0.5, 'a ba';
20-
is $sk->jaccard_index('a,b', 'a'), 0.5, 'ab a';
21-
is $sk->jaccard_index('b,a', 'a'), 0.5, 'ba a';
22-
23-
is $sk->jaccard_index('a,b', 'c,d'), 1, 'ab cd';
24-
is $sk->jaccard_index('a,b', 'a,b'), 0, 'ab ab';
25-
is $sk->jaccard_index('a,b', 'b,a'), 0, 'ab ba';
26-
is $sk->jaccard_index('a,b', 'a,c'), 2/3, 'ab ac';
27-
is $sk->jaccard_index('a,b', 'c,a'), 2/3, 'ab ca';
28-
is $sk->jaccard_index('b,a', 'c,a'), 2/3, 'ba ca';
29-
is $sk->jaccard_index('b,a', 'a,c'), 2/3, 'ba ac';
30-
31-
is $sk->jaccard_index('a,b,c', 'b,c,d,e'), 3/5, 'abc bcde';
15+
is $sk->delta_jaccard('a', 'a'), 0, 'a a';
16+
is $sk->delta_jaccard('a', 'b'), 1, 'a b';
17+
18+
is $sk->delta_jaccard('a', 'a,b'), 0.5, 'a ab';
19+
is $sk->delta_jaccard('a', 'b,a'), 0.5, 'a ba';
20+
is $sk->delta_jaccard('a,b', 'a'), 0.5, 'ab a';
21+
is $sk->delta_jaccard('b,a', 'a'), 0.5, 'ba a';
22+
23+
is $sk->delta_jaccard('a,b', 'c,d'), 1, 'ab cd';
24+
is $sk->delta_jaccard('a,b', 'a,b'), 0, 'ab ab';
25+
is $sk->delta_jaccard('a,b', 'b,a'), 0, 'ab ba';
26+
is $sk->delta_jaccard('a,b', 'a,c'), 2/3, 'ab ac';
27+
is $sk->delta_jaccard('a,b', 'c,a'), 2/3, 'ab ca';
28+
is $sk->delta_jaccard('b,a', 'c,a'), 2/3, 'ba ca';
29+
is $sk->delta_jaccard('b,a', 'a,c'), 2/3, 'ba ac';
30+
31+
is $sk->delta_jaccard('a,b,c', 'b,c,d,e'), 3/5, 'abc bcde';

t/02-example.t

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,66 @@ use strict;
55
use Statistics::Krippendorff;
66

77
use Test2::V0;
8-
plan 3;
8+
use Test2::Tools::Subtest qw{ subtest_buffered };
9+
plan 2;
910

10-
{ my @units = ({B=>2, C=>2}, {B=>1, C=>1}, {B=>3, C=>3}, {A=>3, B=>3, C=>4},
11-
{A=>4, B=>4, C=>4}, {A=>1, B=>3}, {A=>2, C=>2}, {A=>1, C=>1},
12-
{A=>1, C=>1}, {A=>3, C=>3}, {A=>3, C=>3}, {A=>3, C=>4});
13-
my $sk = 'Statistics::Krippendorff'->new(
14-
units => \@units,
11+
subtest_buffered wikipedia => sub {
12+
plan 3;
13+
my @units_h = ({B=>2, C=>2}, {B=>1, C=>1}, {B=>3, C=>3}, {A=>3, B=>3, C=>4},
14+
{A=>4, B=>4, C=>4}, {A=>1, B=>3}, {A=>2, C=>2}, {A=>1, C=>1},
15+
{A=>1, C=>1}, {A=>3, C=>3}, {A=>3, C=>3}, {A=>3, C=>4});
16+
my $sk1 = 'Statistics::Krippendorff'->new(
17+
units => \@units_h,
1518
delta => \&Statistics::Krippendorff::delta_nominal);
1619

17-
is $sk->alpha, float(0.691, precision => 3),
18-
'Wikipedia example hashes';
19-
}
20-
21-
{ my @units = ([undef, 2, 2],
22-
[undef, 1, 1],
23-
[undef, 3, 3],
24-
[3, 3, 4],
25-
[4, 4, 4],
26-
[1, 3],
27-
[2, undef, 2],
28-
[1, undef, 1],
29-
[1, undef, 1],
30-
[3, undef, 3],
31-
[3, undef, 3],
32-
[3, undef, 4]);
20+
is $sk1->alpha, float(0.691, precision => 3),
21+
'Hash units, default delta';
3322

23+
my @units_a = ([undef, 2, 2],
24+
[undef, 1, 1],
25+
[undef, 3, 3],
26+
[3, 3, 4],
27+
[4, 4, 4],
28+
[1, 3],
29+
[2, undef, 2],
30+
[1, undef, 1],
31+
[1, undef, 1],
32+
[3, undef, 3],
33+
[3, undef, 3],
34+
[3, undef, 4]);
35+
36+
my $sk2 = 'Statistics::Krippendorff'->new(units => \@units_a);
37+
is $sk2->alpha, float(0.691, precision => 3),
38+
'Array units, default delta';
39+
40+
$sk2->delta(\&Statistics::Krippendorff::delta_interval);
41+
is $sk2->alpha, float(0.811, precision => 3),
42+
'Array units, interval delta';
43+
};
44+
45+
subtest_buffered krippendorf_1980 => sub {
46+
plan 4;
47+
48+
my @units = ([1,1,undef,1],[2,2,3,2],[3,3,3,3],[3,3,3,3],[2,2,2,2],
49+
[1,2,3,4],[4,4,4,4],[1,1,2,1],[2,2,2,2],[undef,5,5,5],
50+
[undef,undef,1,1]);
3451
my $sk = 'Statistics::Krippendorff'->new(units => \@units);
35-
is $sk->alpha, float(0.691, precision => 3),
36-
'Wikipedia example array, default delta';
52+
53+
is $sk->alpha, float(0.743, precision => 3),
54+
'nominal';
3755

3856
$sk->delta(\&Statistics::Krippendorff::delta_interval);
39-
is $sk->alpha, float(0.811, precision => 3),
40-
'Wikipedia example interval';
41-
}
57+
is $sk->alpha, float(0.849, precision => 3),
58+
'interval';
59+
60+
$sk->delta(\&Statistics::Krippendorff::delta_ordinal);
61+
is $sk->alpha, float(0.815, precision => 3),
62+
'oridnal';
63+
64+
$sk->delta(\&Statistics::Krippendorff::delta_ratio);
65+
is $sk->alpha, float(0.797, precision => 3),
66+
'ratio';
67+
68+
69+
70+
};

t/04-jaccard.t

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ use Test2::V0;
99
my $sk1 = 'Statistics::Krippendorff'->new(
1010
units => [['a,b', 'b,a'], ['b,c', 'c,b'], ['e,f', 'f,e'],
1111
['x', 'b,a'],['a', 'a,b']],
12-
delta => \&Statistics::Krippendorff::jaccard_index
12+
delta => \&Statistics::Krippendorff::delta_jaccard
1313
);
1414

1515
my $alpha1 = $sk1->alpha;
1616

1717
my $sk2 = 'Statistics::Krippendorff'->new(
1818
units => [['a,b', 'a,b'], ['b,c', 'b,c'], ['e,f', 'e,f'],
1919
['b,a', 'x'], ['a', 'b,a']],
20-
delta => \&Statistics::Krippendorff::jaccard_index
20+
delta => \&Statistics::Krippendorff::delta_jaccard
2121
);
2222
is $sk2->alpha, float($alpha1, precision => 8),
2323
q(Order inside values doesn't matter);

0 commit comments

Comments
 (0)