Skip to content

Commit a615ed3

Browse files
committed
Switch from 'error_logger' to 'logger'
Sync with OpenRiak 3.2. wday-contrib 2325
2 parents 8ec8734 + 14b3608 commit a615ed3

File tree

10 files changed

+95
-65
lines changed

10 files changed

+95
-65
lines changed

.github/workflows/erlang.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,18 @@ jobs:
1111
fail-fast: false
1212
matrix:
1313
os: [ubuntu-latest, macos-latest]
14-
otp: ['24']
15-
rebar: ['3.22']
14+
otp: ['24', '26']
15+
rebar: ['3.22', '3.24']
16+
exclude:
17+
- otp: '24'
18+
os: macos-latest
19+
- rebar: '3.24'
20+
otp: '24'
21+
- rebar: '3.22'
22+
otp: '26'
1623

1724
steps:
18-
- uses: actions/checkout@v2
25+
- uses: actions/checkout@v4
1926
- uses: erlef/setup-beam@v1
2027
with:
2128
otp-version: ${{ matrix.otp }}

README.md

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# HyperLogLog for Erlang
22

3+
[![Hyper OpenRiak Status](https://github.com/OpenRiak/hyper/actions/workflows/erlang.yml/badge.svg)](https://github.com/OpenRiak/hyper/actions/workflows/erlang.yml)
4+
35
This is an implementation of the HyperLogLog algorithm in
46
Erlang. Using HyperLogLog you can estimate the cardinality of very
57
large data sets using constant memory. The relative error is `1.04 *
@@ -35,9 +37,7 @@ for lower cardinalities.
3537

3638
The errors introduced by estimations can be seen in this example:
3739
```erlang
38-
3> random:seed(1,2,3).
39-
undefined
40-
4> Run = fun (P, Card) -> hyper:card(lists:foldl(fun (_, H) -> Int = random:uniform(10000000000000), hyper:insert(<<Int:64/integer>>, H) end, hyper:new(P), lists:seq(1, Card))) end.
40+
4> Run = fun(P, Card) -> hyper:card(lists:foldl(fun (_, H) -> Int = rand:uniform(10000000000000), hyper:insert(<<Int:64/integer>>, H) end, hyper:new(P), lists:seq(1, Card))) end.
4141
#Fun<erl_eval.12.80484245>
4242
5> Run(12, 10000).
4343
9992.846462080579
@@ -105,15 +105,17 @@ registers has a value other than 0.
105105

106106
* `hyper_binary_rle`: Dud
107107

108-
You can also implement your own backend. In `hyper_test` theres a
108+
You can also implement your own backend. In `hyper_test` there are a
109109
bunch of tests run for all backends, including some PropEr tests. The
110110
test suite will ensure your backend gives correct estimates and
111111
correctly encodes/decodes the serialized filters.
112112

113+
## Reports
113114

115+
### Backend Performance
114116

115117
```
116-
$ make perf_report
118+
$ rebar3 as test do shell --eval 'hyper:perf_report().' --eval 'halt().'
117119
...
118120

119121
module P card fill bytes insert us union ms card ms json ms
@@ -167,4 +169,16 @@ hyper_binary 15 100000 0.95 24576 0.79 11.74 2.59
167169
hyper_binary 15 1000000 1.00 24576 0.55 13.88 2.64 5.11
168170
```
169171

172+
### Observed Error Rates
173+
174+
> Requires [R](https://cran.r-project.org) with the
175+
> [`ggplot2`](https://cran.r-project.org/web/packages/ggplot2) package.
176+
177+
```
178+
$ rebar3 as test do shell --eval 'hyper:estimate_report().' --eval 'halt().'
179+
$ bin/plot.R
180+
$ open hyper.png
181+
```
182+
183+
170184
[paper by Google]: http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//pubs/archive/40671.pdf

bin/plot.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/Rscript --vanilla
1+
#!/usr/bin/env Rscript --vanilla
22

33
require("ggplot2")
44
require("scales")

rebar.config

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
{dialyzer, [
4242
{warnings, [
4343
error_handling,
44-
race_conditions,
4544
unknown,
4645
unmatched_returns
4746
]}
@@ -97,10 +96,12 @@
9796
]}
9897
]},
9998
{deps, [
100-
proper,
99+
{proper,
100+
{git, "https://github.com/OpenRiak/proper.git",
101+
{branch, "openriak-3.2"}}},
101102
{basho_stats,
102-
{git, "https://github.com/basho/basho_stats.git",
103-
{branch, "develop"}}}
103+
{git, "https://github.com/OpenRiak/basho_stats.git",
104+
{branch, "wday-develop-3.2"}}}
104105
]}
105106
]},
106107
{gha, [

src/hyper.app.src

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{application, hyper, [
22
{description, "HyperLogLog"},
3-
{vsn, "1.1.2"},
3+
{vsn, "1.2.0"},
44
{registered, []},
55
{applications, [
66
kernel,

src/hyper.erl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,12 +270,11 @@ median(Ns) ->
270270
%% ===================================================================
271271
%% Test Reports
272272
%% ===================================================================
273-
%%
274-
%% These take a VERY long time to run, making them just about unusable.
275-
%% TODO: They SHOULD be restructured to be more efficient.
276-
%% Possibilities include QuickCheck and/or parallel EUnit execution.
277-
%%
278273

274+
%% Shell:
275+
%% $ rebar3 as test do shell --eval 'hyper:estimate_report().' --eval 'halt().'
276+
%% $ bin/plot.R
277+
%% $ open hyper.png
279278
estimate_report() ->
280279
Ps = lists:seq(11, 16),
281280
Cardinalities = [100, 1000, 10000, 100000, 1000000],
@@ -316,6 +315,8 @@ run_report(P, Card, Repetitions) ->
316315
P95 = basho_stats_histogram:quantile(0.95, Hist),
317316
{Card, median(Estimations), P05, P95}.
318317

318+
%% Shell:
319+
%% $ rebar3 as test do shell --eval 'hyper:perf_report().' --eval 'halt().'
319320
perf_report() ->
320321
Ps = [15],
321322
Cards = [1, 100, 500, 1000, 2500, 5000, 10000,

src/hyper_binary.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
%% inserts to perform in the future.
66

77
-module(hyper_binary).
8-
98
-behaviour(hyper_register).
109

1110
-export([new/1,
@@ -279,6 +278,8 @@ merge_buf(B, [{Index, Value} | Rest], PrevIndex, Acc) ->
279278

280279
-ifdef(TEST).
281280

281+
-include_lib("eunit/include/eunit.hrl").
282+
282283
merge_test() ->
283284
P = 4, M = m(P),
284285
Tmp1 = [{1, 1},

src/hyper_binary_rle.erl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
reduce_precision/2, bytes/1]).
77
-export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2]).
88

9+
-include_lib("kernel/include/logger.hrl").
910
-ifdef(TEST).
1011
-include_lib("eunit/include/eunit.hrl").
1112
-endif.
@@ -156,9 +157,12 @@ rle_insert(B, I, Index, Value) ->
156157
ChunkStart = I,
157158
ChunkEnd = ChunkStart + Repeats - 1,
158159

159-
%% error_logger:info_msg("~p repeated ~p times, I: ~p, Index: ~p, Value: ~p~n"
160-
%% "chunk start: ~p, chunk end: ~p~n",
161-
%% [RepeatValue, Repeats, I, Index, Value, ChunkStart, ChunkEnd]),
160+
?LOG_DEBUG(#{
161+
message => lists:flatten(io_lib:format(
162+
"~0tp repeated ~0tp times", [RepeatValue, Repeats])),
163+
'I' => I, index => Index, value => Value,
164+
chunk_start => ChunkStart, chunk_end => ChunkEnd
165+
}),
162166

163167
if
164168
%% Found the chunk where index and value belongs, doing
@@ -173,7 +177,7 @@ rle_insert(B, I, Index, Value) ->
173177
RepeatValue =/= Value ->
174178
LeftFill = Index - ChunkStart,
175179
RightFill = ChunkEnd - Index,
176-
%% error_logger:info_msg("splitting chunk~n"),
180+
?LOG_DEBUG("splitting chunk"),
177181

178182
Left = if LeftFill =:= 0 -> <<>>;
179183
true -> <<?REPEAT(LeftFill), RepeatValue:?VALUE>>
@@ -184,11 +188,11 @@ rle_insert(B, I, Index, Value) ->
184188
[Left, <<?REPEAT(1), ?VALUE(Value)>>, Right, Rest];
185189

186190
ChunkEnd < Index ->
187-
%% error_logger:info_msg("chunk end < index (~p < ~p) recursing~n",
188-
%% [ChunkEnd, Index]),
191+
?LOG_DEBUG("chunk end < index (~0tp < ~0tp) recursing",
192+
[ChunkEnd, Index]),
189193

190194
[<<?REPEAT(Repeats), ?VALUE(RepeatValue)>> |
191-
rle_insert(Rest, ChunkEnd+1, Index, Value)]
195+
rle_insert(Rest, ChunkEnd+1, Index, Value)]
192196
end.
193197

194198
take_repeat(<<>>) ->

src/hyper_gb.erl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ do_decode_registers(<<Value:8/integer, Rest/binary>>, I) ->
136136
%%
137137
-ifdef(TEST).
138138

139+
-include_lib("eunit/include/eunit.hrl").
140+
139141
sum_test() ->
140142
T = set(3, 5, set(1, 1, new(4))),
141143

test/hyper_test.erl

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
-module(hyper_test).
22

3+
-include_lib("kernel/include/logger.hrl").
34
-include_lib("proper/include/proper.hrl").
45
-include_lib("eunit/include/eunit.hrl").
56

@@ -237,7 +238,7 @@ many_union_t() ->
237238
true ->
238239
ok;
239240
false ->
240-
error_logger:info_msg("too high error, expected ~.2f%, actual ~.2f%~n"
241+
?LOG_INFO("too high error, expected ~.2f%, actual ~.2f%~n"
241242
"~p, p = ~p, card = ~p",
242243
[Error, Delta / (Card * NumSets), Mod, P, Card]),
243244
?assert(false)
@@ -374,45 +375,44 @@ gen_getset(Size, P) ->
374375

375376
prop_set() ->
376377
?FORALL(
377-
{Mod, P}, {oneof(backends()), choose(4, 16)},
378-
?FORALL(
379-
Values, gen_getset(P),
380-
begin
381-
R = lists:foldl(
382-
fun ({Index, ZeroCount}, Register) ->
383-
Mod:set(Index, ZeroCount, Register)
378+
{Mod, P}, {oneof(backends()), choose(4, 16)},
379+
?FORALL(
380+
Values, gen_getset(P),
381+
begin
382+
R = lists:foldl(
383+
fun({Index, ZeroCount}, Register) ->
384+
Mod:set(Index, ZeroCount, Register)
384385
end, Mod:new(P), Values),
385-
Max = lists:foldl(fun ({I, V}, Acc) ->
386-
case dict:find(I, Acc) of
387-
{ok, OtherV} when OtherV >= V ->
388-
Acc;
389-
_ ->
390-
dict:store(I, V, Acc)
391-
end
392-
end, dict:new(), Values),
393-
Expected = lists:map(fun (I) ->
394-
case dict:find(I, Max) of
395-
{ok, V} ->
396-
<<V:8/integer>>;
397-
error ->
398-
<<0>>
399-
end
400-
end, lists:seq(0, trunc(math:pow(2, P)) - 1)),
401-
402-
case Mod:encode_registers(Mod:compact(R))
403-
=:= iolist_to_binary(Expected) of
404-
true ->
405-
true;
406-
false ->
407-
%% error_logger:info_msg("values~n~p~n"
408-
%% "encoded~n~p~n"
409-
%% "expected~n~p~n",
410-
%% [Values,
411-
%% Mod:encode_registers(R),
412-
%% iolist_to_binary(Expected)]),
386+
Max = lists:foldl(fun({I, V}, Acc) ->
387+
case dict:find(I, Acc) of
388+
{ok, OtherV} when OtherV >= V ->
389+
Acc;
390+
_ ->
391+
dict:store(I, V, Acc)
392+
end
393+
end, dict:new(), Values),
394+
Expected = lists:map(fun(I) ->
395+
case dict:find(I, Max) of
396+
{ok, V} ->
397+
<<V:8/integer>>;
398+
error ->
399+
<<0>>
400+
end
401+
end, lists:seq(0, trunc(math:pow(2, P)) - 1)),
402+
403+
case Mod:encode_registers(Mod:compact(R))
404+
=:= iolist_to_binary(Expected) of
405+
true ->
406+
true;
407+
false ->
408+
?LOG_ERROR(#{
409+
values => Values,
410+
encoded => Mod:encode_registers(R),
411+
expected => iolist_to_binary(Expected)
412+
}),
413413
false
414-
end
415-
end)).
414+
end
415+
end)).
416416

417417
prop_serialize() ->
418418
?FORALL(

0 commit comments

Comments
 (0)