Switch from 'error_logger' to 'logger'

tburghart · tburghart · commit a615ed31abc3 · 2025-02-21T15:51:20.000Z
Sync with OpenRiak 3.2.

wday-contrib 2325
diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml
@@ -11,11 +11,18 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest]
-        otp: ['24']
-        rebar: ['3.22']
+        otp: ['24', '26']
+        rebar: ['3.22', '3.24']
+        exclude:
+          - otp: '24'
+            os: macos-latest
+          - rebar: '3.24'
+            otp: '24'
+          - rebar: '3.22'
+            otp: '26'
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: erlef/setup-beam@v1
         with:
           otp-version: ${{ matrix.otp }}
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # HyperLogLog for Erlang
 
+[![Hyper OpenRiak Status](https://github.com/OpenRiak/hyper/actions/workflows/erlang.yml/badge.svg)](https://github.com/OpenRiak/hyper/actions/workflows/erlang.yml)
+
 This is an implementation of the HyperLogLog algorithm in
 Erlang. Using HyperLogLog you can estimate the cardinality of very
 large data sets using constant memory. The relative error is `1.04 *
@@ -35,9 +37,7 @@ for lower cardinalities.
 
 The errors introduced by estimations can be seen in this example:
 ```erlang
-3> random:seed(1,2,3).
-undefined
-4> Run = fun (P, Card) -> hyper:card(lists:foldl(fun (_, H) -> Int = random:uniform(10000000000000), hyper:insert(<<Int:64/integer>>, H) end, hyper:new(P), lists:seq(1, Card))) end.
+4> Run = fun(P, Card) -> hyper:card(lists:foldl(fun (_, H) -> Int = rand:uniform(10000000000000), hyper:insert(<<Int:64/integer>>, H) end, hyper:new(P), lists:seq(1, Card))) end.
 #Fun<erl_eval.12.80484245>
 5> Run(12, 10000).
 9992.846462080579
@@ -105,15 +105,17 @@ registers has a value other than 0.
 
  * `hyper_binary_rle`: Dud
 
-You can also implement your own backend. In `hyper_test` theres a
+You can also implement your own backend. In `hyper_test` there are a
 bunch of tests run for all backends, including some PropEr tests. The
 test suite will ensure your backend gives correct estimates and
 correctly encodes/decodes the serialized filters.
 
+## Reports
 
+### Backend Performance
 
 ```
-$ make perf_report
+$ rebar3 as test do shell --eval 'hyper:perf_report().' --eval 'halt().'
 ...
 
 module       P        card   fill      bytes  insert us   union ms    card ms    json ms
@@ -167,4 +169,16 @@ hyper_binary 15     100000   0.95      24576       0.79      11.74       2.59
 hyper_binary 15    1000000   1.00      24576       0.55      13.88       2.64       5.11
 ```
 
+### Observed Error Rates
+
+> Requires [R](https://cran.r-project.org) with the
+> [`ggplot2`](https://cran.r-project.org/web/packages/ggplot2) package.
+
+```
+$ rebar3 as test do shell --eval 'hyper:estimate_report().' --eval 'halt().'
+$ bin/plot.R
+$ open hyper.png
+```
+
+
 [paper by Google]: http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//pubs/archive/40671.pdf
diff --git a/bin/plot.R b/bin/plot.R
@@ -1,4 +1,4 @@
-#!/usr/bin/Rscript --vanilla
+#!/usr/bin/env Rscript --vanilla
 
 require("ggplot2")
 require("scales")
diff --git a/rebar.config b/rebar.config
@@ -41,7 +41,6 @@
         {dialyzer, [
             {warnings, [
                 error_handling,
-                race_conditions,
                 unknown,
                 unmatched_returns
             ]}
@@ -97,10 +96,12 @@
             ]}
         ]},
         {deps, [
-            proper,
+            {proper,
+                {git, "https://github.com/OpenRiak/proper.git",
+                    {branch, "openriak-3.2"}}},
             {basho_stats,
-                {git, "https://github.com/basho/basho_stats.git",
-                    {branch, "develop"}}}
+                {git, "https://github.com/OpenRiak/basho_stats.git",
+                    {branch, "wday-develop-3.2"}}}
         ]}
     ]},
     {gha, [
diff --git a/src/hyper.app.src b/src/hyper.app.src
@@ -1,6 +1,6 @@
 {application, hyper, [
     {description, "HyperLogLog"},
-    {vsn, "1.1.2"},
+    {vsn, "1.2.0"},
     {registered, []},
     {applications, [
         kernel,
diff --git a/src/hyper.erl b/src/hyper.erl
@@ -270,12 +270,11 @@ median(Ns) ->
 %% ===================================================================
 %% Test Reports
 %% ===================================================================
-%%
-%% These take a VERY long time to run, making them just about unusable.
-%% TODO: They SHOULD be restructured to be more efficient.
-%% Possibilities include QuickCheck and/or parallel EUnit execution.
-%%
 
+%% Shell:
+%% $ rebar3 as test do shell --eval 'hyper:estimate_report().' --eval 'halt().'
+%% $ bin/plot.R
+%% $ open hyper.png
 estimate_report() ->
     Ps            = lists:seq(11, 16),
     Cardinalities = [100, 1000, 10000, 100000, 1000000],
@@ -316,6 +315,8 @@ run_report(P, Card, Repetitions) ->
     P95 = basho_stats_histogram:quantile(0.95, Hist),
     {Card, median(Estimations), P05, P95}.
 
+%% Shell:
+%% $ rebar3 as test do shell --eval 'hyper:perf_report().' --eval 'halt().'
 perf_report() ->
     Ps      = [15],
     Cards   = [1, 100, 500, 1000, 2500, 5000, 10000,
diff --git a/src/hyper_binary.erl b/src/hyper_binary.erl
@@ -5,7 +5,6 @@
 %% inserts to perform in the future.
 
 -module(hyper_binary).
-
 -behaviour(hyper_register).
 
 -export([new/1,
@@ -279,6 +278,8 @@ merge_buf(B, [{Index, Value} | Rest], PrevIndex, Acc) ->
 
 -ifdef(TEST).
 
+-include_lib("eunit/include/eunit.hrl").
+
 merge_test() ->
     P = 4, M = m(P),
     Tmp1 = [{1, 1},
diff --git a/src/hyper_binary_rle.erl b/src/hyper_binary_rle.erl
@@ -6,6 +6,7 @@
          reduce_precision/2, bytes/1]).
 -export([register_sum/1, zero_count/1, encode_registers/1, decode_registers/2]).
 
+-include_lib("kernel/include/logger.hrl").
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 -endif.
@@ -156,9 +157,12 @@ rle_insert(B, I, Index, Value) ->
     ChunkStart = I,
     ChunkEnd = ChunkStart + Repeats - 1,
 
-    %% error_logger:info_msg("~p repeated ~p times, I: ~p, Index: ~p, Value: ~p~n"
-    %%                       "chunk start: ~p, chunk end: ~p~n",
-    %%                       [RepeatValue, Repeats, I, Index, Value, ChunkStart, ChunkEnd]),
+     ?LOG_DEBUG(#{
+         message => lists:flatten(io_lib:format(
+             "~0tp repeated ~0tp times", [RepeatValue, Repeats])),
+         'I' => I, index => Index, value => Value,
+         chunk_start => ChunkStart, chunk_end => ChunkEnd
+     }),
 
     if
         %% Found the chunk where index and value belongs, doing
@@ -173,7 +177,7 @@ rle_insert(B, I, Index, Value) ->
         RepeatValue =/= Value ->
             LeftFill = Index - ChunkStart,
             RightFill = ChunkEnd - Index,
-            %% error_logger:info_msg("splitting chunk~n"),
+            ?LOG_DEBUG("splitting chunk"),
 
             Left = if LeftFill =:= 0 -> <<>>;
                       true -> <<?REPEAT(LeftFill), RepeatValue:?VALUE>>
@@ -184,11 +188,11 @@ rle_insert(B, I, Index, Value) ->
             [Left, <<?REPEAT(1), ?VALUE(Value)>>, Right, Rest];
 
         ChunkEnd < Index ->
-            %% error_logger:info_msg("chunk end < index (~p < ~p) recursing~n",
-            %%                       [ChunkEnd, Index]),
+            ?LOG_DEBUG("chunk end < index (~0tp < ~0tp) recursing",
+                [ChunkEnd, Index]),
 
             [<<?REPEAT(Repeats), ?VALUE(RepeatValue)>> |
-             rle_insert(Rest, ChunkEnd+1, Index, Value)]
+                rle_insert(Rest, ChunkEnd+1, Index, Value)]
     end.
 
 take_repeat(<<>>) ->
diff --git a/src/hyper_gb.erl b/src/hyper_gb.erl
@@ -136,6 +136,8 @@ do_decode_registers(<<Value:8/integer, Rest/binary>>, I) ->
 %%
 -ifdef(TEST).
 
+-include_lib("eunit/include/eunit.hrl").
+
 sum_test() ->
     T = set(3, 5, set(1, 1, new(4))),
 
diff --git a/test/hyper_test.erl b/test/hyper_test.erl
@@ -1,5 +1,6 @@
 -module(hyper_test).
 
+-include_lib("kernel/include/logger.hrl").
 -include_lib("proper/include/proper.hrl").
 -include_lib("eunit/include/eunit.hrl").
 
@@ -237,7 +238,7 @@ many_union_t() ->
              true ->
                  ok;
              false ->
-                 error_logger:info_msg("too high error, expected ~.2f%, actual ~.2f%~n"
+                 ?LOG_INFO("too high error, expected ~.2f%, actual ~.2f%~n"
                                        "~p, p = ~p, card = ~p",
                                        [Error, Delta / (Card * NumSets), Mod, P, Card]),
                  ?assert(false)
@@ -374,45 +375,44 @@ gen_getset(Size, P) ->
 
 prop_set() ->
     ?FORALL(
-       {Mod, P}, {oneof(backends()), choose(4, 16)},
-       ?FORALL(
-          Values, gen_getset(P),
-          begin
-              R = lists:foldl(
-                    fun ({Index, ZeroCount}, Register) ->
-                            Mod:set(Index, ZeroCount, Register)
+        {Mod, P}, {oneof(backends()), choose(4, 16)},
+        ?FORALL(
+            Values, gen_getset(P),
+            begin
+                R = lists:foldl(
+                    fun({Index, ZeroCount}, Register) ->
+                        Mod:set(Index, ZeroCount, Register)
                     end, Mod:new(P), Values),
-              Max = lists:foldl(fun ({I, V}, Acc) ->
-                                        case dict:find(I, Acc) of
-                                            {ok, OtherV} when OtherV >= V ->
-                                                Acc;
-                                            _ ->
-                                                dict:store(I, V, Acc)
-                                        end
-                                end, dict:new(), Values),
-              Expected = lists:map(fun (I) ->
-                                           case dict:find(I, Max) of
-                                               {ok, V} ->
-                                                   <<V:8/integer>>;
-                                               error ->
-                                                   <<0>>
-                                           end
-                                   end, lists:seq(0, trunc(math:pow(2, P)) - 1)),
-
-              case Mod:encode_registers(Mod:compact(R))
-                  =:= iolist_to_binary(Expected) of
-                  true ->
-                      true;
-                  false ->
-                      %% error_logger:info_msg("values~n~p~n"
-                      %%                       "encoded~n~p~n"
-                      %%                       "expected~n~p~n",
-                      %%                       [Values,
-                      %%                        Mod:encode_registers(R),
-                      %%                        iolist_to_binary(Expected)]),
+                Max = lists:foldl(fun({I, V}, Acc) ->
+                    case dict:find(I, Acc) of
+                        {ok, OtherV} when OtherV >= V ->
+                            Acc;
+                        _ ->
+                            dict:store(I, V, Acc)
+                    end
+                end, dict:new(), Values),
+                Expected = lists:map(fun(I) ->
+                    case dict:find(I, Max) of
+                        {ok, V} ->
+                            <<V:8/integer>>;
+                        error ->
+                            <<0>>
+                    end
+                end, lists:seq(0, trunc(math:pow(2, P)) - 1)),
+
+                case Mod:encode_registers(Mod:compact(R))
+                    =:= iolist_to_binary(Expected) of
+                    true ->
+                        true;
+                    false ->
+                        ?LOG_ERROR(#{
+                            values => Values,
+                            encoded => Mod:encode_registers(R),
+                            expected => iolist_to_binary(Expected)
+                        }),
                         false
-              end
-          end)).
+                end
+            end)).
 
 prop_serialize() ->
     ?FORALL(

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#!/usr/bin/Rscript --vanilla`
	`1`	`+#!/usr/bin/env Rscript --vanilla`
`2`	`2`
`3`	`3`	`require("ggplot2")`
`4`	`4`	`require("scales")`