11#include <stdlib.h>
22#include <stdbool.h>
3+ #include <stdio.h>
34#include <string.h>
45#include <math.h>
56
@@ -99,14 +100,59 @@ double td_total_count(td_histogram_t *h) {
99100 return h -> merged_count + h -> unmerged_count ;
100101}
101102
103+ double td_quantile_of (td_histogram_t * h , double val ) {
104+ merge (h );
105+ if (h -> merged_nodes == 0 ) {
106+ return NAN ;
107+ }
108+ /* if (h->merged_nodes == 1) { */
109+ /* if (h->nodes[0].mean > val) { */
110+ /* return 1; */
111+ /* } else if (h->nodes[0].mean < val) { */
112+ /* return 0; */
113+ /* } */
114+ /* return 0.5; */
115+ /* } */
116+ double k = 0 ;
117+ int i = 0 ;
118+ node_t * n = NULL ;
119+ for (i = 0 ; i < h -> merged_nodes ; i ++ ) {
120+ n = & h -> nodes [i ];
121+ if (n -> mean >= val ) {
122+ break ;
123+ }
124+ k += n -> count ;
125+ }
126+ if (val == n -> mean ) {
127+ // technically this needs to find all of the nodes which contain this value and sum their weight
128+ double count_at_value = n -> count ;
129+ for (i += 1 ; i < h -> merged_nodes && h -> nodes [i ].mean == n -> mean ; i ++ ) {
130+ count_at_value += h -> nodes [i ].count ;
131+ }
132+ return (k + (count_at_value /2 )) / h -> merged_count ;
133+ } else if (val > n -> mean ) { // past the largest
134+ return 1 ;
135+ } else if (i == 0 ) {
136+ return 0 ;
137+ }
138+ // we want to figure out where along the line from the prev node to this node, the value falls
139+ node_t * nr = n ;
140+ node_t * nl = n - 1 ;
141+ k -= (nl -> count /2 );
142+ // we say that at zero we're at nl->mean
143+ // and at (nl->count/2 + nr->count/2) we're at nr
144+ double m = (nr -> mean - nl -> mean ) / (nl -> count /2 + nr -> count /2 );
145+ double x = (val - nl -> mean ) / m ;
146+ printf ("hi %f %f %f %f\n" , m , x , k , h -> merged_count );
147+ return (k + x ) / h -> merged_count ;
148+ }
149+
150+
102151double td_value_at (td_histogram_t * h , double q ) {
103152 merge (h );
104153 if (q < 0 || q > 1 || h -> merged_nodes == 0 ) {
105154 return NAN ;
106155 }
107- if (h -> merged_nodes == 1 ) {
108- return h -> nodes [0 ].mean ;
109- }
110156 // if left of the first node, use the first node
111157 // if right of the last node, use the last node, use it
112158 double goal = q * h -> merged_count ;
@@ -118,7 +164,7 @@ double td_value_at(td_histogram_t *h, double q) {
118164 if (k + n -> count > goal ) {
119165 break ;
120166 }
121- k += h -> nodes [ i ]. count ;
167+ k += n -> count ;
122168 }
123169 double delta_k = goal - k - (n -> count /2 );
124170 if (is_very_small (delta_k )) {
@@ -143,7 +189,7 @@ double td_value_at(td_histogram_t *h, double q) {
143189 double x = goal - k ;
144190 // we have two points (0, nl->mean), (nr->count, nr->mean)
145191 // and we want x
146- double m = (nr -> mean - nl -> mean ) / (nr -> count );
192+ double m = (nr -> mean - nl -> mean ) / (nl -> count / 2 + nr -> count / 2 );
147193 return m * x + nl -> mean ;
148194}
149195
0 commit comments