Skip to content

Commit fbb5580

Browse files
committed
not sure where these changes came from , danglers
1 parent 41b92bd commit fbb5580

File tree

1 file changed

+73
-9
lines changed

1 file changed

+73
-9
lines changed

src/clustering/layer.rs

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ use crate::cards::street::Street;
33
use crate::clustering::abstraction::Abstraction;
44
use crate::clustering::consumer::Consumer;
55
use crate::clustering::histogram::Histogram;
6-
use crate::clustering::metric::Metric;
6+
use crate::clustering::metric::Metric as _;
77
use crate::clustering::producer::Producer;
88
use crate::clustering::progress::Progress;
9-
use crate::clustering::projection::Projection;
9+
use crate::clustering::projection::Projection as _;
1010
use crate::clustering::xor::Pair;
1111
use rand::distributions::Distribution;
1212
use rand::distributions::WeightedIndex;
@@ -16,6 +16,57 @@ use std::collections::BTreeMap;
1616
use std::io::Read;
1717
use std::sync::Arc;
1818

19+
struct Centroid {
20+
prev: Histogram,
21+
next: Histogram,
22+
}
23+
impl Centroid {
24+
fn absorb(&mut self, h: &Histogram) {
25+
self.next.absorb(h);
26+
}
27+
fn histogram(&self) -> &Histogram {
28+
&self.prev
29+
}
30+
}
31+
32+
trait Metric {
33+
// impl for Map<Pair, f32>
34+
fn distance(&self, a: &Abstraction, b: &Abstraction) -> f32;
35+
fn wasserstein(&self, a: &Histogram, b: &Histogram) -> f32;
36+
}
37+
trait Clustering {
38+
// impl for Map<Observation, Abstraction>
39+
fn create_histogram(&self, hi: &Observation) -> Histogram;
40+
fn lookup_abstraction(&self, lo: &Observation) -> Abstraction;
41+
}
42+
trait ObsProjection {
43+
// impl for Map<Observation, Histogram>
44+
fn lookup_histogram(&self, o: &Observation) -> &Histogram;
45+
}
46+
trait AbsProjection {
47+
// impl for Map<Abstraction, Centroid>
48+
fn lookup_histogram(&self, a: &Abstraction) -> &Histogram;
49+
// fn lookup_centroid(&self, a: &Abstraction) -> &Centroid;
50+
}
51+
52+
// horizontal scaling across threads for k-means initialization and clustering
53+
// observation_abstraction: BTreeMap<Observation, Abstraction>
54+
// observation_distributio: BTreeMap<Observation, Histogram>
55+
// abstraction_distributio: BTreeMap<Abstraction, Histogram>
56+
//
57+
// INITIALIZATION:
58+
// each shard needs:
59+
// - Arc<Vec<Histogram>> a readonly view of all N Histograms
60+
// - Arc<Vec<Observation>> a readonly view of all N Observations
61+
// - Fn(Observation) -> Histogram Histogram from readonly Observation
62+
// - Fn(Histogram, Histogram) -> Abstraction Abstraction from two Histograms
63+
//
64+
// CLUSTERING:
65+
// each shard needs:
66+
// - Fn(Observation) -> Histogram Histogram from Observation; self.projection
67+
// - Fn(Abstraction) -> &mut Histogram Histogram from nearest neighbor Abstraction; absorb()
68+
// - Fn(Observation) -> &mut Abstraction nearest neighbor Abstraction; assign()
69+
1970
/// KMeans hiearchical clustering. Every Observation is to be clustered with "similar" observations. River cards are the base case, where similarity metric is defined by equity. For each higher layer, we compare distributions of next-layer outcomes. Distances are measured by EMD and unsupervised kmeans clustering is used to cluster similar distributions. Potential-aware imperfect recall!
2071
pub struct Layer {
2172
street: Street,
@@ -25,6 +76,18 @@ pub struct Layer {
2576
}
2677

2778
impl Layer {
79+
pub async fn hierarchical() -> Self {
80+
Self::outer()
81+
.inner()
82+
.await
83+
.upload()
84+
.inner()
85+
.await
86+
.upload()
87+
.inner()
88+
.await
89+
.upload()
90+
}
2891
/// async equity calculations to create initial River layer.
2992
pub fn outer() -> Self {
3093
Self {
@@ -86,8 +149,8 @@ impl Layer {
86149
for (j, (y, _)) in self.kmeans.iter().enumerate() {
87150
if i > j {
88151
let index = Pair::from((x, y));
89-
let ref x = self.kmeans.get(x).expect("in centroids").0;
90-
let ref y = self.kmeans.get(y).expect("in centroids").0;
152+
let ref x = self.kmeans.get(x).expect("in centroids").0; // Centroid::prev()
153+
let ref y = self.kmeans.get(y).expect("in centroids").0; // Centroid::prev()
91154
let distance = self.metric.emd(x, y) + self.metric.emd(y, x);
92155
let distance = distance / 2.0;
93156
metric.insert(index, distance);
@@ -145,7 +208,7 @@ impl Layer {
145208
.get(choice)
146209
.cloned()
147210
.cloned()
148-
.expect("shared index with lowers");
211+
.expect("shared index with outer layer");
149212
kmeans.push(sample);
150213
progress.tick();
151214
}
@@ -230,6 +293,7 @@ impl Layer {
230293
.expect("kabstractions was initialized with neighbor")
231294
.1
232295
.absorb(children);
296+
// Centroid::absorb
233297
}
234298

235299
/// forget the old centroids and clear the new ones
@@ -321,8 +385,8 @@ impl Layer {
321385
let ref mut reader = std::io::BufReader::with_capacity(BUFFER, file);
322386
let ref mut buffer = [0u8; 16];
323387
while reader.read_exact(buffer).is_ok() {
324-
let obs_u64 = u64::from_le_bytes(buffer[0..8].try_into().unwrap());
325-
let abs_u64 = u64::from_le_bytes(buffer[8..16].try_into().unwrap());
388+
let obs_u64 = u64::from_le_bytes(buffer[00..08].try_into().unwrap());
389+
let abs_u64 = u64::from_le_bytes(buffer[08..16].try_into().unwrap());
326390
let observation = Observation::from(obs_u64 as i64);
327391
let abstraction = Abstraction::from(abs_u64 as i64);
328392
map.insert(observation, abstraction);
@@ -337,8 +401,8 @@ impl Layer {
337401
let ref mut reader = std::io::BufReader::with_capacity(BUFFER, file);
338402
let ref mut buffer = [0u8; 12];
339403
while reader.read_exact(buffer).is_ok() {
340-
let pair_u64 = u64::from_le_bytes(buffer[0..08].try_into().unwrap());
341-
let dist_f64 = f64::from_le_bytes(buffer[8..16].try_into().unwrap());
404+
let pair_u64 = u64::from_le_bytes(buffer[00..08].try_into().unwrap());
405+
let dist_f64 = f64::from_le_bytes(buffer[08..16].try_into().unwrap());
342406
let pair = Pair::from(pair_u64 as i64);
343407
let distance = dist_f64 as f32;
344408
map.insert(pair, distance);

0 commit comments

Comments
 (0)