diff --git a/CHANGELOG.md b/CHANGELOG.md index 71e3698c..52bc4a03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Added ability Pitman-Yor prior process +- Users now specify the prior process in the codebook +- `StateAlpha` and `ViewAlpha` transitions are now `StatePriorProcessParams` and `ViewPriorProcessParams` ## [python-0.7.1] - 2024-02-27 diff --git a/book/src/pcc/img/crp.png b/book/src/pcc/img/crp.png new file mode 100644 index 00000000..20419171 Binary files /dev/null and b/book/src/pcc/img/crp.png differ diff --git a/book/src/pcc/img/pyp.png b/book/src/pcc/img/pyp.png new file mode 100644 index 00000000..74560845 Binary files /dev/null and b/book/src/pcc/img/pyp.png differ diff --git a/book/src/pcc/prior-processes.md b/book/src/pcc/prior-processes.md index 8563e4d5..3a6fb8a7 100644 --- a/book/src/pcc/prior-processes.md +++ b/book/src/pcc/prior-processes.md @@ -7,6 +7,15 @@ In Lace (and in Bayesian nonparametrics) we put a prior on the number of paramet The Dirichlet process more heavily penalizes new categories with an exponential fall off while the Pitman-Yor process has a power law fall off in the number for categories. When d = 0, Pitman-Yor is equivalent to the Dirichlet process. -While Pitman-Yor may fit the data better it will create more parameters, which will cause model training to take longer. +![Dirichlet Process](img/crp.png) + +**Figure**: Category ID (y-axis) by instance number (x-axis) for Dirichlet process draws for various values of alpha. + +Pitman-Yor may fit the data better but (and because) it will create more parameters, which will cause model training to take longer. + +![Pitman-Yor Process](img/pyp.png) + +**Figure**: Category ID (y-axis) by instance number (x-axis) for Pitman-Yor process draws for various values of alpha and d. + For those looking for a good introduction to prior process, [this slide deck](https://www.gatsby.ucl.ac.uk/~ywteh/teaching/probmodels/lecture5bnp.pdf) from Yee Whye Teh is a good resource. diff --git a/book/src/workflow/codebook.md b/book/src/workflow/codebook.md index 98205e0b..2788c219 100644 --- a/book/src/workflow/codebook.md +++ b/book/src/workflow/codebook.md @@ -49,7 +49,7 @@ let df = CsvReader::from_path(paths.data) .unwrap(); // Create the default codebook -let codebook = Codebook::from_df(&df, None, None, false).unwrap(); +let codebook = Codebook::from_df(&df, None, None, None, false).unwrap(); ``` diff --git a/book/src/workflow/model.md b/book/src/workflow/model.md index b35d68a3..e13299f5 100644 --- a/book/src/workflow/model.md +++ b/book/src/workflow/model.md @@ -125,7 +125,7 @@ let df = CsvReader::from_path(paths.data) .unwrap(); // Create the default codebook -let codebook = Codebook::from_df(&df, None, None, false).unwrap(); +let codebook = Codebook::from_df(&df, None, None, None, false).unwrap(); // Build an rng let rng = Xoshiro256Plus::from_entropy(); @@ -156,12 +156,12 @@ let run_config = EngineUpdateConfig::new() .n_iters(100) .transitions(vec![ StateTransition::ColumnAssignment(ColAssignAlg::Gibbs), - StateTransition::StateAlpha, + StateTransition::StatePriorProcessParams, StateTransition::RowAssignment(RowAssignAlg::Sams), StateTransition::ComponentParams, StateTransition::RowAssignment(RowAssignAlg::Slice), StateTransition::ComponentParams, - StateTransition::ViewAlphas, + StateTransition::ViewPriorProcessParams, StateTransition::FeaturePriors, ]); diff --git a/lace/Cargo.lock b/lace/Cargo.lock index fcabe039..d9daa165 100644 --- a/lace/Cargo.lock +++ b/lace/Cargo.lock @@ -1141,7 +1141,7 @@ dependencies = [ [[package]] name = "lace" -version = "0.7.0" +version = "0.8.0" dependencies = [ "approx", "clap", diff --git a/lace/Cargo.toml b/lace/Cargo.toml index 62cd52e0..f5bae231 100644 --- a/lace/Cargo.toml +++ b/lace/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lace" -version = "0.7.0" +version = "0.8.0" authors = ["Promised AI"] build = "build.rs" edition = "2021"