Skip to content

Commit

Permalink
first draft
Browse files Browse the repository at this point in the history
  • Loading branch information
sandreza committed Nov 6, 2024
1 parent 5adceba commit 93414f9
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
2 changes: 2 additions & 0 deletions examples/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"
KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
Expand Down
165 changes: 165 additions & 0 deletions examples/statistical_model_prototype.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
using Distributions

struct StatisticalModel{E, M, T}
embedding::E
invariant_measure::M
transition_operator::T
end

struct DeltaFunction{W, C}
weights::W
means::C
end

struct GeneralGaussianMixture{W, M, C}
weights::W
means::M
covariances::C
end


function determine_statistical_model(trajectory, tree_type::Tree{Val{false},S}; override=false) where {S}
if typeof(tree_type.arguments) <: NamedTuple
if haskey(tree_type.arguments, :minimum_probability)
minimum_probability = tree_type.arguments.minimum_probability
else
@warn "no minimum probability specified, using 0.01"
minimum_probability = 0.01
end
elseif typeof(tree_type.arguments) <: Number
minimum_probability = tree_type.arguments
else
@warn "no minimum probability specified, using 0.01"
minimum_probability = 0.01
end
Nmax = 100 * round(Int, 1 / minimum_probability)
c_trajectory = copy(trajectory)
if (size(trajectory)[2] > Nmax) & !(override)
@warn "trajectory too long, truncating to roughly $Nmax for determining embedding"
skip = round(Int, size(trajectory)[2] / Nmax)
c_trajectory = copy(trajectory[:, 1:skip:end])
end
if (10 / (size(trajectory)[2]) > minimum_probability) & !(override)
@warn "minimum probabity too small, using 10x the reciprocal of the number of points"
minimum_probability = 10 / size(trajectory)[2]
end
F, H, edge_information, parent_to_children, global_to_local, centers_list, CC, local_to_global = unstructured_tree(c_trajectory, minimum_probability)

embedding = UnstructuredTree(global_to_local, centers_list, parent_to_children)

means = zeros(size(trajectory, 1), length(global_to_local))
for key in keys(global_to_local)
local_index = global_to_local[key]
means[:, local_index] = CC[key]
end
# means = [means[:, i] for i in 1:size(means, 2)]
@info "computing partition trajectory"
probability_weights = zeros(length(means))
partitions = zeros(Int64, size(trajectory)[2])
for (i, state) in ProgressBar(enumerate(eachcol(trajectory)))
cell_index = embedding(state)
partitions[i] = cell_index
probability_weights[cell_index] += 1 / size(trajectory)[2]
end
probability_weights = probability_weights / sum(probability_weights)

return embedding, probability_weights, means, partitions
end


# include the lorenz file here
method = Tree(false, 0.01)

emb, pr, ms, partitions = determine_statistical_model(trajectory, method)

empirical_centers = zeros(size(trajectory)[1], maximum(partitions))
empirical_covariance = zeros(size(trajectory)[1], size(trajectory)[1], maximum(partitions))
empirical_count = zeros(Int64, maximum(partitions))
for (i, state) in ProgressBar(enumerate(eachcol(trajectory)))
cell_index = embedding(state)
partitions[i] = cell_index
empirical_count[cell_index] += 1
empirical_centers[:, cell_index] .+= state
empirical_covariance[:, :, cell_index] .+= state * state'
end
# adjust
adj_empirical_centers = empirical_centers ./ reshape(empirical_count, 1, length(empirical_count))
adj_empirical_covariance = empirical_covariance ./ reshape(empirical_count .- 1, 1, 1, length(empirical_count))
for i in 1:length(probability_weights)
adj_empirical_covariance[:, :, i] .-= (adj_empirical_centers[:, i] * adj_empirical_centers[:, i]') * empirical_count[i] / (empirical_count[i] - 1)
end
probability_weights = empirical_count / sum(empirical_count)

δmodel = DeltaFunction(probability_weights, adj_empirical_centers)
Σmodel = GeneralGaussianMixture(probability_weights, adj_empirical_centers, adj_empirical_covariance)

import Base.rand
import Statistics.mean
import Statistics.cov

function rand(δmodel::DeltaFunction)
cell_index = rand(Categorical(δmodel.weights))
return δmodel.means[:, cell_index]
end

function rand(Σmodel::GeneralGaussianMixture)
cell_index = rand(Categorical(Σmodel.weights))
return rand(MvNormal(Σmodel.means[:, cell_index], Σmodel.covariances[:, :, cell_index]))
end

function mean(δmodel::DeltaFunction)
ensemble_mean = zeros(size(δmodel.means)[1])
for i in 1:size(δmodel.means)[2]
ensemble_mean .+= δmodel.weights[i] * δmodel.means[:, i]
end
return ensemble_mean
end

function mean(Σmodel::GeneralGaussianMixture)
ensemble_mean = zeros(size(Σmodel.means)[1])
for i in 1:size(Σmodel.means)[2]
ensemble_mean .+= Σmodel.weights[i] * Σmodel.means[:, i]
end
return ensemble_mean
end

function std(δmodel::DeltaFunction)
ensemble_std = zeros(size(δmodel.means)[1])
ensemble_mean = mean(δmodel)
for i in 1:size(δmodel.means)[2]
ensemble_std .+= δmodel.weights[i] * (δmodel.means[:, i] .- ensemble_mean).^2
end
return sqrt.(ensemble_std)
end

function cov(δmodel::DeltaFunction)
ensemble_cov = zeros(size(δmodel.means)[1], size(δmodel.means)[1])
ensemble_mean = mean(δmodel)
for i in 1:size(δmodel.means)[2]
ensemble_cov .+= δmodel.weights[i] * (δmodel.means[:, i] .- ensemble_mean) * (δmodel.means[:, i] .- ensemble_mean)'
end
return ensemble_cov
end

function cov(Σmodel::GeneralGaussianMixture)
ensemble_cov = zeros(size(Σmodel.means)[1], size(Σmodel.means)[1])
for i in 1:size(Σmodel.means)[2]
ensemble_cov .+= Σmodel.weights[i] * (Σmodel.covariances[:, :, i] + Σmodel.means[:,i] * Σmodel.means[:,i]')
end
ensemble_mean = mean(Σmodel)
ensemble_cov .-= ensemble_mean * ensemble_mean'
return ensemble_cov
end

indval = 2
ind1 = trajectory[:, partitions .== indval]
check1 = mean(ind1, dims=2)
adj_empirical_centers[:, indval] - check1
cov1 = cov(ind1')
adj_empirical_covariance[:, :, indval]

Nsamples = 100
samples = zeros(size(trajectory)[1], Nsamples)
for i in 1:Nsamples
samples[:, i] = rand(Σmodel)
end

0 comments on commit 93414f9

Please sign in to comment.