diff --git a/core/base/dimensionReduction/DimensionReduction.cpp b/core/base/dimensionReduction/DimensionReduction.cpp index 8711a327af..ba342f1b36 100644 --- a/core/base/dimensionReduction/DimensionReduction.cpp +++ b/core/base/dimensionReduction/DimensionReduction.cpp @@ -81,7 +81,7 @@ int DimensionReduction::execute( ae_CUDA, ae_Deterministic, ae_Seed, NumberOfComponents, ae_Epochs, ae_LearningRate, ae_Optimizer, ae_Method, ae_Model, ae_Architecture, ae_Activation, ae_BatchSize, ae_BatchNormalization, ae_RegCoefficient, - IsInputImages); + IsInputImages, ae_PreOptimize, ae_PreOptimizeEpochs); tcdr.setDebugLevel(debugLevel_); tcdr.setThreadNumber(threadNumber_); @@ -89,15 +89,6 @@ int DimensionReduction::execute( for(int d = 0; d < NumberOfComponents; d++) outputEmbedding[d].resize(nRows); - if(ae_PreOptimize) { - DimensionReduction initDR; - initDR.setDebugLevel(debugLevel_); - initDR.setInputMethod(ae_PreOptimizeMethod); - std::vector> latentInitialization; - initDR.execute(latentInitialization, inputMatrix, nRows, nColumns); - tcdr.setLatentInitialization(latentInitialization); - } - tcdr.execute(outputEmbedding, inputMatrix, nRows); this->printMsg("Computed AE dimension reduction", 1.0, t.getElapsedTime(), diff --git a/core/base/dimensionReduction/DimensionReduction.h b/core/base/dimensionReduction/DimensionReduction.h index 0be5774ffb..33f30a493c 100644 --- a/core/base/dimensionReduction/DimensionReduction.h +++ b/core/base/dimensionReduction/DimensionReduction.h @@ -66,8 +66,9 @@ /// /// "Topological Autoencoders++: Fast and Accurate Cycle-Aware Dimensionality /// Reduction" \n -/// Mattéo Clémot, Julie Digne, Julien Tierny, \n -/// arXiv preprint, 2025. +/// Mattéo Clémot, Julie Digne, Julien Tierny, \n +/// IEEE Transactions on Visualization and Computer Graphics. +/// Accepted, to be presented at IEEE VIS 2026. #pragma once @@ -365,7 +366,7 @@ namespace ttk { bool ae_BatchNormalization{true}; double ae_RegCoefficient{1e-2}; bool ae_PreOptimize{false}; - METHOD ae_PreOptimizeMethod{METHOD::PCA}; + int ae_PreOptimizeEpochs{1000}; // testing std::string ModulePath{"default"}; diff --git a/core/base/ripsPersistenceDiagram/FastRipsPersistenceDiagram2.h b/core/base/ripsPersistenceDiagram/FastRipsPersistenceDiagram2.h index cca42639e1..5542b32ec7 100644 --- a/core/base/ripsPersistenceDiagram/FastRipsPersistenceDiagram2.h +++ b/core/base/ripsPersistenceDiagram/FastRipsPersistenceDiagram2.h @@ -14,8 +14,9 @@ /// \b Related \b publication \n /// "Topological Autoencoders++: Fast and Accurate Cycle-Aware Dimensionality /// Reduction" \n -/// Mattéo Clémot, Julie Digne, Julien Tierny, \n -/// arXiv preprint, 2025. +/// Mattéo Clémot, Julie Digne, Julien Tierny, \n +/// IEEE Transactions on Visualization and Computer Graphics. +/// Accepted, to be presented at IEEE VIS 2026. #pragma once @@ -116,4 +117,4 @@ namespace ttk::rpd { } // namespace ttk::rpd -#endif \ No newline at end of file +#endif diff --git a/core/base/ripsPersistenceDiagram/PairCellsWithOracle.cpp b/core/base/ripsPersistenceDiagram/PairCellsWithOracle.cpp index f1cae7ba05..e2e2708bce 100644 --- a/core/base/ripsPersistenceDiagram/PairCellsWithOracle.cpp +++ b/core/base/ripsPersistenceDiagram/PairCellsWithOracle.cpp @@ -29,6 +29,27 @@ ttk::rpd::PairCellsWithOracle::PairCellsWithOracle( } } +ttk::rpd::PairCellsWithOracle::PairCellsWithOracle( + float *data, + int n, + int dim, + MultidimensionalDiagram const &oracle, + bool parallelSort) + : n_(n), parallelSort_(parallelSort), oracle_(oracle) { + // inherited from Debug: prefix will be printed at the beginning of every msg + this->setDebugMsgPrefix("PairCellsWithOracle"); + + for(int i = 1; i < n_; ++i) { + for(int j = 0; j < i; ++j) { + double s = 0.; + for(int d = 0; d < dim; ++d) + s += (data[dim * i + d] - data[dim * j + d]) + * (data[dim * i + d] - data[dim * j + d]); + compressedDM_.push_back(sqrt(s)); + } + } +} + void ttk::rpd::PairCellsWithOracle::callOracle(const PointCloud &points, MultidimensionalDiagram &oracle, double threshold, diff --git a/core/base/ripsPersistenceDiagram/PairCellsWithOracle.h b/core/base/ripsPersistenceDiagram/PairCellsWithOracle.h index 6a9214db3c..494cb5475a 100644 --- a/core/base/ripsPersistenceDiagram/PairCellsWithOracle.h +++ b/core/base/ripsPersistenceDiagram/PairCellsWithOracle.h @@ -26,6 +26,11 @@ namespace ttk::rpd { MultidimensionalDiagram const &oracle, bool distanceMatrix = false, bool parallelSort = false); + PairCellsWithOracle(float *data, + int n, + int dim, + MultidimensionalDiagram const &oracle, + bool parallelSort = false); void run(); diff --git a/core/base/topologicalDimensionReduction/DimensionReductionModel.cpp b/core/base/topologicalDimensionReduction/DimensionReductionModel.cpp index 2b15982389..c666891e0b 100644 --- a/core/base/topologicalDimensionReduction/DimensionReductionModel.cpp +++ b/core/base/topologicalDimensionReduction/DimensionReductionModel.cpp @@ -1,4 +1,4 @@ -#include "DimensionReductionModel.h" +#include #include #ifdef TTK_ENABLE_TORCH diff --git a/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.cpp b/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.cpp index fad3db302c..fa2c2cac34 100644 --- a/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.cpp +++ b/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.cpp @@ -19,12 +19,15 @@ ttk::TopologicalDimensionReduction::TopologicalDimensionReduction( int batchSize, bool batchNormalization, double regCoefficient, - bool inputIsImages) + bool inputIsImages, + bool preOptimize, + int preOptimizeEpochs) : NumberOfComponents(numberOfComponents), Epochs(epochs), LearningRate(learningRate), Optimizer(optimizer), Method(method), ModelType(modelType), InputIsImages(inputIsImages), Architecture(architecture), Activation(activation), BatchSize(batchSize), - BatchNormalization(batchNormalization), RegCoefficient(regCoefficient) { + BatchNormalization(batchNormalization), RegCoefficient(regCoefficient), + PreOptimize(preOptimize), PreOptimizeEpochs(preOptimizeEpochs) { // inherited from Debug: prefix will be printed at the beginning of every msg this->setDebugMsgPrefix("TopologicalDimensionReduction"); @@ -88,15 +91,18 @@ int ttk::TopologicalDimensionReduction::execute( printMsg("Initialization", 0., tm.getElapsedTime()); const int inputSize = n; - const int inputDimension = inputMatrix.size() / n; + const int inputRawDimension = inputMatrix.size() / n; + const int inputDimension + = inputRawDimension - PreOptimize * NumberOfComponents; if(!InputIsImages) this->printMsg("input dimension: " + std::to_string(inputDimension), 0.0, tm.getElapsedTime()); else - this->printMsg("input dimension: " + std::to_string(inputDimension) + " = " - + std::to_string((int)sqrt(inputDimension)) + " x " - + std::to_string((int)sqrt(inputDimension)) + " images", - .0, tm.getElapsedTime()); + this->printMsg( + "input dimension: " + std::to_string(inputDimension) + " = " + + std::to_string(static_cast(sqrt(inputDimension))) + " x " + + std::to_string(static_cast(sqrt(inputDimension))) + " images", + .0, tm.getElapsedTime()); this->printMsg("output dimension: " + std::to_string(NumberOfComponents), 0.0, tm.getElapsedTime()); this->printMsg( @@ -110,20 +116,22 @@ int ttk::TopologicalDimensionReduction::execute( return 1; initializeOptimizer(); - const torch::Tensor input + const torch::Tensor rawInput = torch::from_blob(const_cast(inputMatrix.data()), - {inputSize, inputDimension}, torch::kFloat64) + {inputSize, inputRawDimension}, torch::kFloat64) .to(torch::kFloat32) .to(device); + const torch::Tensor input + = rawInput.index({Slice(), Slice(None, inputDimension)}); rpd::PointCloud points(inputSize, std::vector(inputDimension)); for(int i = 0; i < inputSize; ++i) { for(int j = 0; j < inputDimension; ++j) - points[i][j] = inputMatrix[inputDimension * i + j]; + points[i][j] = inputMatrix[inputRawDimension * i + j]; } - if(latentInitialization_.numel()) { - preOptimize(input, latentInitialization_); + if(PreOptimize) { + preOptimize(input, rawInput.index({Slice(), Slice(inputDimension, None)})); initializeOptimizer(); } @@ -148,18 +156,6 @@ int ttk::TopologicalDimensionReduction::execute( return 0; } -void ttk::TopologicalDimensionReduction::setLatentInitialization( - std::vector> const &latentInitialization) { - std::vector tensors; - for(auto const &column : latentInitialization) - tensors.push_back(torch::from_blob(const_cast(column.data()), - {static_cast(column.size())}, - torch::kFloat64) - .to(torch::kFloat32) - .to(device)); - latentInitialization_ = torch::stack(tensors).transpose(0, 1); -} - void ttk::TopologicalDimensionReduction::optimizeSimple( const torch::Tensor &input) const { int epoch = 0; @@ -180,7 +176,7 @@ void ttk::TopologicalDimensionReduction::optimizeSimple( loss.backward(); // IO - printLoss(epoch, loss.item()); + printLoss(epoch, Epochs, loss.item()); return loss; }; @@ -207,7 +203,7 @@ void ttk::TopologicalDimensionReduction::optimize( loss.backward(); // IO - printLoss(epoch, loss.item()); + printLoss(epoch, Epochs, loss.item()); return loss; }; @@ -232,21 +228,24 @@ void ttk::TopologicalDimensionReduction::preOptimize( loss.backward(); // IO - printLoss(epoch, loss.item()); + printLoss(epoch, PreOptimizeEpochs, loss.item()); return loss; }; - for(; epoch < Epochs; ++epoch) + for(; epoch < PreOptimizeEpochs; ++epoch) torchOptimizer->step(closure); } void ttk::TopologicalDimensionReduction::printLoss(int epoch, + int maxEpoch, double loss) const { - if(epoch % std::max(1, Epochs / 10) == 0) + if(epoch % std::max(1, maxEpoch / 10) == 0) printMsg( - "Loss at epoch " + std::to_string(epoch) + " : " + std::to_string(loss), - double(epoch) / Epochs, -1, -1, debug::LineMode::REPLACE); + "Loss at epoch " + std::to_string(epoch) + ": " + std::to_string(loss), + static_cast(epoch) / maxEpoch, -1, -1, debug::LineMode::REPLACE); + else if(epoch == maxEpoch - 1) + printMsg("Final loss value: " + std::to_string(loss), 1.); } #endif diff --git a/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.h b/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.h index e34775bd6e..5d44b70aa4 100644 --- a/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.h +++ b/core/base/topologicalDimensionReduction/TopologicalDimensionReduction.h @@ -32,7 +32,8 @@ /// "Topological Autoencoders++: Fast and Accurate Cycle-Aware Dimensionality /// Reduction" \n /// Mattéo Clémot, Julie Digne, Julien Tierny, \n -/// arXiv preprint, 2025. +/// IEEE Transactions on Visualization and Computer Graphics. +/// Accepted, to be presented at IEEE VIS 2026. /// /// \sa DimensionReduction.cpp %for a usage example. @@ -90,7 +91,9 @@ namespace ttk { int batchSize, bool batchNormalization, double regCoefficient, - bool inputIsImages); + bool inputIsImages, + bool preOptimize, + int preOptimizeEpochs); /** * @brief Computes the projection with an AutoEncoder @@ -107,9 +110,6 @@ namespace ttk { const std::vector &inputMatrix, size_t n); - void setLatentInitialization( - std::vector> const &latentInitialization); - protected: const int NumberOfComponents; const int Epochs; @@ -123,13 +123,14 @@ namespace ttk { const int BatchSize; const bool BatchNormalization; const double RegCoefficient; + const bool PreOptimize; + const int PreOptimizeEpochs; private: torch::DeviceType device{torch::kCPU}; std::unique_ptr model{nullptr}; std::unique_ptr torchOptimizer{nullptr}; std::unique_ptr topologicalLossContainer{nullptr}; - torch::Tensor latentInitialization_{}; int initializeModel(int inputSize, int inputDimension); void initializeOptimizer(); @@ -140,7 +141,7 @@ namespace ttk { void optimize(const torch::Tensor &input) const; void optimizeSimple(const torch::Tensor &input) const; - inline void printLoss(int epoch, double loss) const; + inline void printLoss(int epoch, int maxEpoch, double loss) const; #endif diff --git a/core/base/topologicalDimensionReduction/TopologicalLoss.cpp b/core/base/topologicalDimensionReduction/TopologicalLoss.cpp index a44ed0b0cd..136cf4bebf 100644 --- a/core/base/topologicalDimensionReduction/TopologicalLoss.cpp +++ b/core/base/topologicalDimensionReduction/TopologicalLoss.cpp @@ -32,11 +32,12 @@ torch::Tensor ttk::TopologicalLoss::computeLoss(const torch::Tensor &latent) { } void ttk::TopologicalLoss::precomputeInputPersistence() { - if(regul_ == REGUL::TOPOAE) { + if(regul_ == REGUL::TOPOAE || regul_ == REGUL::W_DIM1) { rpd::EdgeSets3 inputCritical; ripser::ripser(points_, inputCritical, rpd::inf, 0, false); inputCriticalPairIndices = {pairsToTorch(inputCritical[0])}; - } else if(regul_ == REGUL::TOPOAE_DIM1) { + } + if(regul_ == REGUL::TOPOAE_DIM1) { rpd::EdgeSets3 inputCritical; ripser::ripser(points_, inputCritical, rpd::inf, 1, false); for(int i = 0; i <= 2; ++i) @@ -45,12 +46,6 @@ void ttk::TopologicalLoss::precomputeInputPersistence() { ripser::ripser(points_, inputPD, rpd::inf, 1, false); auction = std::make_unique< PersistenceDiagramWarmRestartAuction>(inputPD[1]); -#ifdef TTK_W1REG_WITH_TOPOAE0 - // we add topoAE0 loss - rpd::EdgeSets3 inputCritical; - ripser::ripser(points_, inputCritical, rpd::inf, 0, false); - inputCriticalPairIndices[0] = pairsToTorch(inputCritical[0]); -#endif } else if(regul_ == REGUL::CASCADE || regul_ == REGUL::ASYMMETRIC_CASCADE) { // first compute the PD with Ripser rpd::PairCellsWithOracle::callOracle(points_, inputPD); @@ -116,14 +111,21 @@ void ttk::TopologicalLoss::computeLatentCascades( latent_.cpu().data_ptr(), latent_.size(0)) .computeRips0And1Persistence(latentCriticalAndCascades, false, false); else { - rpd::PairCells pc( - latent_.cpu().data_ptr(), latent_.size(0), latent_.size(1)); + rpd::MultidimensionalDiagram latentPD; + ripser::ripser(latent_.cpu().data_ptr(), latent_.size(0), + latent_.size(1), latentPD, rpd::inf, 1, false); + rpd::PairCellsWithOracle pc(latent_.cpu().data_ptr(), + latent_.size(0), latent_.size(1), latentPD, + false); pc.run(); pc.getCascades(latentCriticalAndCascades); } #else - rpd::PairCells pc( - latent_.cpu().data_ptr(), latent_.size(0), latent_.size(1)); + rpd::MultidimensionalDiagram latentPD; + ripser::ripser(latent_.cpu().data_ptr(), latent_.size(0), + latent_.size(1), latentPD, rpd::inf, 1, false); + rpd::PairCellsWithOracle pc(latent_.cpu().data_ptr(), latent_.size(0), + latent_.size(1), latentPD, false); pc.run(); pc.getCascades(latentCriticalAndCascades); #endif diff --git a/core/base/topologicalDimensionReduction/TopologicalLoss.h b/core/base/topologicalDimensionReduction/TopologicalLoss.h index ea64bed5ee..31e2efd9d8 100644 --- a/core/base/topologicalDimensionReduction/TopologicalLoss.h +++ b/core/base/topologicalDimensionReduction/TopologicalLoss.h @@ -20,7 +20,6 @@ #pragma once -#include #include #include #include diff --git a/core/vtk/ttkDimensionReduction/ttkDimensionReduction.cpp b/core/vtk/ttkDimensionReduction/ttkDimensionReduction.cpp index 984033edff..687aceb5d3 100644 --- a/core/vtk/ttkDimensionReduction/ttkDimensionReduction.cpp +++ b/core/vtk/ttkDimensionReduction/ttkDimensionReduction.cpp @@ -74,8 +74,35 @@ int ttkDimensionReduction::RequestData(vtkInformation *ttkNotUsed(request), arrays.reserve(ScalarFields.size()); for(const auto &s : ScalarFields) arrays.push_back(input->GetColumnByName(s.data())); + + if(Method == METHOD::AE && ae_PreOptimize) { + if(SelectInitializationFieldsWithRegexp) { + // select all input columns whose name is matching the regexp + InitializationFields.clear(); + const auto n = input->GetNumberOfColumns(); + for(int i = 0; i < n; ++i) { + const auto &name = input->GetColumnName(i); + if(std::regex_match(name, std::regex(InitializationRegexpString))) { + InitializationFields.emplace_back(name); + } + } + } + + const int numberOfInitializationColumns = InitializationFields.size(); + if(numberOfInitializationColumns != NumberOfComponents) { + this->printErr("The number of initialization columns (" + + std::to_string(numberOfInitializationColumns) + ")"); + this->printErr("must match the number of components (" + + std::to_string(NumberOfComponents) + ")"); + return 0; + } + + for(const auto &s : InitializationFields) + arrays.push_back(input->GetColumnByName(s.data())); + } + for(SimplexId i = 0; i < numberOfRows; ++i) { - for(auto arr : arrays) + for(const auto arr : arrays) inputData.push_back(arr->GetVariantValue(i).ToDouble()); } diff --git a/core/vtk/ttkDimensionReduction/ttkDimensionReduction.h b/core/vtk/ttkDimensionReduction/ttkDimensionReduction.h index 30b468cdef..0d918df9d3 100644 --- a/core/vtk/ttkDimensionReduction/ttkDimensionReduction.h +++ b/core/vtk/ttkDimensionReduction/ttkDimensionReduction.h @@ -71,8 +71,9 @@ /// /// "Topological Autoencoders++: Fast and Accurate Cycle-Aware Dimensionality /// Reduction" \n -/// Mattéo Clémot, Julie Digne, Julien Tierny, \n -/// arXiv preprint, 2025. +/// Mattéo Clémot, Julie Digne, Julien Tierny, \n +/// IEEE Transactions on Visualization and Computer Graphics. +/// Accepted, to be presented at IEEE VIS 2026. #pragma once @@ -111,6 +112,22 @@ class TTKDIMENSIONREDUCTION_EXPORT ttkDimensionReduction vtkSetMacro(RegexpString, const std::string &); vtkGetMacro(RegexpString, std::string); + void SetInitializationFields(const std::string &s) { + InitializationFields.push_back(s); + Modified(); + } + + void ClearInitializationFields() { + InitializationFields.clear(); + Modified(); + } + + vtkSetMacro(SelectInitializationFieldsWithRegexp, bool); + vtkGetMacro(SelectInitializationFieldsWithRegexp, bool); + + vtkSetMacro(InitializationRegexpString, const std::string &); + vtkGetMacro(InitializationRegexpString, std::string); + vtkSetMacro(NumberOfComponents, int); vtkGetMacro(NumberOfComponents, int); @@ -310,8 +327,8 @@ class TTKDIMENSIONREDUCTION_EXPORT ttkDimensionReduction vtkSetMacro(ae_PreOptimize, bool); vtkGetMacro(ae_PreOptimize, bool); - ttkSetEnumMacro(ae_PreOptimizeMethod, METHOD); - vtkGetEnumMacro(ae_PreOptimizeMethod, METHOD); + vtkSetMacro(ae_PreOptimizeEpochs, int); + vtkGetMacro(ae_PreOptimizeEpochs, int); // testing vtkSetMacro(ModulePath, const std::string &); @@ -338,6 +355,10 @@ class TTKDIMENSIONREDUCTION_EXPORT ttkDimensionReduction std::string RegexpString{".*"}; std::vector ScalarFields{}; + bool SelectInitializationFieldsWithRegexp{false}; + std::string InitializationRegexpString{".*"}; + std::vector InitializationFields{}; + bool KeepAllDataArrays{true}; // mds && se diff --git a/paraview/xmls/DimensionReduction.xml b/paraview/xmls/DimensionReduction.xml index 46df85b5c3..c73b757aea 100644 --- a/paraview/xmls/DimensionReduction.xml +++ b/paraview/xmls/DimensionReduction.xml @@ -52,9 +52,10 @@ Hariprasad Kannan, Yuhei Umeda, 38th International Conference on Machine Learning, 2021. - "Topological Autoencoders++: Fast and Accurate Cycle-Aware Dimensionality Reduction" - Mattéo Clémot, Julie Digne, Julien Tierny - arXiv preprint, 2025. + "Topological Autoencoders++: Fast and Accurate Cycle-Aware Dimensionality Reduction" + Mattéo Clémot, Julie Digne, Julien Tierny, + IEEE Transactions on Visualization and Computer Graphics. + Accepted, to be presented at IEEE VIS 2026. - + @@ -1023,23 +1024,32 @@ - - - - - - - - - - + + + + + + + + + + - Dimension reduction method used for initializing the autoencoder. + Select input scalar fields matching a regular expression. + + + + + + + + + + + + + + + + + Select the initialization fields. + + + + + + + + + This regexp will be used to filter the chosen fields for initialization. Only + matching ones will be selected. + + + - - + + + + + + + + + + +