The standalone module for dimension reduction.

atalon-lip6 · atalon-lip6 · commit 8ca9994f35ad · 2023-12-07T17:13:49.000+01:00
diff --git a/core/base/dimensionReduction/DimensionReduction.h b/core/base/dimensionReduction/DimensionReduction.h
@@ -209,6 +209,7 @@ namespace ttk {
         this->se_Affinity = "precomputed";
         this->mds_Dissimilarity = "precomputed";
         this->tsne_Metric = "precomputed";
+        this->tsne_Init = "random";
         this->iso_Metric = "precomputed";
       } else {
         this->se_Affinity = "nearest_neighbors";
@@ -258,7 +259,7 @@ namespace ttk {
     int tsne_MaxIterationProgress{300};
     float tsne_GradientThreshold{1e-7};
     std::string tsne_Metric{"euclidean"};
-    std::string tsne_Init{"random"};
+    std::string tsne_Init{"pca"};
     int tsne_Verbose{0};
     std::string tsne_Method{"barnes_hut"};
     float tsne_Angle{0.5};
diff --git a/core/base/topoMap/TopoMap.cpp b/core/base/topoMap/TopoMap.cpp
@@ -23,7 +23,7 @@ bool computeConvexHull_aux(const std::vector<double> &coords,
   }
 
   // Qhull gives us the coordinates of the points in the convex hull. Here we
-  // retrive the indices of this points in the list we provided. We will also
+  // retrieve the indices of this points in the list we provided. We will also
   // compute the barycenter of the points in the convex hull.
   for(const auto &u : qhull.vertexList()) {
     const orgQhull::QhullPoint &qhullPt = u.point();
diff --git a/paraview/xmls/DimensionReduction.xml b/paraview/xmls/DimensionReduction.xml
@@ -295,6 +295,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          Kernel coefficient for rbf kernel. Only used if the affinity is set to rbf.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -348,6 +349,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          Not used if the solver is set to dense.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -435,6 +437,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <IntRangeDomain name="range" min="0" max="100" />
         <Documentation>
+          Number of times the SMACOF algorithm will be run with different initializations. The final results will be the best output of the runs, determined by the run with the smallest final stress.
         </Documentation>
       </IntVectorProperty>
 
@@ -446,6 +449,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <IntRangeDomain name="range" min="0" max="100" />
         <Documentation>
+          Maximum number of iterations of the SMACOF algorithm for a single run.
         </Documentation>
       </IntVectorProperty>
 
@@ -468,6 +472,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          Relative tolerance with respect to stress at which to declare convergence.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -477,8 +482,9 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         number_of_elements="1"
         default_values="30"
         panel_visibility="advanced">
-        <DoubleRangeDomain name="range" min="0.0" max="1.0" />
+        <DoubleRangeDomain name="range" min="5.0" max="50.0" />
         <Documentation>
+        The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -490,6 +496,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -501,6 +508,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If the learning rate is too high, the data may look like a ‘ball’ with any point approximately equidistant from its nearest neighbours. If the learning rate is too low, most points may look compressed in a dense cloud with few outliers. If the cost function gets stuck in a bad local minimum increasing the learning rate may help.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -512,6 +520,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <IntRangeDomain name="range" min="0" max="100" />
         <Documentation>
+          Maximum number of iterations for the optimization. Should be at least 250.
         </Documentation>
       </IntVectorProperty>
 
@@ -523,7 +532,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <IntRangeDomain name="range" min="0" max="100" />
         <Documentation>
-          Documentation.
+          Maximum number of iterations without progress before we abort the optimization, used after 250 initial iterations with early exaggeration. This value is rounded to the next multiple of 50.
         </Documentation>
       </IntVectorProperty>
 
@@ -535,6 +544,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          If the gradient norm is below this threshold, the optimization will be stopped.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -544,21 +554,23 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         number_of_elements="1"
         default_values="euclidean"
         panel_visibility="advanced">
-        <Documentation>
+      <Documentation>
+        The metric to use when calculating distance between instances in a feature array.
         </Documentation>
       </StringVectorProperty>
 
       <StringVectorProperty name="tsne_Init"
         label="Init"
         command="Settsne_Init"
         number_of_elements="1"
-        default_values="random"
+        default_values="pca"
         panel_visibility="advanced">
         <StringListDomain name="enum">
           <String value="random"/>
           <String value="pca"/>
         </StringListDomain>
         <Documentation>
+          Initialization of embedding. PCA initialization cannot be used with precomputed distances and is usually more globally stable than random initialization.
         </Documentation>
       </StringVectorProperty>
 
@@ -584,6 +596,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
           <String value="exact"/>
         </StringListDomain>
         <Documentation>
+          By default the gradient calculation algorithm uses Barnes-Hut approximation running in O(NlogN) time. exact will run in time O(N^2) time. The exact algorithm should be used when nearest-neighbor errors need to be better than 3%.
         </Documentation>
       </StringVectorProperty>
 
@@ -595,6 +608,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          Only used for Barnes Hut. This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. Angle less than 0.2 has quickly increasing computation time and angle greater 0.8 has quickly increasing error.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -621,6 +635,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <DoubleRangeDomain name="range" min="0.0" max="1.0" />
         <Documentation>
+          Only used if the solver is set to arpack.
         </Documentation>
       </DoubleVectorProperty>
 
@@ -632,6 +647,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
         panel_visibility="advanced">
         <IntRangeDomain name="range" min="0" max="100" />
         <Documentation>
+          Only used if the solver is set to arpack.
         </Documentation>
       </IntVectorProperty>
 
@@ -647,6 +663,7 @@ IEEE Transactions on Visualization and Computer Graphics 27(2): 561-571, 2020.
           <String value="D"/>
         </StringListDomain>
         <Documentation>
+          Choosing between Floyd-Warshall and Dijkstra algorithm.
         </Documentation>
       </StringVectorProperty>
 
diff --git a/standalone/DimensionReduction/CMakeLists.txt b/standalone/DimensionReduction/CMakeLists.txt
@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 3.21)
+
+project(ttkDimensionReductionCmd)
+
+if(TARGET ttkDimensionReduction)
+  add_executable(${PROJECT_NAME} main.cpp)
+  target_link_libraries(${PROJECT_NAME}
+    PRIVATE
+      ttkDimensionReduction
+      ttkDataSetToTable
+      VTK::IOXML
+      VTK::IOInfovis
+    )
+  set_target_properties(${PROJECT_NAME}
+    PROPERTIES
+      INSTALL_RPATH
+        "${CMAKE_INSTALL_RPATH}"
+    )
+  install(
+    TARGETS
+      ${PROJECT_NAME}
+    RUNTIME DESTINATION
+      ${TTK_INSTALL_BINARY_DIR}
+    )
+endif()
diff --git a/standalone/DimensionReduction/main.cpp b/standalone/DimensionReduction/main.cpp

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ bool computeConvexHull_aux(const std::vector<double> &coords,`
`23`	`23`	`}`
`24`	`24`
`25`	`25`	`// Qhull gives us the coordinates of the points in the convex hull. Here we`
`26`		`- // retrive the indices of this points in the list we provided. We will also`
	`26`	`+ // retrieve the indices of this points in the list we provided. We will also`
`27`	`27`	`// compute the barycenter of the points in the convex hull.`
`28`	`28`	`for(const auto &u : qhull.vertexList()) {`
`29`	`29`	`const orgQhull::QhullPoint &qhullPt = u.point();`