entropy renaming on README

dskoda · dskoda · commit 0058706458e9 · 2025-05-07T11:19:29.000-04:00
diff --git a/README.md b/README.md
@@ -61,13 +61,13 @@ To use the QUESTS package to create descriptors and compute entropies, you can u
 ```python
 from ase.io import read
 from quests.descriptor import get_descriptors
-from quests.entropy import perfect_entropy, diversity
+from quests.entropy import entropy, diversity
 
 dset = read("dataset.xyz", index=":")
 x = get_descriptors(dset, k=32, cutoff=5.0)
 h = 0.015
 batch_size = 10000
-H = perfect_entropy(x, h=h, batch_size=batch_size)
+H = entropy(x, h=h, batch_size=batch_size)
 D = diversity(x, h=h, batch_size=batch_size)
 ```
 
@@ -131,14 +131,14 @@ Note that this constraint requires the descriptors to be generated using the tra
 import torch
 from ase.io import read
 from quests.descriptor import get_descriptors
-from quests.gpu.entropy import perfect_entropy
+from quests.gpu.entropy import entropy
 
 dset = read("dataset.xyz", index=":")
 x = get_descriptors(dset, k=32, cutoff=5.0)
 x = torch.tensor(x, device="cuda")
 h = 0.015
 batch_size = 10000
-H = perfect_entropy(x, h=h, batch_size=batch_size)
+H = entropy(x, h=h, batch_size=batch_size)
 ```
 
 #### Computing overlap between datasets
diff --git a/quests/gpu/entropy.py b/quests/gpu/entropy.py
@@ -11,6 +11,33 @@
 
 
 def perfect_entropy(
+    x: np.ndarray,
+    h: Union[float, List[float]] = DEFAULT_BANDWIDTH,
+    batch_size: int = DEFAULT_BATCH,
+    device: str = "cpu"
+):
+    """Deprecated. Please use `entropy`.
+
+    Computes the perfect entropy of a dataset using a batch distance
+        calculation. This is necessary because the full distance matrix
+        often does not fit in the memory for a big dataset. This function
+        can be SLOW, despite the optimization of the computation, as it
+        does not approximate the results.
+
+    Arguments:
+        x (np.ndarray): an (N, d) matrix with the descriptors
+        h (int or np.nadarray): bandwidth (value / vector) for the Gaussian kernel
+        batch_size (int): maximum batch size to consider when
+            performing a distance calculation.
+
+    Returns:
+        entropy (float): entropy of the dataset given by `x`.
+            or (np.ndarray): if 'h' is a vector
+    """
+    return entropy(x, h, batch_size, device=device)
+
+
+def entropy(
     x: torch.tensor,
     h: float = DEFAULT_BANDWIDTH,
     batch_size: int = DEFAULT_BATCH,