lanl
diff --git a/‎CITATION.cff
Lines changed: 2 additions & 2 deletions b/‎CITATION.cff
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 35 additions & 34 deletions b/‎README.md
Lines changed: 35 additions & 34 deletions
diff --git a/‎TELF/applications/Bunny/auto_bunny.py
Lines changed: 20 additions & 2 deletions b/‎TELF/applications/Bunny/auto_bunny.py
Lines changed: 20 additions & 2 deletions
diff --git a/‎TELF/applications/Bunny/bunny.py
Lines changed: 28 additions & 12 deletions b/‎TELF/applications/Bunny/bunny.py
Lines changed: 28 additions & 12 deletions
@@ -1,4 +1,4 @@
-version: 0.0.38
+version: 0.0.39
 message: "If you use this software, please cite it as below."
 authors:
   - family-names: Eren
@@ -20,7 +20,7 @@ authors:
   - family-names: Alexandrov
     given-names: Boian
 title: "Tensor Extraction of Latent Features (T-ELF)"
-version: 0.0.38
+version: 0.0.39
 url: https://github.com/lanl/T-ELF
 doi: 10.5281/zenodo.10257897
 date-released: 2023-12-04
@@ -22,7 +22,7 @@ Central to T-ELF's core capabilities lie non-negative matrix and tensor factoriz
 
 <div align="center", style="font-size: 50px">
 <p align="center">
-  <img src="docs/capabilities.png">
+  <img src="docs/smart_tensors_image.png">
 </p>
 
 </div>
@@ -86,47 +86,48 @@ python post_install.py # use the following, for example, for GPU system: <python
 
 ### TELF.factorization
 
-|         **Method**        |      **Dense**     |     **Sparse**     |       **GPU**      |       **CPU**      | **Multiprocessing** |       **HPC**      |                          **Description**                         | **Example** | **Release Status** |
-|:-------------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:-------------------:|:------------------:|:----------------------------------------------------------------:|:-----------:|:------------------:|
-|            NMFk           | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |              NMF with Automatic Model Determination                              |   [Link](examples/NMFk/NMFk.ipynb)  | :white_check_mark: |
-|        Custom NMFk        | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                Use Custom NMF Functions with NMFk                                |   [Link](examples/NMFk/Custom_NMF_NMFk.ipynb)  | :white_check_mark: |
-|          TriNMFk          | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: |                    | NMF with Automatic Model Determination for Clusters and Patterns                 |   [Link](examples/TriNMFk/TriNMFk.ipynb)  | :white_check_mark: |
-|          RESCALk          | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |             RESCAL with Automatic Model Determination                            |   [Link](examples/RESCALk/RESCALk.ipynb)  | :white_check_mark: |
-|           RNMFk           | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         Recommender NMFk                                         |   [Link](examples/RNMFk/RNMFk.ipynb)  |       :white_check_mark:       |
-|           SymNMFk         | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         NMFk with Symmetric Clustering                           |   [Link](examples/SymNMFk/SymNMFk.ipynb)          |       :white_check_mark:       |
-|           WNMFk           | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         NMFk with weighting - used for recommendation system     |   [Link](examples/WNMFk/WNMFk.ipynb)          |       :white_check_mark:       |
-|           HNMFk           | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         Hierarchical NMFk                                        |   [Link](examples/HNMFk/HNMFk.ipynb)       |       :white_check_mark:       |
-|           BNMFk           | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                           Boolean NMFk                                           |   [Link](examples/BNMFk/BNMFk.ipynb) |       :white_check_mark:       |
-|           LMF             | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |                     |                    |                           Logistic Matrix Factorization                          |   [Link](examples/LMF/LMF.ipynb) |       :white_check_mark:       |
-|         SPLIT NMFk        |                    |                    |                    |                    |                     |                    |        Joint NMFk factorization of multiple data via SPLIT                       |             |       :soon:       |
-| SPLIT Transfer Classifier |                    |                    |                    |                    |                     |                    |      Supervised transfer learning method via SPLIT and NMFk                      |             |       :soon:       |
+|         **Method**        |      **Dense**     |     **Sparse**     |       **GPU**      |       **CPU**      | **Multiprocessing** |       **HPC**      |                          **Description**                         | **Example** |
+|:-------------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:-------------------:|:------------------:|:----------------------------------------------------------------:|:-----------:|
+|            NMFk           | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |              NMF with Automatic Model Determination                              |   [Link](examples/NMFk/NMFk.ipynb)  |
+|        Custom NMFk        | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                Use Custom NMF Functions with NMFk                                |   [Link](examples/NMFk/Custom_NMF_NMFk.ipynb)  |
+|          TriNMFk          | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: |                    | NMF with Automatic Model Determination for Clusters and Patterns                 |   [Link](examples/TriNMFk/TriNMFk.ipynb)  |
+|          RESCALk          | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |             RESCAL with Automatic Model Determination                            |   [Link](examples/RESCALk/RESCALk.ipynb)  |
+|           RNMFk           | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         Recommender NMFk                                         |   [Link](examples/RNMFk/RNMFk.ipynb)  |
+|           SymNMFk         | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         NMFk with Symmetric Clustering                           |   [Link](examples/SymNMFk/SymNMFk.ipynb)          |
+|           WNMFk           | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         NMFk with weighting - used for recommendation system     |   [Link](examples/WNMFk/WNMFk.ipynb)          |
+|           HNMFk           | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                         Hierarchical NMFk                                        |   [Link](examples/HNMFk/HNMFk.ipynb)       |
+|           BNMFk           | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |  :heavy_check_mark: | :heavy_check_mark: |                           Boolean NMFk                                           |   [Link](examples/BNMFk/BNMFk.ipynb) |
+|           LMF             | :heavy_check_mark: |                    | :heavy_check_mark: | :heavy_check_mark: |                     |                    |                           Logistic Matrix Factorization                          |   [Link](examples/LMF/LMF.ipynb) |
+|         SPLIT             | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |                     |        Joint NMFk factorization of multiple data via SPLIT                       | [Link](examples/SPLIT/00-SPLIT.ipynb) |
+| SPLITTransfer | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark:  |                    |      Supervised transfer learning method via SPLIT and NMFk                      | [Link](examples/SPLITTransfer/00-SPLITTransfer.ipynb) |
 
 ### TELF.pre_processing
 
-| **Method** | **Multiprocessing** |       **HPC**       |                           **Description**                          | **Example** | **Release Status** |
-|:----------:|:-------------------:|:-------------------:|:------------------------------------------------------------------:|:-----------:|:------------------:|
-|   Vulture  | :heavy_check_mark:  | :heavy_check_mark:  |         Advanced text processing tool for cleaning and NLP         |  [Link](examples/Vulture)  | :white_check_mark: |
-|   Beaver   | :heavy_check_mark:  | :heavy_check_mark:  |        Fast matrix and tensor building tool for text mining        |  [Link](examples/Beaver)  | :white_check_mark: |
-|  iPenguin  | :heavy_check_mark:  |                     |         Online information retrieval tool for Scopus, SemanticScholar, and OSTI         | [Link](examples/iPenguin) |       :white_check_mark:       |
-|    Orca    | :heavy_check_mark:  |                     | Duplicate author detector for text mining and information retrieval |   [Link](examples/Orca)          |       :white_check_mark:       |
+| **Method** | **Multiprocessing** |       **HPC**       |                           **Description**                          | **Example** |
+|:----------:|:-------------------:|:-------------------:|:------------------------------------------------------------------:|:-----------:|
+|   Vulture  | :heavy_check_mark:  | :heavy_check_mark:  |         Advanced text processing tool for cleaning and NLP         |  [Link](examples/Vulture)  |
+|   Beaver   | :heavy_check_mark:  | :heavy_check_mark:  |        Fast matrix and tensor building tool for text mining        |  [Link](examples/Beaver)  |
+|  iPenguin  | :heavy_check_mark:  |                     |         Online information retrieval tool for Scopus, SemanticScholar, and OSTI         | [Link](examples/iPenguin) |
+|    Orca    | :heavy_check_mark:  |                     | Duplicate author detector for text mining and information retrieval |   [Link](examples/Orca)          |
 
 ### TELF.post_processing
 
-| **Method** |                       **Description**                      | **Example** | **Release Status** |
-|:----------:|:----------------------------------------------------------:|:-----------:|:------------------:|
-|    Wolf    |              Graph centrality and ranking tool             |      [Link](examples/Wolf)       |       :white_check_mark:       |
-|   Peacock  | Data visualization and generation of actionable statistics |  [Link](examples/Peacock) |       :white_check_mark:       |
-|    SeaLion    |              Generic report generation tool            | [Link](examples/SeaLion) |       :white_check_mark:       |
-|    Fox    |              Report generation tool for text data            |             |       :soon:       |
+| **Method** |                       **Description**                      | **Example** |
+|:----------:|:----------------------------------------------------------:|:-----------:|
+|    Wolf    |              Graph centrality and ranking tool             |      [Link](examples/Wolf)       |
+|   Peacock  | Data visualization and generation of actionable statistics |  [Link](examples/Peacock) |
+|    SeaLion    |              Generic report generation tool            | [Link](examples/SeaLion) |
+|    Fox    |              Report generation tool for text data from NMFk using OpenAI            | [Link](examples/Fox)  |
+|    ArcticFox    |        Report generation tool for text data from HNMFk using local LLMs            | [Link](examples/ArcticFox)  |
 
 ### TELF.applications
 
-| **Method** |                            **Description**                           | **Example** | **Release Status** |
-|:----------:|:--------------------------------------------------------------------:|:-----------:|:------------------:|
-|   Cheetah  |                        Fast search by keywords and phrases                       |    [Link](examples/Cheetah)         |       :white_check_mark:      |
-|    Bunny   | Dataset generation tool for documents and their citations/references |  [Link](examples/Bunny)  |       :white_check_mark:       |
-|  Penguin   |         Text storage tool                                    | [Link](examples/Penguin) |       :white_check_mark:       |
-|    Termite   | Knowladge graph building tool |             |       :soon:       |
+| **Method** |                            **Description**                           | **Example** |
+|:----------:|:--------------------------------------------------------------------:|:-----------:|
+|   Cheetah  |                        Fast search by keywords and phrases                       |    [Link](examples/Cheetah)         |
+|    Bunny   | Dataset generation tool for documents and their citations/references |  [Link](examples/Bunny)  |
+|  Penguin   |         Text storage tool                                    | [Link](examples/Penguin) |
+|    Termite   | Knowladge graph building tool | :soon: |
 
 
 ## How to Cite T-ELF?
@@ -150,7 +151,7 @@ Eren, M., Solovyev, N., Barron, R., Bhattarai, M., Truong, D., Boureima, I., Ska
 ```
 
 ## Authors
-- [Maksim Ekin Eren](mailto:[email protected]): Advanced Research in Cyber Systems, Los Alamos National Laboratory ([Website](https://www.maksimeren.com/))
+- [Maksim Ekin Eren](mailto:[email protected]): Information Systems and Modeling Group, Los Alamos National Laboratory ([Website](https://www.maksimeren.com/))
 - [Nicholas Solovyev](mailto:[email protected]): Theoretical Division, Los Alamos National Laboratory
 - [Ryan Barron](mailto:[email protected]): Theoretical Division, Los Alamos National Laboratory
 - [Manish Bhattarai](mailto:[email protected]): Theoretical Division, Los Alamos National Laboratory
 
@@ -5,7 +5,7 @@
 import pandas as pd
 from dataclasses import dataclass, field
 
-from .bunny import Bunny
+from .bunny import Bunny, BunnyFilter
 from ..Cheetah import Cheetah
 from ...pre_processing.iPenguin.Scopus import Scopus
 from ...pre_processing.iPenguin.SemanticScholar import SemanticScholar
@@ -42,7 +42,16 @@ def __init__(self, core, s2_key=None, scopus_keys=None, output_dir=None, cache_d
         self.verbose = verbose
 
 
-    def run(self, steps, *, s2_key=None, scopus_keys=None, cheetah_index=None, max_papers=250000, checkpoint=True):
+    def run(self, 
+            steps, 
+            *, 
+            s2_key=None, 
+            scopus_keys=None, 
+            cheetah_index=None, 
+            max_papers=250000, 
+            checkpoint=True,
+            filter_type:str=None, # must be a key from Bunny.FILTERS
+            filter_value=None):
 
         # validate input
         if not isinstance(steps, (list, tuple)):
@@ -87,6 +96,15 @@ def run(self, steps, *, s2_key=None, scopus_keys=None, cheetah_index=None, max_p
                 return df
 
             df = self.__bunny_hop(df, modes, step_max_papers, hop_priority)
+            if filter_value and filter_type:
+                bunny = Bunny()
+                query = BunnyFilter(filter_type, filter_value)
+                subset_df = bunny.apply_filter(df, query, filter_in_core=True, do_author_match=False).reset_index(drop=True)
+                if len(subset_df) < 1:
+                    print("No papers for filter_value, using original df without filter.")
+                else:
+                    df = subset_df
+
             df = self.__vulture_clean(df, vulture_settings)
             df, cheetah_table = self.__cheetah_filter(df, cheetah_settings)
 
 
@@ -161,10 +161,17 @@ def __init__(self, s2_key=None, scopus_keys=None, penguin_settings=None, output_
         self.penguin_settings = penguin_settings
         self.enabled = self.s2_key is not None
 
-        # create a dictionary of supported filters and their callable load functions
-        filters = {f: f"_filter_{re.sub('-', '', f.lower())}" for f in Bunny.FILTERS}
-        self.filter_funcs = {k: getattr(self, v) for k,v in filters.items() if callable(getattr(self, v))}
-        
+        # Explicitly map supported filters to methods
+        self.filter_funcs = {
+            'AFFILCOUNTRY': lambda df, f, auth_map=None: self._filter_affil_generic(df, column_name='country', filter_value=f, auth_map=auth_map),
+            'AFFILORG': self._filter_affilorg,
+            'AF-ID': self._filter_afid,
+            'PUBYEAR': self._filter_pubyear,
+            'AU-ID': self._filter_auid,
+            'KEY': self._filter_key,
+            'DOI': lambda df, f, auth_map=None: set(df[df['doi'].str.lower() == f.lower()].index),
+        }
+
 
     def __init_lookup(self, series, priority, sep):
         lookup = [y for x in series for y in x.split(sep)]
@@ -619,31 +626,40 @@ def _filter_auid(self, df, f, auth_map=None):
         return pids
 
 
-    def _filter_affilcountry(self, df, f, auth_map): 
+    def _filter_affil_generic(self, df, column_name, filter_value, auth_map): 
         if 'affiliations' not in df:
             raise ValueError('"affiliations" not found in df')
 
-        country = f.lower()
+        filter_value = filter_value.lower()
         pids, aids = set(), set()
         aff_df = df.dropna(subset=['affiliations'])
-        affiliations = {k:v for k,v in zip(aff_df.index.to_list(), aff_df.affiliations.to_list())}
+        affiliations = {k: v for k, v in zip(aff_df.index.to_list(), aff_df.affiliations.to_list())}
+
         for idx, affiliation in affiliations.items():
             if isinstance(affiliation, str):
                 affiliation = ast.literal_eval(affiliation)
             for aff_id, aff in affiliation.items():
-                if aff['country'].lower() == country:
-                    pids.add(idx)
-                    aids |= set(aff['authors'])
-                    break
-        
+                try:
+                    if aff[column_name].lower() == filter_value:
+                        pids.add(idx)
+                        aids |= set(aff['authors'])
+                        break
+                except KeyError:
+                    print(f"Warning: '{column_name}' not found in affiliation {aff_id} for index {idx}.")
+                except Exception as e:
+                    print(f"Warning: error processing affiliation {aff_id} at index {idx} — {e}")
+
         if auth_map is not None:
             s2_aids = {auth_map[aid] for aid in aids if aid in auth_map}
             for idx, scopus_authors, s2_authors in zip(df.index.to_list(), df.author_ids.to_list(), df.s2_author_ids.to_list()):
                 if isinstance(scopus_authors, str) and set(scopus_authors.split(';')) & aids:
                     pids.add(idx)
                 if isinstance(s2_authors, str) and set(s2_authors.split(';')) & s2_aids:
                     pids.add(idx)    
+
         return pids
+
+
 
 
     def _filter_affilorg(self, df, f, auth_map):