IntelLabs
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.yml
-1 b/‎.github/ISSUE_TEMPLATE/feature_request.yml
-1
diff --git a/‎.gitignore
+12 b/‎.gitignore
+12
diff --git a/‎.pre-commit-config.yaml
+1 b/‎.pre-commit-config.yaml
+1
diff --git a/‎CONTRIBUTING.md
+5-5 b/‎CONTRIBUTING.md
+5-5
diff --git a/‎LICENSE.md
+1-1 b/‎LICENSE.md
+1-1
diff --git a/‎README.md
+3-3 b/‎README.md
+3-3
diff --git a/‎Security.md
+1-2 b/‎Security.md
+1-2
diff --git a/‎docker/Dockerfile
+3-3 b/‎docker/Dockerfile
+3-3
diff --git a/‎examples/datasets/carolina_db/single_task_devset.py
+8-5 b/‎examples/datasets/carolina_db/single_task_devset.py
+8-5
diff --git a/‎examples/datasets/materials_project/single_task_base.py
+3-1 b/‎examples/datasets/materials_project/single_task_base.py
+3-1
diff --git a/‎examples/datasets/materials_project/single_task_devset.py
+3-2 b/‎examples/datasets/materials_project/single_task_devset.py
+3-2
diff --git a/‎examples/datasets/materials_project/single_task_egnn.py
+9-5 b/‎examples/datasets/materials_project/single_task_egnn.py
+9-5
diff --git a/‎examples/datasets/materials_project/single_task_gala.py
+2-1 b/‎examples/datasets/materials_project/single_task_gala.py
+2-1
diff --git a/‎examples/datasets/materials_project/single_task_symmetry.py
+9-5 b/‎examples/datasets/materials_project/single_task_symmetry.py
+9-5
diff --git a/‎examples/datasets/nomad/single_task_devset.py
+9-8 b/‎examples/datasets/nomad/single_task_devset.py
+9-8
diff --git a/‎examples/datasets/oqmd/single_task_devset.py
+8-5 b/‎examples/datasets/oqmd/single_task_devset.py
+8-5
@@ -39,4 +39,3 @@ body:
     attributes:
       label: Additional notes
       description: Any additional context, screenshots, etc. that may help with the discussion and implementation.
-
@@ -7,6 +7,18 @@ logs
 experimental
 lightning_logs
 
+TEMP/
+TESTFILE.py
+data_explore.py
+examples_folder_log.txt
+faenet_test.py
+lips_splits.py
+matsciml/datasets/materials_project/devset-full/
+msl-ptl2-venv/
+pyg-venv/
+replace_substring.py
+run_examples.sh
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 
@@ -42,6 +42,7 @@ repos:
     hooks:
       - id: isort
         name: isort (python)
+        args: ["--profile=black"]
 -   repo: https://github.com/psf/black
     rev: 23.7.0
     hooks:
 
@@ -75,7 +75,7 @@ are not needed as explicit arguments.
 If variables/features are required by the model, one can override the `read_batch` method. See the [MPNN](https://github.com/IntelLabs/matsciml/blob/main/matsciml/models/dgl/mpnn.py)
 wrapper to see how this pattern can be used to check for data within a batch.
 
-Aside from implementing the `_forward` method of the model itself, the constituent building blocks should be broken up into their own files, respective to what their functions are. For example, layer based classes and utilities should be placed into a `layers.py` file, and other helpful functions can be placed in a `helper.py` or `utils.py` file. 
+Aside from implementing the `_forward` method of the model itself, the constituent building blocks should be broken up into their own files, respective to what their functions are. For example, layer based classes and utilities should be placed into a `layers.py` file, and other helpful functions can be placed in a `helper.py` or `utils.py` file.
 
 Completed models can be added to the list of imports in `./matsciml/models/<framework>/__init__.py`, where `<framework>` can be `dgl` or `pyg`.
 
@@ -108,7 +108,7 @@ class AmazingModel(AbstractPyGModel):
 ### DGL models
 
 DGL does not provide a class to inherit from for the message passing step, and instead, relies
-on users to define user-defined functions (`udf`), and extensive use of graph scopes. 
+on users to define user-defined functions (`udf`), and extensive use of graph scopes.
 
 We recommend reviewing the [MPNN](https://github.com/IntelLabs/matsciml/blob/main/matsciml/models/dgl/mpnn.py) wrapper
 to see a simplified case, and the [MegNet](https://github.com/IntelLabs/matsciml/tree/main/matsciml/models/dgl/megnet) implementation
@@ -132,7 +132,7 @@ for this type of model.
 - Provide proper documentation on how to access, use, and understand the data.
 - Make sure to include data preprocessing scripts if applicable.
 
-Adding a dataset usually involves interacting with an external API to query and download data. If this is the case, a separate `{dataset}_api.py` and `dataset.py` file can be used to separate out the functionalities. In the API file, a default query can be used to save data to lmdb files, and do any initial preprocessing necessary to get the data into a usable format. Keeping track of material ID's and the status of queries. 
+Adding a dataset usually involves interacting with an external API to query and download data. If this is the case, a separate `{dataset}_api.py` and `dataset.py` file can be used to separate out the functionalities. In the API file, a default query can be used to save data to lmdb files, and do any initial preprocessing necessary to get the data into a usable format. Keeping track of material ID's and the status of queries.
 
 The main dataset file should take care of all of the loading, processing and collating needed to prepare data for the training pipeline. This typically involves adding the necessary key-value pairs which are expected, such as `atomic_numbers`, `pc_features`, and `targets`.
 
@@ -144,7 +144,7 @@ The existing dataset's should be used as a template, and can be expanded upon de
 - Follow our testing framework and naming conventions.
 - Verify that all tests pass successfully before making a pull request.
 
-Tests for each new model and datasets should be added to their respective tests folder, and follow the conventions of the existing tests. Task specific tests may be added to the model folder itself. All relevant tests must pass in order for a pull request to be accepted and merged. 
+Tests for each new model and datasets should be added to their respective tests folder, and follow the conventions of the existing tests. Task specific tests may be added to the model folder itself. All relevant tests must pass in order for a pull request to be accepted and merged.
 
 Model tests may be added [here](https://github.com/IntelLabs/matsciml/tree/main/matsciml/models/dgl/tests), and dataset tests may be added to their respective dataset folders when created.
 
@@ -164,4 +164,4 @@ __If it is your first pull request, please ensure you add your name to the [cont
 
 We appreciate your dedication to making our project better and look forward to your contributions! If you have any questions or need assistance, feel free to reach out through the issue tracker or discussions section.
 
-Thank you for being a part of our open-source community!
+Thank you for being a part of our open-source community!
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
@@ -97,9 +97,9 @@ For more advanced use cases:
 Checkout materials generation with CDVAE
 </summary>
 
-CDVAE [7] is a latent diffusion model that trains a VAE on the reconstruction 
+CDVAE [7] is a latent diffusion model that trains a VAE on the reconstruction
 objective, adds Gaussian noise to the latent variable, and learns to predict
-the noise. The noised and generated features inlcude lattice parameters, 
+the noise. The noised and generated features inlcude lattice parameters,
 atoms composition, and atom coordinates.
 The generation process is based on the annealed Langevin dynamics.
 
@@ -140,7 +140,7 @@ Multiple tasks trained using the same dataset
 python examples/tasks/multitask/single_data_multitask_example.py
 ```
 
-Utilizes Materials Project data to train property regression and material classification jointly 
+Utilizes Materials Project data to train property regression and material classification jointly
 </details>
 
 <details>
 
@@ -1,6 +1,5 @@
 # Security Policy
-Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 
+Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation.
 
 ## Reporting a Vulnerability
 Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
-
@@ -7,7 +7,7 @@ FROM nvidia/cuda:$CUDA_VERSION
 
 ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
 # Avoids some interactive prompts during apt-get install
-ARG DEBIAN_FRONTEND=noninteractive 
+ARG DEBIAN_FRONTEND=noninteractive
 
 # clean up and refresh apt-get index
 RUN apt-get update && \
@@ -33,7 +33,7 @@ RUN apt-get update --fix-missing && \
     sudo \
     software-properties-common \
     python3.9 \
-    python3-pip \ 
+    python3-pip \
     virtualenv && \
     apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
 
@@ -64,4 +64,4 @@ RUN pip install matminer
 RUN pip install p_tqdm
 RUN pip install -U pytorch-lightning==1.8.6
 RUN pip install -U torchmetrics==0.11.4
-RUN pip install -U pytest
+RUN pip install -U pytest
@@ -1,11 +1,12 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
+from matsciml.datasets.transforms import PointCloudToGraphTransform
 from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import PLEGNNBackbone
 from matsciml.models.base import ScalarRegressionTask
-from matsciml.datasets.transforms import PointCloudToGraphTransform
-
 
 # configure a simple model for testing
 model_args = {
@@ -57,9 +58,11 @@
     dset_kwargs={
         "transforms": [
             PointCloudToGraphTransform(
-                "dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
-            )
-        ]
+                "dgl",
+                cutoff_dist=20.0,
+                node_keys=["pos", "atomic_numbers"],
+            ),
+        ],
     },
 )
 
 
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
-from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.datasets.transforms import PointCloudToGraphTransform
+from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import GraphConvModel
 from matsciml.models.base import ScalarRegressionTask
 
 
@@ -1,10 +1,11 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 
+from matsciml.datasets.transforms import PointCloudToGraphTransform
 from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import GraphConvModel
 from matsciml.models.base import ScalarRegressionTask
-from matsciml.datasets.transforms import PointCloudToGraphTransform
-
 
 # configure a simple model for testing
 model = GraphConvModel(100, 1, encoder_only=True)
 
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
-from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.datasets.transforms import PointCloudToGraphTransform
+from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import PLEGNNBackbone
-from matsciml.models.base import ScalarRegressionTask, BinaryClassificationTask
+from matsciml.models.base import ScalarRegressionTask
 
 pl.seed_everything(21616)
 
@@ -56,9 +58,11 @@
     dset_kwargs={
         "transforms": [
             PointCloudToGraphTransform(
-                "dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
-            )
-        ]
+                "dgl",
+                cutoff_dist=20.0,
+                node_keys=["pos", "atomic_numbers"],
+            ),
+        ],
     },
     val_split=0.2,
     batch_size=16,
 
@@ -1,11 +1,12 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
 from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import GalaPotential
 from matsciml.models.base import ScalarRegressionTask
 
-
 model_args = {
     "D_in": 100,
     "hidden_dim": 128,
 
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
-from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.datasets.transforms import PointCloudToGraphTransform
+from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import GraphConvModel
 from matsciml.models.base import CrystalSymmetryClassificationTask
 
@@ -25,13 +27,15 @@
 # the base set is required because the devset does not contain symmetry labels
 dm = MatSciMLDataModule(
     dataset="MaterialsProjectDataset",
-    train_path='./mp-project/base/train',
+    train_path="./mp-project/base/train",
     dset_kwargs={
         "transforms": [
             PointCloudToGraphTransform(
-                "dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
-            )
-        ]
+                "dgl",
+                cutoff_dist=20.0,
+                node_keys=["pos", "atomic_numbers"],
+            ),
+        ],
     },
     val_split=0.2,
     batch_size=16,
 
@@ -1,13 +1,12 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
+from matsciml.datasets.transforms import PointCloudToGraphTransform
 from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import PLEGNNBackbone
-from matsciml.models.base import (
-    ScalarRegressionTask,
-)
-from matsciml.datasets.transforms import PointCloudToGraphTransform
-
+from matsciml.models.base import ScalarRegressionTask
 
 # configure a simple model for testing
 model_args = {
@@ -59,9 +58,11 @@
     dset_kwargs={
         "transforms": [
             PointCloudToGraphTransform(
-                "dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
-            )
-        ]
+                "dgl",
+                cutoff_dist=20.0,
+                node_keys=["pos", "atomic_numbers"],
+            ),
+        ],
     },
 )
 
 
@@ -1,11 +1,12 @@
+from __future__ import annotations
+
 import pytorch_lightning as pl
 from torch.nn import LayerNorm, SiLU
 
+from matsciml.datasets.transforms import PointCloudToGraphTransform
 from matsciml.lightning.data_utils import MatSciMLDataModule
 from matsciml.models import PLEGNNBackbone
 from matsciml.models.base import ScalarRegressionTask
-from matsciml.datasets.transforms import PointCloudToGraphTransform
-
 
 # configure a simple model for testing
 model_args = {
@@ -56,9 +57,11 @@
     dset_kwargs={
         "transforms": [
             PointCloudToGraphTransform(
-                "dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
-            )
-        ]
+                "dgl",
+                cutoff_dist=20.0,
+                node_keys=["pos", "atomic_numbers"],
+            ),
+        ],
     },
     num_workers=0,
 )