reorder arguments

kierandidi · kierandidi · commit 6b1912c839aa · 2024-03-25T09:53:20.000Z
diff --git a/proteinworkshop/config/dataset/pdb.yaml b/proteinworkshop/config/dataset/pdb.yaml
@@ -10,11 +10,6 @@ datamodule:
 
   pdb_dataset:
     _target_: "proteinworkshop.datasets.pdb_dataset.PDBData"
-    split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff"
-    split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity")
-    overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten
-    split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"]
-    train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits
     fraction: 1.0 # Fraction of dataset to use
     molecule_type: "protein" # Type of molecule for which to select
     experiment_types: ["diffraction", "NMR", "EM", "other"] # All experiment types
@@ -28,4 +23,10 @@ datamodule:
     remove_ligands: [] # Exclude specific ligands from any available protein-ligand complexes
     remove_non_standard_residues: True # Include only proteins containing standard amino acid residues
     remove_pdb_unavailable: True # Include only proteins that are available to download
+    train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits
+    split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff"
+    split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity")
+    overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten
+    split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"]
+
 
diff --git a/proteinworkshop/datasets/pdb_dataset.py b/proteinworkshop/datasets/pdb_dataset.py
@@ -17,11 +17,6 @@
 class PDBData:
     def __init__(
         self,
-        split_type: Literal["sequence_similarity", "time_cutoff", "random"] = "random",
-        split_sequence_similiarity: Optional[int] = None,
-        overwrite_sequence_clusters: Optional[bool] = False,
-        split_time_frames: Optional[List[str]] = None,
-        train_val_test: List[float],
         fraction: float,
         min_length: int,
         max_length: int,
@@ -35,6 +30,12 @@ def __init__(
         remove_ligands: List[str],
         remove_non_standard_residues: bool,
         remove_pdb_unavailable: bool,
+        train_val_test: List[float],
+        split_type: Literal["sequence_similarity", "time_cutoff", "random"] = "random",
+        split_sequence_similiarity: Optional[int] = None,
+        overwrite_sequence_clusters: Optional[bool] = False,
+        split_time_frames: Optional[List[str]] = None,
+
     ):
         self.fraction = fraction
         self.molecule_type = molecule_type