Skip to content

Commit 6b1912c

Browse files
committed
reorder arguments
1 parent 48c64e8 commit 6b1912c

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

proteinworkshop/config/dataset/pdb.yaml

+6-5
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@ datamodule:
1010

1111
pdb_dataset:
1212
_target_: "proteinworkshop.datasets.pdb_dataset.PDBData"
13-
split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff"
14-
split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity")
15-
overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten
16-
split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"]
17-
train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits
1813
fraction: 1.0 # Fraction of dataset to use
1914
molecule_type: "protein" # Type of molecule for which to select
2015
experiment_types: ["diffraction", "NMR", "EM", "other"] # All experiment types
@@ -28,4 +23,10 @@ datamodule:
2823
remove_ligands: [] # Exclude specific ligands from any available protein-ligand complexes
2924
remove_non_standard_residues: True # Include only proteins containing standard amino acid residues
3025
remove_pdb_unavailable: True # Include only proteins that are available to download
26+
train_val_test: [0.8, 0.1, 0.1] # Cross-validation ratios to use for train, val, and test splits
27+
split_type: "sequence_similarity" # Split sequences by sequence similarity clustering, other options are "random" and "time_cutoff"
28+
split_sequence_similiarity: 0.3 # Clustering at 30% sequence similarity (argument is ignored if split_type!="sequence_similarity")
29+
overwrite_sequence_clusters: False # Previous clusterings at same sequence similarity are reused and not overwritten
30+
split_time_frames: null # Time-cutoffs for train, val and test set (argument is ignored if split_type!="time_cutoff") - e.g., ["2020-01-01", "2021-01-01", "2023-03-01"]
31+
3132

proteinworkshop/datasets/pdb_dataset.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,6 @@
1717
class PDBData:
1818
def __init__(
1919
self,
20-
split_type: Literal["sequence_similarity", "time_cutoff", "random"] = "random",
21-
split_sequence_similiarity: Optional[int] = None,
22-
overwrite_sequence_clusters: Optional[bool] = False,
23-
split_time_frames: Optional[List[str]] = None,
24-
train_val_test: List[float],
2520
fraction: float,
2621
min_length: int,
2722
max_length: int,
@@ -35,6 +30,12 @@ def __init__(
3530
remove_ligands: List[str],
3631
remove_non_standard_residues: bool,
3732
remove_pdb_unavailable: bool,
33+
train_val_test: List[float],
34+
split_type: Literal["sequence_similarity", "time_cutoff", "random"] = "random",
35+
split_sequence_similiarity: Optional[int] = None,
36+
overwrite_sequence_clusters: Optional[bool] = False,
37+
split_time_frames: Optional[List[str]] = None,
38+
3839
):
3940
self.fraction = fraction
4041
self.molecule_type = molecule_type

0 commit comments

Comments
 (0)