-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdkit_utils.py
81 lines (66 loc) · 2.37 KB
/
rdkit_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# --- rdkit_utils.py ---
from rdkit import Chem
from rdkit.Chem import AllChem
from typing import Callable, Optional
from config_loader import Config, RDKitStep
import logging
import functools
logger = logging.getLogger(__name__)
class Hydrogenator:
def __call__(self, mol: Chem.Mol) -> Chem.Mol:
return Chem.AddHs(mol, addCoords=True)
class Sanitizer:
def __call__(self, mol: Chem.Mol) -> Chem.Mol:
Chem.SanitizeMol(mol)
return mol
class Kekulizer:
def __call__(self, mol: Chem.Mol) -> Chem.Mol:
Chem.Kekulize(mol)
return mol
class Embedder:
def __call__(self, mol: Chem.Mol) -> Chem.Mol:
AllChem.EmbedMolecule(mol, AllChem.ETKDGv2())
return mol
class Optimizer:
def __call__(self, mol: Chem.Mol) -> Chem.Mol:
AllChem.MMFFOptimizeMolecule(mol)
return mol
RDKIT_STEPS: dict[RDKitStep, Callable[[Chem.Mol], Chem.Mol]] = {
RDKitStep.HYDROGENATE: Hydrogenator(),
RDKitStep.SANITIZE: Sanitizer(),
RDKitStep.KEKULIZE: Kekulizer(),
RDKitStep.EMBED: Embedder(),
RDKitStep.OPTIMIZE: Optimizer(),
}
def compose(*functions):
def composed(mol):
for func in functions:
mol = func(mol)
return mol
return composed
class RDKitProcessingError(Exception):
pass
class RDKitKekulizeError(RDKitProcessingError):
pass
class RDKitMoleculeProcessor:
def __init__(self, config: Config):
self.config = config.rdkit_processing
self.pipeline = self._create_pipeline()
def _create_pipeline(self) -> Callable[[Chem.Mol], Optional[Chem.Mol]]:
steps = [RDKIT_STEPS[step] for step in self.config.steps]
return compose(*steps)
def process(self, mol: Optional[Chem.Mol]) -> Optional[Chem.Mol]:
if mol is None:
return None
try:
return self.pipeline(mol)
except Chem.rdchem.KekulizeException as e:
raise RDKitKekulizeError(f"Kekulization error: {e}")
except Exception as e:
raise RDKitProcessingError(f"Error processing molecule: {e}")
def create_configurable_rdkit_processor(config: Config) -> RDKitMoleculeProcessor:
return RDKitMoleculeProcessor(config)
def process_molecule(mol_path:str, config: Config) -> Optional[Chem.Mol]:
processor = create_configurable_rdkit_processor(config)
mol = Chem.MolFromMolFile(mol_path)
return processor.process(mol)