@@ -34,48 +34,121 @@ def get_heavy_atom_count(formula: str) -> int:
34
34
return heavy_atom_count
35
35
36
36
37
- def generate_structures_SURGE (molecular_formula : str ) -> Union [list , str ]:
38
- """Generate chemical structures using the surge tool based on the canonical.
39
-
40
- generation path method.
41
-
37
+ def get_surge_count (molecular_formula : str ) -> int :
38
+ """Get the number of structures generated by the surge tool.
42
39
Args:
43
40
molecular_formula (str): Molecular formula provided by the user.
44
-
45
41
Returns:
46
- list: List of SMILES strings representing generated chemical structures.
47
- If the molecular formula contains more than 10 heavy atoms, a message
48
- indicating the limitation is returned instead.
42
+ int: The number of structures generated by the surge tool.
49
43
"""
50
44
51
- smiles = []
52
45
if get_heavy_atom_count (molecular_formula ) <= 10 :
53
46
try :
54
47
process = Popen (
55
- [
56
- "surge" ,
57
- "-P" ,
58
- "-T" ,
59
- "-B1,2,3,4,5,7,9" ,
60
- "-t0" ,
61
- "-f0" ,
62
- "-S" ,
63
- molecular_formula ,
64
- ],
65
- stdout = PIPE ,
66
- stderr = PIPE ,
48
+ ["surge" , "-u" , molecular_formula ], stdout = PIPE , stderr = PIPE
67
49
)
68
50
stdout , stderr = process .communicate ()
69
51
70
- if process .returncode == 0 :
71
- output_lines = stdout .decode ("utf-8" ).splitlines ()
72
- smiles = [line .strip () for line in output_lines ]
73
- return smiles
52
+ stdout_text = stdout .decode ("utf-8" ).strip ()
53
+ stderr_text = stderr .decode ("utf-8" ).strip ()
54
+
55
+ # Surge outputs to stderr, so check both stdout and stderr
56
+ output = stdout_text if stdout_text else stderr_text
57
+
58
+ if not output :
59
+ raise Exception (
60
+ f"No output from surge command. Return code: { process .returncode } "
61
+ )
62
+
63
+ # Parse the output to find the line with structure count
64
+ # Pattern: ">Z generated X -> Y -> Z in N.NN sec"
65
+ pattern = r">Z generated \d+ -> \d+ -> (\d+) in [\d\.]+ sec"
66
+ match = re .search (pattern , output )
67
+
68
+ if match :
69
+ structure_count = int (match .group (1 ))
70
+ return structure_count
74
71
else :
75
72
raise Exception (
76
- f"Error running surge: { stderr . decode ( 'utf-8' ) } " ,
73
+ f"Could not parse structure count from surge output. Output was: ' { output } '"
77
74
)
78
- except Exception :
79
- raise Exception (f"Error running surge: { stderr .decode ('utf-8' )} " )
75
+
76
+ except Exception as e :
77
+ raise Exception (f"Error running surge: { str (e )} " )
80
78
else :
79
+ raise Exception (
80
+ f"Molecular formula { molecular_formula } has more than 10 heavy atoms"
81
+ )
82
+
83
+
84
+ def generate_structures_SURGE (molecular_formula : str ) -> Union [dict , str ]:
85
+ """Generate chemical structures using the surge tool based on the canonical.
86
+
87
+ generation path method.
88
+
89
+ Args:
90
+ molecular_formula (str): Molecular formula provided by the user.
91
+
92
+ Returns:
93
+ dict: Dictionary containing:
94
+ - total_count: Total number of possible structures
95
+ - generated_count: Number of structures actually generated
96
+ - structures: List of SMILES strings (limited to 1000)
97
+ - settings: Dictionary describing the surge settings used
98
+ - formula: The input molecular formula
99
+ - limit_applied: Whether a limit was applied to results
100
+ str: Error message if molecular formula contains more than 10 heavy atoms.
101
+ """
102
+
103
+ if get_heavy_atom_count (molecular_formula ) > 10 :
81
104
return "The molecular formula contains more heavy atoms than allowed (10 Heavy Atoms max)."
105
+
106
+ # Surge command settings
107
+ surge_args = [
108
+ "-P" , # Require planarity
109
+ "-T" , # Disallow triple bonds
110
+ "-B1,2,3,4,5,7,9" , # Avoid various substructures
111
+ "-t0" , # Limit rings of length 3
112
+ "-f0" , # Limit cycles of length 4
113
+ ]
114
+
115
+ settings_description = {
116
+ "-P" : "Require planarity" ,
117
+ "-T" : "Disallow triple bonds" ,
118
+ "-B1,2,3,4,5,7,9" : "Avoid substructures: no triple bonds in small rings, Bredt's rule violations, cumulative double bonds, forbidden topologies" ,
119
+ "-t0" : "No rings of length 3 allowed" ,
120
+ "-f0" : "No cycles of length 4 allowed" ,
121
+ "-S" : "Output in SMILES format" ,
122
+ }
123
+
124
+ try :
125
+ # First, get the total count
126
+ total_count = get_surge_count (molecular_formula )
127
+
128
+ # Then generate structures (limited to first 1000)
129
+ process = Popen (
130
+ ["surge" ] + surge_args + ["-S" , molecular_formula ],
131
+ stdout = PIPE ,
132
+ stderr = PIPE ,
133
+ )
134
+ stdout , stderr = process .communicate ()
135
+
136
+ if process .returncode == 0 :
137
+ output_lines = stdout .decode ("utf-8" ).splitlines ()
138
+ smiles = [line .strip () for line in output_lines if line .strip ()]
139
+
140
+ # Limit to first 1000 structures
141
+ limited_smiles = smiles [:1000 ]
142
+
143
+ return {
144
+ "total_count" : total_count ,
145
+ "generated_count" : len (limited_smiles ),
146
+ "structures" : limited_smiles ,
147
+ "settings" : settings_description ,
148
+ "formula" : molecular_formula ,
149
+ "limit_applied" : len (smiles ) > 1000 ,
150
+ }
151
+ else :
152
+ raise Exception (f"Error running surge: { stderr .decode ('utf-8' )} " )
153
+ except Exception as e :
154
+ raise Exception (f"Error running surge: { str (e )} " )
0 commit comments