Skip to content

Commit 4b63f8f

Browse files
Merge pull request #597 from Steinbeck-Lab/development
fix: link documentation & CFF update
2 parents c2cd46b + 961d153 commit 4b63f8f

File tree

6 files changed

+364
-115
lines changed

6 files changed

+364
-115
lines changed

CITATION.cff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ authors:
1717
given-names: "Kohulan"
1818
orcid: "https://orcid.org/0000-0003-1066-7792"
1919
title: "cheminformatics-microservice"
20-
version: v2.6.0
21-
doi: 10.5281/zenodo.13867839
22-
date-released: 2023-03-16
20+
version: v3.3.0
21+
doi: 10.5281/zenodo.15575699
22+
date-released: 2025-06-02
2323
url: "https://github.com/Steinbeck-Lab/cheminformatics-microservice"

app/routers/converters.py

Lines changed: 225 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,30 @@
99
from fastapi import Query
1010
from fastapi import status
1111
from fastapi import Request
12+
from fastapi import Body
1213
from fastapi.responses import Response
1314
from slowapi import Limiter, _rate_limit_exceeded_handler
1415
from slowapi.util import get_remote_address
1516
from slowapi.errors import RateLimitExceeded
1617
from rdkit import Chem
1718

19+
# Schema imports
20+
from app.schemas import HealthCheck
21+
from app.schemas.error import BadRequestModel
22+
from app.schemas.error import ErrorResponse
23+
from app.schemas.error import NotFoundModel
24+
from app.schemas.converters_schema import GenerateCanonicalResponse
25+
from app.schemas.converters_schema import GenerateCXSMILESResponse
26+
from app.schemas.converters_schema import GenerateFormatsResponse
27+
from app.schemas.converters_schema import GenerateInChIKeyResponse
28+
from app.schemas.converters_schema import GenerateInChIResponse
29+
from app.schemas.converters_schema import GenerateSELFIESResponse
30+
from app.schemas.converters_schema import GenerateSMILESResponse
31+
from app.schemas.converters_schema import ThreeDCoordinatesResponse
32+
from app.schemas.converters_schema import TwoDCoordinatesResponse
33+
from app.schemas.converters_schema import GenerateSMARTSResponse
34+
35+
# Module imports
1836
from app.modules.toolkits.cdk_wrapper import get_canonical_SMILES
1937
from app.modules.toolkits.cdk_wrapper import get_CDK_SDG_mol
2038
from app.modules.toolkits.cdk_wrapper import get_CXSMILES
@@ -27,20 +45,6 @@
2745
from app.modules.toolkits.rdkit_wrapper import get_2d_mol
2846
from app.modules.toolkits.rdkit_wrapper import get_3d_conformers
2947
from app.modules.toolkits.rdkit_wrapper import get_rdkit_CXSMILES
30-
from app.schemas import HealthCheck
31-
from app.schemas.converters_schema import GenerateCanonicalResponse
32-
from app.schemas.converters_schema import GenerateCXSMILESResponse
33-
from app.schemas.converters_schema import GenerateFormatsResponse
34-
from app.schemas.converters_schema import GenerateInChIKeyResponse
35-
from app.schemas.converters_schema import GenerateInChIResponse
36-
from app.schemas.converters_schema import GenerateSELFIESResponse
37-
from app.schemas.converters_schema import GenerateSMILESResponse
38-
from app.schemas.converters_schema import ThreeDCoordinatesResponse
39-
from app.schemas.converters_schema import TwoDCoordinatesResponse
40-
from app.schemas.converters_schema import GenerateSMARTSResponse
41-
from app.schemas.error import BadRequestModel
42-
from app.schemas.error import ErrorResponse
43-
from app.schemas.error import NotFoundModel
4448

4549
# Create the Limiter instance
4650
limiter = Limiter(key_func=get_remote_address)
@@ -745,3 +749,210 @@ async def smiles_to_smarts(
745749
smarts = Chem.MolToSmarts(mol)
746750
if smarts:
747751
return str(smarts)
752+
753+
754+
@router.post(
755+
"/batch",
756+
summary="Batch convert chemical structures to various formats",
757+
responses={
758+
200: {"description": "Successful response"},
759+
400: {"description": "Bad Request", "model": BadRequestModel},
760+
404: {"description": "Not Found", "model": NotFoundModel},
761+
422: {"description": "Unprocessable Entity", "model": ErrorResponse},
762+
},
763+
)
764+
@limiter.limit("10/minute")
765+
async def batch_convert(
766+
request: Request,
767+
body: dict = Body(...),
768+
output_format: str = Query(
769+
default="smiles",
770+
description="Format to convert to (smiles, canonicalsmiles, inchi, inchikey, selfies, cxsmiles, smarts, mol2d, mol3d)",
771+
),
772+
toolkit: Literal["cdk", "rdkit", "openbabel"] = Query(
773+
default="cdk",
774+
description="Cheminformatics toolkit to use for conversion",
775+
),
776+
):
777+
"""Batch convert chemical structures to various formats.
778+
779+
This endpoint accepts a list of inputs with different formats and converts them
780+
to the specified output format using the selected toolkit.
781+
782+
Parameters:
783+
- **body**: required (dict): JSON object with a list of inputs to convert.
784+
- Structure:
785+
```json
786+
{
787+
"inputs": [
788+
{
789+
"value": "CN1C=NC2=C1C(=O)N(C)C(=O)N2C",
790+
"input_format": "smiles"
791+
}
792+
]
793+
}
794+
```
795+
- **output_format**: optional (str): Format to convert to.
796+
- Supported values: "smiles", "canonicalsmiles", "inchi", "inchikey", "selfies", "cxsmiles", "smarts", "mol2d", "mol3d".
797+
- **toolkit**: optional (str): Toolkit to use for conversion.
798+
- Supported values: "cdk" (default), "rdkit", "openbabel".
799+
800+
Returns:
801+
- JSON object containing conversion results and summary.
802+
803+
Note:
804+
- Some conversion combinations may not be supported by all toolkits.
805+
- Failed conversions will be included in the response with error messages.
806+
"""
807+
results = []
808+
success_count = 0
809+
failure_count = 0
810+
811+
# Convert dict to our expected format
812+
inputs = []
813+
if "inputs" in body:
814+
inputs = body["inputs"]
815+
816+
for input_item in inputs:
817+
try:
818+
# Extract values from the input dictionary
819+
value = input_item.get("value", "")
820+
input_format = input_item.get("input_format", "")
821+
822+
if not value or not input_format:
823+
raise ValueError("Missing required fields: value or input_format")
824+
825+
# First convert input to SMILES if it's not already in SMILES format
826+
smiles = value
827+
828+
if input_format.lower() == "iupac":
829+
smiles = get_smiles_opsin(value)
830+
if not smiles:
831+
raise ValueError(f"Failed to convert IUPAC name '{value}' to SMILES")
832+
elif input_format.lower() == "selfies":
833+
smiles = sf.decoder(value)
834+
if not smiles:
835+
raise ValueError(f"Failed to decode SELFIES '{value}' to SMILES")
836+
elif input_format.lower() == "inchi":
837+
# Use RDKit to convert InChI to SMILES
838+
mol = Chem.inchi.MolFromInchi(value)
839+
if not mol:
840+
raise ValueError(f"Failed to convert InChI '{value}' to molecule")
841+
smiles = Chem.MolToSmiles(mol)
842+
elif input_format.lower() != "smiles":
843+
raise ValueError(f"Unsupported input format: {input_format}")
844+
845+
# Now convert SMILES to the desired output format
846+
output_value = ""
847+
848+
if output_format.lower() == "smiles":
849+
output_value = smiles
850+
851+
elif output_format.lower() == "canonicalsmiles":
852+
if toolkit == "cdk":
853+
mol = parse_input(smiles, "cdk", False)
854+
output_value = str(get_canonical_SMILES(mol))
855+
elif toolkit == "rdkit":
856+
mol = parse_input(smiles, "rdkit", False)
857+
output_value = str(Chem.MolToSmiles(mol, kekuleSmiles=True))
858+
elif toolkit == "openbabel":
859+
output_value = get_ob_canonical_SMILES(smiles)
860+
861+
elif output_format.lower() == "inchi":
862+
if toolkit == "cdk":
863+
mol = parse_input(smiles, "cdk", False)
864+
output_value = str(get_InChI(mol))
865+
elif toolkit == "rdkit":
866+
mol = parse_input(smiles, "rdkit", False)
867+
output_value = str(Chem.inchi.MolToInchi(mol))
868+
elif toolkit == "openbabel":
869+
output_value = get_ob_InChI(smiles)
870+
871+
elif output_format.lower() == "inchikey":
872+
if toolkit == "cdk":
873+
mol = parse_input(smiles, "cdk", False)
874+
output_value = str(get_InChI(mol, InChIKey=True))
875+
elif toolkit == "rdkit":
876+
mol = parse_input(smiles, "rdkit", False)
877+
output_value = str(Chem.inchi.MolToInchiKey(mol))
878+
elif toolkit == "openbabel":
879+
output_value = get_ob_InChI(smiles, InChIKey=True)
880+
881+
elif output_format.lower() == "selfies":
882+
output_value = str(sf.encoder(smiles))
883+
884+
elif output_format.lower() == "cxsmiles":
885+
if toolkit == "cdk":
886+
mol = parse_input(smiles, "cdk", False)
887+
output_value = str(get_CXSMILES(mol))
888+
elif toolkit == "rdkit":
889+
mol = parse_input(smiles, "rdkit", False)
890+
output_value = str(get_rdkit_CXSMILES(mol))
891+
else:
892+
raise ValueError(f"CXSMILES conversion not supported by toolkit: {toolkit}")
893+
894+
elif output_format.lower() == "smarts":
895+
if toolkit == "rdkit":
896+
mol = parse_input(smiles, "rdkit", False)
897+
output_value = str(Chem.MolToSmarts(mol))
898+
else:
899+
raise ValueError(f"SMARTS conversion not supported by toolkit: {toolkit}")
900+
901+
elif output_format.lower() == "mol2d":
902+
if toolkit == "cdk":
903+
mol = parse_input(smiles, "cdk", False)
904+
output_value = get_CDK_SDG_mol(mol).replace("$$$$\n", "")
905+
elif toolkit == "rdkit":
906+
mol = parse_input(smiles, "rdkit", False)
907+
output_value = get_2d_mol(mol)
908+
elif toolkit == "openbabel":
909+
output_value = get_ob_mol(smiles)
910+
911+
elif output_format.lower() == "mol3d":
912+
if toolkit == "rdkit":
913+
mol = parse_input(smiles, "rdkit", False)
914+
output_value = get_3d_conformers(mol, depict=False)
915+
elif toolkit == "openbabel":
916+
output_value = get_ob_mol(smiles, threeD=True)
917+
else:
918+
raise ValueError(f"3D coordinates generation not supported by toolkit: {toolkit}")
919+
920+
else:
921+
raise ValueError(f"Unsupported output format: {output_format}")
922+
923+
# Create a result dictionary
924+
results.append({
925+
"input": {
926+
"value": value,
927+
"input_format": input_format
928+
},
929+
"output": output_value,
930+
"success": True,
931+
"error": ""
932+
})
933+
success_count += 1
934+
935+
except Exception as e:
936+
# Create an error result dictionary
937+
results.append({
938+
"input": {
939+
"value": input_item.get("value", ""),
940+
"input_format": input_item.get("input_format", "")
941+
},
942+
"output": "",
943+
"success": False,
944+
"error": str(e)
945+
})
946+
failure_count += 1
947+
948+
summary = {
949+
"total": len(inputs),
950+
"successful": success_count,
951+
"failed": failure_count
952+
}
953+
954+
# Return the response as a dictionary
955+
return {
956+
"results": results,
957+
"summary": summary
958+
}

0 commit comments

Comments
 (0)