|
9 | 9 | from fastapi import Query
|
10 | 10 | from fastapi import status
|
11 | 11 | from fastapi import Request
|
| 12 | +from fastapi import Body |
12 | 13 | from fastapi.responses import Response
|
13 | 14 | from slowapi import Limiter, _rate_limit_exceeded_handler
|
14 | 15 | from slowapi.util import get_remote_address
|
15 | 16 | from slowapi.errors import RateLimitExceeded
|
16 | 17 | from rdkit import Chem
|
17 | 18 |
|
| 19 | +# Schema imports |
| 20 | +from app.schemas import HealthCheck |
| 21 | +from app.schemas.error import BadRequestModel |
| 22 | +from app.schemas.error import ErrorResponse |
| 23 | +from app.schemas.error import NotFoundModel |
| 24 | +from app.schemas.converters_schema import GenerateCanonicalResponse |
| 25 | +from app.schemas.converters_schema import GenerateCXSMILESResponse |
| 26 | +from app.schemas.converters_schema import GenerateFormatsResponse |
| 27 | +from app.schemas.converters_schema import GenerateInChIKeyResponse |
| 28 | +from app.schemas.converters_schema import GenerateInChIResponse |
| 29 | +from app.schemas.converters_schema import GenerateSELFIESResponse |
| 30 | +from app.schemas.converters_schema import GenerateSMILESResponse |
| 31 | +from app.schemas.converters_schema import ThreeDCoordinatesResponse |
| 32 | +from app.schemas.converters_schema import TwoDCoordinatesResponse |
| 33 | +from app.schemas.converters_schema import GenerateSMARTSResponse |
| 34 | + |
| 35 | +# Module imports |
18 | 36 | from app.modules.toolkits.cdk_wrapper import get_canonical_SMILES
|
19 | 37 | from app.modules.toolkits.cdk_wrapper import get_CDK_SDG_mol
|
20 | 38 | from app.modules.toolkits.cdk_wrapper import get_CXSMILES
|
|
27 | 45 | from app.modules.toolkits.rdkit_wrapper import get_2d_mol
|
28 | 46 | from app.modules.toolkits.rdkit_wrapper import get_3d_conformers
|
29 | 47 | from app.modules.toolkits.rdkit_wrapper import get_rdkit_CXSMILES
|
30 |
| -from app.schemas import HealthCheck |
31 |
| -from app.schemas.converters_schema import GenerateCanonicalResponse |
32 |
| -from app.schemas.converters_schema import GenerateCXSMILESResponse |
33 |
| -from app.schemas.converters_schema import GenerateFormatsResponse |
34 |
| -from app.schemas.converters_schema import GenerateInChIKeyResponse |
35 |
| -from app.schemas.converters_schema import GenerateInChIResponse |
36 |
| -from app.schemas.converters_schema import GenerateSELFIESResponse |
37 |
| -from app.schemas.converters_schema import GenerateSMILESResponse |
38 |
| -from app.schemas.converters_schema import ThreeDCoordinatesResponse |
39 |
| -from app.schemas.converters_schema import TwoDCoordinatesResponse |
40 |
| -from app.schemas.converters_schema import GenerateSMARTSResponse |
41 |
| -from app.schemas.error import BadRequestModel |
42 |
| -from app.schemas.error import ErrorResponse |
43 |
| -from app.schemas.error import NotFoundModel |
44 | 48 |
|
45 | 49 | # Create the Limiter instance
|
46 | 50 | limiter = Limiter(key_func=get_remote_address)
|
@@ -745,3 +749,210 @@ async def smiles_to_smarts(
|
745 | 749 | smarts = Chem.MolToSmarts(mol)
|
746 | 750 | if smarts:
|
747 | 751 | return str(smarts)
|
| 752 | + |
| 753 | + |
| 754 | +@router.post( |
| 755 | + "/batch", |
| 756 | + summary="Batch convert chemical structures to various formats", |
| 757 | + responses={ |
| 758 | + 200: {"description": "Successful response"}, |
| 759 | + 400: {"description": "Bad Request", "model": BadRequestModel}, |
| 760 | + 404: {"description": "Not Found", "model": NotFoundModel}, |
| 761 | + 422: {"description": "Unprocessable Entity", "model": ErrorResponse}, |
| 762 | + }, |
| 763 | +) |
| 764 | +@limiter.limit("10/minute") |
| 765 | +async def batch_convert( |
| 766 | + request: Request, |
| 767 | + body: dict = Body(...), |
| 768 | + output_format: str = Query( |
| 769 | + default="smiles", |
| 770 | + description="Format to convert to (smiles, canonicalsmiles, inchi, inchikey, selfies, cxsmiles, smarts, mol2d, mol3d)", |
| 771 | + ), |
| 772 | + toolkit: Literal["cdk", "rdkit", "openbabel"] = Query( |
| 773 | + default="cdk", |
| 774 | + description="Cheminformatics toolkit to use for conversion", |
| 775 | + ), |
| 776 | +): |
| 777 | + """Batch convert chemical structures to various formats. |
| 778 | +
|
| 779 | + This endpoint accepts a list of inputs with different formats and converts them |
| 780 | + to the specified output format using the selected toolkit. |
| 781 | +
|
| 782 | + Parameters: |
| 783 | + - **body**: required (dict): JSON object with a list of inputs to convert. |
| 784 | + - Structure: |
| 785 | + ```json |
| 786 | + { |
| 787 | + "inputs": [ |
| 788 | + { |
| 789 | + "value": "CN1C=NC2=C1C(=O)N(C)C(=O)N2C", |
| 790 | + "input_format": "smiles" |
| 791 | + } |
| 792 | + ] |
| 793 | + } |
| 794 | + ``` |
| 795 | + - **output_format**: optional (str): Format to convert to. |
| 796 | + - Supported values: "smiles", "canonicalsmiles", "inchi", "inchikey", "selfies", "cxsmiles", "smarts", "mol2d", "mol3d". |
| 797 | + - **toolkit**: optional (str): Toolkit to use for conversion. |
| 798 | + - Supported values: "cdk" (default), "rdkit", "openbabel". |
| 799 | +
|
| 800 | + Returns: |
| 801 | + - JSON object containing conversion results and summary. |
| 802 | +
|
| 803 | + Note: |
| 804 | + - Some conversion combinations may not be supported by all toolkits. |
| 805 | + - Failed conversions will be included in the response with error messages. |
| 806 | + """ |
| 807 | + results = [] |
| 808 | + success_count = 0 |
| 809 | + failure_count = 0 |
| 810 | + |
| 811 | + # Convert dict to our expected format |
| 812 | + inputs = [] |
| 813 | + if "inputs" in body: |
| 814 | + inputs = body["inputs"] |
| 815 | + |
| 816 | + for input_item in inputs: |
| 817 | + try: |
| 818 | + # Extract values from the input dictionary |
| 819 | + value = input_item.get("value", "") |
| 820 | + input_format = input_item.get("input_format", "") |
| 821 | + |
| 822 | + if not value or not input_format: |
| 823 | + raise ValueError("Missing required fields: value or input_format") |
| 824 | + |
| 825 | + # First convert input to SMILES if it's not already in SMILES format |
| 826 | + smiles = value |
| 827 | + |
| 828 | + if input_format.lower() == "iupac": |
| 829 | + smiles = get_smiles_opsin(value) |
| 830 | + if not smiles: |
| 831 | + raise ValueError(f"Failed to convert IUPAC name '{value}' to SMILES") |
| 832 | + elif input_format.lower() == "selfies": |
| 833 | + smiles = sf.decoder(value) |
| 834 | + if not smiles: |
| 835 | + raise ValueError(f"Failed to decode SELFIES '{value}' to SMILES") |
| 836 | + elif input_format.lower() == "inchi": |
| 837 | + # Use RDKit to convert InChI to SMILES |
| 838 | + mol = Chem.inchi.MolFromInchi(value) |
| 839 | + if not mol: |
| 840 | + raise ValueError(f"Failed to convert InChI '{value}' to molecule") |
| 841 | + smiles = Chem.MolToSmiles(mol) |
| 842 | + elif input_format.lower() != "smiles": |
| 843 | + raise ValueError(f"Unsupported input format: {input_format}") |
| 844 | + |
| 845 | + # Now convert SMILES to the desired output format |
| 846 | + output_value = "" |
| 847 | + |
| 848 | + if output_format.lower() == "smiles": |
| 849 | + output_value = smiles |
| 850 | + |
| 851 | + elif output_format.lower() == "canonicalsmiles": |
| 852 | + if toolkit == "cdk": |
| 853 | + mol = parse_input(smiles, "cdk", False) |
| 854 | + output_value = str(get_canonical_SMILES(mol)) |
| 855 | + elif toolkit == "rdkit": |
| 856 | + mol = parse_input(smiles, "rdkit", False) |
| 857 | + output_value = str(Chem.MolToSmiles(mol, kekuleSmiles=True)) |
| 858 | + elif toolkit == "openbabel": |
| 859 | + output_value = get_ob_canonical_SMILES(smiles) |
| 860 | + |
| 861 | + elif output_format.lower() == "inchi": |
| 862 | + if toolkit == "cdk": |
| 863 | + mol = parse_input(smiles, "cdk", False) |
| 864 | + output_value = str(get_InChI(mol)) |
| 865 | + elif toolkit == "rdkit": |
| 866 | + mol = parse_input(smiles, "rdkit", False) |
| 867 | + output_value = str(Chem.inchi.MolToInchi(mol)) |
| 868 | + elif toolkit == "openbabel": |
| 869 | + output_value = get_ob_InChI(smiles) |
| 870 | + |
| 871 | + elif output_format.lower() == "inchikey": |
| 872 | + if toolkit == "cdk": |
| 873 | + mol = parse_input(smiles, "cdk", False) |
| 874 | + output_value = str(get_InChI(mol, InChIKey=True)) |
| 875 | + elif toolkit == "rdkit": |
| 876 | + mol = parse_input(smiles, "rdkit", False) |
| 877 | + output_value = str(Chem.inchi.MolToInchiKey(mol)) |
| 878 | + elif toolkit == "openbabel": |
| 879 | + output_value = get_ob_InChI(smiles, InChIKey=True) |
| 880 | + |
| 881 | + elif output_format.lower() == "selfies": |
| 882 | + output_value = str(sf.encoder(smiles)) |
| 883 | + |
| 884 | + elif output_format.lower() == "cxsmiles": |
| 885 | + if toolkit == "cdk": |
| 886 | + mol = parse_input(smiles, "cdk", False) |
| 887 | + output_value = str(get_CXSMILES(mol)) |
| 888 | + elif toolkit == "rdkit": |
| 889 | + mol = parse_input(smiles, "rdkit", False) |
| 890 | + output_value = str(get_rdkit_CXSMILES(mol)) |
| 891 | + else: |
| 892 | + raise ValueError(f"CXSMILES conversion not supported by toolkit: {toolkit}") |
| 893 | + |
| 894 | + elif output_format.lower() == "smarts": |
| 895 | + if toolkit == "rdkit": |
| 896 | + mol = parse_input(smiles, "rdkit", False) |
| 897 | + output_value = str(Chem.MolToSmarts(mol)) |
| 898 | + else: |
| 899 | + raise ValueError(f"SMARTS conversion not supported by toolkit: {toolkit}") |
| 900 | + |
| 901 | + elif output_format.lower() == "mol2d": |
| 902 | + if toolkit == "cdk": |
| 903 | + mol = parse_input(smiles, "cdk", False) |
| 904 | + output_value = get_CDK_SDG_mol(mol).replace("$$$$\n", "") |
| 905 | + elif toolkit == "rdkit": |
| 906 | + mol = parse_input(smiles, "rdkit", False) |
| 907 | + output_value = get_2d_mol(mol) |
| 908 | + elif toolkit == "openbabel": |
| 909 | + output_value = get_ob_mol(smiles) |
| 910 | + |
| 911 | + elif output_format.lower() == "mol3d": |
| 912 | + if toolkit == "rdkit": |
| 913 | + mol = parse_input(smiles, "rdkit", False) |
| 914 | + output_value = get_3d_conformers(mol, depict=False) |
| 915 | + elif toolkit == "openbabel": |
| 916 | + output_value = get_ob_mol(smiles, threeD=True) |
| 917 | + else: |
| 918 | + raise ValueError(f"3D coordinates generation not supported by toolkit: {toolkit}") |
| 919 | + |
| 920 | + else: |
| 921 | + raise ValueError(f"Unsupported output format: {output_format}") |
| 922 | + |
| 923 | + # Create a result dictionary |
| 924 | + results.append({ |
| 925 | + "input": { |
| 926 | + "value": value, |
| 927 | + "input_format": input_format |
| 928 | + }, |
| 929 | + "output": output_value, |
| 930 | + "success": True, |
| 931 | + "error": "" |
| 932 | + }) |
| 933 | + success_count += 1 |
| 934 | + |
| 935 | + except Exception as e: |
| 936 | + # Create an error result dictionary |
| 937 | + results.append({ |
| 938 | + "input": { |
| 939 | + "value": input_item.get("value", ""), |
| 940 | + "input_format": input_item.get("input_format", "") |
| 941 | + }, |
| 942 | + "output": "", |
| 943 | + "success": False, |
| 944 | + "error": str(e) |
| 945 | + }) |
| 946 | + failure_count += 1 |
| 947 | + |
| 948 | + summary = { |
| 949 | + "total": len(inputs), |
| 950 | + "successful": success_count, |
| 951 | + "failed": failure_count |
| 952 | + } |
| 953 | + |
| 954 | + # Return the response as a dictionary |
| 955 | + return { |
| 956 | + "results": results, |
| 957 | + "summary": summary |
| 958 | + } |
0 commit comments