|
27 | 27 |
|
28 | 28 | from __future__ import annotations |
29 | 29 |
|
30 | | -import re |
31 | | -import subprocess |
32 | 30 | import warnings |
33 | | -from shutil import which |
| 31 | +from typing import TYPE_CHECKING |
34 | 32 |
|
35 | 33 | import requests |
36 | | -from monty.dev import requires |
37 | 34 |
|
38 | 35 | from pymatgen.core.composition import Composition |
39 | 36 | from pymatgen.core.structure import Structure |
40 | 37 |
|
| 38 | +if TYPE_CHECKING: |
| 39 | + from typing import Literal |
| 40 | + |
41 | 41 |
|
42 | 42 | class COD: |
43 | | - """An interface to the Crystallography Open Database.""" |
| 43 | + """An interface to the Crystallography Open Database. |
44 | 44 |
|
45 | | - url = "www.crystallography.net" |
| 45 | + Reference: |
| 46 | + https://wiki.crystallography.net/RESTful_API/ |
| 47 | + """ |
46 | 48 |
|
47 | | - def query(self, sql: str) -> str: |
48 | | - """Perform a query. |
| 49 | + def __init__(self, timeout: int = 60): |
| 50 | + """Initialize the COD class. |
49 | 51 |
|
50 | 52 | Args: |
51 | | - sql: SQL string |
52 | | -
|
53 | | - Returns: |
54 | | - Response from SQL query. |
| 53 | + timeout (int): request timeout in seconds. |
55 | 54 | """ |
56 | | - response = subprocess.check_output(["mysql", "-u", "cod_reader", "-h", self.url, "-e", sql, "cod"]) |
57 | | - return response.decode("utf-8") |
| 55 | + self.timeout = timeout |
| 56 | + self.url = "https://www.crystallography.net" |
| 57 | + self.api_url = f"{self.url}/cod/result" |
58 | 58 |
|
59 | | - @requires(which("mysql"), "mysql must be installed to use this query.") |
60 | | - def get_cod_ids(self, formula) -> list[int]: |
61 | | - """Query the COD for all cod ids associated with a formula. Requires |
62 | | - mysql executable to be in the path. |
| 59 | + def get_cod_ids(self, formula: str) -> list[int]: |
| 60 | + """Query the COD for all COD IDs associated with a formula. |
63 | 61 |
|
64 | 62 | Args: |
65 | | - formula (str): Formula. |
66 | | -
|
67 | | - Returns: |
68 | | - List of cod ids. |
| 63 | + formula (str): The formula to request |
69 | 64 | """ |
70 | | - # TODO: Remove dependency on external mysql call. MySQL-python package does not support Py3! |
71 | | - |
72 | | - # Standardize formula to the version used by COD |
| 65 | + # Use hill_formula format as per COD request |
73 | 66 | cod_formula = Composition(formula).hill_formula |
74 | | - sql = f'select file from data where formula="- {cod_formula} -"' # noqa: S608 |
75 | | - text = self.query(sql).split("\n") |
76 | | - cod_ids = [] |
77 | | - for line in text: |
78 | | - if match := re.search(r"(\d+)", line): |
79 | | - cod_ids.append(int(match[1])) |
80 | | - return cod_ids |
81 | 67 |
|
82 | | - def get_structure_by_id(self, cod_id: int, timeout: int = 600, **kwargs) -> Structure: |
83 | | - """Query the COD for a structure by id. |
| 68 | + # Set up query parameters |
| 69 | + params = {"formula": cod_formula, "format": "json"} |
| 70 | + |
| 71 | + response = requests.get(self.api_url, params=params, timeout=self.timeout) |
| 72 | + |
| 73 | + # Raise an exception if the request fails |
| 74 | + response.raise_for_status() |
| 75 | + |
| 76 | + return [int(entry["file"]) for entry in response.json()] |
| 77 | + |
| 78 | + def get_structure_by_id(self, cod_id: int, timeout: int | None = None, **kwargs) -> Structure: |
| 79 | + """Query the COD for a structure by ID. |
84 | 80 |
|
85 | 81 | Args: |
86 | | - cod_id (int): COD id. |
87 | | - timeout (int): Timeout for the request in seconds. Default = 600. |
88 | | - kwargs: All kwargs supported by Structure.from_str. |
| 82 | + cod_id (int): COD ID. |
| 83 | + timeout (int): DEPRECATED. request timeout in seconds. |
| 84 | + kwargs: kwargs passed to Structure.from_str. |
89 | 85 |
|
90 | 86 | Returns: |
91 | 87 | A Structure. |
92 | 88 | """ |
93 | | - response = requests.get(f"https://{self.url}/cod/{cod_id}.cif", timeout=timeout) |
| 89 | + # TODO: remove timeout arg and use class level timeout after 2025-10-17 |
| 90 | + if timeout is not None: |
| 91 | + warnings.warn("separate timeout arg is deprecated, please use class level timeout", DeprecationWarning) |
| 92 | + timeout = timeout or self.timeout |
| 93 | + |
| 94 | + response = requests.get(f"{self.url}/cod/{cod_id}.cif", timeout=timeout) |
94 | 95 | return Structure.from_str(response.text, fmt="cif", **kwargs) |
95 | 96 |
|
96 | | - @requires(which("mysql"), "mysql must be installed to use this query.") |
97 | | - def get_structure_by_formula(self, formula: str, **kwargs) -> list[dict[str, str | int | Structure]]: |
98 | | - """Query the COD for structures by formula. Requires mysql executable to |
99 | | - be in the path. |
| 97 | + def get_structure_by_formula( |
| 98 | + self, |
| 99 | + formula: str, |
| 100 | + **kwargs, |
| 101 | + ) -> list[dict[Literal["structure", "cod_id", "sg"], str | int | Structure]]: |
| 102 | + """Query the COD for structures by formula. |
100 | 103 |
|
101 | 104 | Args: |
102 | 105 | formula (str): Chemical formula. |
103 | 106 | kwargs: All kwargs supported by Structure.from_str. |
104 | 107 |
|
105 | 108 | Returns: |
106 | | - A list of dict of the format [{"structure": Structure, "cod_id": int, "sg": "P n m a"}] |
| 109 | + A list of dict of: {"structure": Structure, "cod_id": int, "sg": "P n m a"} |
107 | 110 | """ |
108 | | - structures: list[dict[str, str | int | Structure]] = [] |
109 | | - sql = f'select file, sg from data where formula="- {Composition(formula).hill_formula} -"' # noqa: S608 |
110 | | - text = self.query(sql).split("\n") |
111 | | - text.pop(0) |
112 | | - for line in text: |
113 | | - if line.strip(): |
114 | | - cod_id, sg = line.split("\t") |
115 | | - response = requests.get(f"https://{self.url}/cod/{cod_id.strip()}.cif", timeout=60) |
116 | | - try: |
117 | | - struct = Structure.from_str(response.text, fmt="cif", **kwargs) |
118 | | - structures.append({"structure": struct, "cod_id": int(cod_id), "sg": sg}) |
119 | | - except Exception: |
120 | | - warnings.warn(f"\nStructure.from_str failed while parsing CIF file:\n{response.text}") |
121 | | - raise |
| 111 | + # Prepare the query parameters |
| 112 | + params = { |
| 113 | + "formula": Composition(formula).hill_formula, |
| 114 | + "format": "json", |
| 115 | + } |
| 116 | + |
| 117 | + response = requests.get(self.api_url, params=params, timeout=self.timeout) |
| 118 | + response.raise_for_status() |
| 119 | + |
| 120 | + structures: list[dict[Literal["structure", "cod_id", "sg"], str | int | Structure]] = [] |
| 121 | + |
| 122 | + # Parse the JSON response |
| 123 | + for entry in response.json(): |
| 124 | + cod_id = entry["file"] |
| 125 | + sg = entry.get("sg") |
| 126 | + |
| 127 | + try: |
| 128 | + struct = self.get_structure_by_id(cod_id, **kwargs) |
| 129 | + structures.append({"structure": struct, "cod_id": int(cod_id), "sg": sg}) |
| 130 | + |
| 131 | + except Exception: |
| 132 | + warnings.warn(f"Structure.from_str failed while parsing CIF file for COD ID {cod_id}", stacklevel=2) |
| 133 | + raise |
122 | 134 |
|
123 | 135 | return structures |
0 commit comments