Skip to content

Commit 524ec23

Browse files
Updated ncbi api processing to require api key
Updated merge function to not attempt to run if both input files do not exist Updated apiWorker to use v2 api instead of v2alpha
1 parent 6767270 commit 524ec23

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

dataSources/ncbi/llib/apiWorker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def apiWorker(queue: Queue, id: int, apiKey: str, recordsPerCall: int, accession
4444

4545
try:
4646
for string in accessionStrings:
47-
url = f"https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/{string}/dataset_report"
47+
url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{string}/dataset_report"
4848
response = session.get(url, headers=headers, params=params)
4949
data = response.json()
5050
records = data.get("reports", [])

dataSources/ncbi/llib/sharedProcessing.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,12 @@
1313

1414
def getStats(summaryFile: DataFile, outputPath: Path):
1515
apiKey = secrets.ncbi.key
16+
if not isinstance(apiKey, str):
17+
logging.error("No API key found in secrets file, and is required to access NCBI api. Please update 'secrets.toml' with 'key' field under 'ncbi'.")
18+
return
19+
20+
logging.info("Found API key")
1621
processes = 10
17-
if apiKey is None:
18-
apiKey = "" # Use empty string
19-
processes = 3 # Reduce processes count due to reduced api call rate
20-
logging.info("No API key found")
21-
22-
else:
23-
logging.info("Found API key")
24-
2522
recordsPerCall = 200
2623
recordsPerSubsection = 30000
2724
accessionCol = "#assembly_accession"
@@ -68,6 +65,14 @@ def getStats(summaryFile: DataFile, outputPath: Path):
6865
writer.combine(False, index=False)
6966

7067
def merge(summaryFile: DataFile, statsFilePath: Path, outputPath: Path) -> None:
68+
if not summaryFile.exists():
69+
logging.error("Unable to merge files as summary file doesn't exist")
70+
return
71+
72+
if not statsFilePath.exists():
73+
logging.error("Unable to merge files as stats file doesn't exist")
74+
return
75+
7176
df = summaryFile.read(low_memory=False)
7277
df2 = pd.read_csv(statsFilePath, low_memory=False)
7378
df = df.merge(df2, how="outer", left_on="#assembly_accession", right_on="current_accession")

0 commit comments

Comments
 (0)