Skip to content

Commit aad59d5

Browse files
committed
Merge branch 'master'
2 parents 93454e2 + c568c26 commit aad59d5

File tree

64 files changed

+1292
-214
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1292
-214
lines changed

dataSources/42bp/genomeArk/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,11 @@
1212
},
1313
"conversion": {
1414
"mapID": 84855374
15+
},
16+
"update": {
17+
"type": "weekly",
18+
"day": "sunday",
19+
"time": 9,
20+
"repeat": 2
1521
}
1622
}

dataSources/ala/avh/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,11 @@
1111
},
1212
"conversion": {
1313
"mapID": 404635334
14+
},
15+
"update": {
16+
"type": "weekly",
17+
"day": "sunday",
18+
"time": 9,
19+
"repeat": 2
1420
}
1521
}

dataSources/ala/lists/config.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"retrieveType": "script",
3+
"download": {
4+
"path": "./processing.py",
5+
"function": "collect",
6+
"args": [
7+
"{OUTPATH}"
8+
],
9+
"output": "lists.csv"
10+
},
11+
"conversion": {}
12+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from pathlib import Path
2+
import pandas as pd
3+
import requests
4+
5+
def collect(outputPath: Path) -> None:
6+
baseURL = "https://lists-ws.test.ala.org.au/"
7+
session = requests.Session()
8+
recordsPerPage = 100
9+
10+
def getURL(endpoint: str, params: dict, pageSize: int, page: int = 1) -> dict:
11+
fields = dict(params)
12+
fields["page"] = page
13+
fields["pageSize"] = pageSize
14+
15+
url = f"{baseURL}{endpoint}?" + "&".join(f"{k}={v}" for k, v in fields.items())
16+
response = session.get(url)
17+
data = response.json()
18+
return data
19+
20+
listsMetadata = outputPath.parent / "metadata.csv"
21+
if not listsMetadata.exists():
22+
records = []
23+
metadataEndpoint = "speciesList/"
24+
25+
query = {"tag": "arga"}
26+
data = getURL(metadataEndpoint, query, recordsPerPage)
27+
records.extend(data["lists"])
28+
totalItems = data["listCount"]
29+
remainingCalls = ((totalItems / recordsPerPage).__ceil__()) - 1
30+
31+
for call, _ in enumerate(range(remainingCalls), start=2):
32+
data = getURL(metadataEndpoint, query, recordsPerPage, call)
33+
records.extend(data["lists"])
34+
35+
df = pd.DataFrame.from_records(records)
36+
df = df.drop(["description"], axis=1)
37+
df.to_csv(listsMetadata, index=False)
38+
else:
39+
df = pd.read_csv(listsMetadata)
40+
41+
records = []
42+
for id in df["id"]:
43+
page = 1
44+
while True:
45+
print(f"Getting page #{page} for id {id}", end="\r")
46+
data = getURL(f"speciesListItems/{id}", {}, recordsPerPage, page)
47+
if not data:
48+
break
49+
50+
records.extend(data)
51+
page += 1
52+
53+
print()
54+
55+
df2 = pd.DataFrame.from_records(records)
56+
df = df.rename(columns={"id": "speciesListID", "version": "speciesListVersion"})
57+
df = df.merge(df2, "outer", on="speciesListID")
58+
df2.to_csv(outputPath, index=False)
Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
{
22
"retrieveType": "script",
3-
"subsections": [
4-
"kamilaroi",
5-
"noongar",
6-
"southeastarnhemland",
7-
"mangrovewatch",
8-
"weeds-australia"
9-
],
3+
"subsections": {
4+
"kamilaroi": {},
5+
"noongar": {},
6+
"southeastarnhemland": {},
7+
"mangrovewatch": {},
8+
"weeds-australia": {}
9+
},
1010
"download": {
1111
"path": "sourceProcessing/ala.py",
1212
"function": "collect",
@@ -16,5 +16,11 @@
1616
"./token.json"
1717
],
1818
"output": "{SUBSECTION}.csv"
19+
},
20+
"update": {
21+
"type": "weekly",
22+
"day": "sunday",
23+
"time": 9,
24+
"repeat": 2
1925
}
2026
}

dataSources/algaeBase/api/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,11 @@
99
"./apiKey.txt"
1010
],
1111
"output": "algaeBase.csv"
12+
},
13+
"update": {
14+
"type": "weekly",
15+
"day": "sunday",
16+
"time": 9,
17+
"repeat": 2
1218
}
1319
}

dataSources/anemone/db/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,11 @@
3030
"function": "dwcAugment"
3131
}
3232
]
33+
},
34+
"update": {
35+
"type": "weekly",
36+
"day": "sunday",
37+
"time": 9,
38+
"repeat": 2
3339
}
3440
}

dataSources/bold/austsv/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,11 @@
1515
},
1616
"conversion": {
1717
"mapID": 78385490
18+
},
19+
"update": {
20+
"type": "weekly",
21+
"day": "sunday",
22+
"time": 9,
23+
"repeat": 2
1824
}
1925
}

dataSources/bold/ausxml/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,11 @@
3333
},
3434
"conversion": {
3535
"mapID": 984983691
36+
},
37+
"update": {
38+
"type": "weekly",
39+
"day": "sunday",
40+
"time": 9,
41+
"repeat": 2
3642
}
3743
}

dataSources/bold/datapackage/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,11 @@
3232
},
3333
"conversion": {
3434
"mapID": 1154592624
35+
},
36+
"update": {
37+
"type": "weekly",
38+
"day": "sunday",
39+
"time": 9,
40+
"repeat": 2
3541
}
3642
}

0 commit comments

Comments
 (0)