ARGA-Genomes
diff --git a/‎dataSources/42bp/genomeArk/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/42bp/genomeArk/config.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dataSources/ala/avh/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/ala/avh/config.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dataSources/ala/lists/config.json‎
Lines changed: 12 additions & 0 deletions b/‎dataSources/ala/lists/config.json‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎dataSources/ala/lists/processing.py‎
Lines changed: 58 additions & 0 deletions b/‎dataSources/ala/lists/processing.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎dataSources/ala/profiles/config.json‎
Lines changed: 13 additions & 7 deletions b/‎dataSources/ala/profiles/config.json‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎dataSources/algaeBase/api/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/algaeBase/api/config.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dataSources/anemone/db/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/anemone/db/config.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dataSources/bold/austsv/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/bold/austsv/config.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dataSources/bold/ausxml/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/bold/ausxml/config.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dataSources/bold/datapackage/config.json‎
Lines changed: 6 additions & 0 deletions b/‎dataSources/bold/datapackage/config.json‎
Lines changed: 6 additions & 0 deletions
@@ -12,5 +12,11 @@
     },
     "conversion": {
         "mapID": 84855374
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -11,5 +11,11 @@
     },
     "conversion": {
         "mapID": 404635334
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -0,0 +1,12 @@
+{
+    "retrieveType": "script",
+    "download": {
+        "path": "./processing.py",
+        "function": "collect",
+        "args": [
+            "{OUTPATH}"
+        ],
+        "output": "lists.csv"
+    },
+    "conversion": {}
+}
@@ -0,0 +1,58 @@
+from pathlib import Path
+import pandas as pd
+import requests
+
+def collect(outputPath: Path) -> None:
+    baseURL = "https://lists-ws.test.ala.org.au/"
+    session = requests.Session()
+    recordsPerPage = 100
+    
+    def getURL(endpoint: str, params: dict, pageSize: int, page: int = 1) -> dict:
+        fields = dict(params)
+        fields["page"] = page
+        fields["pageSize"] = pageSize
+
+        url = f"{baseURL}{endpoint}?" + "&".join(f"{k}={v}" for k, v in fields.items())
+        response = session.get(url)
+        data = response.json()
+        return data
+    
+    listsMetadata = outputPath.parent / "metadata.csv"
+    if not listsMetadata.exists():
+        records = []
+        metadataEndpoint = "speciesList/"
+        
+        query = {"tag": "arga"}
+        data = getURL(metadataEndpoint, query, recordsPerPage)
+        records.extend(data["lists"])
+        totalItems = data["listCount"]
+        remainingCalls = ((totalItems / recordsPerPage).__ceil__()) - 1
+        
+        for call, _ in enumerate(range(remainingCalls), start=2):
+            data = getURL(metadataEndpoint, query, recordsPerPage, call)
+            records.extend(data["lists"])
+
+        df = pd.DataFrame.from_records(records)
+        df = df.drop(["description"], axis=1)
+        df.to_csv(listsMetadata, index=False)
+    else:
+        df = pd.read_csv(listsMetadata)
+
+    records = []
+    for id in df["id"]:
+        page = 1
+        while True:
+            print(f"Getting page #{page} for id {id}", end="\r")
+            data = getURL(f"speciesListItems/{id}", {}, recordsPerPage, page)
+            if not data:
+                break
+
+            records.extend(data)
+            page += 1
+
+        print()
+        
+    df2 = pd.DataFrame.from_records(records)
+    df = df.rename(columns={"id": "speciesListID", "version": "speciesListVersion"})
+    df = df.merge(df2, "outer", on="speciesListID")
+    df2.to_csv(outputPath, index=False)
@@ -1,12 +1,12 @@
 {
     "retrieveType": "script",
-    "subsections": [
-        "kamilaroi",
-        "noongar",
-        "southeastarnhemland",
-        "mangrovewatch",
-        "weeds-australia"
-    ],
+    "subsections": {
+        "kamilaroi": {},
+        "noongar": {},
+        "southeastarnhemland": {},
+        "mangrovewatch": {},
+        "weeds-australia": {}
+    },
     "download": {
         "path": "sourceProcessing/ala.py",
         "function": "collect",
@@ -16,5 +16,11 @@
             "./token.json"
         ],
         "output": "{SUBSECTION}.csv"
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -9,5 +9,11 @@
             "./apiKey.txt"
         ],
         "output": "algaeBase.csv"
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -30,5 +30,11 @@
                 "function": "dwcAugment"
             }
         ]
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -15,5 +15,11 @@
     },
     "conversion": {
         "mapID": 78385490
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -33,5 +33,11 @@
     },
     "conversion": {
         "mapID": 984983691
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
@@ -32,5 +32,11 @@
     },
     "conversion": {
         "mapID": 1154592624
+    },
+    "update": {
+        "type": "weekly",
+        "day": "sunday",
+        "time": 9,
+        "repeat": 2
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -12,5 +12,11 @@`
`12`	`12`	`},`
`13`	`13`	`"conversion": {`
`14`	`14`	`"mapID": 84855374`
	`15`	`+ },`
	`16`	`+ "update": {`
	`17`	`+ "type": "weekly",`
	`18`	`+ "day": "sunday",`
	`19`	`+ "time": 9,`
	`20`	`+ "repeat": 2`
`15`	`21`	`}`
`16`	`22`	`}`
Original file line number	Diff line number	Diff line change
`@@ -11,5 +11,11 @@`
`11`	`11`	`},`
`12`	`12`	`"conversion": {`
`13`	`13`	`"mapID": 404635334`
	`14`	`+ },`
	`15`	`+ "update": {`
	`16`	`+ "type": "weekly",`
	`17`	`+ "day": "sunday",`
	`18`	`+ "time": 9,`
	`19`	`+ "repeat": 2`
`14`	`20`	`}`
`15`	`21`	`}`
Original file line number	Diff line number	Diff line change
`@@ -9,5 +9,11 @@`
`9`	`9`	`"./apiKey.txt"`
`10`	`10`	`],`
`11`	`11`	`"output": "algaeBase.csv"`
	`12`	`+ },`
	`13`	`+ "update": {`
	`14`	`+ "type": "weekly",`
	`15`	`+ "day": "sunday",`
	`16`	`+ "time": 9,`
	`17`	`+ "repeat": 2`
`12`	`18`	`}`
`13`	`19`	`}`
Original file line number	Diff line number	Diff line change
`@@ -30,5 +30,11 @@`
`30`	`30`	`"function": "dwcAugment"`
`31`	`31`	`}`
`32`	`32`	`]`
	`33`	`+ },`
	`34`	`+ "update": {`
	`35`	`+ "type": "weekly",`
	`36`	`+ "day": "sunday",`
	`37`	`+ "time": 9,`
	`38`	`+ "repeat": 2`
`33`	`39`	`}`
`34`	`40`	`}`
Original file line number	Diff line number	Diff line change
`@@ -15,5 +15,11 @@`
`15`	`15`	`},`
`16`	`16`	`"conversion": {`
`17`	`17`	`"mapID": 78385490`
	`18`	`+ },`
	`19`	`+ "update": {`
	`20`	`+ "type": "weekly",`
	`21`	`+ "day": "sunday",`
	`22`	`+ "time": 9,`
	`23`	`+ "repeat": 2`
`18`	`24`	`}`
`19`	`25`	`}`
Original file line number	Diff line number	Diff line change
`@@ -33,5 +33,11 @@`
`33`	`33`	`},`
`34`	`34`	`"conversion": {`
`35`	`35`	`"mapID": 984983691`
	`36`	`+ },`
	`37`	`+ "update": {`
	`38`	`+ "type": "weekly",`
	`39`	`+ "day": "sunday",`
	`40`	`+ "time": 9,`
	`41`	`+ "repeat": 2`
`36`	`42`	`}`
`37`	`43`	`}`
Original file line number	Diff line number	Diff line change
`@@ -32,5 +32,11 @@`
`32`	`32`	`},`
`33`	`33`	`"conversion": {`
`34`	`34`	`"mapID": 1154592624`
	`35`	`+ },`
	`36`	`+ "update": {`
	`37`	`+ "type": "weekly",`
	`38`	`+ "day": "sunday",`
	`39`	`+ "time": 9,`
	`40`	`+ "repeat": 2`
`35`	`41`	`}`
`36`	`42`	`}`