Skip to content

Commit 8605cfb

Browse files
authored
Merge pull request #388 from ktehranchi/master
Fix PUDL link
2 parents d13d3e5 + e2d5766 commit 8605cfb

File tree

1 file changed

+9
-25
lines changed

1 file changed

+9
-25
lines changed

workflow/scripts/retrieve_pudl.py

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import logging
66
import zlib
7+
import zipfile
78
from pathlib import Path
89

910
import requests
@@ -12,27 +13,6 @@
1213

1314
logger = logging.getLogger(__name__)
1415

15-
16-
def retrieve_gzip(url: str, save: str):
17-
"""
18-
Retrieves a gzip file from a URL and saves it to a local file.
19-
20-
Args:
21-
url (str): URL of the gzip file to retrieve.
22-
save (str): Path to save the gzip file to.
23-
"""
24-
logger.info(f"Downloading Data from '{url}'")
25-
d = zlib.decompressobj(16 + zlib.MAX_WBITS)
26-
with requests.get(url, stream=True) as r:
27-
r.raise_for_status()
28-
total_size = int(r.headers.get("content-length", 0))
29-
with tqdm(total=total_size, unit="B", unit_scale=True) as progress_bar:
30-
with open(save, "wb") as fd:
31-
for chunk in r.iter_content(chunk_size=128):
32-
progress_bar.update(len(chunk))
33-
fd.write(d.decompress(chunk))
34-
35-
3616
if __name__ == "__main__":
3717
if "snakemake" not in globals():
3818
from _helpers import mock_snakemake
@@ -44,19 +24,23 @@ def retrieve_gzip(url: str, save: str):
4424

4525
# Recommended to use the stable version of PUDL documented here: https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#stable-builds
4626
url_pudl = (
47-
"http://pudl.catalyst.coop.s3.us-west-2.amazonaws.com/stable/pudl.sqlite.gz"
27+
"https://zenodo.org/records/13346011/files/pudl.sqlite.zip?download=1"
4828
)
4929
url_census = (
50-
"https://zenodo.org/records/11292273/files/censusdp1tract.sqlite.gz?download=1"
30+
"https://zenodo.org/records/13346011/files/censusdp1tract.sqlite.zip?download=1"
5131
)
5232
save_pudl = snakemake.output.pudl
5333
save_census = snakemake.output.census
5434

5535
if not Path(save_census).exists():
56-
retrieve_gzip(url_census, save_census)
36+
progress_retrieve(url_census, save_census + ".zip")
37+
with zipfile.ZipFile(save_census + ".zip", "r") as zip_ref:
38+
zip_ref.extractall(Path(save_census).parent)
5739

5840
if not Path(save_pudl).exists():
59-
retrieve_gzip(url_pudl, save_pudl)
41+
progress_retrieve(url_pudl, save_pudl + ".zip")
42+
with zipfile.ZipFile(save_pudl + ".zip", "r") as zip_ref:
43+
zip_ref.extractall(Path(save_pudl).parent)
6044

6145
# Get PUDL FERC Form 714 Parquet
6246
parquet = f"https://zenodo.org/records/11292273/files/out_ferc714__hourly_estimated_state_demand.parquet?download=1"

0 commit comments

Comments
 (0)