Skip to content

Commit

Permalink
Merge pull request #388 from ktehranchi/master
Browse files Browse the repository at this point in the history
Fix PUDL link
  • Loading branch information
ktehranchi authored Aug 23, 2024
2 parents d13d3e5 + e2d5766 commit 8605cfb
Showing 1 changed file with 9 additions and 25 deletions.
34 changes: 9 additions & 25 deletions workflow/scripts/retrieve_pudl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import logging
import zlib
import zipfile
from pathlib import Path

import requests
Expand All @@ -12,27 +13,6 @@

logger = logging.getLogger(__name__)


def retrieve_gzip(url: str, save: str):
"""
Retrieves a gzip file from a URL and saves it to a local file.
Args:
url (str): URL of the gzip file to retrieve.
save (str): Path to save the gzip file to.
"""
logger.info(f"Downloading Data from '{url}'")
d = zlib.decompressobj(16 + zlib.MAX_WBITS)
with requests.get(url, stream=True) as r:
r.raise_for_status()
total_size = int(r.headers.get("content-length", 0))
with tqdm(total=total_size, unit="B", unit_scale=True) as progress_bar:
with open(save, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
progress_bar.update(len(chunk))
fd.write(d.decompress(chunk))


if __name__ == "__main__":
if "snakemake" not in globals():
from _helpers import mock_snakemake
Expand All @@ -44,19 +24,23 @@ def retrieve_gzip(url: str, save: str):

# Recommended to use the stable version of PUDL documented here: https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#stable-builds
url_pudl = (
"http://pudl.catalyst.coop.s3.us-west-2.amazonaws.com/stable/pudl.sqlite.gz"
"https://zenodo.org/records/13346011/files/pudl.sqlite.zip?download=1"
)
url_census = (
"https://zenodo.org/records/11292273/files/censusdp1tract.sqlite.gz?download=1"
"https://zenodo.org/records/13346011/files/censusdp1tract.sqlite.zip?download=1"
)
save_pudl = snakemake.output.pudl
save_census = snakemake.output.census

if not Path(save_census).exists():
retrieve_gzip(url_census, save_census)
progress_retrieve(url_census, save_census + ".zip")
with zipfile.ZipFile(save_census + ".zip", "r") as zip_ref:
zip_ref.extractall(Path(save_census).parent)

if not Path(save_pudl).exists():
retrieve_gzip(url_pudl, save_pudl)
progress_retrieve(url_pudl, save_pudl + ".zip")
with zipfile.ZipFile(save_pudl + ".zip", "r") as zip_ref:
zip_ref.extractall(Path(save_pudl).parent)

# Get PUDL FERC Form 714 Parquet
parquet = f"https://zenodo.org/records/11292273/files/out_ferc714__hourly_estimated_state_demand.parquet?download=1"
Expand Down

0 comments on commit 8605cfb

Please sign in to comment.