4
4
5
5
import logging
6
6
import zlib
7
+ import zipfile
7
8
from pathlib import Path
8
9
9
10
import requests
12
13
13
14
logger = logging .getLogger (__name__ )
14
15
15
-
16
- def retrieve_gzip (url : str , save : str ):
17
- """
18
- Retrieves a gzip file from a URL and saves it to a local file.
19
-
20
- Args:
21
- url (str): URL of the gzip file to retrieve.
22
- save (str): Path to save the gzip file to.
23
- """
24
- logger .info (f"Downloading Data from '{ url } '" )
25
- d = zlib .decompressobj (16 + zlib .MAX_WBITS )
26
- with requests .get (url , stream = True ) as r :
27
- r .raise_for_status ()
28
- total_size = int (r .headers .get ("content-length" , 0 ))
29
- with tqdm (total = total_size , unit = "B" , unit_scale = True ) as progress_bar :
30
- with open (save , "wb" ) as fd :
31
- for chunk in r .iter_content (chunk_size = 128 ):
32
- progress_bar .update (len (chunk ))
33
- fd .write (d .decompress (chunk ))
34
-
35
-
36
16
if __name__ == "__main__" :
37
17
if "snakemake" not in globals ():
38
18
from _helpers import mock_snakemake
@@ -44,19 +24,23 @@ def retrieve_gzip(url: str, save: str):
44
24
45
25
# Recommended to use the stable version of PUDL documented here: https://catalystcoop-pudl.readthedocs.io/en/latest/data_access.html#stable-builds
46
26
url_pudl = (
47
- "http ://pudl.catalyst.coop.s3.us-west-2.amazonaws.com/stable/ pudl.sqlite.gz "
27
+ "https ://zenodo.org/records/13346011/files/ pudl.sqlite.zip?download=1 "
48
28
)
49
29
url_census = (
50
- "https://zenodo.org/records/11292273 /files/censusdp1tract.sqlite.gz ?download=1"
30
+ "https://zenodo.org/records/13346011 /files/censusdp1tract.sqlite.zip ?download=1"
51
31
)
52
32
save_pudl = snakemake .output .pudl
53
33
save_census = snakemake .output .census
54
34
55
35
if not Path (save_census ).exists ():
56
- retrieve_gzip (url_census , save_census )
36
+ progress_retrieve (url_census , save_census + ".zip" )
37
+ with zipfile .ZipFile (save_census + ".zip" , "r" ) as zip_ref :
38
+ zip_ref .extractall (Path (save_census ).parent )
57
39
58
40
if not Path (save_pudl ).exists ():
59
- retrieve_gzip (url_pudl , save_pudl )
41
+ progress_retrieve (url_pudl , save_pudl + ".zip" )
42
+ with zipfile .ZipFile (save_pudl + ".zip" , "r" ) as zip_ref :
43
+ zip_ref .extractall (Path (save_pudl ).parent )
60
44
61
45
# Get PUDL FERC Form 714 Parquet
62
46
parquet = f"https://zenodo.org/records/11292273/files/out_ferc714__hourly_estimated_state_demand.parquet?download=1"
0 commit comments