update links for datasets I had prior to the website going down

OckermanSethGVSU · OckermanSethGVSU · commit ca8f37b9b416 · 2025-07-15T20:09:30.000Z
diff --git a/torch_geometric_temporal/dataset/metr_la.py b/torch_geometric_temporal/dataset/metr_la.py
@@ -9,7 +9,8 @@
 from torch.utils.data.distributed import DistributedSampler
 from typing import Tuple
 from ..signal import StaticGraphTemporalSignal
-
+import requests 
+from tqdm import tqdm
 
 class METRLADatasetLoader(object):
     """A traffic forecasting dataset based on Los Angeles
@@ -35,13 +36,24 @@ def __init__(self, raw_data_dir=os.path.join(os.getcwd(), "data"), index: bool =
             self.IndexDataset = IndexDataset 
 
     def _download_url(self, url, save_path):  # pragma: no cover
-        context = ssl._create_unverified_context()
-        with urllib.request.urlopen(url, context=context) as dl_file:
-            with open(save_path, "wb") as out_file:
-                out_file.write(dl_file.read())
+        # Check if file is in data folder from working directory, otherwise download
+        if not os.path.isfile(
+        os.path.join(self.raw_data_dir,save_path)
+        ):
+            print("Downloading to", save_path, flush=True)
+            
+            response = requests.get(url, stream=True)
+            file_size = int(response.headers.get('content-length', 0))
+
+            with open(os.path.join(self.raw_data_dir, save_path), "wb") as file, tqdm(
+                total=file_size, unit="B", unit_scale=True, unit_divisor=1024
+            ) as progress_bar:
+                for chunk in response.iter_content(chunk_size=33554432):
+                    file.write(chunk)
+                    progress_bar.update(len(chunk))
 
     def _read_web_data(self):
-        url = "https://graphmining.ai/temporal_datasets/METR-LA.zip"
+        url = "https://anl.app.box.com/shared/static/plgsv3te0akmqluiuqva34su60nn93c2"
 
         # Check if zip file is in data folder from working directory, otherwise download
         if not os.path.isfile(
diff --git a/torch_geometric_temporal/dataset/pems_bay.py b/torch_geometric_temporal/dataset/pems_bay.py
@@ -9,6 +9,8 @@
 from torch.utils.data.distributed import DistributedSampler
 from torch.utils.data import DataLoader
 from typing import Tuple
+import requests 
+from tqdm import tqdm
 
 class PemsBayDatasetLoader(object):
     """A traffic forecasting dataset as described in Diffusion Convolution Layer Paper.
@@ -39,13 +41,25 @@ def __init__(self, raw_data_dir: str =os.path.join(os.getcwd(), "data"),index: b
             self.IndexDataset = IndexDataset 
 
     def _download_url(self, url, save_path):  # pragma: no cover
-        context = ssl._create_unverified_context()
-        with urllib.request.urlopen(url, context=context) as dl_file:
-            with open(save_path, "wb") as out_file:
-                out_file.write(dl_file.read())
-
+        
+        # Check if file is in data folder from working directory, otherwise download
+        if not os.path.isfile(
+        os.path.join(self.raw_data_dir,save_path)
+        ):
+            print("Downloading to", save_path, flush=True)
+            
+            response = requests.get(url, stream=True)
+            file_size = int(response.headers.get('content-length', 0))
+
+            with open(os.path.join(self.raw_data_dir, save_path), "wb") as file, tqdm(
+                total=file_size, unit="B", unit_scale=True, unit_divisor=1024
+            ) as progress_bar:
+                for chunk in response.iter_content(chunk_size=33554432):
+                    file.write(chunk)
+                    progress_bar.update(len(chunk))
+                    
     def _read_web_data(self):            
-        url = "https://graphmining.ai/temporal_datasets/PEMS-BAY.zip"
+        url = "https://anl.app.box.com/shared/static/7ealcaw862pm12sglyt5g71743eu7s5l"
 
         # Check if zip file is in data folder from working directory, otherwise download
         if not os.path.isfile(
diff --git a/torch_geometric_temporal/dataset/windmilllarge.py b/torch_geometric_temporal/dataset/windmilllarge.py
@@ -5,7 +5,9 @@
 from ..signal import StaticGraphTemporalSignal
 import torch
 from torch.utils.data import DataLoader
-
+import os
+import requests 
+from tqdm import tqdm
 
 class WindmillOutputLargeDatasetLoader(object):
     """Hourly energy output of windmills from a European country
@@ -18,21 +20,35 @@ class WindmillOutputLargeDatasetLoader(object):
             Defaults to False.
     """
 
-    def __init__(self, index=False):
+    def __init__(self, raw_data_dir=os.path.join(os.getcwd(), "data"), index=False):
+        self.raw_data_dir = raw_data_dir
         self._read_web_data()
         self.index = index
-
         if index:
             from ..signal.index_dataset import IndexDataset
             self.IndexDataset = IndexDataset 
 
 
     def _read_web_data(self):
-        url = "https://graphmining.ai/temporal_datasets/windmill_output.json"
-        context = ssl._create_unverified_context()
-        self._dataset = json.loads(
-            urllib.request.urlopen(url, context=context).read().decode()
-        )
+        if not os.path.isfile(
+            os.path.join(self.raw_data_dir, "windmill_output.json")
+        ):  
+            url = "https://anl.app.box.com/shared/static/wgwb75lt3ty3pv5a15y9bilx1mjhcq59"
+            save_path = f"{self.raw_data_dir}/windmill_output.json"
+            print("Downloading to", save_path, flush=True)
+            
+            response = requests.get(url, stream=True)
+            file_size = int(response.headers.get('content-length', 0))
+
+            with open(os.path.join(self.raw_data_dir, save_path), "wb") as file, tqdm(
+                total=file_size, unit="B", unit_scale=True, unit_divisor=1024
+            ) as progress_bar:
+                for chunk in response.iter_content(chunk_size=33554432):
+                    file.write(chunk)
+                    progress_bar.update(len(chunk))
+       
+        with open(f"{self.raw_data_dir}/windmill_output.json", 'r') as f:
+            self._dataset = json.load(f)
 
     def _get_edges(self):
         self._edges = np.array(self._dataset["edges"]).T