diff --git a/.env.example b/.env.example index ef1682b07..72b3237cd 100644 --- a/.env.example +++ b/.env.example @@ -3,5 +3,8 @@ DB_NAME= DB_USER= DB_PASS= -TRANSLOC_API_KEY= -TRAFFIC_API_KEY= \ No newline at end of file +BUS_API_URL= +BUS_API_KEY= + +TRAFFIC_API_KEY= +GOOGLE_MAPS_API_KEY= \ No newline at end of file diff --git a/.gitignore b/.gitignore index f21726c76..dd6f40689 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ .env.production.local .env +__pycache__ npm-debug.log* yarn-debug.log* yarn-error.log* diff --git a/Platform/pipeline/extractors/BusExtractor.py b/Platform/pipeline/extractors/BusExtractor.py index db7967fc5..d9ae73c3a 100644 --- a/Platform/pipeline/extractors/BusExtractor.py +++ b/Platform/pipeline/extractors/BusExtractor.py @@ -1,7 +1,101 @@ from BaseExtractor import ABCBaseExtractor as BaseExtractor import pandas as pd +from datetime import datetime, timedelta +import pytz +import re +""" +Route Ids +Red: 20 +Blue: 21 +Green: 17 +Emory: 18 +Gold: 29 +Clough: 28 +Night Gold/Clough: 30 +Northside Dr. - Atlantic Station: 26 +Nara/Science SQ: 22 +""" class BusExtractor(BaseExtractor): + def get_bus_data(self): + endpoint = "Services/JSONPRelay.svc/GetMapVehiclePoints" + params = { + "apiKey": self.api_key, + "isPublicMap": "true" + } + + vehicles = self._get(endpoint, params=params) + + bus_data = [] + + for vehicle in vehicles: + raw_timestamp = vehicle["TimeStamp"] + match = re.search(r'/Date\((\d+)([+-]\d{4})\)/', raw_timestamp) + + if not match: + continue + + timestamp_ms = int(match.group(1)) + utc_offset = match.group(2) + offset_hours = int(utc_offset[:3]) + offset_minutes = int(utc_offset[0] + utc_offset[3:]) + + utc_time = datetime.fromtimestamp(timestamp_ms / 1000, tz=pytz.utc) + offset = timedelta(hours=offset_hours, minutes=offset_minutes) + adjusted_time = utc_time + offset + timestamp = adjusted_time.astimezone(pytz.timezone('America/New_York')) + + bus_id = vehicle["VehicleID"] + route_id = vehicle["RouteID"] + latitude = vehicle["Latitude"] + longitude = vehicle["Longitude"] + day_of_week = timestamp.strftime('%A') + month = timestamp.strftime('%m') + time_of_day = timestamp.strftime('%H:%M:%S') + bus_speed = vehicle["GroundSpeed"] + + stop_id, eta_to_stop = self.get_stop_info(bus_id) + #api call may return an eta where eta < t_o_d, in which we symbol with 0 to show bus already arrived or passed stop + if eta_to_stop < time_of_day: + eta_to_stop = 0 + + bus_data.append({ + "busid": bus_id, + "routeid": route_id, + "latitude": latitude, + "longitude": longitude, + "day_of_week": day_of_week, + "month": month, + "time_of_day": time_of_day, + "bus_speed": bus_speed, + "stop_id": stop_id, + "eta_to_stop": eta_to_stop + }) + + return bus_data + + def get_stop_info(self, vehicle_id): + endpoint = "Services/JSONPRelay.svc/GetVehicleRouteStopEstimates" + params = { + "vehicleIdStrings": vehicle_id, + } + + response = self._get(endpoint, params=params) + estimates = response[0].get("Estimates") + + if estimates: + est = estimates[0] + stop_id = est.get("RouteStopID") + estimate_time = est.get("EstimateTime") + match = re.search(r'\\?/Date\((\d+)\)\\?/', estimate_time) + if match: + timestamp_ms = int(match.group(1)) + utc_time = datetime.fromtimestamp(timestamp_ms / 1000, tz=pytz.utc) + timestamp = utc_time.astimezone(pytz.timezone('America/New_York')) + eta_time_of_day = timestamp.strftime('%H:%M:%S') + return stop_id, eta_time_of_day + return None, None + def extract(self) -> pd.DataFrame: - #implement here - return \ No newline at end of file + bus_data = self.get_bus_data() + return pd.DataFrame(bus_data)