diff --git a/.gitignore b/.gitignore
index f21726c76..ef705bca9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,5 @@
npm-debug.log*
yarn-debug.log*
yarn-error.log*
+
+.idea
diff --git a/Analysis/.ipynb_checkpoints/Stinger_LSTM-checkpoint.ipynb b/Analysis/.ipynb_checkpoints/Stinger_LSTM-checkpoint.ipynb
new file mode 100644
index 000000000..e3ff40a09
--- /dev/null
+++ b/Analysis/.ipynb_checkpoints/Stinger_LSTM-checkpoint.ipynb
@@ -0,0 +1,318 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "8a77807f92f26ee",
+ "metadata": {},
+ "source": [
+ "1. Setup + Imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "f0f09687eb0be6fa",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-11-01T22:47:31.485735Z",
+ "start_time": "2025-11-01T22:47:31.481782Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import tensorflow as tf\n",
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8a4683df52874161",
+ "metadata": {},
+ "source": [
+ "2. Data Preparation\n",
+ " - loading dataset\n",
+ " - preprocessing\n",
+ " - creating sequences\n",
+ " - splitting data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ac5ec41278b7be4e",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-11-01T22:58:20.940274Z",
+ "start_time": "2025-11-01T22:58:20.891444Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(2878, 7)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Distance Between Stations (km) | \n",
+ " Weather Conditions | \n",
+ " Day of the Week | \n",
+ " Time of Day | \n",
+ " Train Type | \n",
+ " Historical Delay (min) | \n",
+ " Route Congestion | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 100 | \n",
+ " Clear | \n",
+ " Monday | \n",
+ " Morning | \n",
+ " Express | \n",
+ " 5 | \n",
+ " Low | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 150 | \n",
+ " Rainy | \n",
+ " Tuesday | \n",
+ " Afternoon | \n",
+ " Superfast | \n",
+ " 10 | \n",
+ " Medium | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 200 | \n",
+ " Foggy | \n",
+ " Wednesday | \n",
+ " Evening | \n",
+ " Local | \n",
+ " 15 | \n",
+ " High | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 50 | \n",
+ " Clear | \n",
+ " Thursday | \n",
+ " Night | \n",
+ " Express | \n",
+ " 2 | \n",
+ " Low | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 75 | \n",
+ " Rainy | \n",
+ " Friday | \n",
+ " Morning | \n",
+ " Superfast | \n",
+ " 8 | \n",
+ " Medium | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Distance Between Stations (km) Weather Conditions Day of the Week \\\n",
+ "0 100 Clear Monday \n",
+ "1 150 Rainy Tuesday \n",
+ "2 200 Foggy Wednesday \n",
+ "3 50 Clear Thursday \n",
+ "4 75 Rainy Friday \n",
+ "\n",
+ " Time of Day Train Type Historical Delay (min) Route Congestion \n",
+ "0 Morning Express 5 Low \n",
+ "1 Afternoon Superfast 10 Medium \n",
+ "2 Evening Local 15 High \n",
+ "3 Night Express 2 Low \n",
+ "4 Morning Superfast 8 Medium "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"data/train_delay_data.csv\")\n",
+ "print(df.shape)\n",
+ "df.head()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ac335af4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ " \"\"\"\n",
+ " Turns your DataFrame into LSTM-ready sequences.\n",
+ "\n",
+ " Inputs:\n",
+ " - df: your pandas DataFrame.\n",
+ " ⚠️ The last column should be the target you want to predict.\n",
+ " All other columns are treated as input features.\n",
+ " Make sure df is sorted by time (oldest → newest).\n",
+ " - steps: how many past rows (time steps) to include in each sequence.\n",
+ " e.g., steps = 24 uses the last 24 hours/days/etc. to predict the next.\n",
+ "\n",
+ " Outputs:\n",
+ " - X: NumPy array of shape (num_samples, steps, num_features)\n",
+ " → the rolling windows of input features.\n",
+ " - y: NumPy array of shape (num_samples,)\n",
+ " → the corresponding target values.\n",
+ " \"\"\"\n",
+ "\n",
+ "\n",
+ "def make_lstm_sequences(df, steps):\n",
+ " X, y = [], []\n",
+ " values = df.values # convert DataFrame to NumPy array\n",
+ "\n",
+ " for i in range(len(values) - steps):\n",
+ " # Take the past `steps` rows of all columns except the last (features)\n",
+ " X.append(values[i:i+steps, :-1])\n",
+ "\n",
+ " # Take the target value at the next time step (last column)\n",
+ " y.append(values[i+steps, -1])\n",
+ "\n",
+ " return np.array(X), np.array(y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6714067e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ " \"\"\"\n",
+ " Splits your sequence data into train, validation, and test sets.\n",
+ "\n",
+ " Inputs:\n",
+ " - X, y: arrays from make_lstm_sequences()\n",
+ " - train_ratio: % of data to use for training (default 70%)\n",
+ " - val_ratio: % of data to use for validation (default 15%)\n",
+ " → the rest (15%) automatically becomes test data.\n",
+ "\n",
+ " Outputs:\n",
+ " - X_train, y_train\n",
+ " - X_val, y_val\n",
+ " - X_test, y_test\n",
+ " \"\"\"\n",
+ " \n",
+ "def split_lstm_data(X, y, train_ratio=0.7, val_ratio=0.15):\n",
+ "\n",
+ "\n",
+ " n = len(X)\n",
+ " train_end = int(n * train_ratio)\n",
+ " val_end = int(n * (train_ratio + val_ratio))\n",
+ "\n",
+ " X_train, y_train = X[:train_end], y[:train_end]\n",
+ " X_val, y_val = X[train_end:val_end], y[train_end:val_end]\n",
+ " X_test, y_test = X[val_end:], y[val_end:]\n",
+ "\n",
+ " return X_train, y_train, X_val, y_val, X_test, y_test"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8883303e2711cfa",
+ "metadata": {},
+ "source": [
+ "3. Model set up\n",
+ " - Define LSTM architecture\n",
+ " - Initialize model, optimizer, and loss function\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6e20ffcba1ebc7c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40be92eea5ae33e9",
+ "metadata": {},
+ "source": [
+ "4. Training Loop\n",
+ " - Train model over epochs\n",
+ " - Track loss and accuracy\n",
+ " - Save best model (optional)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "85429d2f7577e9b5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4f0e8f82bc3d5508",
+ "metadata": {},
+ "source": [
+ "5. Evaluation & Results\n",
+ " - Evaluate on test set\n",
+ " - Plot training/validation loss curves\n",
+ " - Visualize predicted vs actual delays\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Analysis/Stinger_LSTM.ipynb b/Analysis/Stinger_LSTM.ipynb
index b14d4bc12..e3ff40a09 100644
--- a/Analysis/Stinger_LSTM.ipynb
+++ b/Analysis/Stinger_LSTM.ipynb
@@ -4,10 +4,13 @@
"cell_type": "markdown",
"id": "8a77807f92f26ee",
"metadata": {},
- "source": "1. Setup + Imports"
+ "source": [
+ "1. Setup + Imports"
+ ]
},
{
"cell_type": "code",
+ "execution_count": 3,
"id": "f0f09687eb0be6fa",
"metadata": {
"ExecuteTime": {
@@ -15,41 +18,36 @@
"start_time": "2025-11-01T22:47:31.481782Z"
}
},
+ "outputs": [],
"source": [
"import os\n",
"import tensorflow as tf\n",
"import pandas as pd\n",
"import numpy as np"
- ],
- "outputs": [],
- "execution_count": 3
+ ]
},
{
- "metadata": {},
"cell_type": "markdown",
+ "id": "8a4683df52874161",
+ "metadata": {},
"source": [
"2. Data Preparation\n",
" - loading dataset\n",
" - preprocessing\n",
" - creating sequences\n",
" - splitting data"
- ],
- "id": "8a4683df52874161"
+ ]
},
{
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ac5ec41278b7be4e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-01T22:58:20.940274Z",
"start_time": "2025-11-01T22:58:20.891444Z"
}
},
- "cell_type": "code",
- "source": [
- "df = pd.read_csv(\"data/train_delay_data.csv\")\n",
- "print(df.shape)\n",
- "df.head()\n"
- ],
- "id": "ac5ec41278b7be4e",
"outputs": [
{
"name": "stdout",
@@ -60,21 +58,6 @@
},
{
"data": {
- "text/plain": [
- " Distance Between Stations (km) Weather Conditions Day of the Week \\\n",
- "0 100 Clear Monday \n",
- "1 150 Rainy Tuesday \n",
- "2 200 Foggy Wednesday \n",
- "3 50 Clear Thursday \n",
- "4 75 Rainy Friday \n",
- "\n",
- " Time of Day Train Type Historical Delay (min) Route Congestion \n",
- "0 Morning Express 5 Low \n",
- "1 Afternoon Superfast 10 Medium \n",
- "2 Evening Local 15 High \n",
- "3 Night Express 2 Low \n",
- "4 Morning Superfast 8 Medium "
- ],
"text/html": [
"\n",
"