Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dashboard #7

Merged
merged 3 commits into from
Aug 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[theme]
primaryColor = '#1c83e1'
backgroundColor = '#ffffff'
secondaryBackgroundColor = '#f0f2f6'
textColor = '#000000'
Binary file added app/__pycache__/data_processing.cpython-312.pyc
Binary file not shown.
Binary file added app/__pycache__/plots.cpython-312.pyc
Binary file not shown.
Binary file added app/__pycache__/utils.cpython-312.pyc
Binary file not shown.
79 changes: 79 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import streamlit as st
from data_processing import load_data, clean_and_prepare_data
from utils import data_quality_check
from plots import plot_time_series, plot_area, create_scatter_plot, create_correlation_analysis
import pandas as pd

# Load datasets
datasets = load_data()

# Streamlit UI
st.title("Solar Radiation Data Analysis")

# Sidebar for dataset selection
dataset_name = st.sidebar.selectbox("Select Dataset", ("Benin", "Togo", "Sierra Leone"))
df = datasets[dataset_name]

# Display the dataset summary
st.write(f"### {dataset_name} Dataset Summary")
st.write(df.describe())

# Sidebar: Want to Clean Section
clean_data = st.sidebar.checkbox("Want to Clean Data")

if clean_data:
# Data Quality Check Before Cleaning
quality_results_before = data_quality_check(df)
st.write("#### Data Quality Check Results (Before Cleaning)")
st.write(pd.DataFrame(quality_results_before).T)

# Clean Data
df_cleaned = clean_and_prepare_data(df)

# Data Quality Check After Cleaning
quality_results_after = data_quality_check(df_cleaned)
st.write("#### Data Quality Check Results (After Cleaning)")
st.write(pd.DataFrame(quality_results_after).T)

# Display cleaned data
st.write(f"### {dataset_name} Cleaned Data")
st.write(df_cleaned.head())

# Sidebar: Analysis Selection
st.sidebar.write("### Which to Analyze?")
analyze_uncleaned = st.sidebar.checkbox("Analyze Uncleaned Data")
analyze_cleaned = st.sidebar.checkbox("Analyze Cleaned Data")

# Determine which dataset to analyze
df_to_analyze = None
data_label = ""

if analyze_uncleaned:
df_to_analyze = df
data_label = "Uncleaned Data"
elif analyze_cleaned and clean_data:
df_to_analyze = df_cleaned
data_label = "Cleaned Data"

# Display analysis options if either checkbox is selected
if analyze_uncleaned or (analyze_cleaned and clean_data):
st.sidebar.write(f"### Analysis Options for {data_label}")
plot_area_selected = st.sidebar.checkbox("Area Plot")
plot_time_series_selected = st.sidebar.checkbox("Time Series Plot")
plot_scatter_selected = st.sidebar.checkbox("Scatter Plot")
plot_correlation_selected = st.sidebar.checkbox("Correlation Analysis")

# Perform selected analyses
if plot_correlation_selected and df_to_analyze is not None:
create_correlation_analysis(df_to_analyze, dataset_name)

if plot_area_selected and df_to_analyze is not None:
plot_area(df_to_analyze, dataset_name)

if plot_scatter_selected and df_to_analyze is not None:
create_scatter_plot(df_to_analyze)

if plot_time_series_selected and df_to_analyze is not None:
plot_time_series(df_to_analyze, dataset_name)
else:
st.warning("Please select either 'Analyze Uncleaned Data' or 'Analyze Cleaned Data'.")
28 changes: 28 additions & 0 deletions app/data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd
import numpy as np
import os


COLUMNS_TO_CHECK = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']


def load_data():
base_path = os.path.dirname(os.path.abspath(__file__))
datasets = {
"Benin": pd.read_csv(os.path.join(base_path, '../data/benin-malanville.csv')),
"Togo": pd.read_csv(os.path.join(base_path, '../data/togo-dapaong_qc.csv')),
"Sierra Leone": pd.read_csv(os.path.join(base_path, '../data/sierraleone-bumbuna.csv'))
}
return datasets

def clean_data(df):
df = df[(df[COLUMNS_TO_CHECK] >= 0).all(axis=1)]
z_scores = np.abs((df[COLUMNS_TO_CHECK] - df[COLUMNS_TO_CHECK].mean()) / df[COLUMNS_TO_CHECK].std())
df = df[(z_scores < 3).all(axis=1)]
return df

def clean_and_prepare_data(df):
df_cleaned = clean_data(df)
df_cleaned['Timestamp'] = pd.to_datetime(df_cleaned['Timestamp'])
df_cleaned.set_index('Timestamp', inplace=True)
return df_cleaned
9 changes: 5 additions & 4 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
st.title("Solar Radiation Data Analysis")

# Load data
df = da.load_data('data/benin-malanville.csv')
df = da.load_data('../data/benin-malanville.csv')


# Sidebar
option = st.sidebar.selectbox("Select Analysis", ("Summary Statistics", "Time Series Analysis",
"Correlation Analysis", "Wind Analysis",
"Correlation Analysis", "Create Wind Plot",
"Temperature Analysis", "Histograms",
"Z-Score Analysis", "Bubble Chart"))

Expand All @@ -31,8 +32,8 @@
da.time_series_analysis(df)
elif option == "Correlation Analysis":
da.correlation_analysis(df)
elif option == "Wind Analysis":
da.wind_analysis(df)
elif option == "Create Wind Analysis":
da.create_polar_plot(df, 'Wind Direction')
elif option == "Temperature Analysis":
da.temperature_analysis(df)
elif option == "Histograms":
Expand Down
42 changes: 42 additions & 0 deletions app/plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st

def plot_time_series(df, dataset_name):
fig, ax = plt.subplots(figsize=(14, 8))
df[['GHI', 'DNI', 'DHI', 'Tamb']].plot(ax=ax)
plt.title(f'Time Series Analysis of GHI, DNI, DHI, and Tamb in {dataset_name}')
st.pyplot(fig)

import matplotlib.pyplot as plt
import streamlit as st

def plot_area(df, title, columns):
try:
# Check if any column contains both positive and negative values
for col in columns:
if df[col].min() < 0 and df[col].max() > 0:
raise ValueError(f"Column '{col}' contains both positive and negative values, which is not allowed in an area plot.")

# Create area plot
fig, ax = plt.subplots()
df[columns].plot(kind='area', ax=ax, alpha=0.5)
ax.set_title(title)
plt.xticks(rotation=45)
plt.tight_layout()
st.pyplot(fig)

except ValueError as e:
# Handle the ValueError and provide an appropriate message
st.error(f"Error in plotting area chart: {e}")

def create_scatter_plot(df):
fig, ax = plt.subplots()
sns.scatterplot(data=df, x='RH', y='Tamb', ax=ax)
plt.title("Scatter Plot: Temperature (Tamb) vs Relative Humidity (RH)")
st.pyplot(fig)

def create_correlation_analysis(df, dataset_name):
correlation = df[['RH', 'Tamb', 'TModA', 'TModB']].corr()
st.write(f"### Correlation Analysis - {dataset_name}")
st.write(correlation)
12 changes: 12 additions & 0 deletions app/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pandas as pd
import numpy as np

def data_quality_check(df):
COLUMNS_TO_CHECK = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']
data_quality = {
"Column": COLUMNS_TO_CHECK,
"Missing Values": df[COLUMNS_TO_CHECK].isnull().sum().values,
"Outliers": (np.abs((df[COLUMNS_TO_CHECK] - df[COLUMNS_TO_CHECK].mean()) / df[COLUMNS_TO_CHECK].std()) > 3).sum().values,
"Incorrect Entries": (df[COLUMNS_TO_CHECK] < 0).sum().values
}
return pd.DataFrame(data_quality)
Loading