Skip to content

Commit

Permalink
Merge pull request #7 from AschalewMathewosDamtew/dashboard
Browse files Browse the repository at this point in the history
Dashboard
  • Loading branch information
AschalewMathewosDamtew authored Aug 24, 2024
2 parents 087b6fd + c0e5c90 commit 046256f
Show file tree
Hide file tree
Showing 439 changed files with 230 additions and 1,270 deletions.
5 changes: 5 additions & 0 deletions .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[theme]
primaryColor = '#1c83e1'
backgroundColor = '#ffffff'
secondaryBackgroundColor = '#f0f2f6'
textColor = '#000000'
Binary file added app/__pycache__/data_processing.cpython-312.pyc
Binary file not shown.
Binary file added app/__pycache__/plots.cpython-312.pyc
Binary file not shown.
Binary file added app/__pycache__/utils.cpython-312.pyc
Binary file not shown.
79 changes: 79 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import streamlit as st
from data_processing import load_data, clean_and_prepare_data
from utils import data_quality_check
from plots import plot_time_series, plot_area, create_scatter_plot, create_correlation_analysis
import pandas as pd

# Load datasets
datasets = load_data()

# Streamlit UI
st.title("Solar Radiation Data Analysis")

# Sidebar for dataset selection
dataset_name = st.sidebar.selectbox("Select Dataset", ("Benin", "Togo", "Sierra Leone"))
df = datasets[dataset_name]

# Display the dataset summary
st.write(f"### {dataset_name} Dataset Summary")
st.write(df.describe())

# Sidebar: Want to Clean Section
clean_data = st.sidebar.checkbox("Want to Clean Data")

if clean_data:
# Data Quality Check Before Cleaning
quality_results_before = data_quality_check(df)
st.write("#### Data Quality Check Results (Before Cleaning)")
st.write(pd.DataFrame(quality_results_before).T)

# Clean Data
df_cleaned = clean_and_prepare_data(df)

# Data Quality Check After Cleaning
quality_results_after = data_quality_check(df_cleaned)
st.write("#### Data Quality Check Results (After Cleaning)")
st.write(pd.DataFrame(quality_results_after).T)

# Display cleaned data
st.write(f"### {dataset_name} Cleaned Data")
st.write(df_cleaned.head())

# Sidebar: Analysis Selection
st.sidebar.write("### Which to Analyze?")
analyze_uncleaned = st.sidebar.checkbox("Analyze Uncleaned Data")
analyze_cleaned = st.sidebar.checkbox("Analyze Cleaned Data")

# Determine which dataset to analyze
df_to_analyze = None
data_label = ""

if analyze_uncleaned:
df_to_analyze = df
data_label = "Uncleaned Data"
elif analyze_cleaned and clean_data:
df_to_analyze = df_cleaned
data_label = "Cleaned Data"

# Display analysis options if either checkbox is selected
if analyze_uncleaned or (analyze_cleaned and clean_data):
st.sidebar.write(f"### Analysis Options for {data_label}")
plot_area_selected = st.sidebar.checkbox("Area Plot")
plot_time_series_selected = st.sidebar.checkbox("Time Series Plot")
plot_scatter_selected = st.sidebar.checkbox("Scatter Plot")
plot_correlation_selected = st.sidebar.checkbox("Correlation Analysis")

# Perform selected analyses
if plot_correlation_selected and df_to_analyze is not None:
create_correlation_analysis(df_to_analyze, dataset_name)

if plot_area_selected and df_to_analyze is not None:
plot_area(df_to_analyze, dataset_name)

if plot_scatter_selected and df_to_analyze is not None:
create_scatter_plot(df_to_analyze)

if plot_time_series_selected and df_to_analyze is not None:
plot_time_series(df_to_analyze, dataset_name)
else:
st.warning("Please select either 'Analyze Uncleaned Data' or 'Analyze Cleaned Data'.")
28 changes: 28 additions & 0 deletions app/data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd
import numpy as np
import os


COLUMNS_TO_CHECK = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']


def load_data():
base_path = os.path.dirname(os.path.abspath(__file__))
datasets = {
"Benin": pd.read_csv(os.path.join(base_path, '../data/benin-malanville.csv')),
"Togo": pd.read_csv(os.path.join(base_path, '../data/togo-dapaong_qc.csv')),
"Sierra Leone": pd.read_csv(os.path.join(base_path, '../data/sierraleone-bumbuna.csv'))
}
return datasets

def clean_data(df):
df = df[(df[COLUMNS_TO_CHECK] >= 0).all(axis=1)]
z_scores = np.abs((df[COLUMNS_TO_CHECK] - df[COLUMNS_TO_CHECK].mean()) / df[COLUMNS_TO_CHECK].std())
df = df[(z_scores < 3).all(axis=1)]
return df

def clean_and_prepare_data(df):
df_cleaned = clean_data(df)
df_cleaned['Timestamp'] = pd.to_datetime(df_cleaned['Timestamp'])
df_cleaned.set_index('Timestamp', inplace=True)
return df_cleaned
9 changes: 5 additions & 4 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
st.title("Solar Radiation Data Analysis")

# Load data
df = da.load_data('data/benin-malanville.csv')
df = da.load_data('../data/benin-malanville.csv')


# Sidebar
option = st.sidebar.selectbox("Select Analysis", ("Summary Statistics", "Time Series Analysis",
"Correlation Analysis", "Wind Analysis",
"Correlation Analysis", "Create Wind Plot",
"Temperature Analysis", "Histograms",
"Z-Score Analysis", "Bubble Chart"))

Expand All @@ -31,8 +32,8 @@
da.time_series_analysis(df)
elif option == "Correlation Analysis":
da.correlation_analysis(df)
elif option == "Wind Analysis":
da.wind_analysis(df)
elif option == "Create Wind Analysis":
da.create_polar_plot(df, 'Wind Direction')
elif option == "Temperature Analysis":
da.temperature_analysis(df)
elif option == "Histograms":
Expand Down
42 changes: 42 additions & 0 deletions app/plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st

def plot_time_series(df, dataset_name):
fig, ax = plt.subplots(figsize=(14, 8))
df[['GHI', 'DNI', 'DHI', 'Tamb']].plot(ax=ax)
plt.title(f'Time Series Analysis of GHI, DNI, DHI, and Tamb in {dataset_name}')
st.pyplot(fig)

import matplotlib.pyplot as plt
import streamlit as st

def plot_area(df, title, columns):
try:
# Check if any column contains both positive and negative values
for col in columns:
if df[col].min() < 0 and df[col].max() > 0:
raise ValueError(f"Column '{col}' contains both positive and negative values, which is not allowed in an area plot.")

# Create area plot
fig, ax = plt.subplots()
df[columns].plot(kind='area', ax=ax, alpha=0.5)
ax.set_title(title)
plt.xticks(rotation=45)
plt.tight_layout()
st.pyplot(fig)

except ValueError as e:
# Handle the ValueError and provide an appropriate message
st.error(f"Error in plotting area chart: {e}")

def create_scatter_plot(df):
fig, ax = plt.subplots()
sns.scatterplot(data=df, x='RH', y='Tamb', ax=ax)
plt.title("Scatter Plot: Temperature (Tamb) vs Relative Humidity (RH)")
st.pyplot(fig)

def create_correlation_analysis(df, dataset_name):
correlation = df[['RH', 'Tamb', 'TModA', 'TModB']].corr()
st.write(f"### Correlation Analysis - {dataset_name}")
st.write(correlation)
12 changes: 12 additions & 0 deletions app/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pandas as pd
import numpy as np

def data_quality_check(df):
COLUMNS_TO_CHECK = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']
data_quality = {
"Column": COLUMNS_TO_CHECK,
"Missing Values": df[COLUMNS_TO_CHECK].isnull().sum().values,
"Outliers": (np.abs((df[COLUMNS_TO_CHECK] - df[COLUMNS_TO_CHECK].mean()) / df[COLUMNS_TO_CHECK].std()) > 3).sum().values,
"Incorrect Entries": (df[COLUMNS_TO_CHECK] < 0).sum().values
}
return pd.DataFrame(data_quality)
Loading

0 comments on commit 046256f

Please sign in to comment.