-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheda.py
74 lines (64 loc) · 2.37 KB
/
eda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import streamlit as st
st.title("The EDA Page")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
st.set_option('deprecation.showPyplotGlobalUse', False)
plt.style.use('fivethirtyeight')
#load and clean the data
data=pd.read_csv("data/data.csv")
data.drop('Unnamed: 0', axis=1, inplace=True)
zerofiller=lambda x:x.replace(0, x.median())
cols=data.columns[1:6]
data[cols]=data[cols].apply(zerofiller, 0)
# encode the categorical outcome variable
df=data.copy()
d={"Yes":1, 'No':0}
df['Outcome']=df['Outcome'].map(d)
def view_data(data):
st.write(df.head(10))
st.pyplot()
def histograms(df):
df.hist()
plt.tight_layout()
st.pyplot()
def barplot(data, feature):
print("Bar plot of the variable ", feature)
plt.figure(figsize=(10,7))
ax=sns.countplot(data=data, x=feature, color='green')
for p in ax.patches:
x=p.get_bbox().get_points()[:,0]
y=p.get_bbox().get_points()[1,1]
ax.annotate("{:.2g}%".format(100.*y/len(df)), (x.mean(), y), ha='center', va='bottom')
st.pyplot()
def boxplot_histplot(data, feature, bins=None, figsize=(12,7)):
print('Boxplot and Histplot for ', feature)
fig, (ax_box, ax_hist)=plt.subplots(
nrows=2,
sharex=True,
gridspec_kw = {"height_ratios":(0.25, 0.75)},
figsize=figsize
)
sns.boxplot(data=data, x=feature, color='violet', ax=ax_box, showmeans=True)
sns.histplotplot(data=data, x=feature, ax=ax_hist, bins=bins) if bins else sns.histplot(data\
=data, x=feature, ax=ax_hist)
ax_hist.axvline(data[feature].mean(), color='green', linestyle='--')
ax_hist.axvline(data[feature].median(), color='black', linestyle='-')
st.pyplot()
st.sidebar.subheader("Choose the Plot")
plot=st.sidebar.selectbox('plot', ('Data', 'Histograms', 'Barchart', 'Boxplot_Scatterplot', 'Correlations'))
if st.sidebar.button('PLOT'):
if plot=='Data':
view_data(df)
if plot=='Histograms':
histograms(df)
if plot=='Barchart':
barplot(df, feature='Outcome')
if plot=='Boxplot_Scatterplot':
for col in df.select_dtypes(exclude='O').columns:
boxplot_histplot(df, col)
if plot=='Correlations':
plt.figure(figsize=(12,7))
sns.heatmap(df.corr(), cmap='Spectral', vmax=+1, vmin=-1, annot=True)
st.pyplot()