Skip to content

Commit fe216de

Browse files
authored
Add file
1 parent 79bfa1c commit fe216de

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

data_preprocessing.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Import the libraries
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
import pandas as pd
5+
6+
# Import the dataset
7+
dataset = pd.read_csv('data.csv')
8+
X = dataset.iloc[:, :-1].values
9+
y = dataset.iloc[:, 3].values
10+
11+
# Taking care of missing data
12+
from sklearn.preprocessing import Imputer
13+
imputer=Imputer(missing_values='NaN', strategy='mean', axis=0)
14+
imputer=imputer.fit(X[:, 1:3])
15+
X[:, 1:3] = imputer.transform(X[:, 1:3])
16+
17+
# Encoding categorical data
18+
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
19+
labelencoder_X = LabelEncoder()
20+
X[:,0] = labelencoder_X.fit_transform(X[:,0])
21+
onehotencoder = OneHotEncoder(categorical_features = [0])
22+
X = onehotencoder.fit_transform(X).toarray()
23+
labelencoder_y = LabelEncoder()
24+
y = labelencoder_y.fit_transform(y)
25+
26+
# Splitting the dataset into the training set and test set
27+
from sklearn.cross_validation import train_test_split
28+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
29+
30+
# Feature scaling
31+
from sklearn.preprocessing import StandardScaler
32+
sc_X = StandardScaler()
33+
X_train = sc_X.fit_transform(X_train)
34+
X_test = sc_X.transform(X_test)
35+

0 commit comments

Comments
 (0)