|
| 1 | +# Import the libraries |
| 2 | +import numpy as np |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +# Import the dataset |
| 7 | +dataset = pd.read_csv('data.csv') |
| 8 | +X = dataset.iloc[:, :-1].values |
| 9 | +y = dataset.iloc[:, 3].values |
| 10 | + |
| 11 | +# Taking care of missing data |
| 12 | +from sklearn.preprocessing import Imputer |
| 13 | +imputer=Imputer(missing_values='NaN', strategy='mean', axis=0) |
| 14 | +imputer=imputer.fit(X[:, 1:3]) |
| 15 | +X[:, 1:3] = imputer.transform(X[:, 1:3]) |
| 16 | + |
| 17 | +# Encoding categorical data |
| 18 | +from sklearn.preprocessing import LabelEncoder, OneHotEncoder |
| 19 | +labelencoder_X = LabelEncoder() |
| 20 | +X[:,0] = labelencoder_X.fit_transform(X[:,0]) |
| 21 | +onehotencoder = OneHotEncoder(categorical_features = [0]) |
| 22 | +X = onehotencoder.fit_transform(X).toarray() |
| 23 | +labelencoder_y = LabelEncoder() |
| 24 | +y = labelencoder_y.fit_transform(y) |
| 25 | + |
| 26 | +# Splitting the dataset into the training set and test set |
| 27 | +from sklearn.cross_validation import train_test_split |
| 28 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42) |
| 29 | + |
| 30 | +# Feature scaling |
| 31 | +from sklearn.preprocessing import StandardScaler |
| 32 | +sc_X = StandardScaler() |
| 33 | +X_train = sc_X.fit_transform(X_train) |
| 34 | +X_test = sc_X.transform(X_test) |
| 35 | + |
0 commit comments