20
20
from sklearn .model_selection import train_test_split
21
21
from sklearn .metrics import confusion_matrix , classification_report
22
22
from sklearn .preprocessing import MinMaxScaler
23
+
23
24
# module initializations
24
25
sns .set ()
25
26
HERE = os .path .abspath (os .path .dirname (__file__ ))
@@ -59,6 +60,7 @@ def prepare_data():
59
60
- split dependent / independent variables
60
61
- split training / test data sets
61
62
"""
63
+ print ("Preparing data sets" )
62
64
original_db = pd .read_csv (os .path .join (HERE , "data" , "reservations-db.csv" ))
63
65
64
66
# need to be careful to only work with a **COPY** of the original
@@ -86,71 +88,88 @@ def prepare_data():
86
88
return train_test_split (x , y , test_size = 0.30 , stratify = y , random_state = 1 )
87
89
88
90
89
- def linear_kernal ():
91
+ def linear_Kernel ():
90
92
"""
91
93
- create training and test data sets
92
94
- create a Logistic Regression model
93
95
- train the model
94
96
- generate confusion matrix and f-score for the training set
95
97
- generate confusion matrix and f-score for the test set
96
98
"""
99
+ print ("Linear Kernel" )
97
100
x_train , x_test , y_train , y_test = prepare_data ()
98
101
99
- scaling = MinMaxScaler (feature_range = (- 1 ,1 )).fit (x_train )
102
+ print ("- scaling" )
103
+ scaling = MinMaxScaler (feature_range = (- 1 , 1 )).fit (x_train )
100
104
x_train_scaled = scaling .transform (x_train )
101
105
x_test_scaled = scaling .transform (x_test )
102
106
103
- # Linear kernal or linear decision boundary
104
- svm = SVC (kernel = 'linear' , probability = True )
105
- model = svm .fit (X = x_train_scaled , y = y_train )
107
+ # Linear Kernel or linear decision boundary
108
+ print ("- training" )
109
+ svm = SVC (kernel = "linear" , probability = True )
110
+ model = svm .fit (X = x_train_scaled , y = y_train )
106
111
112
+ print ("- modeling on training data" )
107
113
y_pred_train_svm = model .predict (x_train_scaled )
108
114
metrics_score (y_train , y_pred_train_svm )
109
115
116
+ print ("- modeling on test data" )
110
117
y_pred_test_svm = model .predict (x_test_scaled )
111
118
metrics_score (y_test , y_pred_test_svm )
112
119
113
120
# Set the optimal threshold (refer to the Jupyter Notebook to see how we arrived at 42)
114
- optimal_threshold_svm = 0.40
121
+ optimal_threshold_svm = 0.40
115
122
123
+ print ("- remodeling on training data" )
116
124
y_pred_train_svm = model .predict_proba (x_train_scaled )
117
- metrics_score (y_train , y_pred_train_svm [:,1 ] > optimal_threshold_svm )
125
+ metrics_score (y_train , y_pred_train_svm [:, 1 ] > optimal_threshold_svm )
118
126
127
+ print ("- remodeling on test data" )
119
128
y_pred_test = model .predict_proba (x_test_scaled )
120
- metrics_score (y_test , y_pred_test [:,1 ]> optimal_threshold_svm )
129
+ metrics_score (y_test , y_pred_test [:, 1 ] > optimal_threshold_svm )
130
+
121
131
122
- def rbf_kernal ():
132
+ def rbf_Kernel ():
123
133
"""
124
134
- create training and test data sets
125
135
- create a Logistic Regression model
126
136
- train the model
127
137
- generate confusion matrix and f-score for the training set
128
138
- generate confusion matrix and f-score for the test set
129
139
"""
140
+ print ("RBF Kernel" )
130
141
x_train , x_test , y_train , y_test = prepare_data ()
131
142
132
- scaling = MinMaxScaler (feature_range = (- 1 ,1 )).fit (x_train )
143
+ print ("- scaling" )
144
+ scaling = MinMaxScaler (feature_range = (- 1 , 1 )).fit (x_train )
133
145
x_train_scaled = scaling .transform (x_train )
134
146
x_test_scaled = scaling .transform (x_test )
135
147
136
- # Linear kernal or linear decision boundary
137
- svm_rbf = SVC (kernel = 'rbf' ,probability = True )
138
- model = svm_rbf .fit (x_train_scaled ,y_train )
148
+ # Linear Kernel or linear decision boundary
149
+ print ("- training" )
150
+ svm_rbf = SVC (kernel = "rbf" , probability = True )
151
+ model = svm_rbf .fit (x_train_scaled , y_train )
139
152
153
+ print ("- modeling on training data" )
140
154
y_pred_train_svm = model .predict (x_train_scaled )
141
155
metrics_score (y_train , y_pred_train_svm )
142
156
157
+ print ("- modeling on test data" )
143
158
y_pred_test_svm = model .predict (x_test_scaled )
144
159
metrics_score (y_test , y_pred_test_svm )
145
160
146
161
# Set the optimal threshold (refer to the Jupyter Notebook to see how we arrived at 42)
147
- optimal_threshold_svm = 0.41
162
+ optimal_threshold_svm = 0.41
148
163
164
+ print ("- remodeling on training data" )
149
165
y_pred_train_svm = model .predict_proba (x_train_scaled )
150
- metrics_score (y_train , y_pred_train_svm [:,1 ] > optimal_threshold_svm )
166
+ metrics_score (y_train , y_pred_train_svm [:, 1 ] > optimal_threshold_svm )
151
167
168
+ print ("- remodeling on test data" )
152
169
y_pred_test = model .predict_proba (x_test_scaled )
153
- metrics_score (y_test , y_pred_test [:,1 ]> optimal_threshold_svm )
170
+ metrics_score (y_test , y_pred_test [:, 1 ] > optimal_threshold_svm )
171
+
154
172
155
173
if __name__ == "__main__" :
156
- linear_kernal ()
174
+ linear_Kernel ()
175
+ rbf_Kernel ()
0 commit comments