Skip to content

Commit 06f57a2

Browse files
committed
add kfold cross validation tutorial
1 parent 4150ce8 commit 06f57a2

File tree

5 files changed

+181
-0
lines changed

5 files changed

+181
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
9090
- [Handling Imbalanced Datasets: A Case Study with Customer Churn](https://www.thepythoncode.com/article/handling-imbalanced-datasets-sklearn-in-python). ([code](machine-learning/handling-inbalance-churn-data))
9191
- [Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch). ([code](machine-learning/logistic-regression-in-pytorch))
9292
- [Dropout Regularization using PyTorch in Python](https://www.thepythoncode.com/article/dropout-regularization-in-pytorch). ([code](machine-learning/dropout-in-pytorch))
93+
- [K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python). ([code](machine-learning/k-fold-cross-validation-sklearn))
9394

9495
- ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
9596
- [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {
7+
"id": "cLkNm1Ywb3Eh"
8+
},
9+
"outputs": [],
10+
"source": [
11+
"# Load libraries\n",
12+
"from sklearn import datasets\n",
13+
"from sklearn import metrics\n",
14+
"from sklearn.model_selection import KFold, cross_val_score\n",
15+
"from sklearn.pipeline import make_pipeline\n",
16+
"from sklearn.linear_model import LogisticRegression\n",
17+
"from sklearn.preprocessing import StandardScaler"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": 3,
23+
"metadata": {
24+
"id": "TvR-6-N55ZGJ"
25+
},
26+
"outputs": [],
27+
"source": [
28+
"# digits dataset loading\n",
29+
"digits = datasets.load_digits()\n",
30+
"# Create features matrix\n",
31+
"features = digits.data\n",
32+
"# Create target vector\n",
33+
"target = digits.target"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 7,
39+
"metadata": {
40+
"id": "zlsG8vbu5cqh"
41+
},
42+
"outputs": [],
43+
"source": [
44+
"# standardization\n",
45+
"standard_scaler = StandardScaler()\n",
46+
"# logistic regression creation\n",
47+
"logit = LogisticRegression()"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 8,
53+
"metadata": {
54+
"colab": {
55+
"base_uri": "https://localhost:8080/"
56+
},
57+
"id": "4LcbDlYC5m-E",
58+
"outputId": "dc10db6a-272d-4a37-d083-758b4a83dcfb"
59+
},
60+
"outputs": [
61+
{
62+
"data": {
63+
"text/plain": [
64+
"array([0.92682927, 0.98170732, 0.95731707, 0.95121951, 0.98159509,\n",
65+
" 0.97546012, 0.98159509, 0.98773006, 0.96319018, 0.97546012,\n",
66+
" 0.96932515])"
67+
]
68+
},
69+
"execution_count": 8,
70+
"metadata": {},
71+
"output_type": "execute_result"
72+
}
73+
],
74+
"source": [
75+
"# pipeline creation for standardization and performing logistic regression\n",
76+
"pipeline = make_pipeline(standard_scaler, logit)\n",
77+
"# perform k-Fold cross-validation\n",
78+
"kf = KFold(n_splits=11, shuffle=True, random_state=2)\n",
79+
"# k-fold cross-validation conduction\n",
80+
"cv_results = cross_val_score(pipeline, # Pipeline\n",
81+
" features, # Feature matrix\n",
82+
" target, # Target vector\n",
83+
" cv=kf, # Cross-validation technique\n",
84+
" scoring=\"accuracy\", # Loss function\n",
85+
" n_jobs=-1) # Use all CPU cores\n",
86+
"# View score for all 11 folds\n",
87+
"cv_results"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": 9,
93+
"metadata": {
94+
"colab": {
95+
"base_uri": "https://localhost:8080/"
96+
},
97+
"id": "hdX0sbfBaWsI",
98+
"outputId": "9fdc89ce-c2f7-432d-8c6a-35a65f751066"
99+
},
100+
"outputs": [
101+
{
102+
"data": {
103+
"text/plain": [
104+
"0.968311727177506"
105+
]
106+
},
107+
"execution_count": 9,
108+
"metadata": {},
109+
"output_type": "execute_result"
110+
}
111+
],
112+
"source": [
113+
"# Calculate mean\n",
114+
"cv_results.mean()"
115+
]
116+
}
117+
],
118+
"metadata": {
119+
"colab": {
120+
"name": "CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb",
121+
"provenance": []
122+
},
123+
"kernelspec": {
124+
"display_name": "Python 3",
125+
"name": "python3"
126+
},
127+
"language_info": {
128+
"name": "python"
129+
}
130+
},
131+
"nbformat": 4,
132+
"nbformat_minor": 0
133+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# [K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# -*- coding: utf-8 -*-
2+
"""CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb
3+
4+
Automatically generated by Colaboratory.
5+
6+
Original file is located at
7+
https://colab.research.google.com/drive/15FFmKBlvdAFCP4-Ka2SoFsWC93PjdxJH
8+
"""
9+
10+
# Load libraries
11+
from sklearn import datasets
12+
from sklearn import metrics
13+
from sklearn.model_selection import KFold, cross_val_score
14+
from sklearn.pipeline import make_pipeline
15+
from sklearn.linear_model import LogisticRegression
16+
from sklearn.preprocessing import StandardScaler
17+
18+
# digits dataset loading
19+
digits = datasets.load_digits()
20+
# Create features matrix
21+
features = digits.data
22+
# Create target vector
23+
target = digits.target
24+
25+
# standardization
26+
standard_scaler = StandardScaler()
27+
# logistic regression creation
28+
logit = LogisticRegression()
29+
30+
# pipeline creation for standardization and performing logistic regression
31+
pipeline = make_pipeline(standard_scaler, logit)
32+
# perform k-Fold cross-validation
33+
kf = KFold(n_splits=11, shuffle=True, random_state=2)
34+
# k-fold cross-validation conduction
35+
cv_results = cross_val_score(pipeline, # Pipeline
36+
features, # Feature matrix
37+
target, # Target vector
38+
cv=kf, # Cross-validation technique
39+
scoring="accuracy", # Loss function
40+
n_jobs=-1) # Use all CPU cores
41+
# View score for all 11 folds
42+
cv_results
43+
44+
# Calculate mean
45+
cv_results.mean()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
scikit-learn

0 commit comments

Comments
 (0)