add kfold cross validation tutorial

x4nth055 · x4nth055 · commit 06f57a24ce80 · 2022-05-13T17:11:28.000+01:00
diff --git a/README.md b/README.md
@@ -90,6 +90,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [Handling Imbalanced Datasets: A Case Study with Customer Churn](https://www.thepythoncode.com/article/handling-imbalanced-datasets-sklearn-in-python). ([code](machine-learning/handling-inbalance-churn-data))
     - [Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch). ([code](machine-learning/logistic-regression-in-pytorch))
     - [Dropout Regularization using PyTorch in Python](https://www.thepythoncode.com/article/dropout-regularization-in-pytorch). ([code](machine-learning/dropout-in-pytorch))
+    - [K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python). ([code](machine-learning/k-fold-cross-validation-sklearn))
 
 - ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
     - [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
diff --git a/machine-learning/k-fold-cross-validation-sklearn/CrossValidation_ScikitLearn_PythonCodeTutorial.ipynb b/machine-learning/k-fold-cross-validation-sklearn/CrossValidation_ScikitLearn_PythonCodeTutorial.ipynb
@@ -0,0 +1,133 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "cLkNm1Ywb3Eh"
+      },
+      "outputs": [],
+      "source": [
+        "# Load libraries\n",
+        "from sklearn import datasets\n",
+        "from sklearn import metrics\n",
+        "from sklearn.model_selection import KFold, cross_val_score\n",
+        "from sklearn.pipeline import make_pipeline\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "from sklearn.preprocessing import StandardScaler"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "TvR-6-N55ZGJ"
+      },
+      "outputs": [],
+      "source": [
+        "# digits dataset loading\n",
+        "digits = datasets.load_digits()\n",
+        "# Create features matrix\n",
+        "features = digits.data\n",
+        "# Create target vector\n",
+        "target = digits.target"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "zlsG8vbu5cqh"
+      },
+      "outputs": [],
+      "source": [
+        "# standardization\n",
+        "standard_scaler = StandardScaler()\n",
+        "# logistic regression creation\n",
+        "logit = LogisticRegression()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4LcbDlYC5m-E",
+        "outputId": "dc10db6a-272d-4a37-d083-758b4a83dcfb"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "array([0.92682927, 0.98170732, 0.95731707, 0.95121951, 0.98159509,\n",
+              "       0.97546012, 0.98159509, 0.98773006, 0.96319018, 0.97546012,\n",
+              "       0.96932515])"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# pipeline creation for standardization and performing logistic regression\n",
+        "pipeline = make_pipeline(standard_scaler, logit)\n",
+        "# perform k-Fold cross-validation\n",
+        "kf = KFold(n_splits=11, shuffle=True, random_state=2)\n",
+        "# k-fold cross-validation conduction\n",
+        "cv_results = cross_val_score(pipeline, # Pipeline\n",
+        "                             features, # Feature matrix\n",
+        "                             target, # Target vector\n",
+        "                             cv=kf, # Cross-validation technique\n",
+        "                             scoring=\"accuracy\", # Loss function\n",
+        "                             n_jobs=-1) # Use all CPU cores\n",
+        "# View score for all 11 folds\n",
+        "cv_results"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hdX0sbfBaWsI",
+        "outputId": "9fdc89ce-c2f7-432d-8c6a-35a65f751066"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "0.968311727177506"
+            ]
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Calculate mean\n",
+        "cv_results.mean()"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/machine-learning/k-fold-cross-validation-sklearn/README.md b/machine-learning/k-fold-cross-validation-sklearn/README.md
@@ -0,0 +1 @@
+# [K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python)
diff --git a/machine-learning/k-fold-cross-validation-sklearn/crossvalidation_scikitlearn_pythoncodetutorial.py b/machine-learning/k-fold-cross-validation-sklearn/crossvalidation_scikitlearn_pythoncodetutorial.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/15FFmKBlvdAFCP4-Ka2SoFsWC93PjdxJH
+"""
+
+# Load libraries
+from sklearn import datasets
+from sklearn import metrics
+from sklearn.model_selection import KFold, cross_val_score
+from sklearn.pipeline import make_pipeline
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+
+# digits dataset loading
+digits = datasets.load_digits()
+# Create features matrix
+features = digits.data
+# Create target vector
+target = digits.target
+
+# standardization
+standard_scaler = StandardScaler()
+# logistic regression creation
+logit = LogisticRegression()
+
+# pipeline creation for standardization and performing logistic regression
+pipeline = make_pipeline(standard_scaler, logit)
+# perform k-Fold cross-validation
+kf = KFold(n_splits=11, shuffle=True, random_state=2)
+# k-fold cross-validation conduction
+cv_results = cross_val_score(pipeline, # Pipeline
+                             features, # Feature matrix
+                             target, # Target vector
+                             cv=kf, # Cross-validation technique
+                             scoring="accuracy", # Loss function
+                             n_jobs=-1) # Use all CPU cores
+# View score for all 11 folds
+cv_results
+
+# Calculate mean
+cv_results.mean()
diff --git a/machine-learning/k-fold-cross-validation-sklearn/requirements.txt b/machine-learning/k-fold-cross-validation-sklearn/requirements.txt
@@ -0,0 +1 @@
+scikit-learn

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# [K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python)`