x4nth055 · Apr 18, 2022
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎machine-learning/logistic-regression-in-pytorch/LogisticRegressionPyTorch_PythonCodeTutorial.ipynb
+409 b/‎machine-learning/logistic-regression-in-pytorch/LogisticRegressionPyTorch_PythonCodeTutorial.ipynb
+409
diff --git a/‎machine-learning/logistic-regression-in-pytorch/README.md
+1 b/‎machine-learning/logistic-regression-in-pytorch/README.md
+1
diff --git a/‎machine-learning/logistic-regression-in-pytorch/logistic_regression_pytorch.py
+158 b/‎machine-learning/logistic-regression-in-pytorch/logistic_regression_pytorch.py
+158
diff --git a/‎machine-learning/logistic-regression-in-pytorch/requirements.txt
+5 b/‎machine-learning/logistic-regression-in-pytorch/requirements.txt
+5
@@ -87,6 +87,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [Customer Churn Prediction in Python](https://www.thepythoncode.com/article/customer-churn-detection-using-sklearn-in-python). ([code](machine-learning/customer-churn-detection))
     - [Recommender Systems using Association Rules Mining in Python](https://www.thepythoncode.com/article/build-a-recommender-system-with-association-rule-mining-in-python). ([code](machine-learning/recommender-system-using-association-rules))
     - [Handling Imbalanced Datasets: A Case Study with Customer Churn](https://www.thepythoncode.com/article/handling-imbalanced-datasets-sklearn-in-python). ([code](machine-learning/handling-inbalance-churn-data))
+    - [Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch). ([code](machine-learning/logistic-regression-in-pytorch))
 
 - ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
     - [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
 
@@ -0,0 +1,409 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YUssqHFr0VM8",
+        "outputId": "47dcb9c2-b276-43f3-9263-ef9c7fcdf45a"
+      },
+      "outputs": [],
+      "source": [
+        "!gdown --id 12vfq3DYFId3bsXuNj_PhsACMzrLTfObs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CLw5KFxzz-vw"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "from sklearn.utils import resample\n",
+        "from sklearn import preprocessing\n",
+        "from sklearn.preprocessing import StandardScaler\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from warnings import filterwarnings\n",
+        "filterwarnings('ignore')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 379
+        },
+        "id": "llEWd-dM0ZQg",
+        "outputId": "c8c3348b-d68c-4f7d-c587-5279e4b867b7"
+      },
+      "outputs": [],
+      "source": [
+        "#reading data\n",
+        "data = pd.read_csv(\"data_regression.csv\")\n",
+        "##The dimension of the data is seen, and the output column is checked to see whether it is continuous or discrete. \n",
+        "##In this case, the output is discrete, so a classification algorithm should be applied.\n",
+        "data = data.drop([\"year\", \"customer_id\", \"phone_no\"], axis=1)\n",
+        "print(data.shape)         # Lookiing the shape of the data\n",
+        "print(data.columns)       # Looking how many columns data has\n",
+        "data.dtypes  \n",
+        "data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wHAGp4M10cUr",
+        "outputId": "20095ecf-22fd-4a0a-d1ee-e125d303154b"
+      },
+      "outputs": [],
+      "source": [
+        "data.isnull().sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 270
+        },
+        "id": "qt8ctl6m0gm-",
+        "outputId": "eb870fb2-a28d-4e0a-f6f7-55771f2bdaa7"
+      },
+      "outputs": [],
+      "source": [
+        "final_data = data.dropna()         # Dropping the null values\n",
+        "final_data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yL_bQ-mH0hzn",
+        "outputId": "1537150a-c09c-4abb-b46c-082ee6bff1ce"
+      },
+      "outputs": [],
+      "source": [
+        "final_data[\"churn\"].value_counts()       \n",
+        "# let us see how many data is there in each class for deciding the sampling data number"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TqetEXX50i7Z",
+        "outputId": "632fbf54-5189-4d61-8d8f-fb1a6735aeb0"
+      },
+      "outputs": [],
+      "source": [
+        "data_majority = final_data[final_data['churn']==0] # class 0\n",
+        "data_minority = final_data[final_data['churn']==1] # class 1\n",
+        "# upsampling minority class\n",
+        "data_minority_upsampled = resample(data_minority, replace=True, n_samples=900, random_state=123) \n",
+        "# downsampling majority class\n",
+        "data_majority_downsampled = resample(data_majority, replace=False, n_samples=900, random_state=123)\n",
+        "# concanating both upsampled and downsampled class\n",
+        "## Data Concatenation:  Concatenating the dataframe after upsampling and downsampling \n",
+        "# concanating both upsampled and downsampled class\n",
+        "data2 = pd.concat([data_majority_downsampled, data_minority_upsampled])\n",
+        "## Encoding Catagoricals:  We need to encode the categorical variables before feeding it to the model\n",
+        "data2[['gender', 'multi_screen', 'mail_subscribed']]\n",
+        "# label encoding categorical variables\n",
+        "label_encoder = preprocessing.LabelEncoder()\n",
+        "data2['gender'] = label_encoder.fit_transform(data2['gender'])\n",
+        "data2['multi_screen'] = label_encoder.fit_transform(data2['multi_screen'])\n",
+        "data2['mail_subscribed'] = label_encoder.fit_transform(data2['mail_subscribed'])\n",
+        "## Lets now check again the distribution of the oputut class after sampling\n",
+        "data2[\"churn\"].value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GUpLqdEw0tXb",
+        "outputId": "041cd5a3-f9ec-447a-c0d9-f09e1e8bd4e0"
+      },
+      "outputs": [],
+      "source": [
+        "# indenpendent variable \n",
+        "X = data2.iloc[:,:-1]\n",
+        "## This X will be fed to the model to learn params \n",
+        "#scaling the data\n",
+        "sc = StandardScaler()         # Bringing the mean to 0 and variance to 1, so as to have a non-noisy optimization\n",
+        "X = sc.fit_transform(X)\n",
+        "X = sc.transform(X)\n",
+        "## Keeping the output column in a separate dataframe\n",
+        "data2 = data2.sample(frac=1).reset_index(drop=True) ## Shuffle the data frame and reset index\n",
+        "n_samples, n_features = X.shape ## n_samples is the number of samples and n_features is the number of features\n",
+        "#output column\n",
+        "Y = data2[\"churn\"]\n",
+        "#output column\n",
+        "Y = data2[\"churn\"]\n",
+        "##Data Splitting: \n",
+        "## The data is processed, so now we can split the data into train and test to train the model with training data and test it later from testing data.\n",
+        "#splitting data into train and test\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=42, stratify = Y)\n",
+        "print((y_train == 1).sum())\n",
+        "print((y_train == 0).sum())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "VzYgdjlU0tof",
+        "outputId": "5e188583-3dcd-4ddc-8b1b-dee6bb7b524a"
+      },
+      "outputs": [],
+      "source": [
+        "print(type(X_train))\n",
+        "print(type(X_test))\n",
+        "print(type(y_train.values))\n",
+        "print(type(y_test.values))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y0raeQhA0u4z"
+      },
+      "outputs": [],
+      "source": [
+        "X_train = torch.from_numpy(X_train.astype(np.float32))\n",
+        "X_test = torch.from_numpy(X_test.astype(np.float32))\n",
+        "y_train = torch.from_numpy(y_train.values.astype(np.float32))\n",
+        "y_test = torch.from_numpy(y_test.values.astype(np.float32))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TBb_XrZt00_p",
+        "outputId": "4c44564c-a8c1-4071-9527-afebd5fa310f"
+      },
+      "outputs": [],
+      "source": [
+        "y_train.shape, y_test.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6-9CiBo40vxM"
+      },
+      "outputs": [],
+      "source": [
+        "y_train = y_train.view(y_train.shape[0], 1)\n",
+        "y_test = y_test.view(y_test.shape[0], 1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "07XkWAA20w0U",
+        "outputId": "63cb82b1-6650-442d-c911-cec1a0778e20"
+      },
+      "outputs": [],
+      "source": [
+        "y_train.shape, y_test.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fQxkh0FK02i5"
+      },
+      "outputs": [],
+      "source": [
+        "# logistic regression class\n",
+        "class LogisticRegression(nn.Module):\n",
+        "    def __init__(self, n_input_features):\n",
+        "        super(LogisticRegression, self).__init__()\n",
+        "        self.linear = nn.Linear(n_input_features, 1)\n",
+        "    \n",
+        "    #sigmoid transformation of the input \n",
+        "    def forward(self, x):\n",
+        "        y_pred = torch.sigmoid(self.linear(x))\n",
+        "        return y_pred"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "C8GvUnxQ05Ik"
+      },
+      "outputs": [],
+      "source": [
+        "lr = LogisticRegression(n_features)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RdYvQs1a06JP"
+      },
+      "outputs": [],
+      "source": [
+        "num_epochs = 500\n",
+        "# Traning the model for large number of epochs to see better results  \n",
+        "learning_rate = 0.0001\n",
+        "criterion = nn.BCELoss()                                \n",
+        "# We are working on lgistic regression so using Binary Cross Entropy\n",
+        "optimizer = torch.optim.SGD(lr.parameters(), lr=learning_rate)      "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qT5pK7jr0_Ez",
+        "outputId": "abf0e908-173d-447f-f8bf-0ce55c9907e3"
+      },
+      "outputs": [],
+      "source": [
+        "for epoch in range(num_epochs):\n",
+        "    y_pred = lr(X_train)\n",
+        "    loss = criterion(y_pred, y_train)             \n",
+        "    loss.backward()\n",
+        "    optimizer.step()\n",
+        "    optimizer.zero_grad()\n",
+        "    if (epoch+1) % 20 == 0:                                         \n",
+        "        # printing loss values on every 10 epochs to keep track\n",
+        "        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "KYDPNSBm1C_T",
+        "outputId": "77c4070e-20de-4cb0-94b1-33962a36cce8"
+      },
+      "outputs": [],
+      "source": [
+        "with torch.no_grad():\n",
+        "    y_predicted = lr(X_test)\n",
+        "    y_predicted_cls = y_predicted.round()\n",
+        "    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])\n",
+        "    print(f'accuracy: {acc.item():.4f}')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0miFH7DO1oOq",
+        "outputId": "38352b86-1590-490a-9f45-377ed543a3bc"
+      },
+      "outputs": [],
+      "source": [
+        "#classification report\n",
+        "from sklearn.metrics import classification_report\n",
+        "print(classification_report(y_test, y_predicted_cls))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "BXKCNp_q2zhp",
+        "outputId": "2fe7e571-64a6-4dc5-9be7-20d365a96a05"
+      },
+      "outputs": [],
+      "source": [
+        "#confusion matrix\n",
+        "from sklearn.metrics import confusion_matrix\n",
+        "confusion_matrix = confusion_matrix(y_test, y_predicted_cls)\n",
+        "print(confusion_matrix)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "x6l2_Yxr21kT"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "LogisticRegressionPyTorch_PythonCodeTutorial.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1 @@
+# [Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch)
@@ -0,0 +1,158 @@
+# %%
+!gdown --id 12vfq3DYFId3bsXuNj_PhsACMzrLTfObs
+
+# %%
+import pandas as pd
+import numpy as np
+import torch
+import torch.nn as nn
+from sklearn.utils import resample
+from sklearn import preprocessing
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from warnings import filterwarnings
+filterwarnings('ignore')
+
+# %%
+#reading data
+data = pd.read_csv("data_regression.csv")
+##The dimension of the data is seen, and the output column is checked to see whether it is continuous or discrete. 
+##In this case, the output is discrete, so a classification algorithm should be applied.
+data = data.drop(["year", "customer_id", "phone_no"], axis=1)
+print(data.shape)         # Lookiing the shape of the data
+print(data.columns)       # Looking how many columns data has
+data.dtypes  
+data.head()
+
+# %%
+data.isnull().sum()
+
+# %%
+final_data = data.dropna()         # Dropping the null values
+final_data.head()
+
+# %%
+final_data["churn"].value_counts()       
+# let us see how many data is there in each class for deciding the sampling data number
+
+# %%
+data_majority = final_data[final_data['churn']==0] # class 0
+data_minority = final_data[final_data['churn']==1] # class 1
+# upsampling minority class
+data_minority_upsampled = resample(data_minority, replace=True, n_samples=900, random_state=123) 
+# downsampling majority class
+data_majority_downsampled = resample(data_majority, replace=False, n_samples=900, random_state=123)
+# concanating both upsampled and downsampled class
+## Data Concatenation:  Concatenating the dataframe after upsampling and downsampling 
+# concanating both upsampled and downsampled class
+data2 = pd.concat([data_majority_downsampled, data_minority_upsampled])
+## Encoding Catagoricals:  We need to encode the categorical variables before feeding it to the model
+data2[['gender', 'multi_screen', 'mail_subscribed']]
+# label encoding categorical variables
+label_encoder = preprocessing.LabelEncoder()
+data2['gender'] = label_encoder.fit_transform(data2['gender'])
+data2['multi_screen'] = label_encoder.fit_transform(data2['multi_screen'])
+data2['mail_subscribed'] = label_encoder.fit_transform(data2['mail_subscribed'])
+## Lets now check again the distribution of the oputut class after sampling
+data2["churn"].value_counts()
+
+# %%
+# indenpendent variable 
+X = data2.iloc[:,:-1]
+## This X will be fed to the model to learn params 
+#scaling the data
+sc = StandardScaler()         # Bringing the mean to 0 and variance to 1, so as to have a non-noisy optimization
+X = sc.fit_transform(X)
+X = sc.transform(X)
+## Keeping the output column in a separate dataframe
+data2 = data2.sample(frac=1).reset_index(drop=True) ## Shuffle the data frame and reset index
+n_samples, n_features = X.shape ## n_samples is the number of samples and n_features is the number of features
+#output column
+Y = data2["churn"]
+#output column
+Y = data2["churn"]
+##Data Splitting: 
+## The data is processed, so now we can split the data into train and test to train the model with training data and test it later from testing data.
+#splitting data into train and test
+X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=42, stratify = Y)
+print((y_train == 1).sum())
+print((y_train == 0).sum())
+
+# %%
+print(type(X_train))
+print(type(X_test))
+print(type(y_train.values))
+print(type(y_test.values))
+
+# %%
+X_train = torch.from_numpy(X_train.astype(np.float32))
+X_test = torch.from_numpy(X_test.astype(np.float32))
+y_train = torch.from_numpy(y_train.values.astype(np.float32))
+y_test = torch.from_numpy(y_test.values.astype(np.float32))
+
+# %%
+y_train.shape, y_test.shape
+
+# %%
+y_train = y_train.view(y_train.shape[0], 1)
+y_test = y_test.view(y_test.shape[0], 1)
+
+# %%
+y_train.shape, y_test.shape
+
+# %%
+# logistic regression class
+class LogisticRegression(nn.Module):
+    def __init__(self, n_input_features):
+        super(LogisticRegression, self).__init__()
+        self.linear = nn.Linear(n_input_features, 1)
+    
+    #sigmoid transformation of the input 
+    def forward(self, x):
+        y_pred = torch.sigmoid(self.linear(x))
+        return y_pred
+
+# %%
+lr = LogisticRegression(n_features)
+
+# %%
+num_epochs = 500
+# Traning the model for large number of epochs to see better results  
+learning_rate = 0.0001
+criterion = nn.BCELoss()                                
+# We are working on lgistic regression so using Binary Cross Entropy
+optimizer = torch.optim.SGD(lr.parameters(), lr=learning_rate)      
+
+# %%
+for epoch in range(num_epochs):
+    y_pred = lr(X_train)
+    loss = criterion(y_pred, y_train)             
+    loss.backward()
+    optimizer.step()
+    optimizer.zero_grad()
+    if (epoch+1) % 20 == 0:                                         
+        # printing loss values on every 10 epochs to keep track
+        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')
+
+# %%
+with torch.no_grad():
+    y_predicted = lr(X_test)
+    y_predicted_cls = y_predicted.round()
+    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
+    print(f'accuracy: {acc.item():.4f}')
+
+# %%
+#classification report
+from sklearn.metrics import classification_report
+print(classification_report(y_test, y_predicted_cls))
+
+# %%
+#confusion matrix
+from sklearn.metrics import confusion_matrix
+confusion_matrix = confusion_matrix(y_test, y_predicted_cls)
+print(confusion_matrix)
+
+# %%
+
+
+
@@ -0,0 +1,5 @@
+matplotlib
+numpy
+pandas
+scikit_learn==1.0.2
+torch==1.10.1
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# [Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch)`