Skip to content

Commit d506b2a

Browse files
committed
Fixing data imputation
1 parent 0650fa9 commit d506b2a

File tree

2 files changed

+30
-32
lines changed

2 files changed

+30
-32
lines changed

01_exploratory_analysis_pre_cleaning.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@
943943
"cell_type": "markdown",
944944
"metadata": {},
945945
"source": [
946-
"- Most of the clients who did not meet the bank guidelines (Credit_History) had their loan applications rejected.\n",
946+
"- Most of the clients who did not meet the bank guidelines for Credit_History had their loan applications rejected.\n",
947947
"- Most of the applicants income are under 5700.\n",
948948
"- Most of the co-applicants income are below 2200 and almost half of them have 0 income.\n",
949949
"- Most of the loan terms are 360 months.\n",

02_pre_processing.ipynb

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,14 @@
1313
"source": [
1414
"Based on the conclusion of the Exploratory Data Analysis, we fill in some missing data assuming the following hypothesis:\n",
1515
"\n",
16-
"- If a client has no information about the Credit_History and had their application rejected, they do not meet the bank guidelines (Credit_History = 0).\n",
17-
"- Fill in the Self_employed with 'Employed', which is the most common case.\n",
16+
"- Fill in the Credit_History with 1. \n",
1817
"- Fill in the LoanAmount_Terms with 360 for clients who have this value missing.\n",
1918
"- Fill in the CoapplicantIncome with 0."
2019
]
2120
},
2221
{
2322
"cell_type": "code",
24-
"execution_count": 127,
23+
"execution_count": 2,
2524
"metadata": {},
2625
"outputs": [],
2726
"source": [
@@ -41,7 +40,7 @@
4140
},
4241
{
4342
"cell_type": "code",
44-
"execution_count": 128,
43+
"execution_count": 3,
4544
"metadata": {},
4645
"outputs": [
4746
{
@@ -50,7 +49,7 @@
5049
"(614, 14)"
5150
]
5251
},
53-
"execution_count": 128,
52+
"execution_count": 3,
5453
"metadata": {},
5554
"output_type": "execute_result"
5655
}
@@ -70,15 +69,14 @@
7069
},
7170
{
7271
"cell_type": "code",
73-
"execution_count": 129,
72+
"execution_count": 4,
7473
"metadata": {},
7574
"outputs": [],
7675
"source": [
7776
"df_fill = df_import.copy()\n",
78-
"# df_fill.loc[df_fill['Credit_History'].isnull(),'Credit_History'] = 1\n",
79-
"df_fill.loc[(df_fill['Loan_Status'] == 'N') & (df_fill['Credit_History'].isnull()),'Credit_History'] = 0\n",
77+
"df_fill.loc[df_fill['Credit_History'].isnull(),'Credit_History'] = 1\n",
8078
"df_fill.loc[df_fill['Loan_Amount_Term'].isnull(),'Loan_Amount_Term'] = 360\n",
81-
"df_fill.loc[df_fill['Self_Employed'].isnull(),'Self_Employed'] = 'No'\n",
79+
"# df_fill.loc[df_fill['Self_Employed'].isnull(),'Self_Employed'] = 'No'\n",
8280
"df_fill.loc[df_fill['CoapplicantIncome'].isnull(),'CoapplicantIncome'] = 0"
8381
]
8482
},
@@ -91,7 +89,7 @@
9189
},
9290
{
9391
"cell_type": "code",
94-
"execution_count": 130,
92+
"execution_count": 5,
9593
"metadata": {},
9694
"outputs": [],
9795
"source": [
@@ -113,30 +111,30 @@
113111
},
114112
{
115113
"cell_type": "code",
116-
"execution_count": 131,
114+
"execution_count": 7,
117115
"metadata": {},
118116
"outputs": [
119117
{
120118
"data": {
121119
"text/plain": [
122-
"Gender 530\n",
123-
"Married 530\n",
124-
"Dependents 530\n",
125-
"Education 530\n",
126-
"Self_Employed 530\n",
127-
"ApplicantIncome 530\n",
128-
"CoapplicantIncome 530\n",
129-
"LoanAmount 530\n",
130-
"Loan_Amount_Term 530\n",
131-
"Credit_History 530\n",
132-
"Property_Area 530\n",
133-
"Loan_Status 530\n",
134-
"Base_Loan_Installment 530\n",
135-
"Remaining_Income 530\n",
120+
"Gender 535\n",
121+
"Married 535\n",
122+
"Dependents 535\n",
123+
"Education 535\n",
124+
"Self_Employed 535\n",
125+
"ApplicantIncome 535\n",
126+
"CoapplicantIncome 535\n",
127+
"LoanAmount 535\n",
128+
"Loan_Amount_Term 535\n",
129+
"Credit_History 535\n",
130+
"Property_Area 535\n",
131+
"Loan_Status 535\n",
132+
"Base_Loan_Installment 535\n",
133+
"Remaining_Income 535\n",
136134
"dtype: int64"
137135
]
138136
},
139-
"execution_count": 131,
137+
"execution_count": 7,
140138
"metadata": {},
141139
"output_type": "execute_result"
142140
}
@@ -155,7 +153,7 @@
155153
},
156154
{
157155
"cell_type": "code",
158-
"execution_count": 132,
156+
"execution_count": 8,
159157
"metadata": {},
160158
"outputs": [],
161159
"source": [
@@ -172,7 +170,7 @@
172170
},
173171
{
174172
"cell_type": "code",
175-
"execution_count": 133,
173+
"execution_count": 9,
176174
"metadata": {},
177175
"outputs": [
178176
{
@@ -204,7 +202,7 @@
204202
},
205203
{
206204
"cell_type": "code",
207-
"execution_count": 134,
205+
"execution_count": 10,
208206
"metadata": {},
209207
"outputs": [],
210208
"source": [
@@ -213,7 +211,7 @@
213211
},
214212
{
215213
"cell_type": "code",
216-
"execution_count": 135,
214+
"execution_count": 11,
217215
"metadata": {},
218216
"outputs": [
219217
{
@@ -466,7 +464,7 @@
466464
"9 2.0 1.0 194.444444 0.950142 "
467465
]
468466
},
469-
"execution_count": 135,
467+
"execution_count": 11,
470468
"metadata": {},
471469
"output_type": "execute_result"
472470
}

0 commit comments

Comments
 (0)