-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
121 lines (93 loc) · 3.15 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import time
from colors import colors
def estimate_price(mileage, m_now, b_now):
return (b_now + (m_now * mileage))
# def loss_function(m, b, points):
# total_error = 0
# for i in range(len(points)):
# x = points.iloc[i].km
# y = points.iloc[i].price
# total_error += ((y - (m * x + b)) ** 2)
# return (total_error / float(len(points)))
def gradient_descent(m_now, b_now, data, L):
m_gradient = 0
b_gradient = 0
n = len(data)
for i in range(n):
x = data.iloc[i].km
y = data.iloc[i].price
m_gradient = (estimate_price(x, m_now, b_now) - y) * x
b_gradient = estimate_price(x, m_now, b_now) - y
m_now -= L * (1 / n) * m_gradient
b_now -= L * (1 / n) * b_gradient
return (m_now, b_now)
def main():
try:
data = pd.read_csv('data.csv')
except:
print(f"{colors().RED}Error: could not read file{colors().END}")
exit()
m = 0
b = 0
L = 0.1
epochs = 1000
########### normalize data start ###########
print("\nNormalizing data from data.csv...")
time.sleep(1)
max_km = data['km'].max()
max_price = data['price'].max()
min_km = data['km'].min()
min_price = data['price'].min()
data['km'] /= max_km
data['price'] /= max_price
########### normalize data end ###########
########### prepare animation/graph start ###########
fig, ax = plt.subplots()
x_max = max_km
ax.set_xlim(min_km, max_km)
ax.set_ylim(min_price, max_price)
ax.set_xlim(0, x_max)
ax.set_ylim(0, 10000)
ax.set_xlabel('mileage')
ax.set_ylabel('price')
ax.grid()
line, = ax.plot([], [], 'red')
animation_store = []
########### prepare animation/graph end ###########
########### training the model start ###########
print("\nTraining the model...")
for i in range(1, epochs + 1):
if (i % 50) == 0:
animation_store.append([m, b])
m, b = gradient_descent(m, b, data, L)
# if (i % 100) == 0:
# print(f"Loss function after {i} of {epochs} epochs returns: ", loss_function(m, b, data))
# print(f"{colors().GREEN}Final loss function result after {i} epochs is: ", loss_function(m, b, data), colors().END)
animation_store.append([m, b])
########### training the model end ###########
########### export m & b as thetas start ###########
file = open('theta.py', 'w')
theta_content = "theta0 = " + str((b * max_price)) + "\r\n" + "theta1 = " + str((m * max_price / max_km)) + "\r\n"
file.write(theta_content)
file.close()
########### export m & b as thetas end ###########
############ denormalize data start ###########
print("\nDenormalizing data...")
data['km'] *= max_km
data['price'] *= max_price
b *= max_price
m *= (max_price / max_km)
############ denormalize data end ###########
######### animation helper start#########
def animate(n):
line.set_data(list(range(0, max_km)), [(animation_store[n][0] * (max_price / max_km) ) * x + (animation_store[n][1] * max_price ) for x in range(0, max_km)])
######### animation helper end#########
plt.scatter(data.km, data.price, color="black")
anim = animation.FuncAnimation(fig, animate, frames=len(animation_store), interval=20, repeat=False)
plt.show()
if __name__ == '__main__':
main()