-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathtrain_keras.py
123 lines (103 loc) · 4.26 KB
/
train_keras.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from State import AI_Board
import os
import random
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.core import Dense, Activation, Flatten
from keras.optimizers import Adam
import cv2
def build_network(num_actions):
print("Initializing model ....")
model = Sequential()
model.add(Conv2D(32, (8, 8), padding='same',
strides=(4, 4), input_shape=(80, 160, 3)))
model.add(Activation('relu'))
model.add(Conv2D(64, (4, 4), padding='same', strides=(2, 2)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding='same', strides=(1, 1)))
model.add(Activation('relu'))
model.add(Conv2D(64, (4, 4), padding='same', strides=(2, 2)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding='same', strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(num_actions))
model.add(Activation('softmax'))
if os.path.exists("dqn.h5"):
print("Loading weights from dqn.h5 .....")
model.load_weights("dqn.h5")
print("Weights loaded successfully.")
adam = Adam(lr=1e-4)
model.compile(loss='mse', optimizer=adam)
print("Finished building model.")
return model
def process(input):
# resize image to 80x80 from 288x404
image = cv2.resize(input, (160, 80))
# scale down pixels values to (0,1)
image = image / 255.0
return image
def train_network():
game = AI_Board()
model = build_network(game.action_num)
num_actions = game.action_num # number of valid actions
discount = 0.99 # decay rate of past observations
observe = 200 # timesteps to observe before training
explore = 3000000 # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
replay_memory = 300 # number of previous transitions to remember
epsilon = INITIAL_EPSILON
timestep = 0
loss = 0
# initialize an instance of game
# store the previous observations in replay memory
replay = deque()
image, _, reward, alive = game.next(0)
# preprocess the image and stack to 80x80x4 pixels
input_image = process(image)
input_image = input_image.reshape(
1, input_image.shape[0], input_image.shape[1], input_image.shape[2])
while (True):
if random.random() <= epsilon:
action = random.randint(0, num_actions)
else:
q = model.predict(input_image)
action = np.argmax(q)
# decay epsilon linearly
if epsilon > FINAL_EPSILON and timestep > observe:
epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / explore
image1, _, reward, alive = game.next(action)
image1 = process(image1)
input_image1 = image1.reshape(1, image1.shape[0], image1.shape[1], image1.shape[2])
replay.append((input_image, action, reward, input_image1, alive))
if len(replay) > replay_memory:
replay.popleft()
if timestep > observe:
try:
# sample a minibatch of size 32 from replay memory
minibatch = random.sample(replay, 16)
s, a, r, s1, alive = zip(*minibatch)
s = np.concatenate(s)
s1 = np.concatenate(s1)
targets = model.predict(s)
print(s.shape, s1.shape, targets.shape)
targets[range(16), a] = r + discount * \
np.max(model.predict(s1), axis=1)*alive
loss += model.train_on_batch(s, targets)
except Exception as e:
print(e)
continue
input_image = input_image1
timestep = timestep + 1
if timestep % 400 == 0:
model.save_weights("dqn.h5", overwrite=True)
print("TIMESTEP: " + str(timestep) + ", EPSILON: " + str(epsilon) +
", ACTION: " + str(action) + ", REWARD: " + str(reward) + ", Loss: " + str(loss))
loss = 0
if __name__ == "__main__":
train_network()