-
Notifications
You must be signed in to change notification settings - Fork 13
/
actor_network.py
156 lines (120 loc) · 6.59 KB
/
actor_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import tensorflow as tf
import numpy as np
# how to syncronize the parameters in different device
# https://stackoverflow.com/questions/37801137/duplicate-a-tensorflow-graph
# relationship between Session and Graph one session per graph, one graph can be used in multiple sessions.
# https://www.tensorflow.org/versions/r0.12/api_docs/python/client/session_management
# create multiple instances
# https://stackoverflow.com/questions/41709207/python-create-n-number-of-class-instances
LAYER1_SIZE = 10
LAYER2_SIZE = 10
LEARNING_RATE = 1e-4
# target updating rate
TAU = 0.001
BATCH_SIZE = 64
# when adding new agents, initialize the
class ActorNetwork:
def __init__(self,sess, state_dim, action_dim, agent_name,pre_nets = None):
self.sess = sess
self.agent_name = agent_name
self.state_dim = state_dim
self.action_dim = action_dim
if pre_nets ==None:
print('create new agent')
self.state_input, self.action_output, self.nets = \
self.create_new_network(state_dim, action_dim)
else:
print('create new agent use previous weights')
self.state_input, self.action_output, self.nets = \
self.create_copy_network(state_dim, action_dim,pre_nets)
self.target_update, \
self.target_action_output= self.create_target_network(
self.action_output, self.nets)
self.create_training_method()
self.init_new_variables()
self.update_target()
def create_new_network(self,state_dim,action_dim):
layer1_size = LAYER1_SIZE
layer2_size = LAYER2_SIZE
with tf.variable_scope(self.agent_name) as scope:
state_input = tf.placeholder('float',[None,state_dim])
W1 = tf.get_variable('W1',[state_dim,layer1_size],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable('b1',[layer1_size],
initializer=tf.contrib.layers.xavier_initializer())
W2 = tf.get_variable('W2',[layer1_size,layer2_size],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable('b2',[layer2_size],
initializer=tf.contrib.layers.xavier_initializer())
W3 = tf.get_variable('W3',[layer2_size,action_dim],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable('b3',initializer=tf.random_uniform([action_dim],-3e-3,3e-3))
layer1 = tf.nn.relu(tf.matmul(state_input,W1)+b1)
layer2 = tf.nn.relu(tf.matmul(layer1,W2)+b2)
action_ouput = tf.tanh(tf.matmul(layer2,W3)+b3)
nets = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.agent_name)
return state_input, action_ouput,nets
def create_copy_network(self,state_dim, action_dim, pre_nets):
with tf.variable_scope(self.agent_name) as scope:
state_input = tf.placeholder('float',[None,state_dim])
W1 = tf.get_variable('W1',initializer=self.sess.run(pre_nets[0]))
b1 = tf.get_variable('b1',initializer=self.sess.run(pre_nets[1]))
W2 = tf.get_variable('W2',initializer=self.sess.run(pre_nets[2]))
b2 = tf.get_variable('b2',initializer=self.sess.run(pre_nets[3]))
W3 = tf.get_variable('W3',initializer=self.sess.run(pre_nets[4]))
b3 = tf.get_variable('b3',initializer=self.sess.run(pre_nets[5]))
layer1 = tf.nn.relu(tf.matmul(state_input,W1)+b1)
layer2 = tf.nn.relu(tf.matmul(layer1,W2)+b2)
action_output = tf.tanh(tf.matmul(layer2,W3)+b3)
nets = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.agent_name)
return state_input, action_output,nets
def create_target_network(self,action_output,nets):
ema = tf.train.ExponentialMovingAverage(decay=1-TAU, zero_debias=True)
target_update = ema.apply(nets)
replace_ts ={}
for tt in nets:
temp_ts = ema.average(tt)
replace_ts.update({tt.value(): temp_ts.value()})
target_action_output =tf.contrib.graph_editor.graph_replace(action_output,replace_ts)
return target_update, target_action_output
def create_training_method(self):
self.q_gradient_input = tf.placeholder('float',[None,self.action_dim])
self.parameters_gradients = tf.gradients(self.action_output,
self.nets,-self.q_gradient_input)
self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(zip(self.parameters_gradients, self.nets))
def update_target(self):
self.sess.run(self.target_update)
def train(self,q_gradient_batch,state_batch):
# q_gradients_batch = [batch_size * action_dim ]
# state_batch = [batch_size * state_dim ]
self.sess.run(self.optimizer, feed_dict={
self.q_gradient_input:q_gradient_batch,
self.state_input: state_batch
})
def action(self,state):
return self.sess.run(self.action_output,feed_dict={
self.state_input: [state]
})
def actions(self,state_batch):
return self.sess.run(self.action_output,feed_dict={
self.state_input: state_batch
})
def target_actions(self,state_batch):
return self.sess.run(self.target_action_output, feed_dict={
self.state_input: state_batch
})
def init_new_variables(self):
'''init the new add variables, instead of all the variables
it is convenient to add new agents
https://asyoulook.com/computers%20&%20internet/tensorflow-how-to-get-the-list-of-uninitialized-variables-from-tf-report-uninitialized-variables/1730337
https://stackoverflow.com/questions/35164529/in-tensorflow-is-there-any-way-to-just-initialize-uninitialised-variables
'''
list_of_variables = tf.global_variables()
# this method returns b'strings' , so decode to string for comparison
uninit_names = set(self.sess.run(tf.report_uninitialized_variables()))
# https://stackoverflow.com/questions/606191/convert-bytes-to-a-string
uninit_names = [v.decode('utf-8') for v in uninit_names]
uninit_variables = [v for v in list_of_variables if
v.name.split(':')[0] in uninit_names]
ss = tf.variables_initializer(uninit_variables)
self.sess.run(ss)