Skip to content

Commit 56e9975

Browse files
committedFeb 9, 2022
initial commit
0 parents  commit 56e9975

File tree

14,434 files changed

+13093
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

14,434 files changed

+13093
-0
lines changed
 

‎.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Others
2+
###################
3+
.git/
4+
5+
# Python stuffs
6+
###################
7+
__pycache__/
8+
model/

‎ACNetComm_old.py

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import tensorflow as tf
2+
import tensorflow.contrib.layers as layers
3+
import numpy as np
4+
#parameters for training
5+
GRAD_CLIP = 1000.0
6+
KEEP_PROB1 = 1 # was 0.5
7+
KEEP_PROB2 = 1 # was 0.7
8+
RNN_SIZE = 512
9+
GOAL_REPR_SIZE = 12
10+
11+
#Used to initialize weights for policy and value output layers (Do we need to use that? Maybe not now)
12+
def normalized_columns_initializer(std=1.0):
13+
def _initializer(shape, dtype=None, partition_info=None):
14+
out = np.random.randn(*shape).astype(np.float32)
15+
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
16+
return tf.constant(out)
17+
return _initializer
18+
19+
class ACNet:
20+
def __init__(self, scope, a_size, trainer,TRAINING,GRID_SIZE,GLOBAL_NET_SCOPE):
21+
with tf.variable_scope(str(scope)+'/qvalues'):
22+
#The input size may require more work to fit the interface.
23+
self.inputs = tf.placeholder(shape=[None,4,GRID_SIZE,GRID_SIZE], dtype=tf.float32)
24+
self.goal_pos=tf.placeholder(shape=[None,3],dtype=tf.float32)
25+
self.myinput = tf.transpose(self.inputs, perm=[0,2,3,1])
26+
27+
self.message = tf.placeholder(shape=[None,RNN_SIZE],dtype=tf.float32)
28+
29+
self.policy, self.value, self.state_out, self.state_in, self.state_init, self.blocking, self.on_goal, self.valids, self.priority = self._build_net(self.myinput,self.goal_pos, self.message, RNN_SIZE, TRAINING,a_size)
30+
if TRAINING:
31+
self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
32+
self.actions_onehot = tf.one_hot(self.actions, a_size, dtype=tf.float32)
33+
self.train_valid = tf.placeholder(shape=[None,a_size], dtype=tf.float32)
34+
self.target_v = tf.placeholder(tf.float32, [None], 'Vtarget')
35+
self.advantages = tf.placeholder(shape=[None], dtype=tf.float32)
36+
self.target_blockings = tf.placeholder(tf.float32, [None])
37+
self.target_on_goals = tf.placeholder(tf.float32, [None])
38+
self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1])
39+
self.train_value = tf.placeholder(tf.float32, [None])
40+
self.optimal_actions = tf.placeholder(tf.int32,[None])
41+
self.optimal_actions_onehot = tf.one_hot(self.optimal_actions, a_size, dtype=tf.float32)
42+
self.target_priority = tf.placeholder(tf.float32, [None])
43+
44+
45+
# Loss Functions
46+
self.value_loss = tf.reduce_sum(self.train_value*tf.square(self.target_v - tf.reshape(self.value, shape=[-1])))
47+
self.entropy = - tf.reduce_sum(self.policy * tf.log(tf.clip_by_value(self.policy,1e-10,1.0)))
48+
self.policy_loss = - tf.reduce_sum(tf.log(tf.clip_by_value(self.responsible_outputs,1e-15,1.0)) * self.advantages)
49+
self.valid_loss = - tf.reduce_sum(tf.log(tf.clip_by_value(self.valids,1e-10,1.0)) *\
50+
self.train_valid+tf.log(tf.clip_by_value(1-self.valids,1e-10,1.0)) * (1-self.train_valid))
51+
self.blocking_loss = - tf.reduce_sum(self.target_blockings*tf.log(tf.clip_by_value(self.blocking,1e-10,1.0))\
52+
+(1-self.target_blockings)*tf.log(tf.clip_by_value(1-self.blocking,1e-10,1.0)))
53+
self.on_goal_loss = - tf.reduce_sum(self.target_on_goals*tf.log(tf.clip_by_value(self.on_goal,1e-10,1.0))\
54+
+(1-self.target_on_goals)*tf.log(tf.clip_by_value(1-self.on_goal,1e-10,1.0)))
55+
self.loss = 0.5 * self.value_loss + self.policy_loss + 0.5*self.valid_loss \
56+
- self.entropy * 0.01 +.5*self.blocking_loss
57+
58+
self.priority_loss = - tf.reduce_mean(self.target_priority*tf.log(tf.clip_by_value(self.priority,1e-10,1.0))\
59+
+(1-self.target_priority)*tf.log(tf.clip_by_value(1-self.priority,1e-10,1.0)))
60+
# self.imitation_loss = tf.reduce_mean(tf.contrib.keras.backend.categorical_crossentropy(self.optimal_actions_onehot,self.policy))
61+
self.behavior_cloning_loss = tf.reduce_mean(tf.contrib.keras.backend.categorical_crossentropy(self.optimal_actions_onehot,self.policy))
62+
self.imitation_loss = self.behavior_cloning_loss + 0.5*self.priority_loss
63+
64+
# Get gradients from local network using local losses and
65+
# normalize the gradients using clipping
66+
local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope+'/qvalues')
67+
self.gradients = tf.gradients(self.loss, local_vars)
68+
self.var_norms = tf.global_norm(local_vars)
69+
grads, self.grad_norms = tf.clip_by_global_norm(self.gradients, GRAD_CLIP)
70+
71+
# Apply local gradients to global network
72+
global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GLOBAL_NET_SCOPE+'/qvalues')
73+
self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))
74+
75+
#now the gradients for imitation loss
76+
self.i_gradients = tf.gradients(self.imitation_loss, local_vars)
77+
self.i_var_norms = tf.global_norm(local_vars)
78+
i_grads, self.i_grad_norms = tf.clip_by_global_norm(self.i_gradients, GRAD_CLIP)
79+
80+
# Apply local gradients to global network
81+
self.apply_imitation_grads = trainer.apply_gradients(zip(i_grads, global_vars))
82+
print("Hello World... From "+str(scope)) # :)
83+
84+
def _build_net(self,inputs,goal_pos,message,RNN_SIZE,TRAINING,a_size):
85+
w_init = layers.variance_scaling_initializer()
86+
87+
conv1 = layers.conv2d(inputs=inputs, padding="SAME", num_outputs=RNN_SIZE//4, kernel_size=[3, 3], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=tf.nn.relu)
88+
conv1a = layers.conv2d(inputs=conv1, padding="SAME", num_outputs=RNN_SIZE//4, kernel_size=[3, 3], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=tf.nn.relu)
89+
conv1b = layers.conv2d(inputs=conv1a, padding="SAME", num_outputs=RNN_SIZE//4, kernel_size=[3, 3], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=tf.nn.relu)
90+
pool1 = layers.max_pool2d(inputs=conv1b,kernel_size=[2,2])
91+
conv2 = layers.conv2d(inputs=pool1, padding="SAME", num_outputs=RNN_SIZE//2, kernel_size=[3, 3], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=tf.nn.relu)
92+
conv2a = layers.conv2d(inputs=conv2, padding="SAME", num_outputs=RNN_SIZE//2, kernel_size=[3, 3], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=tf.nn.relu)
93+
conv2b = layers.conv2d(inputs=conv2a, padding="SAME", num_outputs=RNN_SIZE//2, kernel_size=[3, 3], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=tf.nn.relu)
94+
pool2 = layers.max_pool2d(inputs=conv2b,kernel_size=[2,2])
95+
conv3 = layers.conv2d(inputs=pool2, padding="VALID", num_outputs=RNN_SIZE-GOAL_REPR_SIZE, kernel_size=[2, 2], stride=1, data_format="NHWC", weights_initializer=w_init,activation_fn=None)
96+
97+
flat = tf.nn.relu(layers.flatten(conv3))
98+
goal_layer= layers.fully_connected(inputs=goal_pos, num_outputs=GOAL_REPR_SIZE)
99+
hidden_input=tf.concat([flat,goal_layer],1)
100+
h1 = layers.fully_connected(inputs=hidden_input, num_outputs=RNN_SIZE)
101+
d1 = layers.dropout(h1, keep_prob=KEEP_PROB1, is_training=TRAINING)
102+
h2 = layers.fully_connected(inputs=d1, num_outputs=RNN_SIZE, activation_fn=None)
103+
d2 = layers.dropout(h2, keep_prob=KEEP_PROB2, is_training=TRAINING)
104+
self.h3 = tf.nn.relu(d2+hidden_input)
105+
#Recurrent network for temporal dependencies
106+
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(RNN_SIZE,state_is_tuple=True)
107+
c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
108+
h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
109+
state_init = [c_init, h_init]
110+
c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
111+
h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
112+
state_in = (c_in, h_in)
113+
rnn_in = tf.expand_dims(self.h3, [0])
114+
step_size = tf.shape(inputs)[:1]
115+
state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
116+
lstm_outputs, lstm_state = tf.nn.dynamic_rnn(
117+
lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size,
118+
time_major=False)
119+
lstm_c, lstm_h = lstm_state
120+
state_out = (lstm_c[:1, :], lstm_h[:1, :])
121+
self.rnn_out = tf.reshape(lstm_outputs, [-1, RNN_SIZE])
122+
123+
message_sig = tf.sigmoid(message)
124+
comm_layer = layers.fully_connected(inputs=message_sig, num_outputs=RNN_SIZE,weights_initializer=normalized_columns_initializer(1./float(RNN_SIZE)), biases_initializer=None, activation_fn=None)
125+
# comm_sig = tf.sigmoid(comm_layer)
126+
# import pdb;pdb.set_trace()
127+
comm_rnn_concat=tf.concat([comm_layer,self.rnn_out],1)
128+
# comm_rnn_concat=tf.nn.relu(comm_layer+self.rnn_out)
129+
130+
policy_layer = layers.fully_connected(inputs=comm_rnn_concat, num_outputs=a_size,weights_initializer=normalized_columns_initializer(1./float(a_size)), biases_initializer=None, activation_fn=None)
131+
# dp = layers.dropout(policy_layer, keep_prob=KEEP_PROB1, is_training=TRAINING)
132+
policy = tf.nn.softmax(policy_layer)
133+
policy_sig = tf.sigmoid(policy_layer)
134+
value = layers.fully_connected(inputs=self.rnn_out, num_outputs=1, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None, activation_fn=None)
135+
blocking = layers.fully_connected(inputs=self.rnn_out, num_outputs=1, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None, activation_fn=tf.sigmoid)
136+
on_goal = layers.fully_connected(inputs=self.rnn_out, num_outputs=1, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None, activation_fn=tf.sigmoid)
137+
priority = layers.fully_connected(inputs=self.rnn_out, num_outputs=1, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None, activation_fn=tf.sigmoid)
138+
139+
return policy, value, state_out ,state_in, state_init, blocking, on_goal, policy_sig, priority

0 commit comments

Comments
 (0)
Please sign in to comment.