Skip to content

Commit 946d3a6

Browse files
committed
first commit
0 parents  commit 946d3a6

File tree

102 files changed

+1170
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1170
-0
lines changed

.idea/A3C_breakoutv0.iml

+11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/workspace.xml

+574
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

A3C_Network.py

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
import tensorflow.contrib.layers as nn
4+
import tensorflow.contrib.slim as slim
5+
6+
7+
SMALL_VALUE = 1e-20
8+
9+
class A3C_Network(object):
10+
11+
def __init__(self, args, no_action, scope):
12+
self.scope = scope
13+
self.lstm_input_dim = args.lstm_input_dim
14+
self.lstm_size = args.lstm_size
15+
self.no_action = no_action
16+
self.initializer = tf.truncated_normal_initializer(stddev=0.02)
17+
self.biases_initializer = tf.constant_initializer(0.0)
18+
self.create_network()
19+
self.checkpoint_path = args.checkpoint_dir
20+
self.environment = args.environment
21+
22+
def create_network(self):
23+
24+
with tf.variable_scope(self.scope):
25+
self.s = tf.placeholder("float", [None, 84, 84, 4])
26+
27+
self.conv1 = nn.conv2d(inputs=self.s, num_outputs=16, kernel_size=8, stride=4, \
28+
padding='valid', activation_fn=tf.nn.relu, \
29+
biases_initializer=self.biases_initializer, scope='conv1')
30+
self.conv2 = nn.conv2d(inputs=self.conv1, num_outputs=32, kernel_size=4, stride=2, \
31+
padding='valid', activation_fn=tf.nn.relu, \
32+
biases_initializer=self.biases_initializer, scope='conv2')
33+
#self.conv3 = nn.conv2d(inputs=self.conv2, num_outputs=64, kernel_size=3, stride=2, \
34+
# padding='SAME', activation_fn=tf.nn.relu, \
35+
# weights_initializer=self.initializer, scope='conv3')
36+
#self.flatten1 = tf.reshape(self.conv2, shape=[-1, 6400])
37+
self.flatten1 = slim.flatten(self.conv2)
38+
self.fc1 = tf.contrib.layers.fully_connected(inputs=self.flatten1, num_outputs=self.lstm_input_dim, \
39+
activation_fn=tf.nn.relu, \
40+
biases_initializer = self.biases_initializer, scope='fc1')
41+
with tf.variable_scope("lstm1"):
42+
43+
lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.lstm_size, state_is_tuple=True)
44+
c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
45+
h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
46+
self.lstm_state_init = [c_init, h_init]
47+
c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
48+
h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
49+
self.lstm_state_in = (c_in, h_in)
50+
rnn_in = tf.expand_dims(self.fc1, [0])
51+
step_size = tf.shape(self.s)[:1]
52+
state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in)
53+
lstm_outputs, lstm_state_out = tf.nn.dynamic_rnn(
54+
lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size,
55+
time_major=False)
56+
self.rnn_out = tf.reshape(lstm_outputs, [-1, self.lstm_size])
57+
58+
lstm_c, lstm_h = lstm_state_out
59+
self.lstm_state = [lstm_c[:1, :], lstm_h[:1, :]]
60+
61+
self.policy = tf.contrib.layers.fully_connected(inputs=self.rnn_out, num_outputs=self.no_action, \
62+
activation_fn=tf.nn.softmax,
63+
weights_initializer=self.normalized_columns_initializer(0.01),
64+
scope='policy') # initializer std 0.01
65+
self.value = tf.contrib.layers.fully_connected(inputs=self.rnn_out, num_outputs=1, \
66+
activation_fn=None,
67+
weights_initializer=self.normalized_columns_initializer(1.0),
68+
scope='value') #initializer std 1.0
69+
70+
self.prepare_loss()
71+
72+
def prepare_loss(self):
73+
self.a = tf.placeholder(shape=[None], dtype=tf.int32)
74+
self.a_onehot = tf.one_hot(self.a, self.no_action, dtype=tf.float32)
75+
self.y = tf.placeholder(shape=[None], dtype=tf.float32)
76+
self.advantages = tf.placeholder(shape=[None], dtype=tf.float32)
77+
log_policy = tf.log(tf.clip_by_value(self.policy, SMALL_VALUE, 1.0))
78+
self.readout_action = tf.reduce_sum(tf.multiply(log_policy, self.a_onehot), reduction_indices=1)
79+
self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.y - tf.reshape(self.value, [-1])))
80+
self.policy_loss = -tf.reduce_sum(self.readout_action*self.advantages)
81+
self.entropy = -tf.reduce_sum(self.policy * log_policy)
82+
self.loss = 0.5 * self.value_loss + self.policy_loss - 0.01 * self.entropy
83+
84+
grads = tf.gradients(self.loss, self.get_var_list())
85+
self.var_norms = tf.global_norm(self.get_var_list())
86+
self.grads, self.grad_norms = tf.clip_by_global_norm(grads, 40.0)
87+
88+
89+
90+
def get_var_list(self):
91+
self.vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
92+
return self.vars
93+
94+
def update_graph(self, from_net):
95+
96+
with tf.variable_scope(self.scope):
97+
to_vars = self.get_var_list()
98+
from_vars = from_net.get_var_list()
99+
op_holder = []
100+
for from_var, self_var in zip(from_vars,to_vars):
101+
op_holder.append(self_var.assign(from_var))
102+
103+
return tf.group(*op_holder)
104+
105+
def load_model(self, sess, saver):
106+
checkpoint = tf.train.get_checkpoint_state(self.checkpoint_path)
107+
108+
if checkpoint:
109+
saver.restore(sess, checkpoint.model_checkpoint_path)
110+
print('.............Model restored to global.............')
111+
else:
112+
init = tf.global_variables_initializer()
113+
sess.run(init)
114+
print('................No model is found.................')
115+
116+
def save_model(self, sess, saver, time_step):
117+
print('............save model ............')
118+
saver.save(sess, self.checkpoint_path + '/'+self.environment +'-' + str(time_step) + '.ckpt')
119+
120+
def normalized_columns_initializer(self, std=1.0):
121+
def _initializer(shape, dtype=None, partition_info=None):
122+
out = np.random.randn(*shape).astype(np.float32)
123+
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
124+
return tf.constant(out)
125+
126+
return _initializer
127+
128+

README.md

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Implementation of A3C (Asynchronous Advantage Actor-Critic)
2+
3+
This is a tensorflow implementation of Asynchronous advantage actor-critic algorithm for CNN-LSTM as function approximator
4+
5+
## Original Paper
6+
[here](https://arxiv.org/abs/1602.01783)
7+
8+
## Demo
9+
10+
![Breakout_v0](/src/Breakout_v0.gif)
11+
12+
## Results
13+
14+
Training on Breakout-v0 is done with Nvidia GeForce GTX 1070 GPU for 28 hours
15+
16+
## Total Scores Vs Number of iteration (Breakout_v0)
17+
18+
![Scores](/src/Training_Breakout_Total_Scores.png)
19+
20+
## Episode Length Vs Number of iteration (Breakout_v0)
21+
22+
![Episode_Length](/src/Training_Breakout_episode_length.png)
23+
24+
25+
26+
## Dependencies
27+
28+
* python 3.5
29+
* tensorflow 1.1.0
30+
* opencv 3.2.0
31+
* openAI
32+
33+
34+
## Usage
35+
36+
For Training Run:
37+
38+
```
39+
$ python3 trainer.py
40+
```
41+
42+
For Demo Run:
43+
44+
```
45+
$ python3 play.py
46+
```
47+
48+
## Credit
49+
50+
Got important help form this [project](https://github.com/MatheusMRFM/A3C-LSTM-with-Tensorflow)
51+
52+
53+
54+
# A3C_CNN_LSTM

Summary.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import tensorflow as tf
2+
3+
4+
class Summary_Parameters():
5+
6+
def __init__(self):
7+
8+
self.total_reward = tf.Variable(0, dtype=tf.float32, trainable=False)
9+
self.episode_length = tf.Variable(0, dtype=tf.float32, trainable=False)
10+
self.total_loss = tf.Variable(0, dtype=tf.float32, trainable=False)
11+
self.policy_loss = tf.Variable(0, dtype=tf.float32, trainable=False)
12+
self.value_loss = tf.Variable(0, dtype=tf.float32, trainable=False)
13+
14+
tf.summary.scalar('total rewards', self.total_reward)
15+
tf.summary.scalar('episode length', self.episode_length)
16+
tf.summary.scalar('total loss', self.total_loss)
17+
tf.summary.scalar('policy loss', self.policy_loss)
18+
tf.summary.scalar('value loss', self.value_loss)

0 commit comments

Comments
 (0)