Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull RAC updated changes #9

Open
wants to merge 63 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
308ac43
Update Readme.md
ysharma1126 Apr 10, 2017
739d219
updated launcher, runner
vladfi1 Apr 10, 2017
ad24474
Merge branch 'master' of https://github.com/ysharma1126/phillip
vladfi1 Apr 10, 2017
cc9cce3
Blah blah
vladfi1 Apr 10, 2017
d0cd2f2
blah
ysharma1126 Apr 10, 2017
190e688
fix merge conflicts
ysharma1126 Apr 13, 2017
6d7b7a8
Delete .runner.py.swp
ysharma1126 Apr 13, 2017
b984e2a
Delete .launcher.py.swp
ysharma1126 Apr 13, 2017
25c0b88
Delete .RL.py.swp
ysharma1126 Apr 13, 2017
a337391
Delete .ac.py.swp
ysharma1126 Apr 13, 2017
8ea5dbd
Delete .rac.py.swp
ysharma1126 Apr 13, 2017
93e9bfc
Delete .tf_lib.py.swp
ysharma1126 Apr 13, 2017
1f8f1c9
reccurrent actor critic initial train
ysharma1126 Apr 13, 2017
73a88c0
fixed runner
ysharma1126 Apr 13, 2017
339bb36
Fixing merge conflicts
ysharma1126 Apr 13, 2017
a48a772
readme + vladnotes
ysharma1126 Apr 13, 2017
2cba316
agents
ysharma1126 Apr 13, 2017
40298cd
Update Readme.md
ysharma1126 Apr 14, 2017
6fd60cd
updated runner
ysharma1126 Apr 14, 2017
3a09d61
Merge branch 'master' of https://github.com/ysharma1126/phillip_actio…
ysharma1126 Apr 14, 2017
3361f44
rac add fully connected before GRU
vladfi1 Apr 16, 2017
8279800
fix syntax error
vladfi1 Apr 16, 2017
2e1b3c4
updated vlad notes
ysharma1126 Apr 17, 2017
856a4bd
python paths
ysharma1126 Apr 17, 2017
17d17b9
updated runner
ysharma1126 Apr 18, 2017
7b71a68
updated half life
ysharma1126 Apr 18, 2017
bd00f83
vlad notes
ysharma1126 Apr 20, 2017
e8359b1
Tune list
ysharma1126 Apr 21, 2017
db0088d
add fully-connected to rac
vladfi1 Apr 21, 2017
c0df44f
Merge branch 'master' of http://www.github.com/ysharma1126/phillip_ac…
vladfi1 Apr 21, 2017
7a210a5
TODO
ysharma1126 Apr 21, 2017
7e94830
Merge branch 'master' of https://github.com/ysharma1126/phillip_actio…
ysharma1126 Apr 21, 2017
03dd359
fix merge conflicts
ysharma1126 Apr 22, 2017
0052400
rac entropy reward_halflife
ysharma1126 Apr 22, 2017
2a2b4aa
RDQN
ysharma1126 Apr 22, 2017
1db976a
q_layers
ysharma1126 Apr 22, 2017
5537a4a
syntax
ysharma1126 Apr 22, 2017
68e1c34
Readme Slurm
ysharma1126 Apr 23, 2017
5b4d683
Fix RDQN
ysharma1126 Apr 23, 2017
59cb0c1
Adding delay
ysharma1126 Apr 24, 2017
97dc478
RecurrentDQN
ysharma1126 Apr 24, 2017
2e4b3af
Initial state - zero
ysharma1126 Apr 24, 2017
39000e2
search params and run batch jobs
vladfi1 Apr 25, 2017
dcf053f
update gridsearch to randomly sample
vladfi1 Apr 25, 2017
f89092f
Update Readme.md
elifriedman Apr 25, 2017
387e26d
Update Readme.md
elifriedman Apr 25, 2017
cffac6f
Update
elifriedman Apr 25, 2017
75d08f3
log file name includes job id
vladfi1 Apr 25, 2017
dec9776
Merge branch 'master' of http://www.github.com/ysharma1126/phillip_ac…
vladfi1 Apr 25, 2017
8f08b41
fix bugs in batcher
vladfi1 Apr 25, 2017
cd997a2
delay 2
ysharma1126 Apr 25, 2017
ce68bd0
Merge branch 'master' of https://github.com/ysharma1126/phillip_actio…
ysharma1126 Apr 25, 2017
c749470
fix entropy_scale
elifriedman Apr 25, 2017
30eda33
Merge branch 'master' of https://github.com/ysharma1126/phillip_actio…
ysharma1126 Apr 25, 2017
d03a755
delay 2
ysharma1126 Apr 25, 2017
6301a9d
fix q layers
ysharma1126 Apr 25, 2017
2ad614e
vlad notes
ysharma1126 Apr 25, 2017
7c6061a
try against self
ysharma1126 Apr 29, 2017
d5b8853
Retrace returns for off-policy RL.
vladfi1 May 1, 2017
60c6abe
update batcher
vladfi1 May 2, 2017
95c6033
Merge branch 'master' of https://github.com/vladfi1/phillip
vladfi1 May 2, 2017
417ce8e
test
vladfi1 May 2, 2017
58914e1
Update Readme.md
ysharma1126 Aug 24, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update batcher
vladfi1 committed May 2, 2017
commit 60c6abedf3a2d4b64f3e2633741cb7ad86ecb12f
46 changes: 27 additions & 19 deletions batcher.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@

SAVE_DIR = "saves/"
LOG_DIR = "slurm_logs/"
TRAIN_TIME = 3600 * 4 # 4 hours
TRAIN_TIME = 3600 * 10 # 4 hours

def get_jobs():
not_ran = set()
@@ -21,25 +21,28 @@ def get_jobs():
return not_ran

def get_jobid(job):
logs = os.listdir(LOG_DIR)
for log in logs:
if job in log:
l = log.rfind("_")
r = log.rfind(".")
return int(log[l+1:r])
return 4294967294 # Seems like this is the default job id
cmd = "squeue -u vladfi1 -o '%i %j %t' | grep {0}".format(job)
output = subprocess.check_output(cmd, shell=True).strip().split()
return int(output[0])

def get_status(job_id):
cmd = "squeue --job {0} -o '%t'".format(job_id)
output = subprocess.check_output(cmd, shell=True).splitlines()
if len(output) != 2:
raise ValueError("Bad output:" + str(output))

status = output[1]
if type(status)==bytes:
status = status.decode("utf-8")
return status

def get_trainnode(job_id):
cmd = "squeue --job {0}".format(job_id)
cmd = "squeue --job {0} -o '%N'".format(job_id)
output = subprocess.check_output(cmd, shell=True).splitlines()
if len(output) != 2:
return None, None
output = output[1].split()
if len(output) != 8:
return None, None
status = output[4]
node = output[7][4:]
return status, int(node)
raise ValueError("Bad output:" + str(output))
node = output[1][4:]
return int(node)

def main():
queue = set()
@@ -54,7 +57,7 @@ def main():

# start training
train_cmd = "python launcher.py {0}/{1} --init".format(SAVE_DIR,job)
print("Running train command:" train_cmd)
print("Running train command:", train_cmd)
os.system(train_cmd)

# make sure the job started
@@ -66,10 +69,15 @@ def main():
status = "PD"
while status == "PD":
time.sleep(5)
status, train_machine = get_trainnode(job_id)
status = get_status(job_id)

if status != "R":
raise ValueError("Bad Status: " + str(status))

train_machine = get_trainnode(job_id)

print("Done waiting status =",status,"train machine =",str(train_machine))
if status == None:
if status is None:
continue

agent_cmd = "python launcher.py {0}/{1} --trainer {2}".format(SAVE_DIR, job, train_machine)
4 changes: 2 additions & 2 deletions phillip/dqn.py
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@ class DQN(Default):
hidden_size = []

_options = [
Option('q_layers', type=int, nargs='+', default=[128, 128], help="sizes of the dqn hidden layers"),
Option('q_fc_layers', type=int, nargs='+', default=[128, 128], help="sizes of the dqn hidden layers"),
Option('epsilon', type=float, default=0.02, help="pick random action with probability EPSILON"),
Option('temperature', type=float, default=0.01, help="Boltzmann distribution over actions"),
Option('sarsa', type=bool, default=True, help="use action taken instead of max when computing target Q-values"),
@@ -32,7 +32,7 @@ def __init__(self, embedGame, embedAction, global_step, rlConfig, scope='q', **k
with tf.variable_scope(scope):
self.net = tfl.Sequential()
prev_size = history_size
for i, size in enumerate(self.q_layers):
for i, size in enumerate(self.q_fc_layers):
with tf.variable_scope("layer_%d" % i):
self.net.append(tfl.FCLayer(prev_size, size, self.nl))
prev_size = size