Skip to content

Commit 48d9584

Browse files
authored
Merge pull request #61 from beduffy/master
Renamed args.tau to args.gae_lambda
2 parents 8826e21 + 26a9678 commit 48d9584

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

main.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
help='learning rate (default: 0.0001)')
2121
parser.add_argument('--gamma', type=float, default=0.99,
2222
help='discount factor for rewards (default: 0.99)')
23-
parser.add_argument('--tau', type=float, default=1.00,
24-
help='parameter for GAE (default: 1.00)')
23+
parser.add_argument('--gae-lambda', type=float, default=1.00,
24+
help='lambda parameter for GAE (default: 1.00)')
2525
parser.add_argument('--entropy-coef', type=float, default=0.01,
2626
help='entropy term coefficient (default: 0.01)')
2727
parser.add_argument('--value-loss-coef', type=float, default=0.5,

train.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ def train(rank, args, shared_model, counter, lock, optimizer=None):
9292
advantage = R - values[i]
9393
value_loss = value_loss + 0.5 * advantage.pow(2)
9494

95-
# Generalized Advantage Estimataion
95+
# Generalized Advantage Estimation
9696
delta_t = rewards[i] + args.gamma * \
9797
values[i + 1] - values[i]
98-
gae = gae * args.gamma * args.tau + delta_t
98+
gae = gae * args.gamma * args.gae_lambda + delta_t
9999

100100
policy_loss = policy_loss - \
101101
log_probs[i] * gae.detach() - args.entropy_coef * entropies[i]

0 commit comments

Comments
 (0)