Skip to content

Commit f9843c1

Browse files
committed
[Config] updated configs to match latest experiments
1 parent 9880eab commit f9843c1

File tree

7 files changed

+19
-37
lines changed

7 files changed

+19
-37
lines changed

configs/experiment/scheduling/am-pomo.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ model:
1414
_target_: rl4co.models.L2DAttnPolicy
1515
env_name: ${env.name}
1616
scaling_factor: ${scaling_factor}
17+
normalization: "batch"
1718
batch_size: 64
1819
num_starts: 10
1920
num_augment: 0

configs/experiment/scheduling/am-ppo.yaml

+1-7
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,8 @@ model:
4343
batch_size: 128
4444
val_batch_size: 512
4545
test_batch_size: 64
46-
# Song et al use 1000 iterations over batches of 20 = 20_000
47-
# We train 10 epochs on a set of 2000 instance = 20_000
4846
train_data_size: 2000
4947
mini_batch_size: 512
50-
reward_scale: scale
51-
optimizer_kwargs:
52-
lr: 1e-4
5348

5449
env:
55-
stepwise_reward: True
56-
_torchrl_mode: True
50+
stepwise_reward: True

configs/experiment/scheduling/base.yaml

+5-3
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,19 @@ trainer:
2222

2323
seed: 12345678
2424

25-
scaling_factor: 20
25+
scaling_factor: ${env.generator_params.max_processing_time}
2626

2727
model:
2828
_target_: ???
2929
batch_size: ???
3030
train_data_size: 2_000
3131
val_data_size: 1_000
32-
test_data_size: 1_000
32+
test_data_size: 100
3333
optimizer_kwargs:
34-
lr: 1e-4
34+
lr: 2e-4
3535
weight_decay: 1e-6
3636
lr_scheduler: "ExponentialLR"
3737
lr_scheduler_kwargs:
3838
gamma: 0.95
39+
reward_scale: scale
40+
max_grad_norm: 1

configs/experiment/scheduling/gnn-ppo.yaml

+6-8
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,22 @@ logger:
1212
model:
1313
_target_: rl4co.models.L2DPPOModel
1414
policy_kwargs:
15-
embed_dim: 128
15+
embed_dim: 256
1616
num_encoder_layers: 3
1717
scaling_factor: ${scaling_factor}
18-
max_grad_norm: 1
19-
ppo_epochs: 3
18+
ppo_epochs: 2
2019
het_emb: False
20+
normalization: instance
21+
test_decode_type: greedy
2122
batch_size: 128
2223
val_batch_size: 512
2324
test_batch_size: 64
2425
mini_batch_size: 512
25-
reward_scale: scale
26-
optimizer_kwargs:
27-
lr: 1e-4
26+
2827

2928
trainer:
3029
max_epochs: 10
3130

3231

3332
env:
34-
stepwise_reward: True
35-
_torchrl_mode: True
33+
stepwise_reward: True

configs/experiment/scheduling/hgnn-pomo.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ model:
1818
stepwise_encoding: False
1919
scaling_factor: ${scaling_factor}
2020
het_emb: True
21+
normalization: instance
2122
num_starts: 10
2223
batch_size: 64
2324
num_augment: 0

configs/experiment/scheduling/hgnn-ppo.yaml

+4-12
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,16 @@ logger:
1212
model:
1313
_target_: rl4co.models.L2DPPOModel
1414
policy_kwargs:
15-
embed_dim: 128
15+
embed_dim: 256
1616
num_encoder_layers: 3
1717
scaling_factor: ${scaling_factor}
18-
max_grad_norm: 1
19-
ppo_epochs: 3
18+
ppo_epochs: 2
2019
het_emb: True
20+
normalization: instance
2121
batch_size: 128
2222
val_batch_size: 512
2323
test_batch_size: 64
2424
mini_batch_size: 512
25-
reward_scale: scale
26-
optimizer_kwargs:
27-
lr: 1e-4
28-
29-
trainer:
30-
max_epochs: 10
31-
3225

3326
env:
34-
stepwise_reward: True
35-
_torchrl_mode: True
27+
stepwise_reward: True

configs/experiment/scheduling/matnet-ppo.yaml

+1-7
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,7 @@ model:
3636
batch_size: 128
3737
val_batch_size: 512
3838
test_batch_size: 64
39-
# Song et al use 1000 iterations over batches of 20 = 20_000
40-
# We train 10 epochs on a set of 2000 instance = 20_000
4139
mini_batch_size: 512
42-
reward_scale: scale
43-
optimizer_kwargs:
44-
lr: 1e-4
4540

4641
env:
47-
stepwise_reward: True
48-
_torchrl_mode: True
42+
stepwise_reward: True

0 commit comments

Comments
 (0)