File tree 7 files changed +19
-37
lines changed
configs/experiment/scheduling
7 files changed +19
-37
lines changed Original file line number Diff line number Diff line change 14
14
_target_ : rl4co.models.L2DAttnPolicy
15
15
env_name : ${env.name}
16
16
scaling_factor : ${scaling_factor}
17
+ normalization : " batch"
17
18
batch_size : 64
18
19
num_starts : 10
19
20
num_augment : 0
Original file line number Diff line number Diff line change @@ -43,14 +43,8 @@ model:
43
43
batch_size : 128
44
44
val_batch_size : 512
45
45
test_batch_size : 64
46
- # Song et al use 1000 iterations over batches of 20 = 20_000
47
- # We train 10 epochs on a set of 2000 instance = 20_000
48
46
train_data_size : 2000
49
47
mini_batch_size : 512
50
- reward_scale : scale
51
- optimizer_kwargs :
52
- lr : 1e-4
53
48
54
49
env :
55
- stepwise_reward : True
56
- _torchrl_mode : True
50
+ stepwise_reward : True
Original file line number Diff line number Diff line change @@ -22,17 +22,19 @@ trainer:
22
22
23
23
seed : 12345678
24
24
25
- scaling_factor : 20
25
+ scaling_factor : ${env.generator_params.max_processing_time}
26
26
27
27
model :
28
28
_target_ : ???
29
29
batch_size : ???
30
30
train_data_size : 2_000
31
31
val_data_size : 1_000
32
- test_data_size : 1_000
32
+ test_data_size : 100
33
33
optimizer_kwargs :
34
- lr : 1e -4
34
+ lr : 2e -4
35
35
weight_decay : 1e-6
36
36
lr_scheduler : " ExponentialLR"
37
37
lr_scheduler_kwargs :
38
38
gamma : 0.95
39
+ reward_scale : scale
40
+ max_grad_norm : 1
Original file line number Diff line number Diff line change @@ -12,24 +12,22 @@ logger:
12
12
model :
13
13
_target_ : rl4co.models.L2DPPOModel
14
14
policy_kwargs :
15
- embed_dim : 128
15
+ embed_dim : 256
16
16
num_encoder_layers : 3
17
17
scaling_factor : ${scaling_factor}
18
- max_grad_norm : 1
19
- ppo_epochs : 3
18
+ ppo_epochs : 2
20
19
het_emb : False
20
+ normalization : instance
21
+ test_decode_type : greedy
21
22
batch_size : 128
22
23
val_batch_size : 512
23
24
test_batch_size : 64
24
25
mini_batch_size : 512
25
- reward_scale : scale
26
- optimizer_kwargs :
27
- lr : 1e-4
26
+
28
27
29
28
trainer :
30
29
max_epochs : 10
31
30
32
31
33
32
env :
34
- stepwise_reward : True
35
- _torchrl_mode : True
33
+ stepwise_reward : True
Original file line number Diff line number Diff line change 18
18
stepwise_encoding : False
19
19
scaling_factor : ${scaling_factor}
20
20
het_emb : True
21
+ normalization : instance
21
22
num_starts : 10
22
23
batch_size : 64
23
24
num_augment : 0
Original file line number Diff line number Diff line change @@ -12,24 +12,16 @@ logger:
12
12
model :
13
13
_target_ : rl4co.models.L2DPPOModel
14
14
policy_kwargs :
15
- embed_dim : 128
15
+ embed_dim : 256
16
16
num_encoder_layers : 3
17
17
scaling_factor : ${scaling_factor}
18
- max_grad_norm : 1
19
- ppo_epochs : 3
18
+ ppo_epochs : 2
20
19
het_emb : True
20
+ normalization : instance
21
21
batch_size : 128
22
22
val_batch_size : 512
23
23
test_batch_size : 64
24
24
mini_batch_size : 512
25
- reward_scale : scale
26
- optimizer_kwargs :
27
- lr : 1e-4
28
-
29
- trainer :
30
- max_epochs : 10
31
-
32
25
33
26
env :
34
- stepwise_reward : True
35
- _torchrl_mode : True
27
+ stepwise_reward : True
Original file line number Diff line number Diff line change @@ -36,13 +36,7 @@ model:
36
36
batch_size : 128
37
37
val_batch_size : 512
38
38
test_batch_size : 64
39
- # Song et al use 1000 iterations over batches of 20 = 20_000
40
- # We train 10 epochs on a set of 2000 instance = 20_000
41
39
mini_batch_size : 512
42
- reward_scale : scale
43
- optimizer_kwargs :
44
- lr : 1e-4
45
40
46
41
env :
47
- stepwise_reward : True
48
- _torchrl_mode : True
42
+ stepwise_reward : True
You can’t perform that action at this time.
0 commit comments