[Config] updated configs to match latest experiments

LTluttmann · LTluttmann · commit f9843c1cd235 · 2024-06-13T14:28:00.000+02:00
diff --git a/configs/experiment/scheduling/am-pomo.yaml b/configs/experiment/scheduling/am-pomo.yaml
@@ -14,6 +14,7 @@ model:
     _target_: rl4co.models.L2DAttnPolicy
     env_name: ${env.name}
     scaling_factor: ${scaling_factor}
+    normalization: "batch"
   batch_size: 64
   num_starts: 10
   num_augment: 0
diff --git a/configs/experiment/scheduling/am-ppo.yaml b/configs/experiment/scheduling/am-ppo.yaml
@@ -43,14 +43,8 @@ model:
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
-  # Song et al use 1000 iterations over batches of 20 = 20_000
-  # We train 10 epochs on a set of 2000 instance = 20_000
   train_data_size: 2000
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/configs/experiment/scheduling/base.yaml b/configs/experiment/scheduling/base.yaml
@@ -22,17 +22,19 @@ trainer:
 
 seed: 12345678
 
-scaling_factor: 20
+scaling_factor: ${env.generator_params.max_processing_time}
 
 model:
   _target_: ???
   batch_size: ???
   train_data_size: 2_000
   val_data_size: 1_000
-  test_data_size: 1_000
+  test_data_size: 100
   optimizer_kwargs:
-    lr: 1e-4
+    lr: 2e-4
     weight_decay: 1e-6
   lr_scheduler: "ExponentialLR"
   lr_scheduler_kwargs:
     gamma: 0.95
+  reward_scale: scale
+  max_grad_norm: 1
diff --git a/configs/experiment/scheduling/gnn-ppo.yaml b/configs/experiment/scheduling/gnn-ppo.yaml
@@ -12,24 +12,22 @@ logger:
 model:
   _target_: rl4co.models.L2DPPOModel
   policy_kwargs:
-    embed_dim: 128
+    embed_dim: 256
     num_encoder_layers: 3
     scaling_factor: ${scaling_factor}
-    max_grad_norm: 1
-    ppo_epochs: 3
+    ppo_epochs: 2
     het_emb: False
+    normalization: instance
+    test_decode_type: greedy
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
+
 
 trainer:
   max_epochs: 10
 
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/configs/experiment/scheduling/hgnn-pomo.yaml b/configs/experiment/scheduling/hgnn-pomo.yaml
@@ -18,6 +18,7 @@ model:
     stepwise_encoding: False
     scaling_factor: ${scaling_factor}
     het_emb: True
+    normalization: instance
   num_starts: 10
   batch_size: 64
   num_augment: 0
diff --git a/configs/experiment/scheduling/hgnn-ppo.yaml b/configs/experiment/scheduling/hgnn-ppo.yaml
@@ -12,24 +12,16 @@ logger:
 model:
   _target_: rl4co.models.L2DPPOModel
   policy_kwargs:
-    embed_dim: 128
+    embed_dim: 256
     num_encoder_layers: 3
     scaling_factor: ${scaling_factor}
-    max_grad_norm: 1
-    ppo_epochs: 3
+    ppo_epochs: 2
     het_emb: True
+    normalization: instance
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
-
-trainer:
-  max_epochs: 10
-
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True
diff --git a/configs/experiment/scheduling/matnet-ppo.yaml b/configs/experiment/scheduling/matnet-ppo.yaml
@@ -36,13 +36,7 @@ model:
   batch_size: 128
   val_batch_size: 512
   test_batch_size: 64
-  # Song et al use 1000 iterations over batches of 20 = 20_000
-  # We train 10 epochs on a set of 2000 instance = 20_000
   mini_batch_size: 512
-  reward_scale: scale
-  optimizer_kwargs:
-    lr: 1e-4
 
 env:
-  stepwise_reward: True
-  _torchrl_mode: True
+  stepwise_reward: True