llava_agent: Add frameskipping and cleanup

mikelma · mikelma · commit ef9a7bf6c0ef · 2025-01-27T14:10:55.000+01:00
diff --git a/llava_agent.py b/llava_agent.py
@@ -17,10 +17,12 @@ def parse_args():
 
     parser.add_argument("--frames", type=str, default="frames")
     parser.add_argument("--train-mins", type=int, default=60)
-    parser.add_argument("--log", type=str, default="llm_agent_"+str(uuid4())+".csv")
+    parser.add_argument("--log", type=str,
+                        default="llm_agent_"+str(uuid4())+".csv")
 
     return parser.parse_args()
 
+
 def obs_to_bytes(observation):
     """Converts an observation encoded as a numpy array into a bytes representation of a PNG image."""
     image = Image.fromarray(observation)
@@ -35,23 +37,25 @@ def obs_to_bytes(observation):
     args = parse_args()
 
     config = dict(
-        max_block_generate_distance=3, # 16x3 blocks
+        max_block_generate_distance=3,  # 16x3 blocks
         # hud_scaling=0.9,
         fov=90,
         console_alpha=0,
         smooth_lighting=False,
         performance_tradeoffs=True,
         enable_particles=False,
     )
-
     env = gym.make(
         "Craftium/OpenWorld-v0",
-        frameskip=1,
+        frameskip=3,
         obs_width=520,
         obs_height=520,
         # render_mode="human",
         # pipe_proc=False,
         minetest_conf=config,
+        sync_mode=True,
+        # max_fps=60,
+        pmul=20,
     )
 
     observation, info = env.reset()
@@ -62,7 +66,8 @@ def obs_to_bytes(observation):
         "select hotbar slot 1", "select hotbar slot 2", "select hotbar slot 3", "select hotbar slot 4", "select hotbar slot 5",
         "move camera right", "move camera left", "move camera up", "move camera down"]
 
-    objectives = ["is to chop a tree", "is to collect stone", "is to collect iron", "is to find diamond blocks"]
+    objectives = ["is to chop a tree", "is to collect stone",
+                  "is to collect iron", "is to find diamond blocks"]
     obj_rwds = [128, 256, 1024, 2048]
     objective_id = 0
 
@@ -72,7 +77,8 @@ def obs_to_bytes(observation):
     episode = 0
     while (time.time() - start) / 60 < args.train_mins:
         img_bytes, img = obs_to_bytes(observation)
-        img.save(os.path.join(args.frames, f"frame_{str(t_step).zfill(7)}.png"), "PNG")
+        img.save(os.path.join(
+            args.frames, f"frame_{str(t_step).zfill(7)}.png"), "PNG")
 
         prompt = f"You are a reinforcement learning agent in the Minecraft game. You will be presented the current observation, and you have to select the next action with the ultimate objective to fulfill your goal. In this case, the goal {objectives[objective_id]}. You should fight monsters and hunt animals just as a secondary objective and survival. Available actions are: do nothing, move forward, move backward, move left, move right, jump, sneak, use tool, place, select hotbar slot 1, select hotbar slot 2, select hotbar slot 3, select hotbar slot 4, select hotbar slot 5, move camera right, move camera left, move camera up, move camera down. From now on, your responses must only contain the name of the action you will take, nothing else."
         print("Prompt:", prompt)
@@ -86,7 +92,7 @@ def obs_to_bytes(observation):
         incorrect = False
         candidates = [i for i, name in enumerate(act_names) if name in act_str]
         print(candidates)
-        if len(candidates) == 0: # if the response is in an incorrect format
+        if len(candidates) == 0:  # if the response is in an incorrect format
             action = env.action_space.sample()  # take a random action
             incorrect = True
             print("[WARNING] Incorrect action. Using random action.")
@@ -98,13 +104,16 @@ def obs_to_bytes(observation):
         print(f"* Action: {action}")
 
         if act_names[action] == "jump":
+            # jump forward
             _, _, _, _, _ = env.step(action)
             observation, reward, terminated, truncated, _info = env.step(1)
         else:
-            observation, reward, terminated, truncated, _info = env.step(action)
+            observation, reward, terminated, truncated, _info = env.step(
+                action)
 
         ep_ret += reward
-        print(f"Step: {t_step}, Elapsed: {int(time.time()-start)}s, Reward: {reward}, Ep. ret.: {ep_ret}")
+        print(
+            f"Step: {t_step}, Elapsed: {int(time.time()-start)}s, Reward: {reward}, Ep. ret.: {ep_ret}")
 
         # check if a stage has been completed
         if reward >= 128.0:
@@ -116,8 +125,10 @@ def obs_to_bytes(observation):
 
         with open(args.log, "a" if t_step > 0 else "w") as f:
             if t_step == 0:
-                f.write("t_step,episode,elapsed mins,reward,ep_ret,objective_id,id\n")
-            f.write(f"{t_step},{episode},{(time.time()-start)/60},{reward},{ep_ret},{objective_id},{args.log}\n")
+                f.write(
+                    "t_step,episode,elapsed mins,reward,ep_ret,objective_id,id\n")
+            f.write(
+                f"{t_step},{episode},{(time.time()-start)/60},{reward},{ep_ret},{objective_id},{args.log}\n")
 
         if terminated or truncated:
             episode += 1