Add "auto" dtype to RayLLMEngineArgs and some minor fixes (#89)

SumanthRH · web-flow · commit 4bb8f3ef3b8d · 2025-02-27T15:23:56.000-08:00
# What does this PR do?

- Adds `auto` dtype to RayLLMEngineArgs. 
- Fixes a multiprocessing error with TACO 
- Adds ray.shutdown for ray backend to not interfere with multiprocessing code in scoring stage.
diff --git a/skythought/evals/common/entities.py b/skythought/evals/common/entities.py
@@ -83,8 +83,8 @@ class RayLLMEngineArgs(BaseModel):
     gpu_memory_utilization: Optional[float] = Field(
         default=None, description="GPU memory utilization for the inference engine"
     )
-    dtype: Optional[Literal["float32", "float16", "bfloat16", "float8"]] = Field(
-        default=None, description="Data type for inference engine."
+    dtype: Optional[Literal["float32", "float16", "bfloat16", "float8", "auto"]] = (
+        Field(default=None, description="Data type for inference engine.")
     )
 
     def get_ray_llm_config(self):
diff --git a/skythought/evals/inference_and_check.py b/skythought/evals/inference_and_check.py
@@ -167,6 +167,8 @@ def inference(
         # TODO: revisit the underlying issue and remove the deepcopy if possible
         responses = copy.deepcopy(responses)
         responses = sorted(responses, key=lambda x: x.index)
+        # Cleanup ray session
+        ray.shutdown()
     elif backend == Backend.OPENAI:
         llm = OpenAI(**backend_params.to_dict())
         assert isinstance(sampling_params.params, OpenAISamplingParams)
diff --git a/skythought/evals/tasks/taco/taco_handler.py b/skythought/evals/tasks/taco/taco_handler.py
@@ -51,12 +51,6 @@ def generate_prompt(self, problem):
     def check_correctness(self, problem, generation):
         TIME_OUT = 300
 
-        def _temp_run(problem, generation, debug, result):
-            try:
-                result.append(taco_run_test(problem, test=generation, debug=debug))
-            except Exception as e:
-                print(f"Error in _temp_run: {e}")
-
         manager = Manager()
         result = manager.list()
         p = multiprocessing.Process(
@@ -106,3 +100,10 @@ def load_and_filter_dataset(
             )
 
         return dataset.iloc[start:end] if end > 0 else dataset.iloc[start:]
+
+
+def _temp_run(problem, generation, debug, result):
+    try:
+        result.append(taco_run_test(problem, test=generation, debug=debug))
+    except Exception as e:
+        print(f"Error in _temp_run: {e}")

Original file line number	Diff line number	Diff line change
`@@ -83,8 +83,8 @@ class RayLLMEngineArgs(BaseModel):`
`83`	`83`	`gpu_memory_utilization: Optional[float] = Field(`
`84`	`84`	`default=None, description="GPU memory utilization for the inference engine"`
`85`	`85`	`)`
`86`		`- dtype: Optional[Literal["float32", "float16", "bfloat16", "float8"]] = Field(`
`87`		`- default=None, description="Data type for inference engine."`
	`86`	`+ dtype: Optional[Literal["float32", "float16", "bfloat16", "float8", "auto"]] = (`
	`87`	`+ Field(default=None, description="Data type for inference engine.")`
`88`	`88`	`)`
`89`	`89`
`90`	`90`	`def get_ray_llm_config(self):`