Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions skythought/evals/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ class RayLLMEngineArgs(BaseModel):
gpu_memory_utilization: Optional[float] = Field(
default=None, description="GPU memory utilization for the inference engine"
)
dtype: Optional[Literal["float32", "float16", "bfloat16", "float8"]] = Field(
default=None, description="Data type for inference engine."
dtype: Optional[Literal["float32", "float16", "bfloat16", "float8", "auto"]] = (
Field(default=None, description="Data type for inference engine.")
)

def get_ray_llm_config(self):
Expand Down
2 changes: 2 additions & 0 deletions skythought/evals/inference_and_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def inference(
# TODO: revisit the underlying issue and remove the deepcopy if possible
responses = copy.deepcopy(responses)
responses = sorted(responses, key=lambda x: x.index)
# Cleanup ray session
ray.shutdown()
elif backend == Backend.OPENAI:
llm = OpenAI(**backend_params.to_dict())
assert isinstance(sampling_params.params, OpenAISamplingParams)
Expand Down
13 changes: 7 additions & 6 deletions skythought/evals/tasks/taco/taco_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,6 @@ def generate_prompt(self, problem):
def check_correctness(self, problem, generation):
TIME_OUT = 300

def _temp_run(problem, generation, debug, result):
try:
result.append(taco_run_test(problem, test=generation, debug=debug))
Comment on lines -54 to -56
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixes a multiprocessing error with TACO:

Traceback (most recent call last):
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/concurrent/futures/process.py", line 246, in _process_worker
    r = call_item.fn(*call_item.args, **call_item.kwargs)
  File "/Users/sumanthrh/Documents/SkyThought/skythought/evals/tasks/taco/taco_handler.py", line 84, in update_results
    curr_res = self.check_correctness(problem, generation=last_code)
  File "/Users/sumanthrh/Documents/SkyThought/skythought/evals/tasks/taco/taco_handler.py", line 65, in check_correctness
    p.start()
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/process.py", line 121, in start
    self._popen = self._Popen(self)
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/context.py", line 224, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/context.py", line 288, in _Popen
    return Popen(process_obj)
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__
    super().__init__(process_obj)
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__
    self._launch(process_obj)
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch
    reduction.dump(process_obj, fp)
  File "/Users/sumanthrh/.local/share/uv/python/cpython-3.10.15-macos-aarch64-none/lib/python3.10/multiprocessing/reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'TACOTaskHandler.check_correctness.<locals>._temp_run'

except Exception as e:
print(f"Error in _temp_run: {e}")

manager = Manager()
result = manager.list()
p = multiprocessing.Process(
Expand Down Expand Up @@ -106,3 +100,10 @@ def load_and_filter_dataset(
)

return dataset.iloc[start:end] if end > 0 else dataset.iloc[start:]


def _temp_run(problem, generation, debug, result):
try:
result.append(taco_run_test(problem, test=generation, debug=debug))
except Exception as e:
print(f"Error in _temp_run: {e}")