x

SumanthRH · SumanthRH · commit 27bf724e42e8 · 2025-02-26T21:57:23.000-08:00
Signed-off-by: SumanthRH &lt;sumanthrh@anyscale.com&gt;
diff --git a/recipes/sky-t1-preview/preprocess.py b/recipes/sky-t1-preview/preprocess.py
@@ -1,5 +1,8 @@
 import json
 
+import pyarrow as pa
+from ray.data import Schema
+
 
 class APPSPreprocessor:
     WITH_FN_NAME_TEMPLATE = "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}"  # noqa: E501
@@ -68,3 +71,17 @@ def __call__(self, row):
         prompt = row["problem"]
         _input = self.TEMPLATE.format(prompt=prompt)
         return {**row, "user_input": _input}
+
+
+def taco_coerce_types(row, schema: Schema):
+    for key, schema_type in zip(schema.names, schema.types):
+        value = pa.array([row[key]])
+        if value.type != schema_type:
+            if schema_type == pa.string():
+                try:
+                    row[key] = str(row[key])
+                except Exception:
+                    row[key] = ""
+            elif schema_type == pa.null():
+                row[key] = None
+    return row
diff --git a/recipes/sky-t1-preview/recipe.py b/recipes/sky-t1-preview/recipe.py
@@ -18,7 +18,12 @@
 from skythought.evals.scoring.taco import TACOScorer
 
 from .postprocess import convert_to_sharegpt_format
-from .preprocess import APPSPreprocessor, NUMINAPreprocessor, TACOPreprocessor
+from .preprocess import (
+    APPSPreprocessor,
+    NUMINAPreprocessor,
+    TACOPreprocessor,
+    taco_coerce_types,
+)
 from .prompts import CONVERT_PROMPT, CONVERT_PROMPT_EXAMPLE
 
 parser = argparse.ArgumentParser()
@@ -38,6 +43,9 @@
 # convert all to ray dataset
 apps_ds = ray.data.from_huggingface(apps_ds)
 taco_ds_medium = ray.data.from_huggingface(taco_ds_medium)
+taco_ds_medium = taco_ds_medium.map(
+    taco_coerce_types, fn_args=(taco_ds_medium.schema(),)
+)
 numina_ds = ray.data.from_huggingface(numina_ds)
 
 
@@ -77,7 +85,7 @@
 )
 scorers = [
     APPSScorer(response_column="formatted_response"),
-    TACOScorer(response_column="formatted_response"),
+    TACOScorer(response_column="formatted_response", backend="ray"),
     numina_scorer,
     numina_scorer,
     numina_scorer,
@@ -168,4 +176,6 @@
 
     # 6. Save datasets
     dir_name = f"sky-t1-preview-{i}_parquet"
+    datasets[i] = datasets[i].materialize()
+    # breakpoint()
     datasets[i].write_parquet(os.path.abspath(dir_name))
diff --git a/skythought/evals/scoring/apps/apps.py b/skythought/evals/scoring/apps/apps.py
@@ -1,6 +1,6 @@
 import copy
 import json
-from typing import Any, Dict
+from typing import Any, Dict, Literal
 
 import numpy as np
 import ray
@@ -19,11 +19,13 @@ def __init__(
         response_column="response",
         answer_column="solutions",
         input_column="input_output",
+        backend: Literal["mp", "ray"] = "ray",
     ) -> None:
         super().__init__()
         self.response_column = response_column
         self.answer_column = answer_column
         self.input_column = input_column
+        self.backend = backend
 
     def score(self, row: Dict[str, Any]):
         TIMEOUT = 10