Skip to content

Commit cbf83dc

Browse files
committed
initial commit
Signed-off-by: SumanthRH <[email protected]>
1 parent a0df2e6 commit cbf83dc

File tree

24 files changed

+4725
-553
lines changed

24 files changed

+4725
-553
lines changed

recipes/sky-t1-preview/__init__.py

Whitespace-only changes.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import copy
2+
import json
3+
from typing import Any, Dict
4+
5+
import numpy as np
6+
import ray
7+
8+
from skythought.evals.scoring.base import Scorer
9+
from skythought.evals.tasks.apps.apps_util import run_test as apps_run_test
10+
from skythought.evals.util.common import has_code
11+
12+
STILL2_SYSTEM_PROMPT = "Your role as an assistant involves thoroughly exploring questions through a systematic long \
13+
thinking process before providing the final precise and accurate solutions. This requires \
14+
engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, \
15+
backtracing, and iteration to develop well-considered thinking process. \
16+
Please structure your response into two main sections: Thought and Solution. \
17+
In the Thought section, detail your reasoning process using the specified format: \
18+
<|begin_of_thought|> {thought with steps separated with '\n\n'} \
19+
<|end_of_thought|> \
20+
Each step should include detailed considerations such as analisying questions, summarizing \
21+
relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining \
22+
any errors, and revisiting previous steps. \
23+
In the Solution section, based on various attempts, explorations, and reflections from the Thought \
24+
section, systematically present the final solution that you deem correct. The solution should \
25+
remain a logical, accurate, concise expression style and detail necessary step needed to reach the \
26+
conclusion, formatted as follows: \
27+
<|begin_of_solution|> \
28+
{final formatted, precise, and clear solution} \
29+
<|end_of_solution|> \
30+
Now, try to solve the following question through the above guidelines:"
31+
32+
33+
class APPSScorer(Scorer):
34+
def score(self, row: Dict[str, Any]):
35+
TIMEOUT = 10
36+
code_filter_result = has_code(row["response"])
37+
if len(code_filter_result) == 0:
38+
return False
39+
else:
40+
last_code = code_filter_result[-1]
41+
problem_to_check = copy.deepcopy(row)
42+
problem_to_check["input_output"] = json.loads(row["input_output"])
43+
try:
44+
problem_to_check["solutions"] = json.loads(row["solutions"])
45+
except Exception:
46+
problem_to_check["solutions"] = ""
47+
48+
@ray.remote
49+
def _temp_run(problem, generation, debug):
50+
try:
51+
result = apps_run_test(problem=problem, test=generation, debug=debug)
52+
return result
53+
except Exception:
54+
pass
55+
56+
result = ray.get(
57+
_temp_run.remote(problem_to_check, last_code, False), timeout=TIMEOUT + 1
58+
)
59+
60+
return bool(result and np.all(result[0]))
61+
62+
63+
class TACOScorer(Scorer):
64+
def score(self, row: Dict[str, Any]):
65+
return True
66+
67+
68+
def convert_to_sharegpt_format(row: Dict[str, Any]):
69+
prompt = row["user_input"]
70+
# accept
71+
# Create the conversation format
72+
conversations = [
73+
{"from": "user", "value": prompt},
74+
{
75+
"from": "assistant",
76+
"value": row["formatted_response"],
77+
},
78+
]
79+
80+
# Prepare the final structure
81+
cur_data = {
82+
"system": STILL2_SYSTEM_PROMPT,
83+
"conversations": conversations,
84+
}
85+
86+
return cur_data
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import json
2+
3+
4+
class APPSPreprocessor:
5+
WITH_FN_NAME_TEMPLATE = "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition. {prompt}" # noqa: E501
6+
7+
WITHOUT_FN_NAME_TEMPLATE = "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution. {prompt}" # noqa: E501
8+
9+
WITH_STARTER_CODE_TEMPLATE = "{input}\n{starter_code}"
10+
11+
def __call__(self, row):
12+
test_case = json.loads(row["input_output"])
13+
starter_code = row["starter_code"]
14+
prompt = row["question"]
15+
if not test_case.get("fn_name"):
16+
_input = self.WITH_FN_NAME_TEMPLATE.format(prompt=prompt)
17+
else:
18+
_input = self.WITHOUT_FN_NAME_TEMPLATE.format(prompt=prompt)
19+
20+
if starter_code is not None:
21+
_input = self.WITH_STARTER_CODE_TEMPLATE.format(
22+
input=_input, starter_code=starter_code
23+
)
24+
25+
return {**row, "user_input": _input}
26+
27+
28+
class TACOPreprocessor:
29+
INITIAL_TEMPLATE = "\nQUESTION:\n{prompt}"
30+
STARTER_CODE_TEMPLATE = "{input}\n{starter_code}"
31+
STDIN_TEMPLATE = "{input}\nUse Standard Input format\nANSWER:\n"
32+
CALL_TEMPLATE = "{input}\nUse Call-Based format\nANSWER:\n"
33+
34+
def __call__(self, problem):
35+
36+
prompt = problem["question"]
37+
starter_code = (
38+
None if len(problem["starter_code"]) == 0 else problem["starter_code"]
39+
)
40+
try:
41+
input_outpout = json.loads(problem["input_output"])
42+
fn_name = (
43+
None if not input_outpout.get("fn_name") else input_outpout["fn_name"]
44+
)
45+
except ValueError:
46+
fn_name = None
47+
48+
_input = self.INITIAL_TEMPLATE.format(prompt=prompt)
49+
50+
if starter_code:
51+
_input = self.STARTER_CODE_TEMPLATE.format(
52+
input=_input, starter_code=starter_code
53+
)
54+
else:
55+
_input = self.INITIAL_TEMPLATE.format(prompt=prompt)
56+
if (not fn_name) and (not starter_code):
57+
_input = self.STDIN_TEMPLATE.format(input=_input)
58+
else:
59+
_input = self.CALL_TEMPLATE.format(input=_input)
60+
61+
return {**problem, "user_input": _input}
62+
63+
64+
class NUMINAPreprocessor:
65+
TEMPLATE = "Return your final response within \\boxed{{}}. {prompt}"
66+
67+
def __call__(self, row):
68+
prompt = row["problem"]
69+
_input = self.TEMPLATE.format(prompt=prompt)
70+
return {**row, "user_input": _input}

recipes/sky-t1-preview/prompts.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
convert_prompt_example = ( # noqa: E501
2+
"<|begin_of_thought|>\n\n"
3+
"Okay, so I've got this problem here. Mr. Wang leaves home at 6 AM, riding his bike at 12 km/h, "
4+
"and he stops to rest for 6 minutes after every 30 minutes of riding. Then, when he arrives at a park "
5+
"that's 16.8 km away, I need to find out the angle between the hour and minute hands on his watch.\n\n"
6+
"Alright, first things first, I need to figure out how long it takes Mr. Wang to ride 16.8 km, including "
7+
"his rest periods.\n\n"
8+
"So, his speed is 12 km/h. To find out how long it takes to go 16.8 km without any stops, I can use the formula "
9+
"time = distance/speed. That would be 16.8 divided by 12, which is 1.4 hours. To make it easier, that's 1 hour and 24 minutes.\n\n"
10+
"But wait, he doesn't ride straight through. He stops for 6 minutes after every 30 minutes of riding. So, I need to see how many "
11+
"of those 30-minute riding periods are there in his total riding time.\n\n"
12+
"In 1 hour and 24 minutes of riding, how many 30-minute segments are there? Well, 1 hour is 60 minutes, plus 24 minutes makes 84 minutes "
13+
"total riding time. So, 84 divided by 30 is 2.8. That means he has two full 30-minute riding periods and a partial one.\n\n"
14+
"After each full 30-minute riding period, he rests for 6 minutes. So, for two full periods, he rests twice, which is 12 minutes of rest.\n\n"
15+
"Now, for the partial riding period. Since 2 times 30 minutes is 60 minutes, and he has 84 minutes of riding, the remaining riding time is 84 minus 60, "
16+
"which is 24 minutes. So, he rides for 24 minutes without another rest because he doesn't complete another 30-minute segment.\n\n"
17+
"So, total time taken is riding time plus rest time. That's 84 minutes riding plus 12 minutes resting, totaling 96 minutes.\n\n"
18+
"Wait a minute, but he stops after every 30 minutes of riding, but in the last partial period of 24 minutes, does he rest again? I think he only rests after " # noqa: E501
19+
"completing 30 minutes of riding, so in this case, since the last riding period is only 24 minutes, he doesn't take an additional rest after that.\n\n"
20+
"So, total time should be 84 minutes riding plus 12 minutes resting, which is indeed 96 minutes, or 1 hour and 36 minutes.\n\n"
21+
"So, he leaves at 6 AM and takes 1 hour and 36 minutes to reach the park, arriving at 7:36 AM.\n\n"
22+
"Now, I need to find the angle between the hour and minute hands at 7:36.\n\n"
23+
"To find the angle between the hour and minute hands, I can use the formula:\n\n"
24+
"|30H - 5.5M|\n\n"
25+
"where H is the hour and M is the minutes.\n\n"
26+
"At 7:36, H is 7 and M is 36.\n\n"
27+
"So, plugging in:\n\n"
28+
"30*7 = 210\n\n"
29+
"5.5*36 = 198\n\n"
30+
"210 - 198 = 12\n\n"
31+
"So, the angle is 12 degrees.\n\n"
32+
"Wait, but I should make sure that's the smaller angle. Sometimes, the larger angle is considered, but usually, the smaller one is what is asked for.\n\n"
33+
"So, the angle between the hour and minute hands at 7:36 AM is 12 degrees.\n\n"
34+
"I think that's the answer.<|end_of_thought|>\n\n"
35+
"<|begin_of_solution|>\n\n"
36+
"Mr. Wang leaves home at 6 AM and rides at a speed of 12 km/h, stopping to rest for 6 minutes after every 30 minutes of riding. "
37+
"He arrives at a park 16.8 km away. To determine the angle between the hour and minute hands on his watch when he arrives, we first calculate the total time taken.\n\n" # noqa: E501
38+
"1. **Riding time without stops**:\n\n"
39+
"$$\\text{Time} = \\frac{\\text{Distance}}{\\text{Speed}} = \\frac{16.8 \\text{ km}}{12 \\text{ km/h}} = 1.4 \\text{ hours} = 84 \\text{ minutes}$$\n\n"
40+
"2. **Rest periods**:\n\n"
41+
" - He rests for 6 minutes after every 30 minutes of riding.\n\n"
42+
" - In 84 minutes of riding, he completes 2 full 30-minute segments and a partial 24-minute segment.\n\n"
43+
" - He rests twice, totaling 12 minutes of rest.\n\n"
44+
"3. **Total time**:\n\n"
45+
"$$\\text{Total time} = 84 \\text{ minutes (riding)} + 12 \\text{ minutes (rest)} = 96 \\text{ minutes} = 1 \\text{ hour and } 36 \\text{ minutes}$$\n\n"
46+
" - He arrives at 7:36 AM.\n\n"
47+
"4. **Angle between hour and minute hands at 7:36**:\n\n"
48+
" - Use the formula:\n\n"
49+
"$$\\text{Angle} = |30H - 5.5M|$$\n\n"
50+
" - At 7:36, $H = 7$ and $M = 36$:\n\n"
51+
"$$\\text{Angle} = |30 \\times 7 - 5.5 \\times 36| = |210 - 198| = 12 \\text{ degrees}$$\n\n"
52+
"Thus, the angle between the hour and minute hands on his watch is $\\boxed{12}$.<|end_of_solution|>\n" # noqa: E501
53+
)
54+
55+
# From https://arxiv.org/pdf/2412.09413
56+
CONVERT_PROMPT = (
57+
"Another solution is written in an unstructured way. Your job is to convert them into two sections:"
58+
"<|begin_of_thought|>"
59+
"(Thought process, you should copy exactly the thinking process of the original solution.)"
60+
"<|end_of_thought|>"
61+
"<|begin_of_solution|>"
62+
"(Final formatted, precise, and clear solution; make sure there is only one solution in this section; If it is a coding problem, make sure there is only one code block)" # noqa: E501
63+
"<|end_of_solution|>"
64+
"Here is an example demonstration of a different question, you can refer to its format: "
65+
"{example}\n"
66+
"Important: You should almost copy all the contents word-by-word of the original solution. Just convert them into two sections. "
67+
"Make sure you include: <|begin_of_slow_thought|>, <|end_of_slow_thought|>, <|begin_of_solution|>,<|end_of_solution|> These four headers explicitly. "
68+
"Content to be converted: {{content}}".format(example=convert_prompt_example)
69+
)

recipes/sky-t1-preview/recipe.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
"""
2+
This is the recipe for data curation for the Sky T1 Preview model .
3+
"""
4+
5+
import datasets
6+
import ray
7+
from ray.data.llm import (
8+
HttpRequestProcessorConfig,
9+
build_llm_processor,
10+
vLLMEngineProcessorConfig,
11+
)
12+
13+
from skythought.evals.scoring.math import MathEqualScorer
14+
15+
from .postprocess import APPSScorer, TACOScorer, convert_to_sharegpt_format
16+
from .preprocess import APPSPreprocessor, NUMINAPreprocessor, TACOPreprocessor
17+
from .prompts import CONVERT_PROMPT
18+
19+
SYSTEM_PROMPT = "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step." # noqa: E501
20+
21+
# 1. Load datasets
22+
apps_ds = datasets.load_dataset("codeparrot/apps", split="test", streaming=True)
23+
taco_ds_medium = datasets.load_dataset(
24+
"BAAI/TACO", split="test", name="MEDIUM", streaming=True
25+
)
26+
numina_ds = datasets.load_dataset("AI-MO/NuminaMath-CoT", split="train", streaming=True)
27+
28+
29+
# convert all to ray dataset
30+
apps_ds = ray.data.from_huggingface(apps_ds)
31+
taco_ds_medium = ray.data.from_huggingface(taco_ds_medium)
32+
numina_ds = ray.data.from_huggingface(numina_ds)
33+
34+
35+
# get subsets from numina based on the source column
36+
numina_ds_amc_aime = numina_ds.filter(lambda x: x["source"] == "amc_aime")
37+
numina_ds_olympiads = numina_ds.filter(lambda x: x["source"] == "olympiads")
38+
numina_ds_math = numina_ds.filter(lambda x: x["source"] == "math")
39+
40+
# 2. Get model responses for each of the datasets
41+
datasets = [
42+
apps_ds,
43+
taco_ds_medium,
44+
numina_ds_amc_aime,
45+
numina_ds_olympiads,
46+
numina_ds_math,
47+
]
48+
49+
# these are user-defined simple preprocessing functions to go from entry -> prompt
50+
preprocess_fns = [
51+
APPSPreprocessor(),
52+
TACOPreprocessor(),
53+
NUMINAPreprocessor(),
54+
NUMINAPreprocessor(),
55+
NUMINAPreprocessor(),
56+
]
57+
58+
for i, ds in enumerate(datasets):
59+
datasets[i] = ds.map(preprocess_fns[i])
60+
61+
# our API
62+
config = vLLMEngineProcessorConfig(
63+
model="Qwen/QwQ-32B-Preview",
64+
engine_kwargs=dict(
65+
enable_prefix_caching=True,
66+
enable_chunked_prefill=True,
67+
max_num_batched_tokens=16384,
68+
),
69+
concurrency=2,
70+
batch_size=64,
71+
)
72+
73+
# our API
74+
processor = build_llm_processor(
75+
config,
76+
preprocess=lambda row: dict(
77+
messages=[
78+
SYSTEM_PROMPT,
79+
{"role": "user", "content": row["user_input"]},
80+
],
81+
sampling_params=dict(
82+
temperature=0.3,
83+
max_tokens=20,
84+
detokenize=False,
85+
),
86+
),
87+
postprocess=lambda row: dict(
88+
assistant_response=row["generated_text"],
89+
**row, # This will return all the original columns in the dataset.
90+
),
91+
)
92+
# our API
93+
datasets[i] = processor(ds)
94+
95+
# 3. Reformat the examples into a structured format
96+
# define a configuration for the reformatter
97+
config = HttpRequestProcessorConfig(
98+
url="https://api.openai.com/v1/chat/completions",
99+
headers={"Authorization": "Bearer sk-..."},
100+
# number of processors to run in parallel
101+
# Each handles a batch of requests
102+
concurrency=1,
103+
)
104+
# define the reformatter
105+
reformatter = build_llm_processor(
106+
config=config,
107+
preprocess=lambda row: dict(
108+
# define the payload / the exact arguments to the OpenAI chat completions API
109+
payload=dict(
110+
model="gpt-4o-mini",
111+
messages=[
112+
{"role": "system", "content": "You are a solution format convertor."},
113+
{
114+
"role": "user",
115+
"content": CONVERT_PROMPT.format(
116+
content=f"{row['question']}\n{row['assistant_response']}"
117+
),
118+
},
119+
],
120+
temperature=0.7,
121+
max_tokens=16384,
122+
),
123+
),
124+
postprocess=lambda row: dict(
125+
formatted_response=row["http_response"]["choices"][0]["message"]["content"],
126+
),
127+
batch_size=64,
128+
)
129+
130+
for i, dataset in enumerate(datasets):
131+
datasets[i] = reformatter(dataset)
132+
133+
134+
# 4. Rejection Sampling based on scoring
135+
# apps, taco, numina-amc-aime, numina-olympiads, numina-math
136+
numina_scorer = MathEqualScorer(
137+
response_key="formatted_response", answer_key="solution"
138+
)
139+
scorers = [APPSScorer(), TACOScorer(), numina_scorer, numina_scorer, numina_scorer]
140+
141+
for i, dataset in enumerate(datasets):
142+
fn = scorers[i]
143+
datasets[i] = dataset.map(fn)
144+
145+
# 5. Convert to ShareGPT format
146+
for i, dataset in enumerate(datasets):
147+
datasets[i] = dataset.map(convert_to_sharegpt_format)
148+
149+
# 6. Union + Save datasets
150+
datasets = datasets[0].union(*datasets[1:])
151+
datasets.write_parquet("sky-t1-preview.parquet")
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .base import Scorer
2+
from .gsm8k import GSM8KScorer
3+
from .ifeval import IfEvalScorer
4+
from .livecodebench import LiveCodeBenchScorer
5+
from .math import MathScorer
6+
7+
__all__ = ["Scorer", "MathScorer", "GSM8KScorer", "LiveCodeBenchScorer", "IfEvalScorer"]

0 commit comments

Comments
 (0)