Skip to content

Commit 5bf9613

Browse files
committed
Start fine-tuning job using SDK with monitoring
1 parent 6a86399 commit 5bf9613

10 files changed

+270
-526
lines changed

pipelines/__init__.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
# Apache Software License 2.0
2-
#
2+
#
33
# Copyright (c) ZenML GmbH 2025. All rights reserved.
4-
#
4+
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
77
# You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing, software
1212
# distributed under the License is distributed on an "AS IS" BASIS,
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16-
#
16+
#
1717

1818
from .openpipe_finetuning import openpipe_finetuning

pipelines/openpipe_finetuning.py

+27-60
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
# Apache Software License 2.0
2-
#
2+
#
33
# Copyright (c) ZenML GmbH 2025. All rights reserved.
4-
#
4+
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
77
# You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing, software
1212
# distributed under the License is distributed on an "AS IS" BASIS,
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16-
#
16+
#
1717

1818
from typing import Dict, List, Optional
1919

@@ -25,7 +25,6 @@
2525
openpipe_data_converter,
2626
openpipe_dataset_creator,
2727
openpipe_finetuning_starter,
28-
openpipe_finetuning_starter_sdk,
2928
)
3029

3130
logger = get_logger(__name__)
@@ -42,12 +41,9 @@ def openpipe_finetuning(
4241
system_prompt: str = "You are a helpful assistant",
4342
split_ratio: float = 0.9,
4443
metadata_columns: Optional[List[str]] = None,
45-
4644
# OpenPipe dataset parameters
4745
dataset_name: str = "zenml_dataset",
4846
openpipe_api_key: Optional[str] = None,
49-
base_url: str = "https://api.openpipe.ai/api/v1",
50-
5147
# Fine-tuning parameters
5248
model_name: str = "zenml_finetuned_model",
5349
base_model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -62,9 +58,6 @@ def openpipe_finetuning(
6258
verbose_logs: bool = True,
6359
auto_rename: bool = True,
6460
force_overwrite: bool = False,
65-
66-
# Implementation options
67-
use_sdk: bool = False,
6861
):
6962
"""
7063
OpenPipe fine-tuning pipeline.
@@ -83,7 +76,6 @@ def openpipe_finetuning(
8376
metadata_columns: Optional columns to include as metadata
8477
dataset_name: Name for the OpenPipe dataset
8578
openpipe_api_key: OpenPipe API key
86-
base_url: OpenPipe API base URL
8779
model_name: Name for the fine-tuned model
8880
base_model: Base model to fine-tune
8981
enable_sft: Whether to enable supervised fine-tuning
@@ -97,7 +89,6 @@ def openpipe_finetuning(
9789
verbose_logs: Whether to log detailed model information during polling
9890
auto_rename: If True, automatically append a timestamp to model name if it already exists
9991
force_overwrite: If True, delete existing model with the same name before creating new one
100-
use_sdk: If True, use the Python OpenPipe SDK instead of direct API calls
10192
10293
Returns:
10394
A dictionary with details about the fine-tuning job, including model information
@@ -108,7 +99,7 @@ def openpipe_finetuning(
10899
sample_size=sample_size,
109100
data_source=data_source,
110101
)
111-
102+
112103
# Convert data to OpenPipe format
113104
jsonl_path = openpipe_data_converter(
114105
data=data,
@@ -118,55 +109,31 @@ def openpipe_finetuning(
118109
split_ratio=split_ratio,
119110
metadata_columns=metadata_columns,
120111
)
121-
112+
122113
# Create OpenPipe dataset and upload data
123114
dataset_id = openpipe_dataset_creator(
124115
jsonl_path=jsonl_path,
125116
dataset_name=dataset_name,
126117
openpipe_api_key=openpipe_api_key,
127-
base_url=base_url,
128118
)
129-
130-
# Choose between SDK and direct API implementation
131-
if use_sdk:
132-
# Use the SDK implementation
133-
finetuning_result = openpipe_finetuning_starter_sdk(
134-
dataset_id=dataset_id,
135-
model_name=model_name,
136-
base_model=base_model,
137-
openpipe_api_key=openpipe_api_key,
138-
base_url=base_url,
139-
enable_sft=enable_sft,
140-
enable_preference_tuning=enable_preference_tuning,
141-
learning_rate_multiplier=learning_rate_multiplier,
142-
num_epochs=num_epochs,
143-
batch_size=batch_size,
144-
default_temperature=default_temperature,
145-
wait_for_completion=wait_for_completion,
146-
timeout_minutes=timeout_minutes,
147-
verbose_logs=verbose_logs,
148-
auto_rename=auto_rename,
149-
force_overwrite=force_overwrite,
150-
)
151-
else:
152-
# Use the original direct API implementation
153-
finetuning_result = openpipe_finetuning_starter(
154-
dataset_id=dataset_id,
155-
model_name=model_name,
156-
base_model=base_model,
157-
openpipe_api_key=openpipe_api_key,
158-
base_url=base_url,
159-
enable_sft=enable_sft,
160-
enable_preference_tuning=enable_preference_tuning,
161-
learning_rate_multiplier=learning_rate_multiplier,
162-
num_epochs=num_epochs,
163-
batch_size=batch_size,
164-
default_temperature=default_temperature,
165-
wait_for_completion=wait_for_completion,
166-
timeout_minutes=timeout_minutes,
167-
verbose_logs=verbose_logs,
168-
auto_rename=auto_rename,
169-
force_overwrite=force_overwrite,
170-
)
171-
119+
120+
# Start fine-tuning using the SDK implementation
121+
finetuning_result = openpipe_finetuning_starter(
122+
dataset_id=dataset_id,
123+
model_name=model_name,
124+
base_model=base_model,
125+
openpipe_api_key=openpipe_api_key,
126+
enable_sft=enable_sft,
127+
enable_preference_tuning=enable_preference_tuning,
128+
learning_rate_multiplier=learning_rate_multiplier,
129+
num_epochs=num_epochs,
130+
batch_size=batch_size,
131+
default_temperature=default_temperature,
132+
wait_for_completion=wait_for_completion,
133+
timeout_minutes=timeout_minutes,
134+
verbose_logs=verbose_logs,
135+
auto_rename=auto_rename,
136+
force_overwrite=force_overwrite,
137+
)
138+
172139
return finetuning_result

run.py

+26-32
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
# Apache Software License 2.0
2-
#
2+
#
33
# Copyright (c) ZenML GmbH 2025. All rights reserved.
4-
#
4+
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
77
# You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing, software
1212
# distributed under the License is distributed on an "AS IS" BASIS,
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16-
#
16+
#
1717

1818
import os
1919
import json
@@ -141,12 +141,6 @@
141141
default=False,
142142
help="Disable caching for the pipeline run.",
143143
)
144-
@click.option(
145-
"--use-sdk",
146-
is_flag=True,
147-
default=False,
148-
help="Use the Python OpenPipe SDK instead of direct API calls.",
149-
)
150144
def main(
151145
openpipe_api_key: Optional[str] = None,
152146
dataset_name: str = "ultra_customer_service",
@@ -162,7 +156,6 @@ def main(
162156
force_overwrite: bool = False,
163157
fetch_details_only: bool = False,
164158
no_cache: bool = False,
165-
use_sdk: bool = False,
166159
):
167160
"""Main entry point for the OpenPipe fine-tuning pipeline.
168161
@@ -183,7 +176,6 @@ def main(
183176
force_overwrite: If True, delete existing model with the same name before creating new one.
184177
fetch_details_only: Only fetch model details without running the fine-tuning pipeline.
185178
no_cache: If `True` cache will be disabled.
186-
use_sdk: If `True` use the Python OpenPipe SDK instead of direct API calls.
187179
"""
188180
client = Client()
189181

@@ -196,61 +188,65 @@ def main(
196188
if not openpipe_api_key:
197189
openpipe_api_key = os.environ.get("OPENPIPE_API_KEY")
198190
if not openpipe_api_key:
199-
logger.error("OpenPipe API key not provided. Please set --openpipe-api-key "
200-
"or the OPENPIPE_API_KEY environment variable.")
191+
logger.error(
192+
"OpenPipe API key not provided. Please set --openpipe-api-key "
193+
"or the OPENPIPE_API_KEY environment variable."
194+
)
201195
return
202-
196+
203197
# Check for conflicting options
204198
if force_overwrite and auto_rename:
205-
logger.warning("Both force_overwrite and auto_rename are enabled. force_overwrite will take precedence.")
206-
199+
logger.warning(
200+
"Both force_overwrite and auto_rename are enabled. force_overwrite will take precedence."
201+
)
202+
207203
# If fetch_details_only is True, just fetch model details without running the pipeline
208204
if fetch_details_only:
209205
logger.info(f"Fetching details for model: {model_name}")
210-
206+
211207
# Set up headers for API request
212208
headers = {
213209
"Authorization": f"Bearer {openpipe_api_key}",
214-
"Content-Type": "application/json"
210+
"Content-Type": "application/json",
215211
}
216-
212+
217213
# Construct the URL
218214
base_url = "https://api.openpipe.ai/api/v1"
219215
url = f"{base_url}/models/{model_name}"
220-
216+
221217
try:
222218
# Make the API request
223219
response = requests.get(url, headers=headers)
224220
response.raise_for_status()
225221
model_info = response.json()
226-
222+
227223
# Log important model information
228224
status = model_info.get("openpipe", {}).get("status", "UNKNOWN")
229225
error_message = model_info.get("openpipe", {}).get("errorMessage")
230226
base_model = model_info.get("openpipe", {}).get("baseModel", "unknown")
231227
created = model_info.get("created", "unknown")
232-
228+
233229
logger.info(f"Model: {model_name}")
234230
logger.info(f"Status: {status}")
235231
logger.info(f"Base model: {base_model}")
236232
logger.info(f"Created: {created}")
237-
233+
238234
if status == "ERROR" and error_message:
239235
logger.error(f"Error message: {error_message}")
240-
236+
241237
# Log training parameters if available
242238
hyperparams = model_info.get("openpipe", {}).get("hyperparameters", {})
243239
if hyperparams:
244240
logger.info("Training parameters:")
245241
for key, value in hyperparams.items():
246242
logger.info(f" {key}: {value}")
247-
243+
248244
# Print full JSON response for detailed debugging
249245
logger.info(f"Full model details: {json.dumps(model_info, indent=2)}")
250246
return
251247
except Exception as e:
252248
logger.error(f"Failed to fetch model details: {str(e)}")
253-
if hasattr(e, 'response') and e.response is not None:
249+
if hasattr(e, "response") and e.response is not None:
254250
logger.error(f"Response status code: {e.response.status_code}")
255251
logger.error(f"Response body: {e.response.text}")
256252
return
@@ -262,7 +258,7 @@ def main(
262258
pipeline_args["config_path"] = os.path.join(
263259
config_folder, "openpipe_finetuning.yaml"
264260
)
265-
261+
266262
# Set up run arguments
267263
run_args_openpipe = {
268264
"dataset_name": dataset_name,
@@ -277,12 +273,10 @@ def main(
277273
"auto_rename": auto_rename,
278274
"force_overwrite": force_overwrite,
279275
"openpipe_api_key": openpipe_api_key,
280-
"use_sdk": use_sdk,
281276
}
282-
277+
283278
# Run the pipeline
284279
openpipe_finetuning.with_options(**pipeline_args)(**run_args_openpipe)
285-
286280

287281

288282
if __name__ == "__main__":

steps/__init__.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
# Apache Software License 2.0
2-
#
2+
#
33
# Copyright (c) ZenML GmbH 2025. All rights reserved.
4-
#
4+
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
77
# You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing, software
1212
# distributed under the License is distributed on an "AS IS" BASIS,
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16-
#
16+
#
1717

1818
from .data_loader import (
1919
data_loader,
@@ -26,5 +26,4 @@
2626
)
2727
from .openpipe_finetuning_starter import (
2828
openpipe_finetuning_starter,
29-
openpipe_finetuning_starter_sdk,
3029
)

0 commit comments

Comments
 (0)