1
1
# Apache Software License 2.0
2
- #
2
+ #
3
3
# Copyright (c) ZenML GmbH 2025. All rights reserved.
4
- #
4
+ #
5
5
# Licensed under the Apache License, Version 2.0 (the "License");
6
6
# you may not use this file except in compliance with the License.
7
7
# You may obtain a copy of the License at
8
- #
8
+ #
9
9
# http://www.apache.org/licenses/LICENSE-2.0
10
- #
10
+ #
11
11
# Unless required by applicable law or agreed to in writing, software
12
12
# distributed under the License is distributed on an "AS IS" BASIS,
13
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
14
# See the License for the specific language governing permissions and
15
15
# limitations under the License.
16
- #
16
+ #
17
17
18
18
from typing import Dict , List , Optional
19
19
25
25
openpipe_data_converter ,
26
26
openpipe_dataset_creator ,
27
27
openpipe_finetuning_starter ,
28
- openpipe_finetuning_starter_sdk ,
29
28
)
30
29
31
30
logger = get_logger (__name__ )
@@ -42,12 +41,9 @@ def openpipe_finetuning(
42
41
system_prompt : str = "You are a helpful assistant" ,
43
42
split_ratio : float = 0.9 ,
44
43
metadata_columns : Optional [List [str ]] = None ,
45
-
46
44
# OpenPipe dataset parameters
47
45
dataset_name : str = "zenml_dataset" ,
48
46
openpipe_api_key : Optional [str ] = None ,
49
- base_url : str = "https://api.openpipe.ai/api/v1" ,
50
-
51
47
# Fine-tuning parameters
52
48
model_name : str = "zenml_finetuned_model" ,
53
49
base_model : str = "meta-llama/Meta-Llama-3.1-8B-Instruct" ,
@@ -62,9 +58,6 @@ def openpipe_finetuning(
62
58
verbose_logs : bool = True ,
63
59
auto_rename : bool = True ,
64
60
force_overwrite : bool = False ,
65
-
66
- # Implementation options
67
- use_sdk : bool = False ,
68
61
):
69
62
"""
70
63
OpenPipe fine-tuning pipeline.
@@ -83,7 +76,6 @@ def openpipe_finetuning(
83
76
metadata_columns: Optional columns to include as metadata
84
77
dataset_name: Name for the OpenPipe dataset
85
78
openpipe_api_key: OpenPipe API key
86
- base_url: OpenPipe API base URL
87
79
model_name: Name for the fine-tuned model
88
80
base_model: Base model to fine-tune
89
81
enable_sft: Whether to enable supervised fine-tuning
@@ -97,7 +89,6 @@ def openpipe_finetuning(
97
89
verbose_logs: Whether to log detailed model information during polling
98
90
auto_rename: If True, automatically append a timestamp to model name if it already exists
99
91
force_overwrite: If True, delete existing model with the same name before creating new one
100
- use_sdk: If True, use the Python OpenPipe SDK instead of direct API calls
101
92
102
93
Returns:
103
94
A dictionary with details about the fine-tuning job, including model information
@@ -108,7 +99,7 @@ def openpipe_finetuning(
108
99
sample_size = sample_size ,
109
100
data_source = data_source ,
110
101
)
111
-
102
+
112
103
# Convert data to OpenPipe format
113
104
jsonl_path = openpipe_data_converter (
114
105
data = data ,
@@ -118,55 +109,31 @@ def openpipe_finetuning(
118
109
split_ratio = split_ratio ,
119
110
metadata_columns = metadata_columns ,
120
111
)
121
-
112
+
122
113
# Create OpenPipe dataset and upload data
123
114
dataset_id = openpipe_dataset_creator (
124
115
jsonl_path = jsonl_path ,
125
116
dataset_name = dataset_name ,
126
117
openpipe_api_key = openpipe_api_key ,
127
- base_url = base_url ,
128
118
)
129
-
130
- # Choose between SDK and direct API implementation
131
- if use_sdk :
132
- # Use the SDK implementation
133
- finetuning_result = openpipe_finetuning_starter_sdk (
134
- dataset_id = dataset_id ,
135
- model_name = model_name ,
136
- base_model = base_model ,
137
- openpipe_api_key = openpipe_api_key ,
138
- base_url = base_url ,
139
- enable_sft = enable_sft ,
140
- enable_preference_tuning = enable_preference_tuning ,
141
- learning_rate_multiplier = learning_rate_multiplier ,
142
- num_epochs = num_epochs ,
143
- batch_size = batch_size ,
144
- default_temperature = default_temperature ,
145
- wait_for_completion = wait_for_completion ,
146
- timeout_minutes = timeout_minutes ,
147
- verbose_logs = verbose_logs ,
148
- auto_rename = auto_rename ,
149
- force_overwrite = force_overwrite ,
150
- )
151
- else :
152
- # Use the original direct API implementation
153
- finetuning_result = openpipe_finetuning_starter (
154
- dataset_id = dataset_id ,
155
- model_name = model_name ,
156
- base_model = base_model ,
157
- openpipe_api_key = openpipe_api_key ,
158
- base_url = base_url ,
159
- enable_sft = enable_sft ,
160
- enable_preference_tuning = enable_preference_tuning ,
161
- learning_rate_multiplier = learning_rate_multiplier ,
162
- num_epochs = num_epochs ,
163
- batch_size = batch_size ,
164
- default_temperature = default_temperature ,
165
- wait_for_completion = wait_for_completion ,
166
- timeout_minutes = timeout_minutes ,
167
- verbose_logs = verbose_logs ,
168
- auto_rename = auto_rename ,
169
- force_overwrite = force_overwrite ,
170
- )
171
-
119
+
120
+ # Start fine-tuning using the SDK implementation
121
+ finetuning_result = openpipe_finetuning_starter (
122
+ dataset_id = dataset_id ,
123
+ model_name = model_name ,
124
+ base_model = base_model ,
125
+ openpipe_api_key = openpipe_api_key ,
126
+ enable_sft = enable_sft ,
127
+ enable_preference_tuning = enable_preference_tuning ,
128
+ learning_rate_multiplier = learning_rate_multiplier ,
129
+ num_epochs = num_epochs ,
130
+ batch_size = batch_size ,
131
+ default_temperature = default_temperature ,
132
+ wait_for_completion = wait_for_completion ,
133
+ timeout_minutes = timeout_minutes ,
134
+ verbose_logs = verbose_logs ,
135
+ auto_rename = auto_rename ,
136
+ force_overwrite = force_overwrite ,
137
+ )
138
+
172
139
return finetuning_result
0 commit comments