Skip to content

Commit e790456

Browse files
committed
feat regression benchmark service
1 parent 6f44b37 commit e790456

File tree

4 files changed

+582
-38
lines changed

4 files changed

+582
-38
lines changed

Diff for: requirements-all.txt

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
click==8.1.7
2+
joblib==1.3.2
3+
lazypredict-nightly==0.3.0
4+
lightgbm==4.3.0
5+
numpy==1.26.4
6+
pandas==2.2.1
7+
python-dateutil==2.9.0.post0
8+
pytz==2024.1
9+
scikit-learn==1.4.1.post1
10+
scipy==1.12.0
11+
six==1.16.0
12+
threadpoolctl==3.3.0
13+
tqdm==4.66.2
14+
tzdata==2024.1
15+
xgboost==2.0.3
16+
Werkzeug==2.3.6

Diff for: requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
common-code[test] @ git+https://github.com/swiss-ai-center/common-code.git@main
2+
lazypredict-nightly==0.3.0

Diff for: src/main.py

+58-38
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,26 @@
1414
from common_code.tasks.models import TaskData
1515
from common_code.service.models import Service
1616
from common_code.service.enums import ServiceStatus
17-
from common_code.common.enums import FieldDescriptionType, ExecutionUnitTagName, ExecutionUnitTagAcronym
17+
from common_code.common.enums import (
18+
FieldDescriptionType,
19+
ExecutionUnitTagName,
20+
ExecutionUnitTagAcronym,
21+
)
1822
from common_code.common.models import FieldDescription, ExecutionUnitTag
1923
from contextlib import asynccontextmanager
2024

2125
# Imports required by the service's model
22-
# TODO: 1. ADD REQUIRED IMPORTS (ALSO IN THE REQUIREMENTS.TXT)
26+
import pandas as pd
27+
from lazypredict.Supervised import LazyRegressor
28+
from sklearn.model_selection import train_test_split
29+
import io
2330

2431
settings = get_settings()
2532

2633

2734
class MyService(Service):
28-
# TODO: 2. CHANGE THIS DESCRIPTION
2935
"""
30-
My service model
36+
Benchmark multiple models on a dataset and return the results.
3137
"""
3238

3339
# Any additional fields must be excluded for Pydantic to work
@@ -36,32 +42,22 @@ class MyService(Service):
3642

3743
def __init__(self):
3844
super().__init__(
39-
# TODO: 3. CHANGE THE SERVICE NAME AND SLUG
40-
name="My Service",
41-
slug="my-service",
45+
name="Regression Benchmark",
46+
slug="regression-benchmark",
4247
url=settings.service_url,
4348
summary=api_summary,
4449
description=api_description,
4550
status=ServiceStatus.AVAILABLE,
46-
# TODO: 4. CHANGE THE INPUT AND OUTPUT FIELDS, THE TAGS AND THE HAS_AI VARIABLE
4751
data_in_fields=[
48-
FieldDescription(
49-
name="image",
50-
type=[
51-
FieldDescriptionType.IMAGE_PNG,
52-
FieldDescriptionType.IMAGE_JPEG,
53-
],
54-
),
52+
FieldDescription(name="dataset", type=[FieldDescriptionType.TEXT_CSV]),
5553
],
5654
data_out_fields=[
57-
FieldDescription(
58-
name="result", type=[FieldDescriptionType.APPLICATION_JSON]
59-
),
55+
FieldDescription(name="result", type=[FieldDescriptionType.TEXT_PLAIN]),
6056
],
6157
tags=[
6258
ExecutionUnitTag(
63-
name=ExecutionUnitTagName.IMAGE_PROCESSING,
64-
acronym=ExecutionUnitTagAcronym.IMAGE_PROCESSING,
59+
name=ExecutionUnitTagName.DATA_PREPROCESSING,
60+
acronym=ExecutionUnitTagAcronym.DATA_PREPROCESSING,
6561
),
6662
],
6763
has_ai=False,
@@ -70,18 +66,38 @@ def __init__(self):
7066
)
7167
self._logger = get_logger(settings)
7268

73-
# TODO: 5. CHANGE THE PROCESS METHOD (CORE OF THE SERVICE)
7469
def process(self, data):
75-
# NOTE that the data is a dictionary with the keys being the field names set in the data_in_fields
76-
# The objects in the data variable are always bytes. It is necessary to convert them to the desired type
77-
# before using them.
78-
# raw = data["image"].data
79-
# input_type = data["image"].type
80-
# ... do something with the raw data
81-
82-
# NOTE that the result must be a dictionary with the keys being the field names set in the data_out_fields
70+
71+
raw = str(data["dataset"].data)
72+
raw = (
73+
raw.replace(",", ";")
74+
.replace("\\n", "\n")
75+
.replace("\\r", "\n")
76+
.replace("b'", "")
77+
)
78+
79+
lines = raw.splitlines()
80+
if lines[-1] == "" or lines[-1] == "'":
81+
lines.pop()
82+
raw = "\n".join(lines)
83+
84+
data_df = pd.read_csv(io.StringIO(raw), sep=";")
85+
86+
X = data_df.drop("target", axis=1)
87+
y = data_df["target"]
88+
X_train, X_test, y_train, y_test = train_test_split(
89+
X, y, test_size=0.2, random_state=42
90+
)
91+
reg = LazyRegressor(verbose=0, custom_metric=None)
92+
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
93+
94+
buf = io.BytesIO()
95+
buf.write(models.to_string().encode("utf-8"))
96+
8397
return {
84-
"result": TaskData(data=..., type=FieldDescriptionType.APPLICATION_JSON)
98+
"result": TaskData(
99+
data=buf.getvalue(), type=FieldDescriptionType.TEXT_PLAIN
100+
),
85101
}
86102

87103

@@ -115,7 +131,9 @@ async def announce():
115131
for engine_url in settings.engine_urls:
116132
announced = False
117133
while not announced and retries > 0:
118-
announced = await service_service.announce_service(my_service, engine_url)
134+
announced = await service_service.announce_service(
135+
my_service, engine_url
136+
)
119137
retries -= 1
120138
if not announced:
121139
time.sleep(settings.engine_announce_retry_delay)
@@ -135,19 +153,21 @@ async def announce():
135153
await service_service.graceful_shutdown(my_service, engine_url)
136154

137155

138-
# TODO: 6. CHANGE THE API DESCRIPTION AND SUMMARY
139-
api_description = """My service
140-
bla bla bla...
156+
api_description = """This service benchmarks a dataset with various models and outputs the results sorted by accuracy.
157+
In order for the service to work your dataset label column must be called "target".
158+
Also to improve the results you may want to remove uneccessary columns from the dataset.
159+
Finally, avoid having multiple empty lines at the end of the file.
141160
"""
142-
api_summary = """My service
143-
bla bla bla...
161+
api_summary = """This service benchmarks a dataset with various models and outputs the results sorted by accuracy.
162+
In order for the service to work your dataset label column must be called "target".
163+
Also to improve the results you may want to remove uneccessary columns from the dataset.
164+
Finally, avoid having multiple empty lines at the end of the file.
144165
"""
145166

146167
# Define the FastAPI application with information
147-
# TODO: 7. CHANGE THE API TITLE, VERSION, CONTACT AND LICENSE
148168
app = FastAPI(
149169
lifespan=lifespan,
150-
title="Sample Service API.",
170+
title="Regression benchmark API.",
151171
description=api_description,
152172
version="0.0.1",
153173
contact={

0 commit comments

Comments
 (0)