Skip to content

Commit

Permalink
feat regression benchmark service
Browse files Browse the repository at this point in the history
  • Loading branch information
Kayzz99 committed Mar 5, 2024
1 parent 6f44b37 commit e790456
Show file tree
Hide file tree
Showing 4 changed files with 582 additions and 38 deletions.
16 changes: 16 additions & 0 deletions requirements-all.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
click==8.1.7
joblib==1.3.2
lazypredict-nightly==0.3.0
lightgbm==4.3.0
numpy==1.26.4
pandas==2.2.1
python-dateutil==2.9.0.post0
pytz==2024.1
scikit-learn==1.4.1.post1
scipy==1.12.0
six==1.16.0
threadpoolctl==3.3.0
tqdm==4.66.2
tzdata==2024.1
xgboost==2.0.3
Werkzeug==2.3.6
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
common-code[test] @ git+https://github.com/swiss-ai-center/common-code.git@main
lazypredict-nightly==0.3.0
96 changes: 58 additions & 38 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,26 @@
from common_code.tasks.models import TaskData
from common_code.service.models import Service
from common_code.service.enums import ServiceStatus
from common_code.common.enums import FieldDescriptionType, ExecutionUnitTagName, ExecutionUnitTagAcronym
from common_code.common.enums import (
FieldDescriptionType,
ExecutionUnitTagName,
ExecutionUnitTagAcronym,
)
from common_code.common.models import FieldDescription, ExecutionUnitTag
from contextlib import asynccontextmanager

# Imports required by the service's model
# TODO: 1. ADD REQUIRED IMPORTS (ALSO IN THE REQUIREMENTS.TXT)
import pandas as pd
from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
import io

settings = get_settings()


class MyService(Service):
# TODO: 2. CHANGE THIS DESCRIPTION
"""
My service model
Benchmark multiple models on a dataset and return the results.
"""

# Any additional fields must be excluded for Pydantic to work
Expand All @@ -36,32 +42,22 @@ class MyService(Service):

def __init__(self):
super().__init__(
# TODO: 3. CHANGE THE SERVICE NAME AND SLUG
name="My Service",
slug="my-service",
name="Regression Benchmark",
slug="regression-benchmark",
url=settings.service_url,
summary=api_summary,
description=api_description,
status=ServiceStatus.AVAILABLE,
# TODO: 4. CHANGE THE INPUT AND OUTPUT FIELDS, THE TAGS AND THE HAS_AI VARIABLE
data_in_fields=[
FieldDescription(
name="image",
type=[
FieldDescriptionType.IMAGE_PNG,
FieldDescriptionType.IMAGE_JPEG,
],
),
FieldDescription(name="dataset", type=[FieldDescriptionType.TEXT_CSV]),
],
data_out_fields=[
FieldDescription(
name="result", type=[FieldDescriptionType.APPLICATION_JSON]
),
FieldDescription(name="result", type=[FieldDescriptionType.TEXT_PLAIN]),
],
tags=[
ExecutionUnitTag(
name=ExecutionUnitTagName.IMAGE_PROCESSING,
acronym=ExecutionUnitTagAcronym.IMAGE_PROCESSING,
name=ExecutionUnitTagName.DATA_PREPROCESSING,
acronym=ExecutionUnitTagAcronym.DATA_PREPROCESSING,
),
],
has_ai=False,
Expand All @@ -70,18 +66,38 @@ def __init__(self):
)
self._logger = get_logger(settings)

# TODO: 5. CHANGE THE PROCESS METHOD (CORE OF THE SERVICE)
def process(self, data):
# NOTE that the data is a dictionary with the keys being the field names set in the data_in_fields
# The objects in the data variable are always bytes. It is necessary to convert them to the desired type
# before using them.
# raw = data["image"].data
# input_type = data["image"].type
# ... do something with the raw data

# NOTE that the result must be a dictionary with the keys being the field names set in the data_out_fields

raw = str(data["dataset"].data)
raw = (
raw.replace(",", ";")
.replace("\\n", "\n")
.replace("\\r", "\n")
.replace("b'", "")
)

lines = raw.splitlines()
if lines[-1] == "" or lines[-1] == "'":
lines.pop()
raw = "\n".join(lines)

data_df = pd.read_csv(io.StringIO(raw), sep=";")

X = data_df.drop("target", axis=1)
y = data_df["target"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
reg = LazyRegressor(verbose=0, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)

buf = io.BytesIO()
buf.write(models.to_string().encode("utf-8"))

return {
"result": TaskData(data=..., type=FieldDescriptionType.APPLICATION_JSON)
"result": TaskData(
data=buf.getvalue(), type=FieldDescriptionType.TEXT_PLAIN
),
}


Expand Down Expand Up @@ -115,7 +131,9 @@ async def announce():
for engine_url in settings.engine_urls:
announced = False
while not announced and retries > 0:
announced = await service_service.announce_service(my_service, engine_url)
announced = await service_service.announce_service(
my_service, engine_url
)
retries -= 1
if not announced:
time.sleep(settings.engine_announce_retry_delay)
Expand All @@ -135,19 +153,21 @@ async def announce():
await service_service.graceful_shutdown(my_service, engine_url)


# TODO: 6. CHANGE THE API DESCRIPTION AND SUMMARY
api_description = """My service
bla bla bla...
api_description = """This service benchmarks a dataset with various models and outputs the results sorted by accuracy.
In order for the service to work your dataset label column must be called "target".
Also to improve the results you may want to remove uneccessary columns from the dataset.
Finally, avoid having multiple empty lines at the end of the file.
"""
api_summary = """My service
bla bla bla...
api_summary = """This service benchmarks a dataset with various models and outputs the results sorted by accuracy.
In order for the service to work your dataset label column must be called "target".
Also to improve the results you may want to remove uneccessary columns from the dataset.
Finally, avoid having multiple empty lines at the end of the file.
"""

# Define the FastAPI application with information
# TODO: 7. CHANGE THE API TITLE, VERSION, CONTACT AND LICENSE
app = FastAPI(
lifespan=lifespan,
title="Sample Service API.",
title="Regression benchmark API.",
description=api_description,
version="0.0.1",
contact={
Expand Down
Loading

0 comments on commit e790456

Please sign in to comment.