14
14
from common_code .tasks .models import TaskData
15
15
from common_code .service .models import Service
16
16
from common_code .service .enums import ServiceStatus
17
- from common_code .common .enums import FieldDescriptionType , ExecutionUnitTagName , ExecutionUnitTagAcronym
17
+ from common_code .common .enums import (
18
+ FieldDescriptionType ,
19
+ ExecutionUnitTagName ,
20
+ ExecutionUnitTagAcronym ,
21
+ )
18
22
from common_code .common .models import FieldDescription , ExecutionUnitTag
19
23
from contextlib import asynccontextmanager
20
24
21
25
# Imports required by the service's model
22
- # TODO: 1. ADD REQUIRED IMPORTS (ALSO IN THE REQUIREMENTS.TXT)
26
+ import pandas as pd
27
+ from lazypredict .Supervised import LazyRegressor
28
+ from sklearn .model_selection import train_test_split
29
+ import io
23
30
24
31
settings = get_settings ()
25
32
26
33
27
34
class MyService (Service ):
28
- # TODO: 2. CHANGE THIS DESCRIPTION
29
35
"""
30
- My service model
36
+ Benchmark multiple models on a dataset and return the results.
31
37
"""
32
38
33
39
# Any additional fields must be excluded for Pydantic to work
@@ -36,32 +42,22 @@ class MyService(Service):
36
42
37
43
def __init__ (self ):
38
44
super ().__init__ (
39
- # TODO: 3. CHANGE THE SERVICE NAME AND SLUG
40
- name = "My Service" ,
41
- slug = "my-service" ,
45
+ name = "Regression Benchmark" ,
46
+ slug = "regression-benchmark" ,
42
47
url = settings .service_url ,
43
48
summary = api_summary ,
44
49
description = api_description ,
45
50
status = ServiceStatus .AVAILABLE ,
46
- # TODO: 4. CHANGE THE INPUT AND OUTPUT FIELDS, THE TAGS AND THE HAS_AI VARIABLE
47
51
data_in_fields = [
48
- FieldDescription (
49
- name = "image" ,
50
- type = [
51
- FieldDescriptionType .IMAGE_PNG ,
52
- FieldDescriptionType .IMAGE_JPEG ,
53
- ],
54
- ),
52
+ FieldDescription (name = "dataset" , type = [FieldDescriptionType .TEXT_CSV ]),
55
53
],
56
54
data_out_fields = [
57
- FieldDescription (
58
- name = "result" , type = [FieldDescriptionType .APPLICATION_JSON ]
59
- ),
55
+ FieldDescription (name = "result" , type = [FieldDescriptionType .TEXT_PLAIN ]),
60
56
],
61
57
tags = [
62
58
ExecutionUnitTag (
63
- name = ExecutionUnitTagName .IMAGE_PROCESSING ,
64
- acronym = ExecutionUnitTagAcronym .IMAGE_PROCESSING ,
59
+ name = ExecutionUnitTagName .DATA_PREPROCESSING ,
60
+ acronym = ExecutionUnitTagAcronym .DATA_PREPROCESSING ,
65
61
),
66
62
],
67
63
has_ai = False ,
@@ -70,18 +66,38 @@ def __init__(self):
70
66
)
71
67
self ._logger = get_logger (settings )
72
68
73
- # TODO: 5. CHANGE THE PROCESS METHOD (CORE OF THE SERVICE)
74
69
def process (self , data ):
75
- # NOTE that the data is a dictionary with the keys being the field names set in the data_in_fields
76
- # The objects in the data variable are always bytes. It is necessary to convert them to the desired type
77
- # before using them.
78
- # raw = data["image"].data
79
- # input_type = data["image"].type
80
- # ... do something with the raw data
81
-
82
- # NOTE that the result must be a dictionary with the keys being the field names set in the data_out_fields
70
+
71
+ raw = str (data ["dataset" ].data )
72
+ raw = (
73
+ raw .replace ("," , ";" )
74
+ .replace ("\\ n" , "\n " )
75
+ .replace ("\\ r" , "\n " )
76
+ .replace ("b'" , "" )
77
+ )
78
+
79
+ lines = raw .splitlines ()
80
+ if lines [- 1 ] == "" or lines [- 1 ] == "'" :
81
+ lines .pop ()
82
+ raw = "\n " .join (lines )
83
+
84
+ data_df = pd .read_csv (io .StringIO (raw ), sep = ";" )
85
+
86
+ X = data_df .drop ("target" , axis = 1 )
87
+ y = data_df ["target" ]
88
+ X_train , X_test , y_train , y_test = train_test_split (
89
+ X , y , test_size = 0.2 , random_state = 42
90
+ )
91
+ reg = LazyRegressor (verbose = 0 , custom_metric = None )
92
+ models , predictions = reg .fit (X_train , X_test , y_train , y_test )
93
+
94
+ buf = io .BytesIO ()
95
+ buf .write (models .to_string ().encode ("utf-8" ))
96
+
83
97
return {
84
- "result" : TaskData (data = ..., type = FieldDescriptionType .APPLICATION_JSON )
98
+ "result" : TaskData (
99
+ data = buf .getvalue (), type = FieldDescriptionType .TEXT_PLAIN
100
+ ),
85
101
}
86
102
87
103
@@ -115,7 +131,9 @@ async def announce():
115
131
for engine_url in settings .engine_urls :
116
132
announced = False
117
133
while not announced and retries > 0 :
118
- announced = await service_service .announce_service (my_service , engine_url )
134
+ announced = await service_service .announce_service (
135
+ my_service , engine_url
136
+ )
119
137
retries -= 1
120
138
if not announced :
121
139
time .sleep (settings .engine_announce_retry_delay )
@@ -135,19 +153,21 @@ async def announce():
135
153
await service_service .graceful_shutdown (my_service , engine_url )
136
154
137
155
138
- # TODO: 6. CHANGE THE API DESCRIPTION AND SUMMARY
139
- api_description = """My service
140
- bla bla bla...
156
+ api_description = """This service benchmarks a dataset with various models and outputs the results sorted by accuracy.
157
+ In order for the service to work your dataset label column must be called "target".
158
+ Also to improve the results you may want to remove uneccessary columns from the dataset.
159
+ Finally, avoid having multiple empty lines at the end of the file.
141
160
"""
142
- api_summary = """My service
143
- bla bla bla...
161
+ api_summary = """This service benchmarks a dataset with various models and outputs the results sorted by accuracy.
162
+ In order for the service to work your dataset label column must be called "target".
163
+ Also to improve the results you may want to remove uneccessary columns from the dataset.
164
+ Finally, avoid having multiple empty lines at the end of the file.
144
165
"""
145
166
146
167
# Define the FastAPI application with information
147
- # TODO: 7. CHANGE THE API TITLE, VERSION, CONTACT AND LICENSE
148
168
app = FastAPI (
149
169
lifespan = lifespan ,
150
- title = "Sample Service API." ,
170
+ title = "Regression benchmark API." ,
151
171
description = api_description ,
152
172
version = "0.0.1" ,
153
173
contact = {
0 commit comments