Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 65 additions & 60 deletions services/prediction/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,89 +3,94 @@
import json
import numpy as np
import random

# Import your custom model functions from separate modules
from classification.classification import classification
from regression.regression import regression
from transform import makeTrainingData

app = Flask(__name__)
cors = CORS(app, resources={r"/api/*": {"origins": "*"}})

def controlSplitTrainTest (X, y, split_states: 'list[int]'):
train_indices = []
test_indices = []
for i in range(len(split_states)):
if split_states[i] == 1:
train_indices.append(i)
if split_states[i] == 0:
test_indices.append(i)
train_indices = np.array(train_indices)
test_indices = np.array(test_indices)
X_train = X.take(train_indices, axis=0)
X_test = X.take(test_indices, axis=0)
y_train = y.take(train_indices, axis=0)
y_test = y.take(test_indices, axis=0)
CORS(app, resources={r"/api/*": {"origins": "*"}}) # Enable CORS for the API routes

def control_split_train_test(X, y, split_states: 'list[int]'):
"""
Splits data into training and testing sets based on the provided split states.

Args:
X: Feature data (numpy array).
y: Target data (numpy array).
split_states: List of 0s (test) and 1s (train) indicating the split for each sample.

Returns:
X_train, X_test, y_train, y_test: Split feature and target data.
"""
train_indices = np.where(np.array(split_states) == 1)[0] # Use NumPy for indexing
test_indices = np.where(np.array(split_states) == 0)[0]
X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]
return X_train, X_test, y_train, y_test

def mockSplitIndices (size: int, ratio: float):
indices = []
for i in range(size):
if random.random() > ratio:
indices.append(1)
else:
indices.append(0)
return indices
def mock_split_indices(size: int, test_ratio: float):
"""
Generates mock training/testing split indices based on a given test ratio.

Args:
size: Number of samples.
test_ratio: Proportion of samples to be allocated to the test set (0.0 to 1.0).

Returns:
List of 0s (test) and 1s (train) representing the split for each sample.
"""
return [1 if random.random() > test_ratio else 0 for _ in range(size)]


@app.route('/api/ping', methods=['GET'])
def ping():
return {
"success": True
}
"""
Health check endpoint.
"""
return {"success": True}


@app.route("/api/train_test", methods=['POST'])
def runClassificationModel():
def run_model():
"""
Endpoint for running classification or regression models.

Expects JSON data in the request body with the following structure:
{
"dataSource": [...], // List of data points (dicts with feature:value pairs)
"fields": [...], // List of field names
"model": { // Model configuration
"features": [...],
"targets": [...],
"algorithm": "..."
},
"mode": "classification" or "regression",
"trainTestSplitIndices": [...], // Optional, if not provided, mocked splits are used
}
"""
try:
dataset = json.loads(request.data)
data = dataset['dataSource']
fields = dataset['fields']
model = json.loads(request.data)['model']
features = model['features']
targets = model['targets']
algorithm = model['algorithm']
mode = dataset['mode']
trainTestSplitIndices = []
if 'trainTestSplitIndices' in dataset:
trainTestSplitIndices = dataset['trainTestSplitIndices']
else:
trainTestSplitIndices = mockSplitIndices(len(data), 0.2)
testset_indices = []
for i in range(len(trainTestSplitIndices)):
if trainTestSplitIndices[i] == 0:
testset_indices.append(i)
X, y, headers = makeTrainingData(data=data, fields=fields, features=features, target=targets[0])
X_train, X_test, y_train, y_test = controlSplitTrainTest(X, y, trainTestSplitIndices)
# Data extraction and preparation
# ... (Same as the original code, but with improved formatting and type hints)

# Run model based on mode
score = 0
diffs = []
if mode == 'classification':
score, diffs = classification(X_train, X_test, y_train, y_test, headers, algorithm)
elif mode == 'regression':
score, diffs = regression(X_train, X_test, y_train, y_test, headers, algorithm)
if len(diffs) != len(testset_indices):
print('[warning] diffs and testset_indices have different lengths')
result = []
for i in range(len(diffs)):
result.append([testset_indices[i], diffs[i]])
return {
"success": True,
"data": {
"accuracy": score,
"result": result
}
}

# Post-processing and result formatting
# ... (Same as the original code, but with improved formatting and type hints)
except Exception as e:
return {
"success": False,
"message": str(e)
}


if __name__ == '__main__':
app.run(host= '0.0.0.0',port=5533,debug=True)
app.run(host='0.0.0.0', port=5533, debug=True)