diff --git a/src/api.py b/src/api.py index 36c257a..84c8cf0 100644 --- a/src/api.py +++ b/src/api.py @@ -1,28 +1,49 @@ -from fastapi import FastAPI, UploadFile, File -from PIL import Image +""" +This script creates an API endpoint using FastAPI. The endpoint accepts an image file, +applies the necessary transformations, and uses a pre-trained PyTorch model to make a prediction. +""" + import torch +from fastapi import FastAPI, File, UploadFile +from PIL import Image from torchvision import transforms + from main import Net # Importing Net class from main.py -# Load the model +# Load the pre-trained model model = Net() model.load_state_dict(torch.load("mnist_model.pth")) -model.eval() +model.eval() # Set the model to evaluation mode -# Transform used for preprocessing the image -transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.5,), (0.5,)) -]) +# Define the transformations to be applied to the input images +transform = transforms.Compose( + [ + transforms.ToTensor(), # Convert the images to tensors + transforms.Normalize((0.5,), (0.5,)), # Normalize the images + ] +) app = FastAPI() + @app.post("/predict/") async def predict(file: UploadFile = File(...)): - image = Image.open(file.file).convert("L") - image = transform(image) - image = image.unsqueeze(0) # Add batch dimension - with torch.no_grad(): - output = model(image) - _, predicted = torch.max(output.data, 1) - return {"prediction": int(predicted[0])} + """ + This function accepts an image file, applies the necessary transformations, + and uses a pre-trained PyTorch model to make a prediction. + + Parameters: + file (UploadFile): The image file to be processed. + + Returns: + dict: A dictionary with the key 'prediction' and the predicted class as the value. + """ + image = Image.open(file.file).convert("L") # Convert the image to grayscale + image = transform(image) # Apply the transformations + image = image.unsqueeze(0) # Add a batch dimension + with torch.no_grad(): # Disable gradient calculation + output = model(image) # Make a prediction + _, predicted = torch.max( + output.data, 1 + ) # Get the class with the highest probability + return {"prediction": int(predicted[0])} # Return the prediction as a dictionary diff --git a/src/main.py b/src/main.py index 243a31e..a8a111b 100644 --- a/src/main.py +++ b/src/main.py @@ -1,48 +1,66 @@ -from PIL import Image +""" +This script defines and trains a PyTorch model for the MNIST dataset. +""" + +import numpy as np import torch import torch.nn as nn import torch.optim as optim -from torchvision import datasets, transforms from torch.utils.data import DataLoader -import numpy as np +from torchvision import datasets, transforms -# Step 1: Load MNIST Data and Preprocess -transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.5,), (0.5,)) -]) +# Load and preprocess the MNIST dataset +transform = transforms.Compose( + [ + transforms.ToTensor(), # Convert the images to tensors + transforms.Normalize((0.5,), (0.5,)), # Normalize the images + ] +) -trainset = datasets.MNIST('.', download=True, train=True, transform=transform) +# Download the MNIST dataset and apply the transformations +trainset = datasets.MNIST(".", download=True, train=True, transform=transform) +# Create a DataLoader to handle batching of the MNIST dataset trainloader = DataLoader(trainset, batch_size=64, shuffle=True) -# Step 2: Define the PyTorch Model + +# Define the PyTorch model class Net(nn.Module): + """ + This class defines a simple feed-forward neural network for the MNIST dataset. + + The network consists of three fully connected layers. The first two layers use + the ReLU activation function, and the final layer uses the log softmax function + for multi-class classification. + """ + def __init__(self): super().__init__() - self.fc1 = nn.Linear(28 * 28, 128) - self.fc2 = nn.Linear(128, 64) - self.fc3 = nn.Linear(64, 10) - + self.fc1 = nn.Linear(28 * 28, 128) # First fully connected layer + self.fc2 = nn.Linear(128, 64) # Second fully connected layer + self.fc3 = nn.Linear(64, 10) # Final fully connected layer + def forward(self, x): - x = x.view(-1, 28 * 28) - x = nn.functional.relu(self.fc1(x)) - x = nn.functional.relu(self.fc2(x)) - x = self.fc3(x) + x = x.view(-1, 28 * 28) # Flatten the input images + x = nn.functional.relu(self.fc1(x)) # Apply ReLU activation function + x = nn.functional.relu(self.fc2(x)) # Apply ReLU activation function + x = self.fc3(x) # Apply log softmax function return nn.functional.log_softmax(x, dim=1) -# Step 3: Train the Model + +# Initialize the model, optimizer, and loss function model = Net() optimizer = optim.SGD(model.parameters(), lr=0.01) criterion = nn.NLLLoss() -# Training loop +# Train the model epochs = 3 for epoch in range(epochs): for images, labels in trainloader: - optimizer.zero_grad() - output = model(images) - loss = criterion(output, labels) - loss.backward() - optimizer.step() + optimizer.zero_grad() # Reset the gradients + output = model(images) # Forward pass + loss = criterion(output, labels) # Compute the loss + loss.backward() # Backward pass + optimizer.step() # Update the weights -torch.save(model.state_dict(), "mnist_model.pth") \ No newline at end of file +# Save the trained model +torch.save(model.state_dict(), "mnist_model.pth")