multilayer_perceptron.py

# multilayer_perceptron.py: Machine learning implementation of a Multilayer Perceptron classifier from scratch.
#
# Submitted by: [enter your full name here] -- [enter your IU username here]
#
# Based on skeleton code by CSCI-B 551 Fall 2021 Course Staff

import numpy as np
from utils import identity, sigmoid, tanh, relu, softmax, cross_entropy, one_hot_encoding


class MultilayerPerceptron:
    """
    A class representing the machine learning implementation of a Multilayer Perceptron classifier from scratch.
    Attributes:
        n_hidden
            An integer representing the number of neurons in the one hidden layer of the neural network.
        hidden_activation
            A string representing the activation function of the hidden layer. The possible options are
            {'identity', 'sigmoid', 'tanh', 'relu'}.
        n_iterations
            An integer representing the number of gradient descent iterations performed by the fit(X, y) method.
        learning_rate
            A float representing the learning rate used when updating neural network weights during gradient descent.
        _output_activation
            An attribute representing the activation function of the output layer. This is set to the softmax function
            defined in utils.py.
        _loss_function
            An attribute representing the loss function used to compute the loss for each iteration. This is set to the
            cross_entropy function defined in utils.py.
        _loss_history
            A Python list of floats representing the history of the loss function for every 20 iterations that the
            algorithm runs for. The first index of the list is the loss function computed at iteration 0, the second
            index is the loss function computed at iteration 20, and so on and so forth. Once all the iterations are
            complete, the _loss_history list should have length n_iterations / 20.
        _X
            A numpy array of shape (n_samples, n_features) representing the input data used when fitting the model. This
            is set in the _initialize(X, y) method.
        _y
            A numpy array of shape (n_samples, n_outputs) representing the one-hot encoded target class values for the
            input data used when fitting the model.
        _h_weights
            A numpy array of shape (n_features, n_hidden) representing the weights applied between the input layer
            features and the hidden layer neurons.
        _h_bias
            A numpy array of shape (1, n_hidden) representing the weights applied between the input layer bias term
            and the hidden layer neurons.
        _o_weights
            A numpy array of shape (n_hidden, n_outputs) representing the weights applied between the hidden layer
            neurons and the output layer neurons.
        _o_bias
            A numpy array of shape (1, n_outputs) representing the weights applied between the hidden layer bias term
            neuron and the output layer neurons.
    Methods:
        _initialize(X, y)
            Function called at the beginning of fit(X, y) that performs one-hot encoding for the target class values and
            initializes the neural network weights (_h_weights, _h_bias, _o_weights, and _o_bias).
        fit(X, y)
            Fits the model to the provided data matrix X and targets y.
        predict(X)
            Predicts class target values for the given test data matrix X using the fitted classifier model.
    """

    def __init__(self, n_hidden=16, hidden_activation='sigmoid', n_iterations=1000, learning_rate=0.01):
        # Create a dictionary linking the hidden_activation strings to the functions defined in utils.py
        activation_functions = {'identity': identity,
                                'sigmoid': sigmoid, 'tanh': tanh, 'relu': relu}

        # Check if the provided arguments are valid
        if not isinstance(n_hidden, int) \
                or hidden_activation not in activation_functions \
                or not isinstance(n_iterations, int) \
                or not isinstance(learning_rate, float):
            raise ValueError(
                'The provided class parameter arguments are not recognized.')

        # Define and setup the attributes for the MultilayerPerceptron model object
        self.n_hidden = n_hidden
        self.hidden_activation = activation_functions[hidden_activation]
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
        self._output_activation = softmax
        self._loss_function = cross_entropy
        self._loss_history = []
        self._X = None
        self._y = None
        self._h_weights = None
        self._h_bias = None
        self._o_weights = None
        self._o_bias = None

    def _initialize(self, X, y):
        """
        Function called at the beginning of fit(X, y) that performs one hot encoding for the target class values and
        initializes the neural network weights (_h_weights, _h_bias, _o_weights, and _o_bias).
        Args:
            X: A numpy array of shape (n_samples, n_features) representing the input data.
            y: A numpy array of shape (n_samples,) representing the true class values for each sample in the input data.
        Returns:
            None.
        """
        np.random.seed(42)
        self._X = X
        self._y = one_hot_encoding(y)

        n_x = self._X.shape[1]  # size of input layer
        n_h = self.n_hidden
        n_y = self._y.shape[1]  # size of output layer

        self._h_weights = np.random.randn(n_x, n_h) * 0.01
        self._h_bias = np.random.randn(1, n_h) * 0.01

        self._o_weights = np.random.randn(n_h, n_y) * 0.01
        self._o_bias = np.random.randn(1, n_y) * 0.01

    def fit(self, X, y):
        """
        Fits the model to the provided data matrix X and targets y and stores the cross-entropy loss every 20
        iterations.
        Args:
            X: A numpy array of shape (n_samples, n_features) representing the input data.
            y: A numpy array of shape (n_samples,) representing the true class values for each sample in the input data.
        Returns:
            None.
        """
        self._initialize(X, y)

        for epoch in range(self.n_iterations):
            self.forward_propagation(self._X)

            if epoch % 20 == 0:
                cost = self._loss_function(self.A2, self._y)
                self._loss_history.append(cost)

            self.backward_propagation(self._X, self._y)

    def forward_propagation(self, X):
        W1 = self._h_weights
        b1 = self._h_bias
        W2 = self._o_weights
        b2 = self._o_bias

        self.Z1 = np.dot(X, W1) + b1
        self.A1 = self.hidden_activation(self.Z1)
        self.Z2 = np.dot(self.A1, W2) + b2
        self.A2 = self._output_activation(self.Z2)

    def backward_propagation(self, X, y):
        W2 = self._o_weights

        A1 = self.A1
        A2 = self.A2

        # output layer->hidden layer
        loss_gradient_o_h = (A2 - y) * self._output_activation(A2, derivative=True)
        grad_o = self.A1.T.dot(loss_gradient_o_h)
        grad_ob = np.sum(loss_gradient_o_h, axis=0, keepdims=True)

        # hidden layer->input layer
        gradient_hidden_input = np.dot(loss_gradient_o_h, W2.T) * self.hidden_activation(A1, derivative=True)
        grad_h = self._X.T.dot(gradient_hidden_input)
        grad_hb = np.sum(gradient_hidden_input, axis=0, keepdims=True)

        self._o_weights -= self.learning_rate * grad_o
        self._o_bias -= self.learning_rate * grad_ob

        self._h_weights -= self.learning_rate * grad_h
        self._h_bias -= self.learning_rate * grad_hb

    def predict(self, X):
        """
        Predicts class target values for the given test data matrix X using the fitted classifier model.
        Args:
            X: A numpy array of shape (n_samples, n_features) representing the test data.
        Returns:
            A numpy array of shape (n_samples,) representing the predicted target class values for the given test data.
        """

        self.forward_propagation(X)

        return self.A2.argmax(axis=1)