-
Notifications
You must be signed in to change notification settings - Fork 0
/
multilayer_perceptron.py
180 lines (155 loc) · 8.01 KB
/
multilayer_perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# multilayer_perceptron.py: Machine learning implementation of a Multilayer Perceptron classifier from scratch.
#
# Submitted by: [enter your full name here] -- [enter your IU username here]
#
# Based on skeleton code by CSCI-B 551 Fall 2021 Course Staff
import numpy as np
from utils import identity, sigmoid, tanh, relu, softmax, cross_entropy, one_hot_encoding
class MultilayerPerceptron:
"""
A class representing the machine learning implementation of a Multilayer Perceptron classifier from scratch.
Attributes:
n_hidden
An integer representing the number of neurons in the one hidden layer of the neural network.
hidden_activation
A string representing the activation function of the hidden layer. The possible options are
{'identity', 'sigmoid', 'tanh', 'relu'}.
n_iterations
An integer representing the number of gradient descent iterations performed by the fit(X, y) method.
learning_rate
A float representing the learning rate used when updating neural network weights during gradient descent.
_output_activation
An attribute representing the activation function of the output layer. This is set to the softmax function
defined in utils.py.
_loss_function
An attribute representing the loss function used to compute the loss for each iteration. This is set to the
cross_entropy function defined in utils.py.
_loss_history
A Python list of floats representing the history of the loss function for every 20 iterations that the
algorithm runs for. The first index of the list is the loss function computed at iteration 0, the second
index is the loss function computed at iteration 20, and so on and so forth. Once all the iterations are
complete, the _loss_history list should have length n_iterations / 20.
_X
A numpy array of shape (n_samples, n_features) representing the input data used when fitting the model. This
is set in the _initialize(X, y) method.
_y
A numpy array of shape (n_samples, n_outputs) representing the one-hot encoded target class values for the
input data used when fitting the model.
_h_weights
A numpy array of shape (n_features, n_hidden) representing the weights applied between the input layer
features and the hidden layer neurons.
_h_bias
A numpy array of shape (1, n_hidden) representing the weights applied between the input layer bias term
and the hidden layer neurons.
_o_weights
A numpy array of shape (n_hidden, n_outputs) representing the weights applied between the hidden layer
neurons and the output layer neurons.
_o_bias
A numpy array of shape (1, n_outputs) representing the weights applied between the hidden layer bias term
neuron and the output layer neurons.
Methods:
_initialize(X, y)
Function called at the beginning of fit(X, y) that performs one-hot encoding for the target class values and
initializes the neural network weights (_h_weights, _h_bias, _o_weights, and _o_bias).
fit(X, y)
Fits the model to the provided data matrix X and targets y.
predict(X)
Predicts class target values for the given test data matrix X using the fitted classifier model.
"""
def __init__(self, n_hidden=16, hidden_activation='sigmoid', n_iterations=1000, learning_rate=0.01):
# Create a dictionary linking the hidden_activation strings to the functions defined in utils.py
activation_functions = {'identity': identity,
'sigmoid': sigmoid, 'tanh': tanh, 'relu': relu}
# Check if the provided arguments are valid
if not isinstance(n_hidden, int) \
or hidden_activation not in activation_functions \
or not isinstance(n_iterations, int) \
or not isinstance(learning_rate, float):
raise ValueError(
'The provided class parameter arguments are not recognized.')
# Define and setup the attributes for the MultilayerPerceptron model object
self.n_hidden = n_hidden
self.hidden_activation = activation_functions[hidden_activation]
self.n_iterations = n_iterations
self.learning_rate = learning_rate
self._output_activation = softmax
self._loss_function = cross_entropy
self._loss_history = []
self._X = None
self._y = None
self._h_weights = None
self._h_bias = None
self._o_weights = None
self._o_bias = None
def _initialize(self, X, y):
"""
Function called at the beginning of fit(X, y) that performs one hot encoding for the target class values and
initializes the neural network weights (_h_weights, _h_bias, _o_weights, and _o_bias).
Args:
X: A numpy array of shape (n_samples, n_features) representing the input data.
y: A numpy array of shape (n_samples,) representing the true class values for each sample in the input data.
Returns:
None.
"""
np.random.seed(42)
self._X = X
self._y = one_hot_encoding(y)
n_x = self._X.shape[1] # size of input layer
n_h = self.n_hidden
n_y = self._y.shape[1] # size of output layer
self._h_weights = np.random.randn(n_x, n_h) * 0.01
self._h_bias = np.random.randn(1, n_h) * 0.01
self._o_weights = np.random.randn(n_h, n_y) * 0.01
self._o_bias = np.random.randn(1, n_y) * 0.01
def fit(self, X, y):
"""
Fits the model to the provided data matrix X and targets y and stores the cross-entropy loss every 20
iterations.
Args:
X: A numpy array of shape (n_samples, n_features) representing the input data.
y: A numpy array of shape (n_samples,) representing the true class values for each sample in the input data.
Returns:
None.
"""
self._initialize(X, y)
for epoch in range(self.n_iterations):
self.forward_propagation(self._X)
if epoch % 20 == 0:
cost = self._loss_function(self.A2, self._y)
self._loss_history.append(cost)
self.backward_propagation(self._X, self._y)
def forward_propagation(self, X):
W1 = self._h_weights
b1 = self._h_bias
W2 = self._o_weights
b2 = self._o_bias
self.Z1 = np.dot(X, W1) + b1
self.A1 = self.hidden_activation(self.Z1)
self.Z2 = np.dot(self.A1, W2) + b2
self.A2 = self._output_activation(self.Z2)
def backward_propagation(self, X, y):
W2 = self._o_weights
A1 = self.A1
A2 = self.A2
# output layer->hidden layer
loss_gradient_o_h = (A2 - y) * self._output_activation(A2, derivative=True)
grad_o = self.A1.T.dot(loss_gradient_o_h)
grad_ob = np.sum(loss_gradient_o_h, axis=0, keepdims=True)
# hidden layer->input layer
gradient_hidden_input = np.dot(loss_gradient_o_h, W2.T) * self.hidden_activation(A1, derivative=True)
grad_h = self._X.T.dot(gradient_hidden_input)
grad_hb = np.sum(gradient_hidden_input, axis=0, keepdims=True)
self._o_weights -= self.learning_rate * grad_o
self._o_bias -= self.learning_rate * grad_ob
self._h_weights -= self.learning_rate * grad_h
self._h_bias -= self.learning_rate * grad_hb
def predict(self, X):
"""
Predicts class target values for the given test data matrix X using the fitted classifier model.
Args:
X: A numpy array of shape (n_samples, n_features) representing the test data.
Returns:
A numpy array of shape (n_samples,) representing the predicted target class values for the given test data.
"""
self.forward_propagation(X)
return self.A2.argmax(axis=1)