Skip to content

Commit 0722bee

Browse files
committed
submit
1 parent cebff4d commit 0722bee

File tree

5 files changed

+80
-75
lines changed

5 files changed

+80
-75
lines changed

data_loader.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
TRAIN_PATH = '/seg_train/seg_train/'
1212
VALID_PATH = '/seg_dev/seg_dev/'
13-
TEST_PATH = '/seg_test/'
13+
# TEST_PATH = '/seg_test/'
1414
VGG19_PATH = 'vgg19_features/'
1515
CLASS_NAMES = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']
1616
RANDOM_SEED = 42
@@ -68,7 +68,7 @@ def load_test(self):
6868
self.get_vgg_features('test')
6969
self.X_test, self.y_test = [], []
7070

71-
cur_path = self.opt.data_path + TEST_PATH # TODO
71+
cur_path = self.opt.data_path
7272
img_paths = os.listdir(cur_path)
7373
img_paths.sort(key=lambda f: int(re.sub('\D', '', f)))
7474
csv_reader = csv.reader(open(self.opt.test_label_path), delimiter=';')

neural_network.py

+55-54
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@ def __init__(self, hidden_sizes=None, activation_func=None, error_func=None, lr=
1717
self.error_func = error_func
1818
self.lr = lr
1919
self.output_size = output_size
20-
self.init_weights(input_size, hidden_sizes, output_size)
20+
self.init_weights(input_size, hidden_sizes, output_size) # initialize random weights
2121

2222
def init_weights(self, input_size, hidden_sizes, output_size):
23-
# get all dimensions in the network
23+
# get all layer sizes in the network
2424
layer_sizes = np.concatenate((input_size, hidden_sizes, output_size), axis=None).astype(int)
2525

2626
for i in range(self.layer_num):
27-
stdv = 1. / math.sqrt(layer_sizes[i])
28-
self.net['w_' + str(i + 1)] = np.random.uniform(-stdv, stdv, (layer_sizes[i], layer_sizes[i + 1])).astype(
27+
std = 1. / math.sqrt(layer_sizes[i])
28+
# use float32 to avoid overflow in the upcoming calculations
29+
self.net['w_' + str(i + 1)] = np.random.uniform(-std, std, (layer_sizes[i], layer_sizes[i + 1])).astype(
2930
'float32')
30-
self.net['b_' + str(i + 1)] = np.random.uniform(-stdv, stdv, layer_sizes[i + 1]).astype('float32')
31+
self.net['b_' + str(i + 1)] = np.random.uniform(-std, std, layer_sizes[i + 1]).astype('float32')
3132

3233
# Activation functions - Start
3334
def sigmoid(self, z):
@@ -40,7 +41,7 @@ def tanh(self, z):
4041
def relu(self, z):
4142
return np.maximum(0, z).astype('float32')
4243

43-
# Activation functions and derivatives - End
44+
# Activation functions - End
4445

4546
# Activation functions derivatives - Start
4647
def d_sigmoid(self, a):
@@ -55,60 +56,53 @@ def d_relu(self, z):
5556
# Activation functions derivatives - End
5657

5758
def softmax(self, z):
58-
shifted = z - np.max(z, axis=1, keepdims=True)
59-
z = np.sum(np.exp(shifted), axis=1, keepdims=True)
60-
log_probs = shifted - np.log(z)
59+
shift_z = z - np.max(z, axis=1, keepdims=True) # shift for stable softmax
60+
exp_z = np.sum(np.exp(shift_z), axis=1, keepdims=True)
61+
log_probs = shift_z - np.log(exp_z)
6162
probs = np.exp(log_probs)
6263
return log_probs, probs
6364

6465
# Error functions - Start
65-
def sum_neg_log_likelihood(self, z, y):
66-
log_probs, probs = self.softmax(z)
67-
n = z.shape[0]
66+
def sum_neg_log_likelihood(self, y, probs, log_probs, n):
6867
loss = -np.sum(log_probs[np.arange(n), y]) / n
6968
d_x = probs.copy()
70-
d_x[np.arange(n), y] -= 1
71-
d_x /= n
69+
d_x[np.arange(n), y] = d_x[np.arange(n), y] - 1
70+
d_x = d_x / n
7271
return loss, d_x
7372

74-
def mean_squared_err(self, z, y):
75-
_, probs = self.softmax(z)
76-
n = z.shape[0]
73+
def sum_squared_err(self, y, probs, n):
7774
one_hot_y = np.zeros((n, self.output_size), dtype='float32')
7875
one_hot_y[np.arange(n), y] = 1.
79-
loss = np.sum(np.power(one_hot_y - probs, 2)) / n
80-
d_x = -2 * (one_hot_y - probs) / n
76+
loss = np.sum(np.power(one_hot_y - probs, 2))
77+
d_x = -2 * (one_hot_y - probs)
8178
return loss, d_x
8279

83-
def sum_squared_err(self, z, y):
84-
_, probs = self.softmax(z)
85-
n = z.shape[0]
80+
def mean_squared_err(self, y, probs, n):
8681
one_hot_y = np.zeros((n, self.output_size), dtype='float32')
8782
one_hot_y[np.arange(n), y] = 1.
88-
loss = np.sum(np.power(one_hot_y - probs, 2))
89-
d_x = -2 * (one_hot_y - probs)
83+
loss = np.sum(np.power(one_hot_y - probs, 2)) / n
84+
d_x = -2 * (one_hot_y - probs) / n
9085
return loss, d_x
9186

9287
# Error functions - End
9388

9489
# Forward - Start
95-
def forward_pass(self, X, valid=False):
90+
def forward_pass(self, X):
9691
inputs = X
97-
self.caches = []
92+
self.layer_history = [] # keep forward pass information for backward pass
9893

99-
for i in range(self.layer_num - 1):
100-
inputs, cache = self.activated_forward(inputs, self.net['w_' + str(i + 1)], self.net['b_' + str(i + 1)])
101-
self.caches.append(cache)
94+
for i in range(self.layer_num - 1): # apply forward pass and activation for each layer except last one
95+
inputs, history = self.activated_forward(inputs, self.net['w_' + str(i + 1)], self.net['b_' + str(i + 1)])
96+
self.layer_history.append(history)
10297

103-
scores, cache = self.forward(inputs, self.net['w_' + str(self.layer_num)], self.net['b_' + str(self.layer_num)])
104-
if not valid:
105-
self.caches.append(cache)
98+
scores, history = self.forward(inputs, self.net['w_' + str(self.layer_num)],
99+
self.net['b_' + str(self.layer_num)])
100+
self.layer_history.append(history)
106101
return scores
107102

108103
def forward(self, x, w, b):
109-
z = x.reshape(x.shape[0], -1).dot(w) + b
110-
cache = (x, w, b)
111-
return z, cache
104+
z = x.reshape(x.shape[0], -1).dot(w) + b # linear formula computation
105+
return z, (x, w, b)
112106

113107
def activate(self, z):
114108
if self.activation_func == 'sigmoid':
@@ -120,35 +114,41 @@ def activate(self, z):
120114
return activated
121115

122116
def activated_forward(self, x, w, b):
123-
z, fwd_cache = self.forward(x, w, b)
117+
z, fwd_history = self.forward(x, w, b)
124118
activated = self.activate(z)
125-
return activated, (fwd_cache, z, activated)
119+
return activated, (fwd_history, z, activated)
126120

127121
# Forward - End
128122

129123
# Backward - Start
130124
def backward_pass(self, scores, y):
131125
gradients = {}
126+
log_probs, probs = self.softmax(scores)
127+
n = scores.shape[0]
128+
129+
# get loss and derivative of error wrt output
132130
if self.error_func == 'log':
133-
loss, d_o = self.sum_neg_log_likelihood(scores, y)
131+
loss, d_o = self.sum_neg_log_likelihood(y, probs, log_probs, n)
134132
elif self.error_func == 'sse':
135-
loss, d_o = self.sum_squared_err(scores, y)
133+
loss, d_o = self.sum_squared_err(y, probs, n)
136134
elif self.error_func == 'mse':
137-
loss, d_o = self.mean_squared_err(scores, y)
135+
loss, d_o = self.mean_squared_err(y, probs, n)
138136

139-
d_o, d_w, d_b = self.backward(d_o, self.caches.pop())
137+
# apply backward pass to compute gradients
138+
d_o, d_w, d_b = self.backward(d_o, self.layer_history.pop())
140139
gradients['w_' + str(self.layer_num)] = d_w
141140
gradients['b_' + str(self.layer_num)] = d_b
142141

143142
for i in range(self.layer_num - 2, -1, -1):
144-
d_o, d_w, d_b = self.activated_backward(d_o, self.caches.pop())
143+
d_o, d_w, d_b = self.activated_backward(d_o, self.layer_history.pop())
145144
gradients['w_' + str(i + 1)] = d_w
146145
gradients['b_' + str(i + 1)] = d_b
147146

148147
return loss, gradients
149148

150-
def backward(self, d_o, cache):
151-
x, w, b = cache
149+
def backward(self, d_o, history):
150+
x, w, b = history
151+
# compute gradients of input, weight and bias
152152
d_x = d_o.dot(w.T).reshape(x.shape)
153153
d_w = x.reshape(x.shape[0], -1).T.dot(d_o)
154154
d_b = np.sum(d_o, axis=0)
@@ -161,20 +161,20 @@ def d_activate(self, d_o, z, a):
161161
d_x = self.d_tanh(a)
162162
elif self.activation_func == 'relu':
163163
d_x = self.d_relu(z)
164-
return d_x * d_o
164+
return d_x * d_o # apply chain rule
165165

166-
def activated_backward(self, d_o, cache):
167-
fwd_cache, z_cache, a_cache = cache
168-
d_a = self.d_activate(d_o, z_cache, a_cache)
169-
return self.backward(d_a, fwd_cache)
166+
def activated_backward(self, d_o, history):
167+
fwd_history, z_history, a_history = history
168+
d_a = self.d_activate(d_o, z_history, a_history)
169+
return self.backward(d_a, fwd_history)
170170

171171
def update_weights(self, gradients):
172-
for param, w in self.net.items():
173-
updated_w = self.sgd(w, gradients[param])
172+
for param, w in self.net.items(): # update each parameter in the network
173+
updated_w = self.gradient_descent(w, gradients[param])
174174
self.net[param] = updated_w
175175

176-
def sgd(self, w, d_w):
177-
w -= self.lr * d_w
176+
def gradient_descent(self, w, d_w):
177+
w = w - self.lr * d_w # apply gradient descent to update the weights
178178
return w
179179

180180
# Backward - End
@@ -185,7 +185,8 @@ def train(self, X, y):
185185
return loss, gradients
186186

187187
def predict(self, X):
188-
return self.forward_pass(X, valid=True)
188+
scores = self.forward_pass(X)
189+
return np.argmax(scores, axis=1) # predict the label with max score
189190

190191
def extract_model(self):
191192
name = '%dnn_lr=%0.3f_err=%s_act=%s_vgg.pkl' % (self.layer_num, self.lr, self.error_func, self.activation_func)

options.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,18 @@ def initialize(self, parser):
88

99
# train
1010
parser.add_argument('-model_name', type=str, default='model.txt', help='name of the model to save')
11-
parser.add_argument('-hidden_layer_num', type=int, default=1, help='number of hidden layers')
12-
parser.add_argument('-hidden_unit_num', type=int, default=300, help='number of hidden units in hidden layers')
11+
parser.add_argument('-hidden_layer_num', type=int, default=2, help='number of hidden layers')
12+
parser.add_argument('-hidden_unit_num', type=int, default=500, help='number of hidden units in hidden layers')
1313
parser.add_argument('-epoch_num', type=int, default=50, help='number of epochs')
14-
parser.add_argument('-batch_size', type=int, default=32, help='batch size for mini-batch gradient descent')
14+
parser.add_argument('-batch_size', type=int, default=64, help='batch size for mini-batch gradient descent')
1515
parser.add_argument('-learning_rate', type=float, default=0.1, help='learning rate for gradient descent')
1616
parser.add_argument('-reduce_lr', action='store_true', help='if specified, reduce learning rate')
1717
parser.add_argument('-activation_func', type=str, default='relu', help='sigmoid | tanh | relu')
1818
parser.add_argument('-objective_func', type=str, default='log', help='log | sse | mse')
1919

2020
# test
21-
parser.add_argument('-model_path', type=str, default='./model/sl_nn_', help='path to saved model')
21+
parser.add_argument('-model_path', type=str, default='./model/30x30_best.pkl', help='path to saved model')
22+
parser.add_argument('-test_label_path', type=str, default='./data/test_label.csv', help='path to test labels')
2223

2324
self.parser = parser
2425
return parser

train.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from data_loader import DataLoader
44
from options import Options
55
from neural_network import NeuralNetwork
6-
from utils import validate, plot_loss, plot_acc
6+
from utils import validate, plot_loss, plot_acc, plot_parameters
77
from statistics import mean
88

99
np.random.seed(12345)
10-
train_loss_cache, mini_batch_loss_cache = [], []
11-
train_acc_cache, valid_acc_cache = [], []
10+
train_losses, mini_batch_losses = [], []
11+
train_accs, valid_accs = [], []
1212
lr_decay = 0.95
1313

1414

@@ -17,8 +17,8 @@ def mini_batch_gd(start_idx, end_idx):
1717
X_batch = X_train[start_idx:end_idx]
1818
y_batch = y_train[start_idx:end_idx]
1919
loss, gradients = nn.train(X_batch, y_batch) # train network with batches
20-
mini_batch_loss_cache.append(loss)
2120
nn.update_weights(gradients) # update parameters
21+
mini_batch_losses.append(loss)
2222
return loss
2323

2424

@@ -47,24 +47,27 @@ def mini_batch_gd(start_idx, end_idx):
4747
start_idx = i * batch_size
4848
end_idx = (i + 1) * batch_size
4949
loss = mini_batch_gd(start_idx, end_idx)
50-
# print('Iteration %d in Epoch %d - Loss: %f' % (i+1, epoch+1, loss))
50+
print('Iteration %d in Epoch %d - Loss: %f' % (i + 1, epoch + 1, loss))
5151

5252
if opt.reduce_lr:
5353
nn.lr *= lr_decay
5454

5555
train_acc = validate(nn, X_train, y_train)
56-
train_acc_cache.append(train_acc)
56+
train_accs.append(train_acc)
5757
print('Epoch %d/%d - Train acc: %0.2f' % (epoch + 1, epoch_num, train_acc))
5858

5959
valid_acc = validate(nn, X_valid, y_valid)
60-
valid_acc_cache.append(valid_acc)
60+
valid_accs.append(valid_acc)
6161
print('Epoch %d/%d - Validation acc: %0.2f' % (epoch + 1, epoch_num, valid_acc))
6262

6363
print("-------------------")
64-
train_loss_cache.append(mean(mini_batch_loss_cache))
65-
mini_batch_loss_cache = []
64+
train_losses.append(mean(mini_batch_losses))
65+
mini_batch_losses = []
6666

67-
nn.extract_model()
68-
plot_loss(opt, train_loss_cache)
69-
plot_acc(opt, train_acc_cache, valid_acc_cache)
67+
# for i in range(6):
68+
# plot_parameters(nn.net['w_1'][:, i], 30, 30)
69+
70+
# nn.extract_model()
71+
plot_loss(opt, train_losses)
72+
plot_acc(opt, train_accs, valid_accs)
7073
# write_file(opt, train_loss_cache, train_acc_cache, valid_acc_cache)

utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@
88

99
def validate(nn, X, y, valid_batch_size=100):
1010
preds = []
11-
1211
valid_batch_num = X.shape[0] // valid_batch_size
1312
if X.shape[0] % valid_batch_size != 0:
1413
valid_batch_num += 1
1514

1615
for i in range(valid_batch_num):
1716
start_idx = i * valid_batch_size
1817
end_idx = (i + 1) * valid_batch_size
19-
scores = nn.predict(X[start_idx:end_idx])
20-
preds.append(np.argmax(scores, axis=1))
18+
pred = nn.predict(X[start_idx:end_idx]) # get predictions for current batch of data
19+
preds.append(pred)
2120

2221
preds = np.concatenate(preds, axis=None)
22+
# plot_conf_matrix(y, preds)
2323
correct_classified = np.count_nonzero(preds == y)
2424
acc = 100 * (correct_classified / len(y)) # calculate the accuracy
2525
print("%d/%d samples are correctly classified - Accuracy: %0.2f" % (correct_classified, len(y), acc))

0 commit comments

Comments
 (0)