Skip to content

Commit da3de42

Browse files
committed
partial progress
1 parent 2f7ebb0 commit da3de42

13 files changed

+62
-214
lines changed

Assignment4/.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
*.log
44
*.synctex.gz
55
*problem.pdf
6-
*.pyc
6+
*.pyc
7+
**__pycache__*

Assignment4/src/Criterion.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,14 @@ def forward(self, input, target):
3434

3535
def backward(self, input, target):
3636
batch_size, num_classes = input.size()
37+
# print("input",input)
3738
inputExp = input.exp()
38-
print(input)
39+
3940
probabilities = inputExp/(inputExp.sum(dim=1).unsqueeze(1))
4041
probabilities[torch.isnan(probabilities)] = 1
4142
probabilities = probabilities/(probabilities.sum(dim=1).unsqueeze(1))
4243
labels = torch.eye(num_classes, device=device, dtype=dtype)[target]
4344

4445
grads = (probabilities - labels)/batch_size
45-
print(probabilities)
46+
# print("probabilities",probabilities,labels)
4647
return grads

Assignment4/src/Dropout.py

-42
This file was deleted.

Assignment4/src/LeakyRelu.py

-33
This file was deleted.

Assignment4/src/Linear.py

-48
This file was deleted.

Assignment4/src/Linear.pyc

-2.32 KB
Binary file not shown.

Assignment4/src/Model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def trainModel(self, learningRate, batchSize, epochs, trainingData,unique_labels
127127

128128
predictions=torch.tensor(predictions)
129129

130-
print("Training Loss",sum(crit_list))
130+
print("Training Loss",sum(crit_list)/len(crit_list))
131131
print("Training Accuracy: ", (torch.sum(predictions == trainingLabels).item()*100.0/trainingLabels.size()[0]))
132132
if i%5==0 and i>0:
133133
if type(validationData)!=type(None):
@@ -137,7 +137,7 @@ def trainModel(self, learningRate, batchSize, epochs, trainingData,unique_labels
137137
predictions.append(self.classify(validationData[j])[0])
138138
crit_list.append(criterion.forward(self.forward(validationData[j]), validationLabels[j]).item())
139139
predictions=torch.tensor(predictions)
140-
print("validation Loss",sum(crit_list))
140+
print("validation Loss",sum(crit_list)/len(crit_list))
141141
print("Validation Accuracy: ", (torch.sum(predictions == validationLabels).item()*100.0/validationLabels.size()[0]))
142142

143143

Assignment4/src/RNN.py

+43-30
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,24 @@
22
import torch
33
import math
44
import ReLU
5+
import Tanh
56

67
dtype = torch.double
78
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
89

910

1011
class RNN:
11-
def __init__(self, input_dim, hidden_dim,output_dim,mx=1.0e10):
12+
def __init__(self, input_dim, hidden_dim,output_dim,mx=1.0e4):
1213
self.max = mx
1314
self.input_dim = input_dim
1415
self.hidden_dim = hidden_dim
1516
self.output_dim = output_dim
1617

17-
self.weights_hh = torch.randn(hidden_dim, hidden_dim, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim)
18-
self.weights_hx = torch.randn(hidden_dim, input_dim, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim)
19-
self.weights_hy = torch.randn(output_dim, hidden_dim, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim)
20-
self.bias_h = torch.randn(hidden_dim, 1, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim) # hidden_dim X 1
21-
self.bias_y = torch.randn(output_dim, 1, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim) # output_dim X 1
18+
self.weights_hh = torch.randn(hidden_dim, hidden_dim, dtype=dtype, device=device)*0.01 #self.hidden_dim)
19+
self.weights_hx = torch.randn(hidden_dim, input_dim, dtype=dtype, device=device)*0.01 #self.hidden_dim)
20+
self.weights_hy = torch.randn(output_dim, hidden_dim, dtype=dtype, device=device)*0.01 #self.hidden_dim)
21+
self.bias_h = torch.randn(hidden_dim, 1, dtype=dtype, device=device)*0.01 #self.hidden_dim) # hidden_dim X 1
22+
self.bias_y = torch.randn(output_dim, 1, dtype=dtype, device=device)*0.01 #self.output_dim) # output_dim X 1
2223

2324
self.y = None
2425
self.h = None
@@ -32,15 +33,14 @@ def __init__(self, input_dim, hidden_dim,output_dim,mx=1.0e10):
3233
self.grad_bias_y = None
3334
self.grad_inp = None
3435
self.grad_prev = None
35-
self.r = ReLU.ReLU()
36+
self.r = Tanh.Tanh()
3637

3738
def forward(self, input,isTrain=False):
3839
# if istrain:
3940
self.y =[]
4041
# print(input)
4142
self.h =[torch.zeros(input[0].size()[0] , self.hidden_dim, dtype=dtype, device=device)]
4243
self.h_bef_act = [torch.zeros(input[0].size()[0] , self.hidden_dim, dtype=dtype, device=device)]
43-
self.prev_h = []
4444
self.x = input
4545

4646
for i in range(len(input)):
@@ -74,9 +74,9 @@ def backward(self, input, gradOutput):
7474
self.grad_bias_y = self.grad_bias_y.add(grad_y.sum(dim=0).reshape(self.output_dim,1))
7575
self.grad_Why = self.grad_Why.add(grad_y.transpose(0,1).mm(self.h[i])) # output X hidden
7676
# print(self.h_bef_act[i],grad_ht)
77-
grad_act = self.r.backward(self.h_bef_act[i],grad_ht) + grad_y.mm(self.weights_hy) # batch X hidden
77+
grad_act = self.r.backward(self.h_bef_act[i],grad_ht + grad_y.mm(self.weights_hy)) # batch X hidden
7878
self.grad_bias_h = self.grad_bias_h.add(grad_act.sum(dim=0).reshape(self.hidden_dim,1)) # hidden X 1
79-
self.grad_Whh = self.grad_Whh.add(grad_act.transpose(0,1).mm(self.h[i-1]))
79+
self.grad_Whh = self.grad_Whh.add(grad_act.transpose(0,1).mm(self.h_bef_act[i-1]))
8080
# print(self.grad_Whx.size(),grad_act.size(),input[i].size())
8181
self.grad_Whx = self.grad_Whx.add(grad_act.transpose(0,1).mm(input[i])) # hidden X input
8282

@@ -92,28 +92,41 @@ def clearGradParam(self):
9292
self.grad_bias_h = torch.zeros(self.hidden_dim, 1, dtype=dtype, device=device)
9393
self.grad_bias_y = torch.zeros(self.output_dim, 1, dtype=dtype, device=device)
9494

95+
def clip(self,M):
96+
M[M>self.max] = self.max
97+
M[M<-self.max] = -self.max
98+
return M
99+
95100
def updateParam(self, learningRate, alpha=0, regularizer=0):
96101
# print('update')
97-
# print(self.grad_Whx)
98-
99-
self.grad_Whh[self.grad_Whh>self.max] = self.max
100-
self.grad_Whx[self.grad_Whx>self.max] = self.max
101-
self.grad_Why[self.grad_Why>self.max] = self.max
102-
self.grad_bias_h[self.grad_bias_h>self.max] = self.max
103-
self.grad_bias_y[self.grad_bias_y>self.max] = self.max
104-
105-
self.grad_Whh[self.grad_Whh<-self.max] = -self.max
106-
self.grad_Whx[self.grad_Whx<-self.max] = -self.max
107-
self.grad_Why[self.grad_Why<-self.max] = -self.max
108-
self.grad_bias_h[self.grad_bias_h<-self.max] = -self.max
109-
self.grad_bias_y[self.grad_bias_y<-self.max] = -self.max
110-
111-
112-
self.weights_hh -= self.grad_Whh*learningRate
113-
self.weights_hx -= self.grad_Whx*learningRate
114-
self.weights_hy -= self.grad_Why*learningRate
115-
self.bias_h -= self.grad_bias_h*learningRate
116-
self.bias_y -= self.grad_bias_y*learningRate
102+
103+
104+
# self.grad_Whh[self.grad_Whh>self.max] = self.max
105+
# self.grad_Whx[self.grad_Whx>self.max] = self.max
106+
# self.grad_Why[self.grad_Why>self.max] = self.max
107+
# self.grad_bias_h[self.grad_bias_h>self.max] = self.max
108+
# self.grad_bias_y[self.grad_bias_y>self.max] = self.max
109+
110+
# self.grad_Whh[self.grad_Whh<-self.max] = -self.max
111+
# self.grad_Whx[self.grad_Whx<-self.max] = -self.max
112+
# self.grad_Why[self.grad_Why<-self.max] = -self.max
113+
# self.grad_bias_h[self.grad_bias_h<-self.max] = -self.max
114+
# self.grad_bias_y[self.grad_bias_y<-self.max] = -self.max
115+
116+
117+
grad_Whh=self.clip(self.grad_Whh)
118+
grad_Whx=self.clip(self.grad_Whx)
119+
grad_Why=self.clip(self.grad_Why)
120+
grad_bias_h=self.clip(self.grad_bias_h)
121+
grad_bias_y=self.clip(self.grad_bias_y)
122+
123+
self.weights_hh -= (self.grad_Whh*learningRate+2*regularizer*self.weights_hh)
124+
self.weights_hx -= (self.grad_Whx*learningRate+2*regularizer*self.weights_hx)
125+
self.weights_hy -= (self.grad_Why*learningRate+2*regularizer*self.weights_hy)
126+
self.bias_h -= (self.grad_bias_h*learningRate+2*regularizer*self.bias_h)
127+
self.bias_y -= (self.grad_bias_y*learningRate+2*regularizer*self.bias_y)
128+
129+
# print(self.weights_hh)
117130

118131
# self.W += (self.momentumW -2*regularizer*self.W)
119132
# self.B += (self.momentumB -2*regularizer*self.B)

Assignment4/src/ReLU.py

-32
This file was deleted.

Assignment4/src/ReLU.pyc

-1.42 KB
Binary file not shown.

Assignment4/src/Tanh.py

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def forward(self, input, isTrain=False):
1919
# print("Tanh Layer Forward")
2020

2121
def backward(self, input, gradOutput):
22+
self.temp = torch.exp(2.0*input)
2223
self.gradInput = gradOutput
2324
self.gradInput *= (4.0*self.temp)/(self.temp + 1.0)**2.0
2425
# print("Tanh Layer backward")

Assignment4/src/test.py

-15
This file was deleted.

Assignment4/trainModel.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def loadData(dataPath,labelsPath):
3838
flattened = [val for sublist in Data for val in sublist]
3939
unique_labels=list(np.unique(flattened))
4040

41-
TRAINING_SIZE = int(0.07*SIZE)
41+
TRAINING_SIZE = int(0.27*SIZE)
4242
VALIDATION_SIZE = int(0.3*SIZE)
4343

4444
indices = list(range(SIZE))
@@ -64,13 +64,13 @@ def loadData(dataPath,labelsPath):
6464

6565

6666

67-
batchSize = 20
67+
batchSize = 1
6868
epochs = 50
69-
lr = 0.00001
70-
reg = 0.000001
71-
al = 0.7
72-
leak = 0.01
73-
dropout_rate = 0.75
69+
lr = 1.0e-1
70+
reg = 0
71+
# al = 0.7
72+
# leak = 0.01
73+
# dropout_rate = 0.75
7474

7575
neuralNetwork = Model.Model()
7676
# neuralNetwork.addLayer(Linear.Linear(108*108,1024))
@@ -83,8 +83,10 @@ def loadData(dataPath,labelsPath):
8383
# neuralNetwork.addLayer(Dropout.Dropout(dropout_rate))
8484
# neuralNetwork.addLayer(LeakyRelu.LeakyRelu(leak))
8585
# neuralNetwork.addLayer(Linear.Linear(512,6))
86-
neuralNetwork.addLayer(RNN.RNN(len(unique_labels),64,2))
87-
neuralNetwork.trainModel(lr, batchSize, epochs, trainingData,unique_labels, trainingLabels, al,reg)
86+
neuralNetwork.addLayer(RNN.RNN(len(unique_labels),256,2))
87+
# neuralNetwork.addLayer(RNN.RNN(64,128,16))
88+
# neuralNetwork.addLayer(RNN.RNN(16,64,2))
89+
neuralNetwork.trainModel(lr, batchSize, epochs, trainingData,unique_labels, trainingLabels,reg)
8890

8991

9092
# directory = "./"+args.modelName+"/"

0 commit comments

Comments
 (0)