msinghal34
diff --git a/‎Assignment4/.gitignore
+2-1 b/‎Assignment4/.gitignore
+2-1
diff --git a/‎Assignment4/src/Criterion.py
+3-2 b/‎Assignment4/src/Criterion.py
+3-2
diff --git a/‎Assignment4/src/Dropout.py
-42 b/‎Assignment4/src/Dropout.py
-42
diff --git a/‎Assignment4/src/LeakyRelu.py
-33 b/‎Assignment4/src/LeakyRelu.py
-33
diff --git a/‎Assignment4/src/Linear.py
-48 b/‎Assignment4/src/Linear.py
-48
diff --git a/‎Assignment4/src/Linear.pyc
-2.32 KB b/‎Assignment4/src/Linear.pyc
-2.32 KB
diff --git a/‎Assignment4/src/Model.py
+2-2 b/‎Assignment4/src/Model.py
+2-2
diff --git a/‎Assignment4/src/RNN.py
+43-30 b/‎Assignment4/src/RNN.py
+43-30
diff --git a/‎Assignment4/src/ReLU.py
-32 b/‎Assignment4/src/ReLU.py
-32
diff --git a/‎Assignment4/src/ReLU.pyc
-1.42 KB b/‎Assignment4/src/ReLU.pyc
-1.42 KB
diff --git a/‎Assignment4/src/Tanh.py
+1 b/‎Assignment4/src/Tanh.py
+1
diff --git a/‎Assignment4/src/test.py
-15 b/‎Assignment4/src/test.py
-15
diff --git a/‎Assignment4/trainModel.py
+11-9 b/‎Assignment4/trainModel.py
+11-9
@@ -3,4 +3,5 @@
 *.log
 *.synctex.gz
 *problem.pdf
-*.pyc
+*.pyc
+**__pycache__*
@@ -34,13 +34,14 @@ def forward(self, input, target):
 
 	def backward(self, input, target):
 		batch_size, num_classes = input.size()
+		# print("input",input)
 		inputExp = input.exp()
-		print(input)
+		
 		probabilities = inputExp/(inputExp.sum(dim=1).unsqueeze(1))
 		probabilities[torch.isnan(probabilities)] = 1
 		probabilities = probabilities/(probabilities.sum(dim=1).unsqueeze(1))
 		labels = torch.eye(num_classes, device=device, dtype=dtype)[target]
 
 		grads = (probabilities - labels)/batch_size
-		print(probabilities)
+		# print("probabilities",probabilities,labels)
 		return grads
@@ -127,7 +127,7 @@ def trainModel(self, learningRate, batchSize, epochs, trainingData,unique_labels
 
 			predictions=torch.tensor(predictions)
 
-			print("Training Loss",sum(crit_list))
+			print("Training Loss",sum(crit_list)/len(crit_list))
 			print("Training Accuracy: ", (torch.sum(predictions == trainingLabels).item()*100.0/trainingLabels.size()[0]))
 			if i%5==0 and i>0:
 				if type(validationData)!=type(None):
@@ -137,7 +137,7 @@ def trainModel(self, learningRate, batchSize, epochs, trainingData,unique_labels
 						predictions.append(self.classify(validationData[j])[0])
 						crit_list.append(criterion.forward(self.forward(validationData[j]), validationLabels[j]).item())
 					predictions=torch.tensor(predictions)
-					print("validation Loss",sum(crit_list))
+					print("validation Loss",sum(crit_list)/len(crit_list))
 					print("Validation Accuracy: ", (torch.sum(predictions == validationLabels).item()*100.0/validationLabels.size()[0]))
 
 
 
@@ -2,23 +2,24 @@
 import torch
 import math
 import ReLU
+import Tanh
 
 dtype = torch.double
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 class RNN:
-	def __init__(self, input_dim, hidden_dim,output_dim,mx=1.0e10):
+	def __init__(self, input_dim, hidden_dim,output_dim,mx=1.0e4):
 		self.max = mx
 		self.input_dim = input_dim
 		self.hidden_dim = hidden_dim
 		self.output_dim = output_dim
 
-		self.weights_hh = torch.randn(hidden_dim, hidden_dim, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim)
-		self.weights_hx = torch.randn(hidden_dim, input_dim, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim)
-		self.weights_hy = torch.randn(output_dim, hidden_dim, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim)
-		self.bias_h = torch.randn(hidden_dim, 1, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim) # hidden_dim X 1
-		self.bias_y = torch.randn(output_dim, 1, dtype=dtype, device=device)*math.sqrt(2.0/self.hidden_dim) # output_dim X 1
+		self.weights_hh = torch.randn(hidden_dim, hidden_dim, dtype=dtype, device=device)*0.01	#self.hidden_dim)
+		self.weights_hx = torch.randn(hidden_dim, input_dim, dtype=dtype, device=device)*0.01	#self.hidden_dim)
+		self.weights_hy = torch.randn(output_dim, hidden_dim, dtype=dtype, device=device)*0.01	#self.hidden_dim)
+		self.bias_h = torch.randn(hidden_dim, 1, dtype=dtype, device=device)*0.01				#self.hidden_dim) # hidden_dim X 1
+		self.bias_y = torch.randn(output_dim, 1, dtype=dtype, device=device)*0.01				#self.output_dim) # output_dim X 1
 
 		self.y = None
 		self.h = None
@@ -32,15 +33,14 @@ def __init__(self, input_dim, hidden_dim,output_dim,mx=1.0e10):
 		self.grad_bias_y = None
 		self.grad_inp = None
 		self.grad_prev = None
-		self.r = ReLU.ReLU()
+		self.r = Tanh.Tanh()
 
 	def forward(self, input,isTrain=False):
 		# if istrain:
 		self.y =[]
 		# print(input)
 		self.h =[torch.zeros(input[0].size()[0] , self.hidden_dim, dtype=dtype, device=device)]
 		self.h_bef_act = [torch.zeros(input[0].size()[0] , self.hidden_dim, dtype=dtype, device=device)]
-		self.prev_h = []
 		self.x = input
 
 		for i in range(len(input)):
@@ -74,9 +74,9 @@ def backward(self, input, gradOutput):
 			self.grad_bias_y = self.grad_bias_y.add(grad_y.sum(dim=0).reshape(self.output_dim,1))
 			self.grad_Why = self.grad_Why.add(grad_y.transpose(0,1).mm(self.h[i]))  # output X hidden
 			# print(self.h_bef_act[i],grad_ht)	
-			grad_act = self.r.backward(self.h_bef_act[i],grad_ht) + grad_y.mm(self.weights_hy)	# batch X hidden
+			grad_act = self.r.backward(self.h_bef_act[i],grad_ht + grad_y.mm(self.weights_hy))	# batch X hidden
 			self.grad_bias_h = self.grad_bias_h.add(grad_act.sum(dim=0).reshape(self.hidden_dim,1)) # hidden X 1
-			self.grad_Whh = self.grad_Whh.add(grad_act.transpose(0,1).mm(self.h[i-1]))
+			self.grad_Whh = self.grad_Whh.add(grad_act.transpose(0,1).mm(self.h_bef_act[i-1]))
 			# print(self.grad_Whx.size(),grad_act.size(),input[i].size())
 			self.grad_Whx = self.grad_Whx.add(grad_act.transpose(0,1).mm(input[i]))   # hidden X input
 
@@ -92,28 +92,41 @@ def clearGradParam(self):
 		self.grad_bias_h = torch.zeros(self.hidden_dim, 1, dtype=dtype, device=device)
 		self.grad_bias_y = torch.zeros(self.output_dim, 1, dtype=dtype, device=device)
 
+	def clip(self,M):
+		M[M>self.max] = self.max
+		M[M<-self.max] = -self.max
+		return M
+
 	def updateParam(self, learningRate, alpha=0, regularizer=0):
 		# print('update')
-		# print(self.grad_Whx)
-
-		self.grad_Whh[self.grad_Whh>self.max] = self.max
-		self.grad_Whx[self.grad_Whx>self.max] = self.max
-		self.grad_Why[self.grad_Why>self.max] = self.max
-		self.grad_bias_h[self.grad_bias_h>self.max] = self.max
-		self.grad_bias_y[self.grad_bias_y>self.max] = self.max
-
-		self.grad_Whh[self.grad_Whh<-self.max] = -self.max
-		self.grad_Whx[self.grad_Whx<-self.max] = -self.max
-		self.grad_Why[self.grad_Why<-self.max] = -self.max
-		self.grad_bias_h[self.grad_bias_h<-self.max] = -self.max
-		self.grad_bias_y[self.grad_bias_y<-self.max] = -self.max
-
-
-		self.weights_hh -= self.grad_Whh*learningRate
-		self.weights_hx -= self.grad_Whx*learningRate
-		self.weights_hy -= self.grad_Why*learningRate
-		self.bias_h -= self.grad_bias_h*learningRate
-		self.bias_y -= self.grad_bias_y*learningRate
+
+
+		# self.grad_Whh[self.grad_Whh>self.max] = self.max
+		# self.grad_Whx[self.grad_Whx>self.max] = self.max
+		# self.grad_Why[self.grad_Why>self.max] = self.max
+		# self.grad_bias_h[self.grad_bias_h>self.max] = self.max
+		# self.grad_bias_y[self.grad_bias_y>self.max] = self.max
+
+		# self.grad_Whh[self.grad_Whh<-self.max] = -self.max
+		# self.grad_Whx[self.grad_Whx<-self.max] = -self.max
+		# self.grad_Why[self.grad_Why<-self.max] = -self.max
+		# self.grad_bias_h[self.grad_bias_h<-self.max] = -self.max
+		# self.grad_bias_y[self.grad_bias_y<-self.max] = -self.max
+
+
+		grad_Whh=self.clip(self.grad_Whh)
+		grad_Whx=self.clip(self.grad_Whx)
+		grad_Why=self.clip(self.grad_Why)
+		grad_bias_h=self.clip(self.grad_bias_h)
+		grad_bias_y=self.clip(self.grad_bias_y)
+
+		self.weights_hh -= (self.grad_Whh*learningRate+2*regularizer*self.weights_hh)
+		self.weights_hx -= (self.grad_Whx*learningRate+2*regularizer*self.weights_hx)
+		self.weights_hy -= (self.grad_Why*learningRate+2*regularizer*self.weights_hy)
+		self.bias_h -= (self.grad_bias_h*learningRate+2*regularizer*self.bias_h)
+		self.bias_y -= (self.grad_bias_y*learningRate+2*regularizer*self.bias_y)
+
+		# print(self.weights_hh)
 
 		# self.W += (self.momentumW -2*regularizer*self.W)
 		# self.B += (self.momentumB -2*regularizer*self.B)
 
@@ -19,6 +19,7 @@ def forward(self, input, isTrain=False):
         # print("Tanh Layer Forward")
 
     def backward(self, input, gradOutput):
+        self.temp = torch.exp(2.0*input)
         self.gradInput = gradOutput
         self.gradInput *= (4.0*self.temp)/(self.temp + 1.0)**2.0
         # print("Tanh Layer backward")
 
@@ -38,7 +38,7 @@ def loadData(dataPath,labelsPath):
 	flattened = [val for sublist in Data for val in sublist]
 	unique_labels=list(np.unique(flattened))
 
-	TRAINING_SIZE = int(0.07*SIZE)
+	TRAINING_SIZE = int(0.27*SIZE)
 	VALIDATION_SIZE = int(0.3*SIZE)
 
 	indices = list(range(SIZE))
@@ -64,13 +64,13 @@ def loadData(dataPath,labelsPath):
 
 
 
-	batchSize = 20
+	batchSize = 1
 	epochs = 50
-	lr = 0.00001
-	reg = 0.000001
-	al = 0.7
-	leak = 0.01
-	dropout_rate = 0.75
+	lr = 1.0e-1
+	reg = 0
+	# al = 0.7
+	# leak = 0.01
+	# dropout_rate = 0.75
 
 	neuralNetwork = Model.Model()
 	# neuralNetwork.addLayer(Linear.Linear(108*108,1024))
@@ -83,8 +83,10 @@ def loadData(dataPath,labelsPath):
 	# neuralNetwork.addLayer(Dropout.Dropout(dropout_rate))
 	# neuralNetwork.addLayer(LeakyRelu.LeakyRelu(leak))
 	# neuralNetwork.addLayer(Linear.Linear(512,6))
-	neuralNetwork.addLayer(RNN.RNN(len(unique_labels),64,2))
-	neuralNetwork.trainModel(lr, batchSize, epochs, trainingData,unique_labels, trainingLabels, al,reg)
+	neuralNetwork.addLayer(RNN.RNN(len(unique_labels),256,2))
+	# neuralNetwork.addLayer(RNN.RNN(64,128,16))
+	# neuralNetwork.addLayer(RNN.RNN(16,64,2))
+	neuralNetwork.trainModel(lr, batchSize, epochs, trainingData,unique_labels, trainingLabels,reg)
 
 
 	# directory = "./"+args.modelName+"/"