Skip to content

Commit

Permalink
push main
Browse files Browse the repository at this point in the history
  • Loading branch information
matin-ghorbani committed Mar 30, 2024
1 parent b49b0b5 commit a62ec9f
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 0 deletions.
91 changes: 91 additions & 0 deletions session_61_assignment_7.11/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
### Without Dropout layer
<table>
<tr>
<td>Featue Vector Dimension</td>
<td>Train Loss</td>
<td>Train Accuracy</td>
<td>Test Loss</td>
<td>Test Accuracy</td>
<td>Inference Time</td>
</tr>
<tr>
<td>50d</td>
<td>0.3673</td>
<td>0.9394</td>
<td>0.4503</td>
<td>0.8571</td>
<td>0.0686s</td>
</tr>
</tr>
<td>100d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>
<tr>
<td>200d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>
<tr>
<td>300d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>

</table>

<br>

### With Dropout layer
<table>
<tr>
<td>Featue Vector Dimension</td>
<td>Train Loss</td>
<td>Train Accuracy</td>
<td>Test Loss</td>
<td>Test Accuracy</td>
<td>Inference Time</td>
</tr>
<tr>
<td>50d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>
</tr>
<td>100d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>
<tr>
<td>200d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>
<tr>
<td>300d</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...s</td>
</tr>

</table>
32 changes: 32 additions & 0 deletions session_61_assignment_7.11/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from time import time
from argparse import ArgumentParser, BooleanOptionalAction

from text_classifier import EmojiTextClassifier
from tqdm import tqdm

args = ArgumentParser()
args.add_argument('--model', type=str,
required=True, help='The model path')
args.add_argument('--vectors-file', type=str,
default='./glove_6B/glove.6B.200d.txt', help='The feature vectors file path')
args.add_argument('--sentence', type=str,
required=True, help='The sentence to test the model')
args.add_argument('--infer', type=bool, default=True,
action=BooleanOptionalAction, help='Whether to inferences the model with your sentence or not')
args.add_argument('--n-infer', type=int,
default=100, help='Number of inferences on your sentence')

opt = args.parse_args()
classifier = EmojiTextClassifier(...)
classifier.load_feature_vectors(opt.vectors_file)
classifier.load_model(opt.model)
emoji = classifier.predict(opt.sentence)
print(emoji)

if opt.infer:
start_time = time()
for i in tqdm(range(opt.n_infer)):
classifier.predict(opt.sentence)

duration = time() - start_time
print(f'\nAverage inference time: {duration / opt.n_infer}')
113 changes: 113 additions & 0 deletions session_61_assignment_7.11/text_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from keras.models import Sequential, load_model
from keras import layers
from keras.activations import softmax
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.callbacks import History, ModelCheckpoint

import numpy as np
import pandas as pd


class EmojiTextClassifier:
def __init__(self, dimension: int) -> None:
self.dimension = dimension
self.model = None
self.words_vectors: dict[str, np.ndarray] = {}

@staticmethod
def load_dataset(dataset_path: str) -> list[np.ndarray, np.ndarray]:
df = pd.read_csv(dataset_path)
x = np.array(df['sentence'])
y = np.array(df['label'], dtype=int)

return x, y

def load_feature_vectors(self, file_path: str) -> None:
line: str

file = open(file_path, encoding='utf-8')
for line in file:
line = line.strip().split(' ')
word = line[0]
vector = np.array(line[1:], dtype=np.float64)
self.words_vectors[word] = vector

def sentence_to_feature_vectors_avg(self, sentence: str) -> np.ndarray[np.floating] | None:
sentence = sentence.lower()
words = sentence.strip().split(' ')
sum_vectors = np.zeros((50, ))

try:
for word in words:
vector = self.words_vectors[word]
sum_vectors += vector
return sum_vectors / len(words)
except KeyError:
print(f'There is an unknown word in this sentence: "{sentence}"')

def convert_sentences_to_vectors(self, sentences: np.ndarray) -> np.ndarray:
sentences_avg = []
for sentence in sentences:
sentences_avg.append(
self.sentence_to_feature_vectors_avg(sentence)
)

return np.array(sentences_avg)

def build_model(self, with_dropout: bool = False) -> None:
if with_dropout:
self.model = Sequential([
layers.Dropout(.5, name='DropoutLayer'),
layers.Dense(5, activation=softmax,
input_shape=(self.dimension, ), name='OutputLayer')
])

else:
self.model = Sequential([
layers.Dense(5, activation=softmax,
input_shape=(self.dimension, ), name='OutputLayer')
])

def compile_model(self, optimizer=Adam(), loss=categorical_crossentropy) -> None:
self.model.compile(
optimizer=optimizer,
loss=loss,
metrics=['accuracy']
)

def train_model(self, x_train, y_train, epochs: int, model_path_to_save: str = 'best_emojis_classifier.keras') -> History:
check = ModelCheckpoint(model_path_to_save,
monitor='accuracy', save_best_only=True)

history: History = self.model.fit(
x_train,
y_train,
epochs=epochs,
callbacks=[check]
)

return history

@staticmethod
def evaluate(model_path, x_test, y_test) -> None:
model: Sequential = load_model(model_path)
print('\nEvaluating model...')
model.evaluate(x_test, y_test)

def load_model(self, model_path: str) -> None:
self.model: Sequential = load_model(model_path)

def predict(self, sentence: str) -> str:
sentence_avg = self.sentence_to_feature_vectors_avg(sentence)
sentence_avg = np.array([sentence_avg])

prediction = self.model.predict(sentence_avg)
y_hat = np.argmax(prediction)

return EmojiTextClassifier.covert_label_to_emoji(y_hat)

@staticmethod
def covert_label_to_emoji(label: int) -> str:
emojis = ['🧡', '⚾', '😃', '😔', '🍴']
return emojis[label]
56 changes: 56 additions & 0 deletions session_61_assignment_7.11/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from argparse import ArgumentParser, BooleanOptionalAction

from keras.callbacks import History
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from text_classifier import EmojiTextClassifier

args = ArgumentParser()
args.add_argument('--train-dataset', type=str,
default='./dataset/train.csv', help='The train dataset path')
args.add_argument('--test-dataset', type=str,
default='./dataset/test.csv', help='The test dataset path')
args.add_argument('--dimension', type=int,
default=50, help='The dimension of feature vectors')
args.add_argument('--vectors-file', type=str,
default='./glove_6B/glove.6B.200d.txt', help='The feature vectors file path')
args.add_argument('--dropout', type=bool,
default=False, action=BooleanOptionalAction, help='Add dropout layer to network')
args.add_argument('--model-save', type=str,
default='best_emojis_classifier.keras', help='The best model path to save')
args.add_argument('--epochs', type=int,
default=200, help='The number of epochs to train the model')
args.add_argument('--save-plots', type=bool,
default=True, action=BooleanOptionalAction, help='Save the training information plots')

opt = args.parse_args()

classifier = EmojiTextClassifier(opt.dimension)
x_train, y_train = EmojiTextClassifier.load_dataset(opt.train_dataset)
x_test, y_test = EmojiTextClassifier.load_dataset(opt.test_dataset)

classifier.load_feature_vectors(opt.vectors_file)
x_train = classifier.convert_sentences_to_vectors(x_train)
x_test = classifier.convert_sentences_to_vectors(x_test)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

classifier.build_model(opt.dropout)
classifier.compile_model()
history: History = classifier.train_model(
x_train, y_train, opt.epochs, opt.model_save)

EmojiTextClassifier.evaluate(opt.model_save, x_test, y_test)

if opt.save_plots:
fig, (ax1, ax2) = plt.subplots(1, 2)[1]
ax1.plot(history.history['accuracy'])
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Accuracy')

ax2.plot(history.history['loss'])
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Loss')
fig.suptitle(f'Emojis Classification With Dropout: {False}')
plt.savefig(f'Emojis_Classification_{opt.epochs}ep_dropout_{opt.dropout}.png')

0 comments on commit a62ec9f

Please sign in to comment.