Merge pull request #134 from stephenhky/develop

stephenhky · web-flow · commit eb166263a5fa · 2022-08-29T23:03:01.000-04:00
10x faster of classes
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -36,22 +36,22 @@ jobs:
   py37:
     <<: *shared
     docker:
-      - image: circleci/python:3.7
+      - image: cimg/python:3.7
 
   py38:
     <<: *shared
     docker:
-      - image: circleci/python:3.8
+      - image: cimg/python:3.8
 
   py39:
     <<: *shared
     docker:
-      - image: circleci/python:3.9
+      - image: cimg/python:3.9
 
   py310:
     <<: *shared
     docker:
-      - image: circleci/python:3.10
+      - image: cimg/python:3.10
 
 
 workflows:
diff --git a/README.md b/README.md
@@ -92,6 +92,8 @@ If you would like to contribute, feel free to submit the pull requests. You can
 
 ## News
 
+* 08/29/2022: `shorttext` 1.5.6 released.
+* 05/28/2022: `shorttext` 1.5.5 released.
 * 12/15/2021: `shorttext` 1.5.4 released.
 * 07/11/2021: `shorttext` 1.5.3 released.
 * 07/06/2021: `shorttext` 1.5.2 released.
diff --git a/docs/conf.py b/docs/conf.py
@@ -58,7 +58,7 @@
 # The short X.Y version.
 version = u'1.5'
 # The full version, including alpha/beta/rc tags.
-release = u'1.5.5'
+release = u'1.5.6'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/news.rst b/docs/news.rst
@@ -1,6 +1,7 @@
 News
 ====
 
+* 08/29/2022: `shorttext` 1.5.6 released.
 * 05/28/2022: `shorttext` 1.5.5 released.
 * 12/15/2021: `shorttext` 1.5.4 released.
 * 07/11/2021: `shorttext` 1.5.3 released.
@@ -74,6 +75,11 @@ News
 What's New
 ----------
 
+Release 1.5.6 (August 29, 2022)
+-------------------------------
+
+* Speeding up inference of `VarNNEmbeddedVecClassifier`. (Acknowledgement: Ritesh Agrawal)
+
 Release 1.5.5 (May 28, 2022)
 -----------------------------
 
diff --git a/setup.py b/setup.py
@@ -35,7 +35,7 @@ def test_requirements():
 
 
 setup(name='shorttext',
-      version='1.5.5',
+      version='1.5.6a1',
       description="Short Text Mining",
       long_description=package_description(),
       long_description_content_type='text/markdown',
diff --git a/shorttext/classifiers/embed/nnlib/VarNNEmbedVecClassification.py b/shorttext/classifiers/embed/nnlib/VarNNEmbedVecClassification.py
@@ -4,11 +4,13 @@
 import warnings
 
 import numpy as np
+import pandas as pd
 
 import shorttext.utils.kerasmodel_io as kerasio
 import shorttext.utils.classification_exceptions as e
 from shorttext.utils import tokenize
 from shorttext.utils.compactmodel_io import CompactIOMachine
+from typing import Union, List, Dict, Any
 
 
 class VarNNEmbeddedVecClassifier(CompactIOMachine):
@@ -208,7 +210,7 @@ def shorttext_to_matrix(self, shorttext):
             matrix[i] = self.word_to_embedvec(tokens[i])
         return matrix
 
-    def score(self, shorttext):
+    def score(self, shorttexts: Union[str, List[str]], model_params: Dict[str, Any] = {}):
         """ Calculate the scores for all the class labels for the given short sentence.
 
         Given a short sentence, calculate the classification scores for all class labels,
@@ -217,25 +219,33 @@ def score(self, shorttext):
         If neither :func:`~train` nor :func:`~loadmodel` was run, it will raise `ModelNotTrainedException`.
 
         :param shorttext: a short sentence
+        :param model_params: additional parameters to be passed to the model object 
         :return: a dictionary with keys being the class labels, and values being the corresponding classification scores
         :type shorttext: str
         :rtype: dict
         :raise: ModelNotTrainedException
         """
+        is_multiple = True
+        if isinstance(shorttexts, str):
+            is_multiple = False
+            shorttexts = [shorttexts]
+        
         if not self.trained:
             raise e.ModelNotTrainedException()
 
         # retrieve vector
-        matrix = np.array([self.shorttext_to_matrix(shorttext)])
+        matrix = np.array([self.shorttext_to_matrix(shorttext) for shorttext in shorttexts])
 
         # classification using the neural network
-        predictions = self.model.predict(matrix)
+        predictions = self.model.predict(matrix, **model_params)
 
         # wrangle output result
-        scoredict = {classlabel: predictions[0][idx]
-                     for idx, classlabel in zip(range(len(self.classlabels)), self.classlabels)}
-
-        return scoredict
+        df = pd.DataFrame(predictions, columns=self.classlabels)
+        
+        if not is_multiple:
+            return df.to_dict('records')[0]
+        
+        return df.to_dict('records')
 
 
 def load_varnnlibvec_classifier(wvmodel, name, compact=True, vecsize=None):