Vosk model loader(#871) (#941)

* methods get_model_by_name, get_model_by_lang, get_model were added into the model class * importing modules changed to using components; introduced constant MODELS_HOME_DIR; simplified code * added new model folders into init; changed samples and transcriber bin for new mode loader * changed back in cli.py lang arg to args.lang * added 3 directories instead of 1 to check for models * cli.py: added 3 args instead of 1 for model; __init__.py: changed script get_model_path for run get_model_by_name/lang inside current directory * deleted default env var * cli.py: changed arg_name; __init__.py: changed const name, changed model loading only for last directory * deleted unused method * changed by_name, by_lang methods, added download_model method * deleted env variable initialization * deleted print() * deteled unused modules * added progress_bar, added folder AppData/Local/vosk for model search * changed download_model methond; added my_hook method
alphacep · May 24, 2022 · 4209f3a · 4209f3a
1 parent ff2c80d
commit 4209f3a
Show file tree

Hide file tree

Showing 15 changed files with 97 additions and 119 deletions.
diff --git a/python/example/test_alternatives.py b/python/example/test_alternatives.py
@@ -8,16 +8,12 @@
 
 SetLogLevel(0)
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 wf = wave.open(sys.argv[1], "rb")
 if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
     print ("Audio file must be WAV format mono PCM.")
     exit (1)
 
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, wf.getframerate())
 rec.SetMaxAlternatives(10)
 rec.SetWords(True)

diff --git a/python/example/test_empty.py b/python/example/test_empty.py
@@ -4,7 +4,7 @@
 import sys
 import json
 
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, 8000)
 
 res = json.loads(rec.FinalResult())

diff --git a/python/example/test_ffmpeg.py b/python/example/test_ffmpeg.py
@@ -8,12 +8,8 @@
 
 SetLogLevel(0)
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 sample_rate=16000
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, sample_rate)
 
 process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',

diff --git a/python/example/test_microphone.py b/python/example/test_microphone.py
@@ -37,9 +37,6 @@ def callback(indata, frames, time, status):
 parser.add_argument(
     '-f', '--filename', type=str, metavar='FILENAME',
     help='audio file to store recording to')
-parser.add_argument(
-    '-m', '--model', type=str, metavar='MODEL_PATH',
-    help='Path to the model')
 parser.add_argument(
     '-d', '--device', type=int_or_str,
     help='input device (numeric ID or substring)')
@@ -48,18 +45,12 @@ def callback(indata, frames, time, status):
 args = parser.parse_args(remaining)
 
 try:
-    if args.model is None:
-        args.model = "model"
-    if not os.path.exists(args.model):
-        print ("Please download a model for your language from https://alphacephei.com/vosk/models")
-        print ("and unpack as 'model' in the current folder.")
-        parser.exit(0)
     if args.samplerate is None:
         device_info = sd.query_devices(args.device, 'input')
         # soundfile expects an int, sounddevice provides a float:
         args.samplerate = int(device_info['default_samplerate'])
 
-    model = vosk.Model(args.model)
+    model = vosk.Model(lang="en-us")
 
     if args.filename:
         dump_fn = open(args.filename, "wb")

diff --git a/python/example/test_nlsml.py b/python/example/test_nlsml.py
@@ -7,16 +7,12 @@
 
 SetLogLevel(0)
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 wf = wave.open(sys.argv[1], "rb")
 if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
     print ("Audio file must be WAV format mono PCM.")
     exit (1)
 
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, wf.getframerate())
 rec.SetMaxAlternatives(10)
 rec.SetNLSML(True)

diff --git a/python/example/test_reset.py b/python/example/test_reset.py
@@ -8,16 +8,12 @@
 
 SetLogLevel(0)
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 wf = wave.open(sys.argv[1], "rb")
 if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
     print ("Audio file must be WAV format mono PCM.")
     exit (1)
 
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, wf.getframerate())
 
 while True:

diff --git a/python/example/test_simple.py b/python/example/test_simple.py
@@ -7,16 +7,12 @@
 
 SetLogLevel(0)
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 wf = wave.open(sys.argv[1], "rb")
 if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
     print ("Audio file must be WAV format mono PCM.")
     exit (1)
 
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, wf.getframerate())
 rec.SetWords(True)
 rec.SetPartialWords(True)

diff --git a/python/example/test_speaker.py b/python/example/test_speaker.py
@@ -7,13 +7,8 @@
 import os
 import numpy as np
 
-model_path = "model"
 spk_model_path = "model-spk"
 
-if not os.path.exists(model_path):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as {} in the current folder.".format(model_path))
-    exit (1)
-
 if not os.path.exists(spk_model_path):
     print ("Please download the speaker model from https://alphacephei.com/vosk/models and unpack as {} in the current folder.".format(spk_model_path))
     exit (1)
@@ -24,7 +19,7 @@
     exit (1)
 
 # Large vocabulary free form recognition
-model = Model(model_path)
+model = Model(lang="en-us")
 spk_model = SpkModel(spk_model_path)
 #rec = KaldiRecognizer(model, wf.getframerate(), spk_model)
 rec = KaldiRecognizer(model, wf.getframerate())

diff --git a/python/example/test_srt.py b/python/example/test_srt.py
@@ -11,12 +11,8 @@
 
 SetLogLevel(-1)
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 sample_rate=16000
-model = Model("model")
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, sample_rate)
 rec.SetWords(True)
 

diff --git a/python/example/test_text.py b/python/example/test_text.py
@@ -5,12 +5,7 @@
 import json
 import os
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
-
-model = Model("model")
+model = Model(lang="en-us")
 
 # Large vocabulary free form recognition
 rec = KaldiRecognizer(model, 16000)

diff --git a/python/example/test_webvtt.py b/python/example/test_webvtt.py
@@ -10,13 +10,8 @@
 
 SetLogLevel(-1)
 
-if not os.path.exists('model'):
-    print('Please download the model from https://alphacephei.com/vosk/models'
-          ' and unpack as `model` in the current folder.')
-    exit(1)
-
 sample_rate = 16000
-model = Model('model')
+model = Model(lang="en-us")
 rec = KaldiRecognizer(model, sample_rate)
 rec.SetWords(True)
 

diff --git a/python/example/test_words.py b/python/example/test_words.py
@@ -5,16 +5,12 @@
 import os
 import wave
 
-if not os.path.exists("model"):
-    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
-    exit (1)
-
 wf = wave.open(sys.argv[1], "rb")
 if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
     print ("Audio file must be WAV format mono PCM.")
     exit (1)
 
-model = Model("model")
+model = Model(lang="en-us")
 
 # You can also specify the possible word or phrase list as JSON list, the order doesn't have to be strict
 rec = KaldiRecognizer(model, wf.getframerate(), '["oh one two three four five six seven eight nine zero", "[unk]"]')

diff --git a/python/vosk/__init__.py b/python/vosk/__init__.py
@@ -1,7 +1,18 @@
 import os
 import sys
 
+from requests import get
+from urllib.request import urlretrieve
+from zipfile import ZipFile
+from re import match
+from pathlib import Path
 from .vosk_cffi import ffi as _ffi
+from tqdm import tqdm
+
+
+MODEL_PRE_URL = 'https://alphacephei.com/vosk/models/'
+MODEL_LIST_URL = MODEL_PRE_URL + 'model-list.json'
+MODEL_DIRS = [os.getenv('VOSK_MODEL_PATH'), Path('/usr/share/vosk'), Path.home() / 'AppData/Local/vosk', Path.home() / '.cache/vosk']
 
 def open_dll():
     dlldir = os.path.abspath(os.path.dirname(__file__))
@@ -21,10 +32,12 @@ def open_dll():
 _c = open_dll()
 
 class Model(object):
-
-    def __init__(self, model_path):
-        self._handle = _c.vosk_model_new(model_path.encode('utf-8'))
-
+    def __init__(self, model_path=None, model_name=None, lang=None):
+        if model_path != None:
+            self._handle = _c.vosk_model_new(model_path.encode('utf-8'))
+        else:
+            model_path = self.get_model_path(model_name, lang)
+            self._handle = _c.vosk_model_new(model_path.encode('utf-8'))
         if self._handle == _ffi.NULL:
             raise Exception("Failed to create a model")
 
@@ -34,6 +47,68 @@ def __del__(self):
     def vosk_model_find_word(self, word):
         return _c.vosk_model_find_word(self._handle, word.encode('utf-8'))
 
+    def get_model_path(self, model_name, lang):
+        if model_name is None:
+            model_path = self.get_model_by_lang(lang)
+        else:
+            model_path = self.get_model_by_name(model_name)
+        return str(model_path)
+
+    def get_model_by_name(self, model_name):
+        for directory in MODEL_DIRS:
+            if directory is None or not Path(directory).exists():
+                continue
+            model_file_list = os.listdir(directory)
+            model_file = [model for model in model_file_list if model == model_name]
+            if model_file != []:
+                return Path(directory, model_file[0])
+        response = get(MODEL_LIST_URL)
+        result_model = [model['name'] for model in response.json() if model['name'] == model_name]
+        if result_model == []:
+            raise Exception("model name %s does not exist" % (model_name))
+        else:
+            self.download_model(Path(directory, result_model[0]))
+            return Path(directory, result_model[0])
+
+    def get_model_by_lang(self, lang):
+        for directory in MODEL_DIRS:
+            if directory is None or not Path(directory).exists():
+                continue
+            model_file_list = os.listdir(directory)
+            model_file = [model for model in model_file_list if match(f"vosk-model(-small)?-{lang}", model)]
+            if model_file != []:
+                return Path(directory, model_file[0])
+        response = get(MODEL_LIST_URL)
+        result_model = [model['name'] for model in response.json() if model['lang'] == lang and model['type'] == 'small' and model['obsolete'] == 'false']
+        if result_model == []:
+            raise Exception("lang %s does not exist" % (lang))
+        else:
+            self.download_model(Path(directory, result_model[0]))
+            return Path(directory, result_model[0])
+
+    def download_model(self, model_name):
+        if not MODEL_DIRS[3].exists():
+            MODEL_DIRS[3].mkdir()
+        with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
+                desc=(MODEL_PRE_URL + str(model_name.name) + '.zip').split('/')[-1]) as t:
+            reporthook = self.my_hook(t)
+            urlretrieve(MODEL_PRE_URL + str(model_name.name) + '.zip', str(model_name) + '.zip', 
+                reporthook=reporthook, data=None)
+            t.total = t.n
+            with ZipFile(str(model_name) + '.zip', 'r') as model_ref:
+                model_ref.extractall(model_name.parent)
+            Path(str(model_name) + '.zip').unlink()
+
+    def my_hook(self, t):
+        last_b = [0]
+        def update_to(b=1, bsize=1, tsize=None):
+            if tsize not in (None, -1):
+                t.total = tsize
+            displayed = t.update((b - last_b[0]) * bsize)
+            last_b[0] = b
+            return displayed
+        return update_to
+
 class SpkModel(object):
 
     def __init__(self, model_path):

diff --git a/python/vosk/transcriber/cli.py b/python/vosk/transcriber/cli.py
@@ -7,6 +7,7 @@
 from vosk.transcriber.transcriber import Transcriber
 from multiprocessing.dummy import Pool
 from pathlib import Path
+from vosk import Model
 
 
 parser = argparse.ArgumentParser(
@@ -21,10 +22,10 @@
         '-list_languages', default=False, action='store_true',
         help='list available languages')
 parser.add_argument(
-        '-model_name',  default='vosk-model-small-en-us-0.15', type=str,
+        '-model_name', type=str,
         help='select model by name')
 parser.add_argument(
-        '-lang', type=str,
+        '-lang', default='en-us', type=str,
         help='select model by language')
 parser.add_argument(
         '-input', type=str,
@@ -64,7 +65,7 @@ def main(args):
     transcriber = Transcriber()
     transcriber.check_args(args)
     if args.input:
-        model = transcriber.get_model(args)
+        model = Model(model_path=args.model, model_name=args.model_name, lang=args.lang)
         if Path(args.input).is_dir() and Path(args.output).is_dir():
             task_list = transcriber.get_task_list(args)
             with Pool() as pool:
@@ -102,4 +103,4 @@ def cli():  # entrypoint used in setup.py
     logging.info(f'''Execution time: {sec} sec, {mcsec} mcsec; xRT: {format(tot_samples / 16000.0 / float(diff_end_start), '.3f')}''')
 
 if __name__ == "__main__":
-    main()
+    main(args)