Skip to content

Commit

Permalink
Vosk model loader(#871) (#941)
Browse files Browse the repository at this point in the history
* methods get_model_by_name, get_model_by_lang, get_model were added into the model class

* importing modules changed to using components; introduced constant MODELS_HOME_DIR; simplified code

* added new model folders into init; changed samples and transcriber bin for new mode loader

* changed back in cli.py lang arg to args.lang

* added 3 directories instead of 1 to check for models

* cli.py: added 3 args instead of 1 for model; __init__.py: changed script get_model_path for run get_model_by_name/lang inside current directory

* deleted default env var

* cli.py: changed arg_name; __init__.py: changed const name, changed model loading only for last directory

* deleted unused method

* changed by_name, by_lang methods, added download_model method

* deleted env variable initialization

* deleted print()

* deteled unused modules

* added progress_bar, added folder AppData/Local/vosk for model search

* changed download_model methond; added my_hook method
  • Loading branch information
vadimdddd authored May 24, 2022
1 parent ff2c80d commit 4209f3a
Show file tree
Hide file tree
Showing 15 changed files with 97 additions and 119 deletions.
6 changes: 1 addition & 5 deletions python/example/test_alternatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,12 @@

SetLogLevel(0)

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)

model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetMaxAlternatives(10)
rec.SetWords(True)
Expand Down
2 changes: 1 addition & 1 deletion python/example/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys
import json

model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, 8000)

res = json.loads(rec.FinalResult())
Expand Down
6 changes: 1 addition & 5 deletions python/example/test_ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,8 @@

SetLogLevel(0)

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

sample_rate=16000
model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, sample_rate)

process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
Expand Down
11 changes: 1 addition & 10 deletions python/example/test_microphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ def callback(indata, frames, time, status):
parser.add_argument(
'-f', '--filename', type=str, metavar='FILENAME',
help='audio file to store recording to')
parser.add_argument(
'-m', '--model', type=str, metavar='MODEL_PATH',
help='Path to the model')
parser.add_argument(
'-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
Expand All @@ -48,18 +45,12 @@ def callback(indata, frames, time, status):
args = parser.parse_args(remaining)

try:
if args.model is None:
args.model = "model"
if not os.path.exists(args.model):
print ("Please download a model for your language from https://alphacephei.com/vosk/models")
print ("and unpack as 'model' in the current folder.")
parser.exit(0)
if args.samplerate is None:
device_info = sd.query_devices(args.device, 'input')
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info['default_samplerate'])

model = vosk.Model(args.model)
model = vosk.Model(lang="en-us")

if args.filename:
dump_fn = open(args.filename, "wb")
Expand Down
6 changes: 1 addition & 5 deletions python/example/test_nlsml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@

SetLogLevel(0)

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)

model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetMaxAlternatives(10)
rec.SetNLSML(True)
Expand Down
6 changes: 1 addition & 5 deletions python/example/test_reset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,12 @@

SetLogLevel(0)

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)

model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, wf.getframerate())

while True:
Expand Down
6 changes: 1 addition & 5 deletions python/example/test_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@

SetLogLevel(0)

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)

model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
rec.SetPartialWords(True)
Expand Down
7 changes: 1 addition & 6 deletions python/example/test_speaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,8 @@
import os
import numpy as np

model_path = "model"
spk_model_path = "model-spk"

if not os.path.exists(model_path):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as {} in the current folder.".format(model_path))
exit (1)

if not os.path.exists(spk_model_path):
print ("Please download the speaker model from https://alphacephei.com/vosk/models and unpack as {} in the current folder.".format(spk_model_path))
exit (1)
Expand All @@ -24,7 +19,7 @@
exit (1)

# Large vocabulary free form recognition
model = Model(model_path)
model = Model(lang="en-us")
spk_model = SpkModel(spk_model_path)
#rec = KaldiRecognizer(model, wf.getframerate(), spk_model)
rec = KaldiRecognizer(model, wf.getframerate())
Expand Down
6 changes: 1 addition & 5 deletions python/example/test_srt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@

SetLogLevel(-1)

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

sample_rate=16000
model = Model("model")
model = Model(lang="en-us")
rec = KaldiRecognizer(model, sample_rate)
rec.SetWords(True)

Expand Down
7 changes: 1 addition & 6 deletions python/example/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@
import json
import os

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)


model = Model("model")
model = Model(lang="en-us")

# Large vocabulary free form recognition
rec = KaldiRecognizer(model, 16000)
Expand Down
7 changes: 1 addition & 6 deletions python/example/test_webvtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,8 @@

SetLogLevel(-1)

if not os.path.exists('model'):
print('Please download the model from https://alphacephei.com/vosk/models'
' and unpack as `model` in the current folder.')
exit(1)

sample_rate = 16000
model = Model('model')
model = Model(lang="en-us")
rec = KaldiRecognizer(model, sample_rate)
rec.SetWords(True)

Expand Down
6 changes: 1 addition & 5 deletions python/example/test_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,12 @@
import os
import wave

if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)

wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)

model = Model("model")
model = Model(lang="en-us")

# You can also specify the possible word or phrase list as JSON list, the order doesn't have to be strict
rec = KaldiRecognizer(model, wf.getframerate(), '["oh one two three four five six seven eight nine zero", "[unk]"]')
Expand Down
83 changes: 79 additions & 4 deletions python/vosk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
import os
import sys

from requests import get
from urllib.request import urlretrieve
from zipfile import ZipFile
from re import match
from pathlib import Path
from .vosk_cffi import ffi as _ffi
from tqdm import tqdm


MODEL_PRE_URL = 'https://alphacephei.com/vosk/models/'
MODEL_LIST_URL = MODEL_PRE_URL + 'model-list.json'
MODEL_DIRS = [os.getenv('VOSK_MODEL_PATH'), Path('/usr/share/vosk'), Path.home() / 'AppData/Local/vosk', Path.home() / '.cache/vosk']

def open_dll():
dlldir = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -21,10 +32,12 @@ def open_dll():
_c = open_dll()

class Model(object):

def __init__(self, model_path):
self._handle = _c.vosk_model_new(model_path.encode('utf-8'))

def __init__(self, model_path=None, model_name=None, lang=None):
if model_path != None:
self._handle = _c.vosk_model_new(model_path.encode('utf-8'))
else:
model_path = self.get_model_path(model_name, lang)
self._handle = _c.vosk_model_new(model_path.encode('utf-8'))
if self._handle == _ffi.NULL:
raise Exception("Failed to create a model")

Expand All @@ -34,6 +47,68 @@ def __del__(self):
def vosk_model_find_word(self, word):
return _c.vosk_model_find_word(self._handle, word.encode('utf-8'))

def get_model_path(self, model_name, lang):
if model_name is None:
model_path = self.get_model_by_lang(lang)
else:
model_path = self.get_model_by_name(model_name)
return str(model_path)

def get_model_by_name(self, model_name):
for directory in MODEL_DIRS:
if directory is None or not Path(directory).exists():
continue
model_file_list = os.listdir(directory)
model_file = [model for model in model_file_list if model == model_name]
if model_file != []:
return Path(directory, model_file[0])
response = get(MODEL_LIST_URL)
result_model = [model['name'] for model in response.json() if model['name'] == model_name]
if result_model == []:
raise Exception("model name %s does not exist" % (model_name))
else:
self.download_model(Path(directory, result_model[0]))
return Path(directory, result_model[0])

def get_model_by_lang(self, lang):
for directory in MODEL_DIRS:
if directory is None or not Path(directory).exists():
continue
model_file_list = os.listdir(directory)
model_file = [model for model in model_file_list if match(f"vosk-model(-small)?-{lang}", model)]
if model_file != []:
return Path(directory, model_file[0])
response = get(MODEL_LIST_URL)
result_model = [model['name'] for model in response.json() if model['lang'] == lang and model['type'] == 'small' and model['obsolete'] == 'false']
if result_model == []:
raise Exception("lang %s does not exist" % (lang))
else:
self.download_model(Path(directory, result_model[0]))
return Path(directory, result_model[0])

def download_model(self, model_name):
if not MODEL_DIRS[3].exists():
MODEL_DIRS[3].mkdir()
with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
desc=(MODEL_PRE_URL + str(model_name.name) + '.zip').split('/')[-1]) as t:
reporthook = self.my_hook(t)
urlretrieve(MODEL_PRE_URL + str(model_name.name) + '.zip', str(model_name) + '.zip',
reporthook=reporthook, data=None)
t.total = t.n
with ZipFile(str(model_name) + '.zip', 'r') as model_ref:
model_ref.extractall(model_name.parent)
Path(str(model_name) + '.zip').unlink()

def my_hook(self, t):
last_b = [0]
def update_to(b=1, bsize=1, tsize=None):
if tsize not in (None, -1):
t.total = tsize
displayed = t.update((b - last_b[0]) * bsize)
last_b[0] = b
return displayed
return update_to

class SpkModel(object):

def __init__(self, model_path):
Expand Down
9 changes: 5 additions & 4 deletions python/vosk/transcriber/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from vosk.transcriber.transcriber import Transcriber
from multiprocessing.dummy import Pool
from pathlib import Path
from vosk import Model


parser = argparse.ArgumentParser(
Expand All @@ -21,10 +22,10 @@
'-list_languages', default=False, action='store_true',
help='list available languages')
parser.add_argument(
'-model_name', default='vosk-model-small-en-us-0.15', type=str,
'-model_name', type=str,
help='select model by name')
parser.add_argument(
'-lang', type=str,
'-lang', default='en-us', type=str,
help='select model by language')
parser.add_argument(
'-input', type=str,
Expand Down Expand Up @@ -64,7 +65,7 @@ def main(args):
transcriber = Transcriber()
transcriber.check_args(args)
if args.input:
model = transcriber.get_model(args)
model = Model(model_path=args.model, model_name=args.model_name, lang=args.lang)
if Path(args.input).is_dir() and Path(args.output).is_dir():
task_list = transcriber.get_task_list(args)
with Pool() as pool:
Expand Down Expand Up @@ -102,4 +103,4 @@ def cli(): # entrypoint used in setup.py
logging.info(f'''Execution time: {sec} sec, {mcsec} mcsec; xRT: {format(tot_samples / 16000.0 / float(diff_end_start), '.3f')}''')

if __name__ == "__main__":
main()
main(args)
Loading

0 comments on commit 4209f3a

Please sign in to comment.