Skip to content

Commit

Permalink
Added util function to download model files if they don't already exist
Browse files Browse the repository at this point in the history
  • Loading branch information
dscripka committed Jul 27, 2024
1 parent c9137ec commit 46d9e7e
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 4 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ This should work on nearly all operating systems (Windows, macOS, Linux), as the

## Usage

openSpeechToIntent is designed to be simple to use. Simply provide a file/array of audio data and a list of target intents, and the library will return information about potential intent matches.
openSpeechToIntent is designed to be simple to use. Just provide a file/array of audio data and a list of target intents, and the library will return information about potential intent matches.

```python

from openspeechtointent.model import CitrinetModel

# Load model
# Load model (this will also download the model if it is not already present)
mdl = CitrinetModel()

# Define some simple intents
Expand Down
27 changes: 27 additions & 0 deletions openspeechtointent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2024 David Scripka. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os

MODELS = {
"stft": {
"model_path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources/models/torchlibrosa_stft.onnx"),
"download_url": "https://github.com/dscripka/openSpeechtoIntent/releases/download/v0.1.0/torchlibrosa_stft.onnx"
},
"citrinet_256": {
"model_path": os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources/models/stt_en_citrinet_256.onnx"),
"download_url": "https://github.com/dscripka/openWakeWord/releases/download/v0.1.0/stt_en_citrinet_256.onnx"
}
}
10 changes: 8 additions & 2 deletions openspeechtointent/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
import wave
import difflib
from openspeechtointent.forced_alignment import forced_align_single_sequence, forced_align_multiple_sequence

from openspeechtointent.utils import download_file
from openspeechtointent import MODELS

class TokenSpan(NamedTuple):
"""
Expand All @@ -53,6 +54,11 @@ def __init__(self,
model_path (str): Path to the Citrinet model
ncpu (int): Number of threads to use for inference of the Citrinet model
"""
# Download models from github release if the don't already exist
for model in MODELS.keys():
if not os.path.exists(MODELS[model]["model_path"]):
download_file(MODELS[model]["download_url"], os.path.dirname(MODELS[model]["model_path"]))

# limit to specified number of threads
sess_options = ort.SessionOptions()
sess_options.intra_op_num_threads = ncpu
Expand All @@ -70,7 +76,7 @@ def __init__(self,
self.filterbank = np.load(filterbank_path)

# Load tokenizer and vocab
tokenizer_path = os.path.join(location, "resources/models/tokenizer.pkl")
tokenizer_path = os.path.join(location, "resources/models/citrinet_tokenizer.pkl")
self.tokenizer = pickle.load(open(tokenizer_path, "rb"))
vocab_path = os.path.join(location, "resources/models/citrinet_vocab.json")
self.vocab = json.load(open(vocab_path, 'r'))
Expand Down
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions openspeechtointent/resources/models/citrinet_vocab.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["<unk>", "s", "\u2581the", "t", "\u2581a", "\u2581i", "'", "\u2581and", "\u2581to", "ed", "d", "\u2581of", "e", "\u2581in", "ing", ".", "\u2581it", "\u2581you", "n", "\u2581that", "m", "y", "er", "\u2581he", "re", "r", "\u2581was", "\u2581is", "\u2581for", "\u2581know", "a", "p", "c", ",", "\u2581be", "o", "\u2581but", "\u2581they", "g", "\u2581so", "ly", "b", "\u2581s", "\u2581yeah", "\u2581we", "\u2581have", "\u2581re", "\u2581like", "l", "\u2581on", "ll", "u", "\u2581with", "\u2581do", "al", "\u2581not", "\u2581are", "or", "ar", "le", "\u2581this", "\u2581as", "es", "\u2581c", "\u2581de", "f", "in", "i", "ve", "\u2581uh", "ent", "\u2581or", "\u2581what", "\u2581me", "\u2581t", "\u2581at", "\u2581my", "\u2581his", "\u2581there", "w", "\u2581all", "\u2581just", "h", "\u2581can", "ri", "il", "k", "ic", "\u2581e", "\u2581", "\u2581um", "\u2581don", "\u2581b", "\u2581had", "ch", "ation", "en", "th", "\u2581no", "\u2581she", "it", "\u2581one", "\u2581think", "\u2581st", "\u2581if", "\u2581from", "ter", "\u2581an", "an", "ur", "\u2581out", "on", "\u2581go", "ck", "\u2581would", "\u2581were", "\u2581w", "\u2581will", "\u2581about", "\u2581right", "ment", "\u2581her", "te", "ion", "\u2581well", "\u2581by", "ce", "\u2581g", "\u2581oh", "\u2581up", "ro", "ra", "\u2581when", "\u2581some", "\u2581also", "\u2581their", "ers", "ow", "\u2581more", "\u2581time", "ate", "\u2581has", "\u2581people", "\u2581see", "\u2581pa", "el", "\u2581get", "\u2581ex", "\u2581mean", "li", "\u2581really", "v", "\u2581ra", "\u2581been", "\u2581said", "-", "la", "ge", "\u2581how", "\u2581po", "ir", "\u2581mo", "\u2581who", "\u2581because", "\u2581co", "\u2581other", "\u2581f", "id", "ol", "\u2581un", "\u2581now", "\u2581work", "ist", "us", "\u2581your", "\u2581them", "ver", "as", "ne", "\u2581ca", "lo", "\u2581fa", "\u2581him", "ng", "\u2581good", "\u2581could", "\u2581pro", "ive", "\u2581con", "de", "un", "age", "\u2581ma", "?", "at", "\u2581ro", "\u2581ba", "\u2581then", "\u2581com", "est", "vi", "\u2581dis", "ies", "ance", "\u2581su", "\u2581even", "\u2581any", "ut", "ad", "ul", "\u2581se", "\u2581two", "\u2581bu", "\u2581lo", "\u2581say", "\u2581la", "\u2581fi", "is", "\u2581li", "\u2581over", "\u2581new", "\u2581man", "\u2581sp", "ity", "\u2581did", "\u2581bo", "\u2581very", "x", "end", "\u2581which", "\u2581our", "\u2581after", "\u2581o", "ke", "\u2581p", "im", "\u2581want", "\u2581ha", "\u2581v", "z", "\u2581where", "ard", "um", "\u2581into", "ru", "\u2581di", "\u2581lot", "\u2581dr", "mp", "\u2581day", "ated", "ci", "\u2581these", "\u2581than", "\u2581take", "\u2581kind", "\u2581got", "ight", "\u2581make", "ence", "\u2581pre", "\u2581going", "ish", "\u2581k", "able", "\u2581look", "ti", "per", "\u2581here", "\u2581en", "\u2581ah", "ry", "\u2581too", "\u2581part", "ant", "one", "\u2581ho", "\u2581much", "\u2581way", "\u2581sa", "\u2581something", "mo", "\u2581us", "\u2581th", "\u2581mhm", "\u2581mi", "\u2581off", "pe", "\u2581back", "les", "\u2581cr", "\u2581ri", "\u2581fe", "und", "\u2581fl", "port", "\u2581school", "\u2581ch", "\u2581should", "\u2581first", "\u2581only", "\u2581le", "ot", "tion", "\u2581little", "\u2581da", "\u2581hu", "\u2581d", "me", "ta", "\u2581down", "\u2581okay", "\u2581come", "ain", "ff", "\u2581car", "co", "\u2581need", "ture", "\u2581many", "\u2581things", "\u2581ta", "qu", "man", "ty", "iv", "\u2581year", "he", "\u2581thing", "ho", "\u2581singapore", "po", "\u2581vi", "\u2581sc", "\u2581still", "der", "\u2581hi", "\u2581never", "\u2581qu", "ia", "\u2581fr", "\u2581min", "\u2581most", "om", "ful", "\u2581bi", "\u2581long", "ig", "\u2581years", "ous", "\u2581three", "\u2581play", "\u2581before", "\u2581pi", "ical", "\u2581those", "\u2581comp", "huh", "\u2581live", "tor", "ise", "\u2581old", "am", "rr", "\u2581sta", "\u2581n", "ick", "di", "ma", "ary", "ction", "\u2581friend", "ition", "\u2581gu", "\u2581through", "pp", "for", "ie", "ious", "\u2581sh", "\u2581home", "lu", "\u2581high", "ian", "cu", "\u2581help", "\u2581give", "\u2581talk", "\u2581sha", "\u2581such", "\u2581didn", "em", "\u2581may", "\u2581ga", "\u2581'", "\u2581gra", "\u2581guess", "\u2581every", "\u2581app", "tic", "\u2581tra", "\u2581\"", "op", "\u2581made", "\"", "\u2581op", "\u2581own", "\u2581mar", "no", "\u2581ph", "\u2581life", "\u2581y", "ak", "ine", "\u2581pu", "\u2581place", "\u2581always", "\u2581start", "\u2581jo", "\u2581pe", "\u2581let", "\u2581name", "ni", "\u2581same", "\u2581last", "\u2581cl", "ph", "\u2581both", "\u2581pri", "ities", "\u2581another", "and", "\u2581al", "\u2581boy", "ving", "\u2581actually", "\u2581person", "\u2581went", "\u2581yes", "ca", "ally", "\u2581h", "\u2581great", "\u2581thought", "\u2581used", "act", "\u2581feel", "ward", "\u2581different", "\u2581cons", "\u2581show", "\u2581watch", "\u2581being", "\u2581money", "ay", "\u2581try", "\u2581why", "\u2581big", "ens", "\u2581cha", "\u2581find", "\u2581hand", "\u2581real", "\u2581four", "ial", "\u2581ne", "\u2581che", "\u2581read", "\u2581five", "\u2581family", "ag", "\u2581change", "\u2581add", "ha", "\u2581put", "par", "lic", "side", "\u2581came", "\u2581under", "ness", "\u2581per", "j", "\u2581around", "\u2581end", "\u2581house", "if", "\u2581while", "vo", "\u2581act", "\u2581happen", "\u2581plan", "mit", "\u2581far", "\u2581tri", "\u2581ten", "\u2581du", "\u2581win", "\u2581tea", "ze", "\u2581better", "\u2581sure", "\u2581mu", "\u2581use", "\u2581anything", "\u2581love", "\u2581world", "\u2581hard", "ure", "\u2581does", "\u2581war", "\u2581stuff", "\u2581ja", "\u2581must", "min", "gg", "\u2581ru", "\u2581care", "\u2581tell", "\u2581pl", "\u2581doing", "\u2581probably", "\u2581found", "ative", "\u2581point", "ach", "\u2581ju", "ip", "\u2581again", "\u2581interest", "\u2581state", "\u2581week", "na", "\u2581might", "\u2581pretty", "\u2581ki", "\u2581fo", "ber", "\u2581am", "line", "led", "\u2581six", "\u2581acc", "\u2581bri", "\u2581call", "\u2581sw", "\u2581each", "\u2581business", "\u2581keep", "\u2581away", "cause", "\u2581pass", "\u2581va", "\u2581children", "\u2581pay", "\u2581count", "\u2581public", "\u2581everything", "land", "\u2581though", "\u2581men", "bo", "\u2581young", "\u2581na", "\u2581move", "ough", "ating", "com", "\u2581month", "ton", "\u2581close", "\u2581few", "!", "\u2581maybe", "\u2581imp", "son", "\u2581grow", "\u2581u", "\u2581turn", "ible", "\u2581em", "\u2581air", "\u2581ever", "our", "\u2581sea", "\u2581fun", "\u2581government", "\u2581miss", "\u2581done", "\u2581next", "\u2581kids", "\u2581cor", "\u2581set", "\u2581run", "way", "\u2581wa", "\u2581getting", "\u2581eight", "\u2581open", "\u2581job", "\u2581problem", "ook", "\u2581night", "\u2581learn", "\u2581book", "ual", "\u2581ti", "\u2581best", "cept", "\u2581during", "\u2581small", "ex", "\u2581without", "\u2581water", "\u2581trans", "\u2581course", "\u2581once", "\u2581sit", "\u2581area", "\u2581country", "\u2581mister", "\u2581nothing", "\u2581whole", "\u2581believe", "\u2581service", "\u2581took", "\u2581face", "\u2581bad", "\u2581later", "\u2581head", "\u2581called", "\u2581seven", "\u2581art", "\u2581since", "\u2581er", "\u2581fact", "\u2581city", "\u2581market", "\u2581hour", "\u2581continue", "ship", "\u2581invest", "\u2581exactly", "\u2581large", "\u2581true", "\u2581nine", "\u2581sub", "\u2581having", "\u2581game", "va", "\u2581lu", "\u2581conf", "\u2581case", "\u2581doesn", "\u2581certain", "\u2581wi", "\u2581law", "\u2581else", "fi", "\u2581left", "\u2581enough", "\u2581second", "\u2581gonna", "\u2581food", "\u2581hope", "\u2581saw", "\u2581between", "\u2581je", "bi", "\u2581girl", "\u2581company", "\u2581able", "\u2581expect", "\u2581told", "\u2581stand", "\u2581group", "\u2581main", "\u2581walk", "\u2581cause", "\u2581however", "\u2581number", "\u2581follow", "\u2581near", "\u2581yet", "\u2581sometimes", "\u2581train", "\u2581lead", "\u2581system", "\u2581remain", "\u2581develop", "gra", "\u2581word", "\u2581exc", "\u2581together", "\u2581consider", "\u2581town", "\u2581less", "ator", "\u2581important", "\u2581remember", "\u2581free", "\u2581quite", "\u2581understand", "\u2581bra", "\u2581support", "\u2581idea", "\u2581stop", "\u2581reason", "\u2581nice", "\u2581mm", "\u2581agree", "\u2581low", "\u2581against", "\u2581issue", "\u2581become", "\u2581today", "\u2581side", "\u2581student", "\u2581matter", "\u2581question", "\u2581mother", "\u2581father", "\u2581hundred", "\u2581sort", "\u2581eat", "\u2581already", "\u2581rest", "\u2581line", "\u2581asked", "\u2581include", "\u2581upon", "\u2581office", "\u2581won", "\u2581class", "\u2581wait", "\u2581twenty", "\u2581half", "\u2581light", "\u2581price", "\u2581almost", "ash", "\u2581child", "\u2581sign", "\u2581least", "\u2581several", "press", "\u2581either", "\u2581minute", "\u2581himself", "\u2581parents", "\u2581room", "\u2581whatever", "\u2581general", "\u2581cost", "\u2581among", "\u2581direct", "\u2581computer", "\u2581appear", "\u2581meet", "\u2581ski", "\u2581return", "\u2581couple", "\u2581product", "\u2581suppose", "\u2581definitely", "\u2581america", "\u2581term", "\u2581usually", "\u2581strong", "\u2581current", "\u2581arm", "\u2581speak", "\u2581local", "\u2581south", "\u2581experience", "\u2581full", "\u2581north", "\u2581elect", "\u2581leave", "\u2581provide", "qui", "\u2581power", "\u2581movie", "\u2581everyone", "\u2581making", "\u2581member", "\u2581woman", "\u2581somebody", "\u2581wonder", "\u2581short", "\u2581health", "\u2581police", "\u2581bank", "\u2581until", "\u2581companies", "\u2581everybody", "\u2581knew", "\u2581program", "\u2581music", "\u2581york", "\u2581land", "\u2581doctor", "\u2581answer", "\u2581building", "\u2581employ", "\u2581travel", "\u2581major", "\u2581seems", "\u2581safe", "gue", "\u2581college", "\u2581along", "\u2581clear", "\u2581especially", "\u2581umhu", "\u2581result", "\u2581type", "\u2581court", "\u2581black", "\u2581hold", "\u2581myself", "\u2581education", "\u2581social", "\u2581enjoy", "\u2581became", "\u2581whether", "\u2581morning", "\u2581difficult", "\u2581shi", "\u2581felt", "\u2581husband", "\u2581white", "\u2581taking", "\u2581million", "\u2581require", "\u2581early", "ency", "\u2581visit", "\u2581level", "\u2581brother", "\u2581married", "\u2581further", "\u2581affect", "\u2581serve", "\u2581present", "\u2581park", "\u2581effect", "\u2581wife", "\u2581teacher", "\u2581cannot", "\u2581community", "\u2581street", "\u2581period", "\u2581national", "\u2581view", "\u2581future", "\u2581daughter", "\u2581situation", "\u2581grand", "\u2581success", "\u2581perform", "\u2581concern", "\u2581complete", "\u2581example", "ized", "\u2581thousand", "\u2581increase", "\u2581began", "\u2581final", "\u2581east", "\u2581sense", "\u2581charge", "\u2581record", "\u2581born", "\u2581instead", "\u2581receive", "\u2581women", "\u2581across", "\u2581information", "\u2581although", "\u2581process", "\u2581condition", "\u2581security", "\u2581treat", "\u2581funny", "\u2581custom", "\u2581cold", "\u2581behind", "ified", "\u2581ground", "cycl", "\u2581depend", "\u2581themselves", "\u2581design", "\u2581slow", "\u2581third", "\u2581smoke", "\u2581wrong", "\u2581project", "\u2581space", "\u2581drink", "\u2581particular", "\u2581listen", "\u2581thirty", "\u2581special", "ability", "\u2581improve", "\u2581attack", "\u2581happy", "\u2581strange", "\u2581english", "\u2581value", "\u2581brought", "\u2581private", "\u2581account", "\u2581china", "\u2581spoke", "\u2581foreign", "\u2581possible", "\u2581author", "\u2581circ", "\u2581voice", "\u2581figure", "\u2581control", "\u2581according", "\u2581green", "\u2581university", "\u2581language", "\u2581please", "\u2581animal", "\u2581church", "\u2581society", "\u2581dream", "\u2019", "q", ":", ";", "\u2014", "\u2018", "\u201d", "_", "3", "8", "<", ">", "1", "\u2013", "7", "(", ")", "0", "2", "4", "+", "&", "5", "9", "\u00fc", "\u00e9", "/", "\u00e1", "\u00f3", "\u014d", "\u00fa", "]", "\u00e2", "\u00ed", "\u00e3", "\u00f0", "\u0101", "\u0107", "\u010d", "\u0161", "\u00e8", "\u00eb", "`", "\u00e7", "\u016b", "\u1ea1", "\u00f8", "=", "\u00e0", "\u0142", "\u03b1", "\u00f4", "\u043a", "}", "\u00e5", "\u0103", "\u0438", "\u012b", "\u03c0", "\u0153", "\\", "[", "\u00f1", "\u00df", "\u00f6", "\u00e4", "6", "\u0437", "\u043d", "\u00fb", "%", "{", "\u00a1", "\u00e6", "\u00ea", "\u00fe", "\u0119", "\u011b", "\u011f", "\u0144", "\u0151", "\u0159", "\u017e", "\u02bb", "\u0432", "\u0435", "\u0439", "\u043b", "\u044c", "\u03c7", "\u201c", ""]
45 changes: 45 additions & 0 deletions openspeechtointent/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2024 David Scripka. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# imports
import os
import urllib.request
import sys

def download_file(url, target_directory, file_size=None):
"""A simple function to download a file from a URL with a progress bar using only standard libraries."""
local_filename = url.split('/')[-1]
file_path = os.path.join(target_directory, local_filename)

# Open the URL
with urllib.request.urlopen(url) as response:
if file_size is None:
file_size = int(response.getheader('Content-Length', 0))

# Create a progress bar
print(f"Downloading {local_filename} ({file_size} bytes)")
downloaded = 0

with open(file_path, 'wb') as f:
while True:
chunk = response.read(8192)
if not chunk:
break
f.write(chunk)
downloaded += len(chunk)

# Update progress
progress = downloaded / file_size * 100 if file_size else 0
sys.stdout.write(f"\rProgress: {progress:.2f}%")
sys.stdout.flush()

0 comments on commit 46d9e7e

Please sign in to comment.