Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for --compiled-grammar. #52

Merged
merged 1 commit into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ There are three options available to provide a grammar:

- The inline grammar option expects a grammar passed inline as a string.
- The grammar URI option expects a URI, either pointing to a built-in grammar or to a grammar that is being hosted externally.
- The compiled grammar expects a filename of the compiled grammar binary.
- The compiled grammar expects a filename (a .tar.xz file) of the previously compiled grammar.

> **THIS FEATURE IS STILL IN DEVELOPMENT, PLEASE ONLY USE THE GRAMMAR URI OPTION WITH BUILTIN GRAMMARS, OR AN ERROR WILL BE GIVEN.**

Expand Down
28 changes: 17 additions & 11 deletions cli-client/helpers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,15 @@ def parse_csr_commandline() -> RecognizerOptions:
options = RecognizerOptions()
parser = argparse.ArgumentParser(description='Perform speech recognition on an audio file')
parser.add_argument('--audio-file', '-a', help='Path to a .wav audio in 8kHz and PCM16 encoding', required=True)
parser.add_argument('--convert-audio', '-c', help='Convert audio file to from A-LAW to PCM using sox software. Used for internal testing.',
parser.add_argument('--convert-audio', '-c', help='Convert audio file to from A-LAW to PCM using sox software. '
'Used for internal testing.',
required=False, default=False, dest='convert_audio', action='store_true')
topicGroup = parser.add_mutually_exclusive_group(required=True)
topicGroup.add_argument('--topic', '-T', choices=['GENERIC', 'TELCO', 'BANKING', 'INSURANCE'], help='A valid topic')
topicGroup.add_argument('--inline-grammar', '-I', help='Grammar inline as a string')
topicGroup.add_argument('--grammar-uri', '-G', help='Grammar URI for the recognition (builtin or served externally)')
topicGroup.add_argument('--compiled-grammar', '-C', help='Compiled grammar binary for the recognition')
topic_group = parser.add_mutually_exclusive_group(required=True)
topic_group.add_argument('--topic', '-T', choices=['GENERIC', 'TELCO', 'BANKING', 'INSURANCE'],
help='A valid topic.')
topic_group.add_argument('--inline-grammar', '-I', help='Grammar inline as a string.')
topic_group.add_argument('--grammar-uri', '-G', help='Builtin grammar URI for the recognition.')
topic_group.add_argument('--compiled-grammar', '-C', help='The compiled grammar file path (an .tar.xz) for the recognition.')
parser.add_argument(
'--language',
'-l',
Expand All @@ -160,21 +162,25 @@ def parse_csr_commandline() -> RecognizerOptions:
help='A Language ID (default: ' + options.language + ')',
default=options.language)
parser.add_argument('--token', '-t', help='File with the authentication token', required=True)
parser.add_argument('--host', '-H', help='The URL of the host trying to reach (default: ' + options.host + ')', required=True)
parser.add_argument('--host', '-H', help='The URL of the host trying to reach (default: ' + options.host + ')',
required=True)
parser.add_argument('--not-secure', '-S', help='Do not use a secure channel. Used for internal testing.',
required=False, default=True, dest='secure', action='store_false')
parser.add_argument('--diarization', '-d', help='', required=False, default=False, action='store_true')
parser.add_argument('--formatting', '-f', help='', required=False, default=False, action='store_true')
parser.add_argument('--inactivity-timeout', '-i', help='Time for stream inactivity after the first valid response', required=False, default=5.0)
parser.add_argument('--inactivity-timeout', '-i', help='Time for stream inactivity after the first valid response',
required=False, default=5.0)
parser.add_argument('--asr-version', choices=['V1', 'V2'], help='Selectable asr version', required=True)
parser.add_argument('--label', help='Label for the request', required=False, default="")

credentialGroup = parser.add_argument_group(
credential_group = parser.add_argument_group(
'credentials',
'''[OPTIONAL] Client authentication credentials used to refresh the token.
You can find your credentials on the dashboard at https://dashboard.speechcenter.verbio.com/access-token''')
credentialGroup.add_argument('--client-id', help='Client id for authentication. MUST be written as --client-id=CLIENT_ID')
credentialGroup.add_argument('--client-secret', help='Client secret for authentication. MUST be written as --client-secret=CLIENT_SECRET')
credential_group.add_argument('--client-id',
help='Client id for authentication. MUST be written as --client-id=CLIENT_ID')
credential_group.add_argument('--client-secret',
help='Client secret for authentication. MUST be written as --client-secret=CLIENT_SECRET')

args = parser.parse_args()
parse_credential_args(args, options)
Expand Down
22 changes: 22 additions & 0 deletions cli-client/helpers/compiled_grammar_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os

COMPILED_GRAMMAR_FORMAT = '.tar'
COMPILED_GRAMMAR_SUB_FORMAT = '.xz'


def check_format(compiled_grammar: str):
first_split = os.path.splitext(compiled_grammar)
second_split = os.path.splitext(first_split[0])
return first_split[1] == COMPILED_GRAMMAR_SUB_FORMAT and second_split[1] == COMPILED_GRAMMAR_FORMAT


def get_compiled_grammar(compiled_grammar: str):
if not os.path.exists(compiled_grammar):
raise ValueError(f"{compiled_grammar} file does not exist.")
if not check_format(compiled_grammar):
raise ValueError(f"{compiled_grammar} file specified is not {COMPILED_GRAMMAR_FORMAT}"
f"{COMPILED_GRAMMAR_SUB_FORMAT}.")

with open(compiled_grammar, mode="rb") as f:
data = f.read()
return data
6 changes: 3 additions & 3 deletions cli-client/helpers/csr_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import threading
from threading import Timer
from helpers.common import VerbioGrammar, RecognizerOptions
from helpers.compiled_grammar_processing import get_compiled_grammar
from concurrent.futures import ThreadPoolExecutor
from google.protobuf.json_format import MessageToJson
from helpers.common import split_audio
Expand Down Expand Up @@ -101,9 +102,8 @@ def __generate_grammar_resource(self, grammar):
elif grammar.type == VerbioGrammar.URI:
return recognition_streaming_request_pb2.GrammarResource(grammar_uri=grammar.content)
elif grammar.type == VerbioGrammar.COMPILED:
with open(grammar.content, "rb") as grammar_file:
compiled_grammar = grammar_file.read()
return recognition_streaming_request_pb2.GrammarResource(compiled_grammar=compiled_grammar)
return recognition_streaming_request_pb2.GrammarResource(
compiled_grammar=get_compiled_grammar(grammar.content))

raise Exception("Type of grammar not recognized.")

Expand Down
Loading