diff --git a/README.md b/README.md index 101d58c..4079fa9 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ There are three options available to provide a grammar: - The inline grammar option expects a grammar passed inline as a string. - The grammar URI option expects a URI, either pointing to a built-in grammar or to a grammar that is being hosted externally. -- The compiled grammar expects a filename of the compiled grammar binary. +- The compiled grammar expects a filename (a .tar.xz file) of the previously compiled grammar. > **THIS FEATURE IS STILL IN DEVELOPMENT, PLEASE ONLY USE THE GRAMMAR URI OPTION WITH BUILTIN GRAMMARS, OR AN ERROR WILL BE GIVEN.** diff --git a/cli-client/helpers/common.py b/cli-client/helpers/common.py index f909600..73490f0 100644 --- a/cli-client/helpers/common.py +++ b/cli-client/helpers/common.py @@ -133,13 +133,15 @@ def parse_csr_commandline() -> RecognizerOptions: options = RecognizerOptions() parser = argparse.ArgumentParser(description='Perform speech recognition on an audio file') parser.add_argument('--audio-file', '-a', help='Path to a .wav audio in 8kHz and PCM16 encoding', required=True) - parser.add_argument('--convert-audio', '-c', help='Convert audio file to from A-LAW to PCM using sox software. Used for internal testing.', + parser.add_argument('--convert-audio', '-c', help='Convert audio file to from A-LAW to PCM using sox software. ' + 'Used for internal testing.', required=False, default=False, dest='convert_audio', action='store_true') - topicGroup = parser.add_mutually_exclusive_group(required=True) - topicGroup.add_argument('--topic', '-T', choices=['GENERIC', 'TELCO', 'BANKING', 'INSURANCE'], help='A valid topic') - topicGroup.add_argument('--inline-grammar', '-I', help='Grammar inline as a string') - topicGroup.add_argument('--grammar-uri', '-G', help='Grammar URI for the recognition (builtin or served externally)') - topicGroup.add_argument('--compiled-grammar', '-C', help='Compiled grammar binary for the recognition') + topic_group = parser.add_mutually_exclusive_group(required=True) + topic_group.add_argument('--topic', '-T', choices=['GENERIC', 'TELCO', 'BANKING', 'INSURANCE'], + help='A valid topic.') + topic_group.add_argument('--inline-grammar', '-I', help='Grammar inline as a string.') + topic_group.add_argument('--grammar-uri', '-G', help='Builtin grammar URI for the recognition.') + topic_group.add_argument('--compiled-grammar', '-C', help='The compiled grammar file path (an .tar.xz) for the recognition.') parser.add_argument( '--language', '-l', @@ -160,21 +162,25 @@ def parse_csr_commandline() -> RecognizerOptions: help='A Language ID (default: ' + options.language + ')', default=options.language) parser.add_argument('--token', '-t', help='File with the authentication token', required=True) - parser.add_argument('--host', '-H', help='The URL of the host trying to reach (default: ' + options.host + ')', required=True) + parser.add_argument('--host', '-H', help='The URL of the host trying to reach (default: ' + options.host + ')', + required=True) parser.add_argument('--not-secure', '-S', help='Do not use a secure channel. Used for internal testing.', required=False, default=True, dest='secure', action='store_false') parser.add_argument('--diarization', '-d', help='', required=False, default=False, action='store_true') parser.add_argument('--formatting', '-f', help='', required=False, default=False, action='store_true') - parser.add_argument('--inactivity-timeout', '-i', help='Time for stream inactivity after the first valid response', required=False, default=5.0) + parser.add_argument('--inactivity-timeout', '-i', help='Time for stream inactivity after the first valid response', + required=False, default=5.0) parser.add_argument('--asr-version', choices=['V1', 'V2'], help='Selectable asr version', required=True) parser.add_argument('--label', help='Label for the request', required=False, default="") - credentialGroup = parser.add_argument_group( + credential_group = parser.add_argument_group( 'credentials', '''[OPTIONAL] Client authentication credentials used to refresh the token. You can find your credentials on the dashboard at https://dashboard.speechcenter.verbio.com/access-token''') - credentialGroup.add_argument('--client-id', help='Client id for authentication. MUST be written as --client-id=CLIENT_ID') - credentialGroup.add_argument('--client-secret', help='Client secret for authentication. MUST be written as --client-secret=CLIENT_SECRET') + credential_group.add_argument('--client-id', + help='Client id for authentication. MUST be written as --client-id=CLIENT_ID') + credential_group.add_argument('--client-secret', + help='Client secret for authentication. MUST be written as --client-secret=CLIENT_SECRET') args = parser.parse_args() parse_credential_args(args, options) diff --git a/cli-client/helpers/compiled_grammar_processing.py b/cli-client/helpers/compiled_grammar_processing.py new file mode 100644 index 0000000..ef9847e --- /dev/null +++ b/cli-client/helpers/compiled_grammar_processing.py @@ -0,0 +1,22 @@ +import os + +COMPILED_GRAMMAR_FORMAT = '.tar' +COMPILED_GRAMMAR_SUB_FORMAT = '.xz' + + +def check_format(compiled_grammar: str): + first_split = os.path.splitext(compiled_grammar) + second_split = os.path.splitext(first_split[0]) + return first_split[1] == COMPILED_GRAMMAR_SUB_FORMAT and second_split[1] == COMPILED_GRAMMAR_FORMAT + + +def get_compiled_grammar(compiled_grammar: str): + if not os.path.exists(compiled_grammar): + raise ValueError(f"{compiled_grammar} file does not exist.") + if not check_format(compiled_grammar): + raise ValueError(f"{compiled_grammar} file specified is not {COMPILED_GRAMMAR_FORMAT}" + f"{COMPILED_GRAMMAR_SUB_FORMAT}.") + + with open(compiled_grammar, mode="rb") as f: + data = f.read() + return data diff --git a/cli-client/helpers/csr_client.py b/cli-client/helpers/csr_client.py index 3b664c3..ee157db 100644 --- a/cli-client/helpers/csr_client.py +++ b/cli-client/helpers/csr_client.py @@ -5,6 +5,7 @@ import threading from threading import Timer from helpers.common import VerbioGrammar, RecognizerOptions +from helpers.compiled_grammar_processing import get_compiled_grammar from concurrent.futures import ThreadPoolExecutor from google.protobuf.json_format import MessageToJson from helpers.common import split_audio @@ -101,9 +102,8 @@ def __generate_grammar_resource(self, grammar): elif grammar.type == VerbioGrammar.URI: return recognition_streaming_request_pb2.GrammarResource(grammar_uri=grammar.content) elif grammar.type == VerbioGrammar.COMPILED: - with open(grammar.content, "rb") as grammar_file: - compiled_grammar = grammar_file.read() - return recognition_streaming_request_pb2.GrammarResource(compiled_grammar=compiled_grammar) + return recognition_streaming_request_pb2.GrammarResource( + compiled_grammar=get_compiled_grammar(grammar.content)) raise Exception("Type of grammar not recognized.")