diff --git a/cli-client/helpers/audio_importer.py b/cli-client/helpers/audio_importer.py index 3e12cd3..f24d3cc 100644 --- a/cli-client/helpers/audio_importer.py +++ b/cli-client/helpers/audio_importer.py @@ -14,11 +14,14 @@ def remove_pcm_audio_file(audio_file: str): class AudioImporter: - def __init__(self, audio_file: str): - tmp_audio_file = preprocess_audio_file_to_pcm(audio_file) + def __init__(self, audio_file: str, convert_audio: bool): + tmp_audio_file = audio_file + if convert_audio: + tmp_audio_file = preprocess_audio_file_to_pcm(audio_file) with open(tmp_audio_file, "rb") as wav_file: wav_data = wave.open(wav_file) self.sample_rate = wav_data.getframerate() self.audio = wav_data.readframes(wav_data.getnframes()) wav_data.close() - remove_pcm_audio_file(tmp_audio_file) + if convert_audio: + remove_pcm_audio_file(tmp_audio_file) diff --git a/cli-client/helpers/common.py b/cli-client/helpers/common.py index 9672edd..6e8b24a 100644 --- a/cli-client/helpers/common.py +++ b/cli-client/helpers/common.py @@ -125,6 +125,8 @@ def parse_csr_commandline() -> RecognizerOptions: options = RecognizerOptions() parser = argparse.ArgumentParser(description='Perform speech recognition on an audio file') parser.add_argument('--audio-file', '-a', help='Path to a .wav audio in 8kHz and PCM16 encoding', required=True) + parser.add_argument('--convert-audio', '-c', help='Convert audio file to from A-LAW to PCM using sox software. Used for internal testing.', + required=False, default=False, dest='convert_audio', action='store_true') topicGroup = parser.add_mutually_exclusive_group(required=True) topicGroup.add_argument('--topic', '-T', choices=['GENERIC', 'TELCO', 'BANKING', 'INSURANCE'], help='A valid topic') topicGroup.add_argument('--inline-grammar', '-I', help='Grammar inline as a string') @@ -171,6 +173,7 @@ def parse_csr_commandline() -> RecognizerOptions: options.token_file = args.token options.host = args.host options.audio_file = args.audio_file + options.convert_audio = args.convert_audio options.language = args.language options.secure_channel = args.secure options.formatting = args.formatting diff --git a/cli-client/recognizer_stream.py b/cli-client/recognizer_stream.py index bc1432b..cb98e0c 100755 --- a/cli-client/recognizer_stream.py +++ b/cli-client/recognizer_stream.py @@ -12,7 +12,7 @@ def process_recognition(executor: ThreadPoolExecutor, channel: grpc.Channel, options: RecognizerOptions, access_token: str): - audio_resource = AudioImporter(options.audio_file) + audio_resource = AudioImporter(options.audio_file, options.convert_audio) stub = recognition_pb2_grpc.RecognizerStub(channel) client = CSRClient(executor, stub, options, audio_resource, access_token) client.send_audio()