Skip to content

Commit

Permalink
Merge pull request #49 from verbio-technologies/feature/SC2_1216_add_…
Browse files Browse the repository at this point in the history
…alaw_support

Added support for a-law audios to be able to use audios from Barrayar.
  • Loading branch information
anikocharyan authored Apr 10, 2024
2 parents 2b9b952 + 7eb5739 commit 1ef42a2
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
22 changes: 19 additions & 3 deletions cli-client/helpers/audio_importer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
import os
import subprocess
import wave

def preprocess_audio_file_to_pcm(audio_file: str):
tmp_audio_file = "./" + os.path.basename(audio_file) + "_tmp.wav"
command = "sox " + audio_file + " -e signed-integer " + tmp_audio_file
subprocess.run(command.split())
return tmp_audio_file


def remove_pcm_audio_file(audio_file: str):
os.remove(audio_file)


class AudioImporter:
def __init__(self, audio_file: str):
with open(audio_file, "rb") as wav_file:
def __init__(self, audio_file: str, convert_audio: bool):
tmp_audio_file = audio_file
if convert_audio:
tmp_audio_file = preprocess_audio_file_to_pcm(audio_file)
with open(tmp_audio_file, "rb") as wav_file:
wav_data = wave.open(wav_file)
self.sample_rate = wav_data.getframerate()
self.audio = wav_data.readframes(wav_data.getnframes())
wav_data.close()

if convert_audio:
remove_pcm_audio_file(tmp_audio_file)
3 changes: 3 additions & 0 deletions cli-client/helpers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def parse_csr_commandline() -> RecognizerOptions:
options = RecognizerOptions()
parser = argparse.ArgumentParser(description='Perform speech recognition on an audio file')
parser.add_argument('--audio-file', '-a', help='Path to a .wav audio in 8kHz and PCM16 encoding', required=True)
parser.add_argument('--convert-audio', '-c', help='Convert audio file to from A-LAW to PCM using sox software. Used for internal testing.',
required=False, default=False, dest='convert_audio', action='store_true')
topicGroup = parser.add_mutually_exclusive_group(required=True)
topicGroup.add_argument('--topic', '-T', choices=['GENERIC', 'TELCO', 'BANKING', 'INSURANCE'], help='A valid topic')
topicGroup.add_argument('--inline-grammar', '-I', help='Grammar inline as a string')
Expand Down Expand Up @@ -172,6 +174,7 @@ def parse_csr_commandline() -> RecognizerOptions:
options.token_file = args.token
options.host = args.host
options.audio_file = args.audio_file
options.convert_audio = args.convert_audio
options.language = args.language
options.secure_channel = args.secure
options.formatting = args.formatting
Expand Down
2 changes: 1 addition & 1 deletion cli-client/recognizer_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def process_recognition(executor: ThreadPoolExecutor, channel: grpc.Channel, options: RecognizerOptions, access_token: str):
audio_resource = AudioImporter(options.audio_file)
audio_resource = AudioImporter(options.audio_file, options.convert_audio)
stub = recognition_pb2_grpc.RecognizerStub(channel)
client = CSRClient(executor, stub, options, audio_resource, access_token)
client.send_audio()
Expand Down

0 comments on commit 1ef42a2

Please sign in to comment.