-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathsettings.ini
73 lines (60 loc) · 4.3 KB
/
settings.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
[general]
# data_folders specifies where the speech files are located.
# Note that you can specify several corpora, e.g.: data_folders = ../data/example_dataset1,../data/example_dataset2
# Look at the folder ../data/example_dataset_en to see how a corpus of speech files and their gold transcriptions should be formatted.
# Speech files may have any name, and the gold transcription should use the same name with `_gold.txt` appended to it. E.g., is a speech file is named `hello.mp3`, the gold transcription should be in a file named `hello_gold.txt` in the same folder.
data_folders = ../data/example_dataset_en
#The line below lists all supported ASRs. Google is the only ASR that doesn't require some credentials.
#asr_systems = amazon,google,googlecloud,houndify,ibm,microsoft,wit
asr_systems = google
overwrite_non_empty_transcriptions = true
overwrite_empty_transcriptions = true
transcribe = true
evaluate_transcriptions = true
delay_in_seconds_between_transcriptions = 0
# speech_file_type should be auto, flac, ogg, mp3 or wav
# If you choose flac, ogg, or mp3, you need to install the Python package https://github.com/jiaaro/pydub
# auto means that the speech file type will be automatically detected. The detected speech file type is the one that has the more speech files in data_folder. E.g., if in the data folder there are 10 .mp3 and 25 .flac, then choose it will choose flac.
speech_file_type = auto
# The supported speech languages depend on the ASR APIs. en-US is supported by all ASR APIs.
# See https://cloud.google.com/speech/docs/languages for some examples of what values speech_language may take.
# Formally, speech_language should be a BCP-47 identifier, which is a language code typically of the form language-region, where language is encoded using ISO 639-1 alpha-2 (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) and region refers to the region encoded using ISO 3166-1 alpha-2 (https://datahub.io/core/country-list) where the language is spoken.
# speech_language is ignored by wit.
speech_language = en-US
# Encoding for the gold and predicted prescriptions.
gold_transcription_encoding = UTF-8
predicted_transcription_encoding = UTF-8
[credentials]
# All ASR APIs except google require credentials for the user to be able to query them.
# Amazon Lex speech recognition API. See https://aws.amazon.com/lex/ to get the credentials
# To use Amazon Lex, you need to install the Python package: pip install boto3
amazon_bot_name = INSERT BOT NAME HERE
amazon_bot_alias = INSERT BOT ALIAS HERE
amazon_user_id = INSERT USER ID HERE
amazon_access_key_id = INSERT ACCESS KEY ID HERE
amazon_secret_access_key = INSERT SECRET ACCESS KEY HERE
amazon_region = INSERT REGION HERE
# Microsoft Bing Voice Recognition API key is a 32-character lowercase hexadecimal string
# Get the Bing key on https://azure.microsoft.com/en-us/services/cognitive-services/speech (demo: https://youtu.be/G4pt76X0vsI)
bing_key = INSERT BING API KEY HERE
# Get the JSON file on https://console.developers.google.com/apis/api/speech.googleapis.com
# You would also need to install Google's API Client Library for Python, e.g. running: pip install google-api-python-client
google_cloud_speech_credentials_filepath = googlecloud.json
# Houndify client IDs are Base64-encoded strings
# Houndify client keys are Base64-encoded strings
# Get the ID and the key from https://www.houndify.com/dashboard
houndify_client_id = INSERT HOUNDIFY CLIENT ID HERE
houndify_client_key = INSERT HOUNDIFY CLIENT KEY HERE
# IBM Speech to Text usernames are strings of the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
# IBM Speech to Text passwords are mixed-case alphanumeric strings
# Get the username and password on https://console.bluemix.net/catalog/services/speech-to-text (demo: https://youtu.be/eL1Cif-mNhs)
ibm_username = INSERT IBM API USERNAME HERE
ibm_password = INSERT IBM API PASSWORD HERE
# Speechmatics ID is an integer
# Speechmatics tokens is a mixed-case alphanumeric string
# Get the ID and the token on https://www.speechmatics.com/ (demo: https://youtu.be/x7R5xbWROxM)
speechmatics_id = INSERT SPEECHMATICS ID HERE
speechmatics_token = INSERT SPEECHMATICS TOKEN HERE
# Wit.ai keys are 32-character uppercase alphanumeric strings
# Get the key on https://wit.ai (demo: https://youtu.be/TFPaL9DbQgM)
wit_ai_key = INSERT WIT.AI API KEY HERE