Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove symbols from create spoken forms & migrate to *.talon-list #1638

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3028ae3
Remove symbols from create spoken forms & migrate remaining keys list…
knausj85 Dec 14, 2024
057eb2d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 14, 2024
6bce77c
Update create_spoken_forms.py
knausj85 Dec 14, 2024
f291cbf
Merge branch 'create-spoken-form-remote-symbols' of https://github.co…
knausj85 Dec 14, 2024
ee47ffb
Update test_create_spoken_forms.py
knausj85 Dec 14, 2024
eadc481
Update create_spoken_forms.py
knausj85 Dec 14, 2024
47e2ddf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 14, 2024
d01743f
Update create_spoken_forms.py
knausj85 Dec 14, 2024
1e30068
preserve set functionality to remove duplicates, fix unit tests.
knausj85 Dec 14, 2024
0762dfa
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 14, 2024
052047c
Remove duplicates
knausj85 Dec 15, 2024
f8db161
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Dec 15, 2024
3559a73
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Dec 15, 2024
8848939
Update create_spoken_forms.py
knausj85 Jan 19, 2025
2e270a7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 19, 2025
cebcb50
Update test_create_spoken_forms.py
knausj85 Jan 19, 2025
b134361
Merge branch 'create-spoken-form-remote-symbols' of https://github.co…
knausj85 Jan 19, 2025
61d69b3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 19, 2025
775115d
Update create_spoken_forms.py
knausj85 Jan 19, 2025
bf534e5
Merge branch 'create-spoken-form-remote-symbols' of https://github.co…
knausj85 Jan 19, 2025
cb7829e
Merge branch 'main' into create-spoken-form-remote-symbols
knausj85 Jan 19, 2025
7ec878a
Update symbol_key.talon-list
knausj85 Jan 25, 2025
99c2f7f
Merge branch 'main' into create-spoken-form-remote-symbols
knausj85 Jan 25, 2025
78fbfcc
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Jan 25, 2025
59ff70e
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Jan 28, 2025
a480e62
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Feb 1, 2025
8e5bbad
Update create_spoken_forms.py
knausj85 Feb 8, 2025
f47d795
Update create_spoken_forms.py
knausj85 Feb 8, 2025
719a703
Merge branch 'main' into create-spoken-form-remote-symbols
knausj85 Feb 8, 2025
a3dd1c3
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Feb 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions core/create_spoken_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@

from talon import Module, actions

from .keys.keys import symbol_key_words
from .numbers.numbers import digits_map, scales, teens, tens
from .user_settings import track_csv_list

mod = Module()

# Keep support for a handful of symbols.
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
symbol_key_words = {
"dot": ".",
"and": "&",
"at": "@",
}

DEFAULT_MINIMUM_TERM_LENGTH = 2
EXPLODE_MAX_LEN = 3
FANCY_REGULAR_EXPRESSION = r"[A-Z]?[a-z]+|[A-Z]+(?![a-z])|[0-9]+"
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -398,7 +404,8 @@ def create_spoken_forms_from_regex(source: str, pattern: re.Pattern):
For numeric pieces detected by the regex, generates both digit-wise and full
spoken forms for the numbers where appropriate.
"""
pieces = list(pattern.finditer(source))
source_without_apostrophes = source.replace("'", "")
pieces = list(pattern.finditer(source_without_apostrophes))
spoken_forms = list(map(lambda x: x.group(0), pieces))

# NOTE: Order is sometimes important
Expand Down
100 changes: 0 additions & 100 deletions core/keys/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,103 +101,3 @@ def keys(m) -> str:
def letters(m) -> str:
"Multiple letter keys"
return "".join(m.letter_list)


ctx = Context()

# `punctuation_words` is for words you want available BOTH in dictation and as key names in command mode.
# `symbol_key_words` is for key names that should be available in command mode, but NOT during dictation.
punctuation_words = {
# TODO: I'm not sure why we need these, I think it has something to do with
# Dragon. Possibly it has been fixed by later improvements to talon? -rntz
"`": "`",
",": ",", # <== these things
"back tick": "`",
"comma": ",",
# Workaround for issue with conformer b-series; see #946
"coma": ",",
"period": ".",
"full stop": ".",
"semicolon": ";",
"colon": ":",
"forward slash": "/",
"question mark": "?",
"exclamation mark": "!",
"exclamation point": "!",
"asterisk": "*",
"hash sign": "#",
"number sign": "#",
"percent sign": "%",
"at sign": "@",
"and sign": "&",
"ampersand": "&",
# Currencies
"dollar sign": "$",
"pound sign": "£",
"hyphen": "-",
"L paren": "(",
"left paren": "(",
"R paren": ")",
"right paren": ")",
}
symbol_key_words = {
"dot": ".",
"point": ".",
"quote": "'",
"question": "?",
"apostrophe": "'",
"L square": "[",
"left square": "[",
"brack": "[",
"bracket": "[",
"left bracket": "[",
"square": "[",
"R square": "]",
"right square": "]",
"r brack": "]",
"r bracket": "]",
"right bracket": "]",
"slash": "/",
"backslash": "\\",
"minus": "-",
"dash": "-",
"equals": "=",
"plus": "+",
"grave": "`",
"tilde": "~",
"bang": "!",
"down score": "_",
"underscore": "_",
"paren": "(",
"brace": "{",
"left brace": "{",
"curly bracket": "{",
"left curly bracket": "{",
"r brace": "}",
"right brace": "}",
"r curly bracket": "}",
"right curly bracket": "}",
"angle": "<",
"left angle": "<",
"less than": "<",
"rangle": ">",
"R angle": ">",
"right angle": ">",
"greater than": ">",
"star": "*",
"hash": "#",
"percent": "%",
"caret": "^",
"amper": "&",
"pipe": "|",
"dub quote": '"',
"double quote": '"',
# Currencies
"dollar": "$",
"pound": "£",
}

# make punctuation words also included in {user.symbol_keys}
symbol_key_words.update(punctuation_words)
ctx.lists["self.punctuation"] = punctuation_words
ctx.lists["self.symbol_key"] = symbol_key_words
28 changes: 28 additions & 0 deletions core/keys/punctuation.talon-list
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# `punctuation` is for words you want available BOTH in dictation and as key names in command mode.
list: user.punctuation
-
back tick: `
comma: ,
coma: ,
period: .
full stop: .
semicolon: ;
colon: :
forward slash: /
question mark: ?
exclamation mark: !
exclamation point: !
asterisk: *
hash sign: #
number sign: #
percent sign: %
at sign: @
and sign: &
ampersand: &
dollar sign: $
pound sign: £
hyphen: -
L paren: (
left paren: (
R paren: )
right paren: )
56 changes: 56 additions & 0 deletions core/keys/symbol_key.talon-list
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# symbol_key is for key names that should be available in command mode, but NOT during dictation.
list: user.symbol_key
-
dot: .
point: .
quote: "'"
question: ?
apostrophe: "'"
L square: [
left square: [
brack: [
bracket: [
left bracket: [
square: [
R square: ]
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
right square: ]
r brack: ]
r bracket: ]
right bracket: ]
slash: /
backslash: \
minus: -
dash: -
equals: =
plus: +
grave: `
tilde: ~
bang: !
down score: _
underscore: _
paren: (
brace: {
left brace: {
curly bracket: {
left curly bracket: {
r brace: }
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
right brace: }
r curly bracket: }
right curly bracket: }
angle: <
left angle: <
less than: <
rangle: >
R angle: >
right angle: >
greater than: >
star: *
hash: #
percent: %
caret: ^
amper: &
pipe: |
dub quote: '"'
double quote: '"'
dollar: $
pound: £
24 changes: 22 additions & 2 deletions test/test_create_spoken_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_handles_generate_subsequences():
def test_expands_special_chars():
result = actions.user.create_spoken_forms("hi $world", None, 0, True)

assert "hi dollar sign world" in result
assert "hi world" in result

def test_expands_file_extensions():
result = actions.user.create_spoken_forms("hi .cs", None, 0, True)
Expand Down Expand Up @@ -98,13 +98,33 @@ def test_explode_packed_words():

assert "read me" in result

def test_email():
result = actions.user.create_spoken_forms("stupid@test.com", None, 0, True)
assert "stupid at test dot com" in result

def test_symbol_removal():
result = actions.user.create_spoken_forms("$ this_is_a-'test'", None, 0, True)

assert "this is a test" in result

def test_and_symbol():
result = actions.user.create_spoken_forms("movies & tv", None, 0, True)

assert "movies tv" in result
assert "movies and tv" in result

def test_apostrophe_stripping():
result = actions.user.create_spoken_forms("Sam's club", None, 0, True)

assert "sams club" in result

def test_properties():
"""
Throw some random inputs at the function to make sure it behaves itself
"""

def _example_generator():
pieces = ["hi", "world", "$", ".cs", "1900"]
pieces = ["hi", "world", "dollar", ".cs", "1900"]
params = list(
itertools.product(
[None, ["world"], ["dot"]], # Dot is from the expanded ".cs"
Expand Down
Loading