diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index 508e8a93ff..7acdbb1a8b 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -6,12 +6,21 @@ from talon import Module, actions -from .keys.keys import symbol_key_words from .numbers.numbers import digits_map, scales, teens, tens from .user_settings import track_csv_list mod = Module() +# Keep support for a handful of symbols. +# We do this since the registry is intended for debug purposes only +# - & appears in application names in windows +# - . and @ are kept for intuitively converting emails into spoken forms +symbol_key_words = { + "dot": ".", + "and": "&", + "at": "@", +} + DEFAULT_MINIMUM_TERM_LENGTH = 2 EXPLODE_MAX_LEN = 3 FANCY_REGULAR_EXPRESSION = r"[A-Z]?[a-z]+|[A-Z]+(?![a-z])|[0-9]+" @@ -398,7 +407,8 @@ def create_spoken_forms_from_regex(source: str, pattern: re.Pattern): For numeric pieces detected by the regex, generates both digit-wise and full spoken forms for the numbers where appropriate. """ - pieces = list(pattern.finditer(source)) + source_without_apostrophes = source.replace("'", "") + pieces = list(pattern.finditer(source_without_apostrophes)) spoken_forms = list(map(lambda x: x.group(0), pieces)) # NOTE: Order is sometimes important diff --git a/core/keys/keys.py b/core/keys/keys.py index 59855b4e97..f77c3a904d 100644 --- a/core/keys/keys.py +++ b/core/keys/keys.py @@ -101,103 +101,3 @@ def keys(m) -> str: def letters(m) -> str: "Multiple letter keys" return "".join(m.letter_list) - - -ctx = Context() - -# `punctuation_words` is for words you want available BOTH in dictation and as key names in command mode. -# `symbol_key_words` is for key names that should be available in command mode, but NOT during dictation. -punctuation_words = { - # TODO: I'm not sure why we need these, I think it has something to do with - # Dragon. Possibly it has been fixed by later improvements to talon? -rntz - "`": "`", - ",": ",", # <== these things - "back tick": "`", - "comma": ",", - # Workaround for issue with conformer b-series; see #946 - "coma": ",", - "period": ".", - "full stop": ".", - "semicolon": ";", - "colon": ":", - "forward slash": "/", - "question mark": "?", - "exclamation mark": "!", - "exclamation point": "!", - "asterisk": "*", - "hash sign": "#", - "number sign": "#", - "percent sign": "%", - "at sign": "@", - "and sign": "&", - "ampersand": "&", - # Currencies - "dollar sign": "$", - "pound sign": "£", - "hyphen": "-", - "L paren": "(", - "left paren": "(", - "R paren": ")", - "right paren": ")", -} -symbol_key_words = { - "dot": ".", - "point": ".", - "quote": "'", - "question": "?", - "apostrophe": "'", - "L square": "[", - "left square": "[", - "brack": "[", - "bracket": "[", - "left bracket": "[", - "square": "[", - "R square": "]", - "right square": "]", - "r brack": "]", - "r bracket": "]", - "right bracket": "]", - "slash": "/", - "backslash": "\\", - "minus": "-", - "dash": "-", - "equals": "=", - "plus": "+", - "grave": "`", - "tilde": "~", - "bang": "!", - "down score": "_", - "underscore": "_", - "paren": "(", - "brace": "{", - "left brace": "{", - "curly bracket": "{", - "left curly bracket": "{", - "r brace": "}", - "right brace": "}", - "r curly bracket": "}", - "right curly bracket": "}", - "angle": "<", - "left angle": "<", - "less than": "<", - "rangle": ">", - "R angle": ">", - "right angle": ">", - "greater than": ">", - "star": "*", - "hash": "#", - "percent": "%", - "caret": "^", - "amper": "&", - "pipe": "|", - "dub quote": '"', - "double quote": '"', - # Currencies - "dollar": "$", - "pound": "£", -} - -# make punctuation words also included in {user.symbol_keys} -symbol_key_words.update(punctuation_words) -ctx.lists["self.punctuation"] = punctuation_words -ctx.lists["self.symbol_key"] = symbol_key_words diff --git a/core/keys/punctuation.talon-list b/core/keys/punctuation.talon-list new file mode 100644 index 0000000000..104323dace --- /dev/null +++ b/core/keys/punctuation.talon-list @@ -0,0 +1,28 @@ +# `punctuation` is for words you want available BOTH in dictation and as key names in command mode. +list: user.punctuation +- +back tick: ` +comma: , +coma: , +period: . +full stop: . +semicolon: ; +colon: : +forward slash: / +question mark: ? +exclamation mark: ! +exclamation point: ! +asterisk: * +hash sign: # +number sign: # +percent sign: % +at sign: @ +and sign: & +ampersand: & +dollar sign: $ +pound sign: £ +hyphen: - +L paren: ( +left paren: ( +R paren: ) +right paren: ) diff --git a/core/keys/symbol_key.talon-list b/core/keys/symbol_key.talon-list new file mode 100644 index 0000000000..3c93a02de0 --- /dev/null +++ b/core/keys/symbol_key.talon-list @@ -0,0 +1,57 @@ +# symbol_key is for key names that should be available in command mode, but NOT during dictation. +list: user.symbol_key +- +dot: . +point: . +quote: "'" +question: ? +apostrophe: "'" +L square: [ +left square: [ +brack: [ +bracket: [ +left bracket: [ +square: [ +R square: ] +right square: ] +R brack: ] +R bracket: ] +right bracket: ] +slash: / +backslash: \ +minus: - +dash: - +equals: = +plus: + +grave: ` +tilde: ~ +bang: ! +down score: _ +underscore: _ +paren: ( +brace: { +L brace: { +left brace: { +curly bracket: { +left curly bracket: { +R brace: } +right brace: } +R curly bracket: } +right curly bracket: } +angle: < +left angle: < +less than: < +rangle: > +R angle: > +right angle: > +greater than: > +star: * +hash: # +percent: % +caret: ^ +amper: & +pipe: | +dub quote: '"' +double quote: '"' +dollar: $ +pound: £ diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index 609c01b17d..64846cac50 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -64,7 +64,7 @@ def test_handles_generate_subsequences(): def test_expands_special_chars(): result = actions.user.create_spoken_forms("hi $world", None, 0, True) - assert "hi dollar sign world" in result + assert "hi world" in result def test_expands_file_extensions(): result = actions.user.create_spoken_forms("hi .cs", None, 0, True) @@ -98,13 +98,33 @@ def test_explode_packed_words(): assert "read me" in result + def test_email(): + result = actions.user.create_spoken_forms("stupid@test.com", None, 0, True) + assert "stupid at test dot com" in result + + def test_symbol_removal(): + result = actions.user.create_spoken_forms("$ this_is_a-'test'", None, 0, True) + + assert "this is a test" in result + + def test_and_symbol(): + result = actions.user.create_spoken_forms("movies & tv", None, 0, True) + + assert "movies tv" in result + assert "movies and tv" in result + + def test_apostrophe_stripping(): + result = actions.user.create_spoken_forms("Sam's club", None, 0, True) + + assert "sams club" in result + def test_properties(): """ Throw some random inputs at the function to make sure it behaves itself """ def _example_generator(): - pieces = ["hi", "world", "$", ".cs", "1900"] + pieces = ["hi", "world", "dollar", ".cs", "1900"] params = list( itertools.product( [None, ["world"], ["dot"]], # Dot is from the expanded ".cs"