diff --git a/Example/Rules/Braille/definitions.yaml b/Example/Rules/Braille/definitions.yaml deleted file mode 100644 index 5aa5004..0000000 --- a/Example/Rules/Braille/definitions.yaml +++ /dev/null @@ -1,83 +0,0 @@ ---- -# Shared definitions among braille rules - -# some rules are need for canonicalization so need to be picked up by languages also, even though not used in the rules file -# we could pull them out and have a smaller 'definitions-shared.yaml' file, but it doesn't save much and adds more file i/o -- include: "../definitions.yaml" - - # these come from SRE's ts\semantic_tree\semantic_attr.ts file -- NemethComparisonOperators: { - "=", "⁼", "₌", "∼", "∽", "≃", "≅", "≈", "≊", "≋", "≌", "≍", "≎", - "≑", "≒", "≓", "≔", "≕", "≖", "≗", "≘", "≙", "≚", "≛", "≜", "≝", "≞", - "≟", "≡", "≣", "⧤", "⩦", "⩮", "⩯", "⩰", "⩱", "⩲", "⩳", "⩴", "⩵", "⩶", - "⩷", "⩸", "⋕", "⩭", "⩪", "⩫", "⩬", "﹦", "=", "⊜", "∷", - - "<", ">", "≁", "≂", "≄", "≆", "≇", "≉", "≏", "≐", "≠", "≢", "≤", "≥", - "≦", "≧", "≨", "≩", "≪", "≫", "≬", "≭", "≮", "≯", "≰", "≱", "≲", "≳", - "≴", "≵", "≶", "≷", "≸", "≹", "≺", "≻", "≼", "≽", "≾", "≿", "⊀", "⊁", - "⋖", "⋗", "⋘", "⋙", "⋚", "⋛", "⋜", "⋝", "⋞", "⋟", "⋠", "⋡", "⋦", "⋧", - "⋨", "⋩", "⩹", "⩺", "⩻", "⩼", "⩽", "⩾", "⩿", "⪀", "⪁", "⪂", "⪃", "⪄", - "⪅", "⪆", "⪇", "⪈", "⪉", "⪊", "⪋", "⪌", "⪍", "⪎", "⪏", "⪐", "⪑", "⪒", - "⪓", "⪔", "⪕", "⪖", "⪗", "⪘", "⪙", "⪚", "⪛", "⪜", "⪝", "⪞", "⪟", "⪠", - "⪡", "⪢", "⪣", "⪤", "⪥", "⪦", "⪧", "⪨", "⪩", "⪪", "⪫", "⪬", "⪭", "⪮", - "⪯", "⪰", "⪱", "⪲", "⪳", "⪴", "⪵", "⪶", "⪷", "⪸", "⪹", "⪺", "⪻", "⪼", - "⫷", "⫸", "⫹", "⫺", "⧀", "⧁", "﹤", "﹥", "<", ">", - - "⋢", "⋣", "⋤", "⋥", "⊂", "⊃", "⊄", "⊅", "⊆", "⊇", "⊈", "⊉", "⊊", "⊋", - "⊏", "⊐", "⊑", "⊒", "⪽", "⪾", "⪿", "⫀", "⫁", "⫂", "⫃", "⫄", "⫅", "⫆", - "⫇", "⫈", "⫉", "⫊", "⫋", "⫌", "⫍", "⫎", "⫏", "⫐", "⫑", "⫒", "⫓", "⫔", - "⫕", "⫖", "⫗", "⫘", "⋐", "⋑", "⋪", "⋫", "⋬", "⋭", "⊲", "⊳", "⊴", "⊵", - - "∈", "∊", "⋲", "⋳", "⋴", "⋵", "⋶", "⋷", "⋸", "⋹", "⋿", - - "∋", "∍", "⋺", "⋻", "⋼", "⋽", "⋾", - - "⊢", "⊣", "⊦", "⊧", "⊨", "⊩", "⊪", "⊫", "⊬", "⊭", "⊮", "⊯", "⫞", "⫟", - "⫠", "⫡", "⫢", "⫣", "⫤", "⫥", "⫦", "⫧", "⫨", "⫩", "⫪", "⫫", "⫬", "⫭", - - "←", "↑", "→", "↓", "↔", "↕", "↖", "↗", "↘", "↙", "↚", "↛", "↜", "↝", "↞", - "↟", "↠", "↡", "↢", "↣", "↤", "↥", "↦", "↧", "↨", "↩", "↪", "↫", "↬", "↭", - "↮", "↯", "↰", "↱", "↲", "↳", "↴", "↵", "↶", "↷", "↸", "↹", "↺", "↻", "⇄", - "⇅", "⇆", "⇇", "⇈", "⇉", "⇊", "⇍", "⇎", "⇏", "⇐", "⇑", "⇒", "⇓", "⇔", "⇕", - "⇖", "⇗", "⇘", "⇙", "⇚", "⇛", "⇜", "⇝", "⇞", "⇟", "⇠", "⇡", "⇢", "⇣", "⇤", - "⇥", "⇦", "⇧", "⇨", "⇩", "⇪", "⇫", "⇬", "⇭", "⇮", "⇯", "⇰", "⇱", "⇲", "⇳", - "⇴", "⇵", "⇶", "⇷", "⇸", "⇹", "⇺", "⇻", "⇼", "⇽", "⇾", "⇿", "⌁", "⌃", "⌄", - "⌤", "⎋", "➔", "➘", "➙", "➚", "➛", "➜", "➝", "➞", "➟", "➠", "➡", "➢", "➣", - "➤", "➥", "➦", "➧", "➨", "➩", "➪", "➫", "➬", "➭", "➮", "➯", "➱", "➲", "➳", - "➴", "➵", "➶", "➷", "➸", "➹", "➺", "➻", "➼", "➽", "➾", "⟰", "⟱", "⟲", "⟳", - "⟴", "⟵", "⟶", "⟷", "⟸", "⟹", "⟺", "⟻", "⟼", "⟽", "⟾", "⟿", "⤀", "⤁", "⤂", - "⤃", "⤄", "⤅", "⤆", "⤇", "⤈", "⤉", "⤊", "⤋", "⤌", "⤍", "⤎", "⤏", "⤐", "⤑", - "⤒", "⤓", "⤔", "⤕", "⤖", "⤗", "⤘", "⤙", "⤚", "⤛", "⤜", "⤝", "⤞", "⤟", "⤠", - "⤡", "⤢", "⤣", "⤤", "⤥", "⤦", "⤧", "⤨", "⤩", "⤪", "⤭", "⤮", "⤯", "⤰", "⤱", - "⤲", "⤳", "⤴", "⤵", "⤶", "⤷", "⤸", "⤹", "⤺", "⤻", "⤼", "⤽", "⤾", "⤿", "⥀", - "⥁", "⥂", "⥃", "⥄", "⥅", "⥆", "⥇", "⥈", "⥉", "⥰", "⥱", "⥲", "⥳", "⥴", "⥵", - "⥶", "⥷", "⥸", "⥹", "⥺", "⥻", "⦳", "⦴", "⦽", "⧪", "⧬", "⧭", "⨗", "⬀", "⬁", - "⬂", "⬃", "⬄", "⬅", "⬆", "⬇", "⬈", "⬉", "⬊", "⬋", "⬌", "⬍", "⬎", "⬏", "⬐", - "⬑", "⬰", "⬱", "⬲", "⬳", "⬴", "⬵", "⬶", "⬷", "⬸", "⬹", "⬺", "⬻", "⬼", "⬽", - "⬾", "⬿", "⭀", "⭁", "⭂", "⭃", "⭄", "⭅", "⭆", "⭇", "⭈", "⭉", "⭊", "⭋", "⭌", - "←", "↑", "→", "↓", - # Harpoons - "↼", "↽", "↾", "↿", "⇀", "⇁", "⇂", "⇃", "⇋", "⇌", "⥊", "⥋", "⥌", "⥍", "⥎", - "⥏", "⥐", "⥑", "⥒", "⥓", "⥔", "⥕", "⥖", "⥗", "⥘", "⥙", "⥚", "⥛", "⥜", "⥝", - "⥞", "⥟", "⥠", "⥡", "⥢", "⥣", "⥤", "⥥", "⥦", "⥧", "⥨", "⥩", "⥪", "⥫", "⥬", - "⥭", "⥮", "⥯", "⥼", "⥽", "⥾", "⥿", - - # chars not from SRE - "∝", "∉", "∶", - - "∥", "⟂", "⊥", - } - - - # no space is used when these chars "apply" to ellipsis or long dash (lesson 2.13, differs slightly from rule 42) - # can't do 42(ii) which is braille indicators because they are not input (they are output) -- handle by regexp later - # also used for comparison operators (Rule 151) -- NemethPunctAndOpenBeforeSymbols: { - "‘", "“", ",", # punctuation -- 42(i) - "(", "[", "{", "⟨", "⟦", "⦃", "⟪", "⌈", "⌊", # 42(iii) open - } - -- NemethPunctAndOpenAfterSymbols: { - "'", ":", ",", "!", ".", "?", "’", "”", ";", # punctuation -- 42(i) - ")", "]", "}", "⟩", "⟧", "⦄", "⟫", "⌉", "⌋", # 42(iii) close - } diff --git a/Example/Rules/Intent/general.yaml b/Example/Rules/Intent/general.yaml deleted file mode 100644 index 4d91f14..0000000 --- a/Example/Rules/Intent/general.yaml +++ /dev/null @@ -1,709 +0,0 @@ ---- -- - name: simple-number-set - tag: mi - match: "string-length(text())=1 and translate(text(), 'ℂℕℚℝℤ', '')=''" # tricky way to match any of the letters instead of using 'or' - replace: - - intent: - name: number-sets - children: [x: "text()"] - -- - name: sup-number-set - tag: msup - match: - - "*[1][string-length(text())=1 and translate(., 'ℂℕℚℝℤ', '')=''] and" - - "*[2][string-length(text())=1 and (text()='+' or text()='-' or self::m:mn)]" - replace: - - intent: - name: number-sets - children: - - x: "*[1]" - - x: "*[2]" - -- - name: real-part - tag: mi - match: "(text()='Re' or text()='re' or text()='ℜ') and following-sibling::*[1][text()='\u2061']" # before function apply - replace: - - intent: - name: real-part - children: [] - -- - name: imaginary-part - tag: mi - match: "(text()='Im' or text()='im' or text()='ℑ') and following-sibling::*[1][text()='\u2061']" # before function apply - replace: - - intent: - name: imaginary-part - children: [] - -- - name: roman_numeral - tag: [mi, mn, mtext] - match: "@data-number" - replace: - - intent: - name: mn - children: [x: "@data-number"] - -- - name: positive-or-negative - tag: mrow - match: "count(*)=2 and *[1][self::m:mo][text()='+' or text()='-']" - variables: - - AtLeftEdge: "EdgeNode(., 'left', 'mtd')" - - AtLeftEdgeOfMTD: "name($AtLeftEdge) = 'mtd'" - - ContinuedColumn: "$TableProperty='lines' or $TableProperty='system-of-equations' or $TableProperty='piecewise'" - - InContinuedRow: "parent::m:mtd[parent::m:mtr[@data-intent-property[contains(., ':continued-row:')]]]" - - FirstColumn: "$AtLeftEdge/../*[1]=$AtLeftEdge" # first child of mtr... - replace: - - test: - # don't use postive/negative if a continued data cell - if: "$AtLeftEdgeOfMTD and $ContinuedColumn and (not($FirstColumn) or $InContinuedRow)" - then: - - intent: - xpath-name: "IfThenElse(*[1][self::m:mo][text()='+'], 'plus', 'minus')" - children: [x: "*[2]"] - else: - - intent: - xpath-name: "IfThenElse(*[1][self::m:mo][text()='+'], 'positive', 'negative')" - children: [x: "*[2]"] - - -- - # (a,b) has many interpretations; (a, b] (etc) have fewer interpretations. - # as an interval, it represents a set and hence a clue that it is an interval is that a set operator comes - # before or after it. '=' is also common. - # They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't inclub (xxx, yyy) in the inference - name: interval - tag: mrow - match: - - "count(*)=3 and " - # FIX: consider adding ]...[ versions - - "(*[1][text()='(' or text()='['] and *[3][text()=')' or text()=']']) and" # match bracketing - - "(*[2][count(*)=3 and *[2][text()=',']]) and" # inside should have ',' - - "not(ancestor::*[IsBracketed(., '{', '}')]) and " # intervals are not part of set notation (e.g, { (x,y)∈L | ...}) - # FIX: if both the first and third children of *[2] are mn, then make sure first <= third - - "(" - - " not(IsBracketed(., '(', ')')) or " # (.,.) is very ambiguous -- need more clues - - " $ClearSpeak_Paren = 'Interval' or " # pref is set - - " *[2]/*[1][contains(., '∞')] or " # starts with infinity - - " *[2]/*[3][contains(., '∞')] or " # ends with infinity - - " preceding-sibling::*[1][self::m:mo and ( text()='=' or IsInDefinition(., 'SubsetOperators') )] or " # context hint - - " following-sibling::*[1][self::m:mo and ( text()='=' or IsInDefinition(., 'SubsetOperators') )]" # context hint - - ")" - replace: - - test: - - if: "*[1][text()='('] and *[3][text()=')']" - then: - - intent: - name: "open-interval" - children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - - else_if: "*[1][text()='('] and *[3][text()=']']" - then: - - intent: - name: "open-closed-interval" - children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - - else_if: "*[1][text()='['] and *[3][text()=']']" - then: - - intent: - name: "closed-interval" - children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - else: - - intent: - name: "closed-open-interval" - children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - -- - name: binomial-frac - tag: mrow - match: "IsBracketed(., '(', ')') and *[2][self::m:mfrac][translate(@linethickness, 'abcdefghijklmnopqrstuvwxyz', '')=0]" - replace: - - intent: - name: binomial - children: - - x: "*[2]/*[1]" - - x: "*[2]/*[2]" - -- - # C with pre-subscript 'n' and post-subscript 'm'. The 'n' can be either in the sub or super position - # See https://en.wikipedia.org/wiki/Binomial_coefficient#History_and_notation - name: binomial-mmultiscripts - tag: mmultiscripts - variables: - - Prescripts: "m:mprescripts/following-sibling::*" - - Postscripts: "m:mprescripts/preceding-sibling::*[position() < last()]" - match: - - "*[1][self::m:mi and text()='C'] and" - - "count($Prescripts)=2 and ($Prescripts[1][self::m:none] or $Prescripts[2][self::m:none]) and " - - "count($Postscripts)=2 and $Postscripts[2][self::m:none]" - replace: - - intent: - name: binomial - children: - - x: "IfThenElse(name($Prescripts[2])='none', $Prescripts[1], $Prescripts[2])" - - x: "$Postscripts[1]" - - -- - name: binomial-msub # C_{n,k} - tag: msub - match: - - "*[1][self::m:mi and text()='C'] and" - - "*[2][self::m:mrow and count(*)=3 and *[2][.=',']]" - replace: - - intent: - name: binomial - children: - - x: "*[2]/*[1]" - - x: "*[2]/*[3]" - -- - # P with pre-sub or superscript 'n' and post-subscript 'k' (https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n) - name: permutation-mmultiscripts - tag: mmultiscripts - variables: - - Prescripts: "m:mprescripts/following-sibling::*" - - Postscripts: "m:mprescripts/preceding-sibling::*[position() < last()]" - match: - - "*[1][self::m:mi and text()='P'] and" - - "count($Prescripts)=2 and ($Prescripts[1][self::m:none] or $Prescripts[2][self::m:none]) and " - - "count($Postscripts)=2 and $Postscripts[2][self::m:none]" - replace: - - intent: - name: permutation-symbol - children: - - test: - if: "$Prescripts[1][self::m:none]" - then: [x: "$Prescripts[2]"] - else: [x: "$Prescripts[1]"] - - x: "$Postscripts[1]" - -- - # P with superscript 'n' and subscript 'k' (https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n) - name: permutation-msubsup - tag: msubsup - match: - - "*[1][self::m:mi and text()='P']" - replace: - - intent: - name: permutation-symbol - children: - - x: "*[3]" - - x: "*[2]" - -# rules on scripted vertical bars ('evaluated at') -- - name: evaluated-at-msub - tag: mrow - match: "count(*)=2 and *[2][self::m:msub and *[1][self::m:mo][text()='|']]" - replace: - - intent: - name: "evaluate" - children: - - x: "*[1]" - - x: "*[2]/*[2]" - -- - name: evaluated-at-msubsup - tag: mrow - match: "count(*)=2 and *[2][self::m:msubsup and *[1][self::m:mo][text()='|']]" - replace: - - intent: - name: "evaluate" - children: - - x: "*[1]" - - x: "*[2]/*[2]" - - x: "*[2]/*[3]" - -# it is also used outside of brackets -- - name: bracketed-evaluated-at - tag: msubsup - match: "IsBracketed(*[1], '[', ']')" - replace: - - intent: - name: "evaluate" - children: - - x: "*[1]/*[1]" - - x: "*[2]" - - x: "*[3]" - - -- - # vertical bars otherwise - # could also be cardinality, length, ... - name: absolute-value - tag: mrow - match: "IsBracketed(., '|', '|')" - replace: - - intent: - name: "absolute-value" - children: [x: "*[2]"] - -- - name: default - tag: msqrt - match: "." - replace: - - intent: - name: "square-root" - children: [x: "*[1]"] - - -- - name: sqrt - tag: mroot - match: "*[2][self::m:mn and text()='2']" - replace: - - intent: - name: "square-root" - children: [x: "*[1]"] - -- - name: default - tag: mroot - match: "." - replace: - - intent: - name: "root" - children: - - x: "*[1]" - - x: "*[2]" - - -- - name: log-base - tag: msub - match: - - "*[1][self::m:mi][text()='log']" - replace: - - intent: - name: log-base - children: [x: "*[2]"] - -- - name: log-base-power - tag: msubsup - match: - - "*[1][self::m:mi][text()='log']" - replace: - - intent: - name: log-base-power - children: - - x: "*[2]" - - x: "*[3]" - -- - name: bigop - tag: [msub, munder] - match: "IsInDefinition(*[1], 'LargeOperators') or contains(@data-intent-property, ':largeop:')" - replace: - - intent: - name: "large-op" # Fix: the name in the spreadsheet needs updating/fixing - children: - - x: "*[1]" - - x: "*[2]" -- - name: limit - tag: [msub, munder] - variables: [NoSpacesBase: "translate(*[1], ' \u00A0\u2004\u2005\u2006\u2007\u2008\u2009\u200A','')"] # Sometimes these have (weird) spaces in them - match: "*[1][$NoSpacesBase='lim' or $NoSpacesBase='limsup' or $NoSpacesBase='liminf']" - replace: - - intent: - name: "limit" - children: - - x: "$NoSpacesBase" - - x: "*[2]" - -- - name: modified-var - tag: mover - # breve, check, dot, double-dot, triple-dot, quadruple-dot, grave, hat, tilde, line/bar - match: "*[1][self::m:mi] and *[2][translate(., '\u0306\u030c.\u00A8\u02D9\u20DB\u20DC`^~→¯_', '')='']" - replace: - - intent: - name: "modified-variable" - children: - - x: "*[1]" - - x: "*[2]" - - -- - # this captures the output for the mhchem's "<=>", "<<=>", and "<=>>" output (there are no Unicode arrows for them) - # this isn't a perfect match, but should be good enough and allows merging all three (see github.com/NSoiffer/MathCAT/issues/60) - name: chemistry-mhchem-equilibrium-arrow - tag: mover - match: - - "*[1][substring(., 1, 1)='↽'] and" - - "*[2][substring(., string-length(), 1)='⇀']" - replace: - - intent: - name: "chemical-arrow-operator" - children: - - test: - if: "*[1][self::m:mrow]" - then_test: - if: "*[2][self::m:mrow]" - then: [t: "\u1f8d2"] # this is currently unassigned and may get used by UTC at some point (<=>) - else: [t: "\u1f8d4"] # this is currently unassigned and may get used by UTC at some point (<<=>) - else: [t: "\u1f8d3"] # this is currently unassigned and may get used by UTC at some point (<==>>) - -- - # need to include the name of the element so the rules know whether to say "sub" or "super" - name: chemistry - tag: [msub, msup] - match: "@data-chem-formula" - replace: - - intent: - name: "chemical-formula" - children: # have to list individual children because "*" results in an internal error (children aren't flat) - - x: "name(.)" - - x: "*[1]" - - x: "*[2]" - - -- - name: chemistry-prescripts - tag: mmultiscripts - match: "@data-chem-formula" - replace: - - test: - - if: "count(*)=4 or count(*)=6" - then: - - intent: - name: "chemical-nuclide" - children: - - x: "*" - else: # FIX: what other cases are there??? - - intent: - name: "chemical-formula" - children: - - x: "*" - - -- - name: chemical-element - tag: [mi, mtext] - match: "@data-chem-element" - replace: - - intent: - name: "chemical-element" - children: - - x: "text()" - -- - name: chemical-formula-op - tag: [mo] - match: "@data-chem-formula-op" - replace: - - intent: - name: "chemical-formula-operator" - children: - - x: "text()" - -- - name: chemical-arrow-op - tag: [mo] - # FIX: this is a duplicate of the list in chemistry.rs. Probably should pull out and add to definitions.yaml - match: "@data-chem-equation-op and translate(., '→➔←⟶⟵⤻⇋⇌↿↾⇃⇂⥮⥯⇷⇸⤉⤈⥂⥄', '')=''" - replace: - - intent: - name: "chemical-arrow-operator" - children: - - x: "text()" - -- - name: chemical-equation-op - tag: [mo] - match: "@data-chem-equation-op" - replace: - - intent: - name: "chemical-equation-operator" - children: - - x: "text()" - -- - name: chemistry-state - tag: mrow - match: - - "(@data-chem-formula or @data-chem-equation) and" - - "IsBracketed(., '(', ')') and" - - "*[2][text()='s' or text()='l' or text()='g' or text()='aq']" - replace: - - intent: - name: "chemical-state" - children: - - x: "*[2]" - - -# This needs to be before the simple "x prime" rule -# minutes/seconds or feet/inches -# If ' or " follows a number, then it is not "prime", but is a unit -# Note the ASCII ' and " are converted to prime during canonicalization if in a superscript -# Handles single, double, primes, and also double quote, which don't have to be in an msup -# The rules are: -# 1. If the prime follows a degree sign with a number (in various forms) or letter after it, -# then it is minutes/seconds -# 2. Else, if it follows a *number* (in various forms), then it feet/inches -# 3. Else it is 'prime' -# any ? ( count(match)==2 && -# (name(match)=="mrow" || name(match)=="msup") && -# (MatchString($1, "mo", "'") || MatchString($1, "mo", "′") || // apostrophe or prime -# MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) && // double quote or double prime -# ( -# // '1': degree sign check -# ( has_previous(match) && -# ( (MatchString($1, "mo", "′") && -# ( name(previous(match))=="msup" || -# (name(previous(match))=="mrow" && count(previous(match))==2)) && -# MatchString(previous(match)[1], "mo", "°")) || -# ( has_previous(previous(match)) && -# (MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) && -# ( name(previous(match, 2))=="msup" || -# (name(previous(match, 2))=="mrow" && count(previous(match, 2))==2)) && -# MatchString(previous(match, 2)[1], "mo", "°")) -# ) -# ) || -# -# // '2': number or numeric fraction in front -# name($0)=="mn" || -# (name($0)=="mfrac" && name($0[0])=="mn" && name($0[1])=="mn") -# ) -# ) -# => structure( -# $0, -# UIWord([$1[0],$1[-1]]) -# {ruleRef="RR_unitsBase"; -# singular=MatchString($0, "mn", "1"); -# // if this follows a degree sign, then it is an angle notation (min/secs) regardless of the pref setting -# // we have to either look at the previous entry for mins or the one prior to that for secs -# pref= ( has_previous(match) && -# ( (MatchString($1, "mo", "′") && -# ( name(previous(match))=="msup" || -# (name(previous(match))=="mrow" && count(previous(match))==2)) && -# MatchString(previous(match)[1], "mo", "°")) || -# ( has_previous(previous(match)) && -# (MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) && -# ( name(previous(match, 2))=="msup" || -# (name(previous(match, 2))=="mrow" && count(previous(match, 2))==2)) && -# MatchString(previous(match, 2)[1], "mo", "°")) -# ) -# ) ? "Angle" : "Length"; -# } -# ); - -# Pseudo-script characters are characters such as "degree sign" ('°') that are raised but in MathML should be in a superscript. -# They are not spoken as if in a superscript (e.g "x degrees", not "x superscripts degrees") - -- - name: skip-super - tag: [msup, msubsup] - match: "*[last()][translate(., '′″‴⁗†‡°*', '')='']" - replace: - - intent: - name: "skip-super" - children: [x: "*"] - -- - name: mo-super - tag: [msup, msubsup] - match: "*[last()][self::m:mo]" - replace: - - intent: - name: "say-super" - children: [x: "*"] - -# rules for functions raised to a power -# these could have been written on 'mrow' but putting them on msup seems more specific -# to see if it is a function, we look right to see if the following sibling is apply-function -- - name: function-inverse - tag: msup - match: - - "*[2][self::m:mrow][count(*)=2] and" # exponent is an mrow with children... - - "*[2]/*[1][self::m:mo][text()='-'] and" # '-' - - "*[2]/*[2][self::m:mn][text()=1] and" # and '1' - - "*[1][self::m:mi] and IsInDefinition(*[1], 'TrigFunctionNames') and" # base is a trig function name (e.g, sin, sinh) - - "following-sibling::*[1][self::m:mo][text()='\u2061']" # and msup is followed by invisible function apply - replace: - - intent: - name: "inverse-function" - children: - - x: "*[1]" - - -- - name: bigop - tag: [msubsup, munderover] - match: "IsInDefinition(*[1], 'LargeOperators') or contains(@data-intent-property, ':largeop:')" - replace: - - intent: - name: "large-op" # Fix: the name in the spreadsheet needs updating/fixing - children: - - x: "*[1]" - - x: "*[2]" - - x: "*[3]" - -- - name: set - tag: mrow - # sets have { }s and should have at least one of ",∈∉|" (vertical bar is "such that") or be the empty set - match: "IsBracketed(., '{', '}') and - (count(*)=2 or (count(*)=3 and IsNode(*[2], 'leaf')) or . != translate(., ',∈∉|:', ''))" - replace: - - intent: - name: "set" - children: - - test: - if: count(*) = 2 - then: [] - else: [x: "*[2]"] -- - name: default - tag: msub - match: "." - replace: - - intent: - name: "particular-value-of" - children: - - x: "*[1]" - - x: "*[2]" - -- - name: default - tag: msup - match: "." - replace: - - intent: - name: "power" - children: - - x: "*[1]" - - x: "*[2]" - - -- - name: default - tag: msubsup - match: "." - replace: - - intent: - name: "power" - children: - - intent: - name: "sub" - children: - - x: "*[1]" - - x: "*[2]" - - x: "*[3]" - -- - name: default - tag: mfrac - match: "." - replace: - - intent: - name: "fraction" - children: - - x: "*[1]" - - x: "*[2]" - - -# generic mtable: treat as multiline equations of some sort -# if a property is set, handle that first - -- - name: "piecewise" - tag: mrow - match: "count(*)=2 and *[1][text()='{'] and *[2][self::m:mtable]" - replace: - - x: "*[2]" - -- - name: mtable-property-is-set - tag: mtable - variables: [MTableProperty: "IfThenElse(contains(@data-intent-property, ':grid:'), - IfThenElse(parent::m:mrow[IsBracketed(., '|', '|')], 'determinant', 'matrix'), - IfThenElse(contains(@data-intent-property, ':piecewise:'), 'piecewise', - IfThenElse(contains(@data-intent-property, ':system-of-equation:'), 'system-of-equation', - IfThenElse(contains(@data-intent-property, ':lines:'), 'lines',''))))"] - match: "$MTableProperty !=''" - replace: - - intent: - xpath-name: "$MTableProperty" - children: - - x: "*" - -- - name: mtable-matrix-property - tag: mtable - match: - - "..[self::m:mrow and (IsBracketed(., '(', ')') or IsBracketed(., '[', ']'))]" - replace: - - with: - variables: - - TableProperty: "'matrix'" - replace: - - intent: - name: "matrix" - children: - - x: "*" - -- - name: mtable-determinant-property - tag: mtable - match: "..[self::m:mrow and IsBracketed(., '|', '|')]" - replace: - - with: - variables: - - TableProperty: "'determinant'" - replace: - - intent: - name: "determinant" - children: - - x: "*" - -- - name: mtable-piecewise-property - tag: mtable - match: - - "..[self::m:mrow and count(*)=2 and *[1][text()='{']]" - replace: - - with: - variables: - - TableProperty: "'piecewise'" - replace: - - intent: - name: "piecewise" - children: - - x: "*" - -- - name: mtable-equations-property - tag: mtable - match: "count(*) > 0 and *[1][contains(translate(., '=≠<>≤≥≦≧', '='), '=')]" - replace: - - with: - variables: - - TableProperty: "'system-of-equations'" - replace: - - intent: - name: "system-of-equations" - children: - - x: "*" - -- - name: mtable-lines-property - tag: mtable - match: "." # default is "lines, so no need to test contains(@data-intent-property, ':lines:') - replace: - - with: - variables: - - TableProperty: "'lines'" - replace: - - intent: - name: "lines" - children: - - x: "*" diff --git a/Example/Rules/Intent/geometry.yaml b/Example/Rules/Intent/geometry.yaml deleted file mode 100644 index 796550f..0000000 --- a/Example/Rules/Intent/geometry.yaml +++ /dev/null @@ -1,53 +0,0 @@ ---- -# Geometry related intent inferences -- - name: geometry-line-segment - tag: mover - match: - - "*[2][self::m:mo][text()='¯'] and" - - "*[1][self::m:mrow][count(*)=3 and " - - " *[1][self::m:mi and string-length(text()) and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = ''] and" - - " *[2][self::m:mo and (text()='\u2062' or text()='\u2063')] and" - - " *[3][self::m:mi and string-length(text()) and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = '']" - - " ]" - replace: - - intent: - name: "line-segment" - children: - - x: "*[1]/*[1]" - - x: "*[1]/*[3]" - -- - name: geometry-ray - tag: mover - match: - - "*[2][self::m:mo][text()='→'] and" # u2192 - - "*[1][self::m:mrow][count(*)=3 and " - - " *[1][self::m:mi and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = ''] and" - - " *[2][self::m:mo and (text()='\u2062' or text()='\u2063')] and" - - " *[3][self::m:mi and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = '']" - - " ]" - replace: - - intent: - name: "ray" - children: - - x: "*[1]/*[1]" - - x: "*[1]/*[3]" - -- - name: geometry-arc - tag: mover - match: - - "*[2][self::m:mo][text()='⌒'] and" # u2192 - - "*[1][self::m:mrow][count(*)=3 and " - - " *[1][self::m:mi and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = ''] and" - - " *[2][self::m:mo and (text()='\u2062' or text()='\u2063')] and" - - " *[3][self::m:mi and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = '']" - - " ]" - replace: - - intent: - name: "arc" - children: - - x: "*[1]/*[1]" - - x: "*[1]/*[3]" - diff --git a/Example/Rules/Intent/linear-algebra.yaml b/Example/Rules/Intent/linear-algebra.yaml deleted file mode 100644 index 1725641..0000000 --- a/Example/Rules/Intent/linear-algebra.yaml +++ /dev/null @@ -1,130 +0,0 @@ ---- - -- - # vertical bars around a capital letter - name: determinant-scalar - tag: mrow - match: - - "IsBracketed(., '|', '|') and " - - "($SpeechStyle = 'SimpleSpeak' or ($SpeechStyle = 'ClearSpeak' and $ClearSpeak_AbsoluteValue != 'Cardinality')) and " - - "*[2][self::m:mi and translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = '']" - replace: - - intent: - name: "determinant" - children: [x: "*[2]"] - -- - # vertical bars around an mtable - # FIX: could add a check to make sure it was a square mtable - name: determinant - tag: mrow - match: - - "IsBracketed(., '|', '|') and *[2][self::m:mtable]" - replace: - - intent: - name: "intent-wrapper" - children: - - intent: - name: "determinant" - id: "*[2]/@id" - children: [x: "*[2]/*"] - -- - # parens or brackets around an mtable - name: matrix - tag: mrow - match: - - "(IsBracketed(., '(', ')') or IsBracketed(., '[', ']')) and *[2][self::m:mtable]" - replace: - - intent: - name: "intent-wrapper" - children: - - intent: - name: "matrix" - id: "*[2]/@id" - children: [x: "*[2]/*"] - -- - name: norm - tag: mrow - match: - - "IsBracketed(., '∥', '∥')" - replace: - - intent: - name: "norm" - children: [x: "*[2]"] - - -- - name: norm - tag: msub - match: - - "*[1][IsBracketed(., '∥', '∥')]" - replace: - - intent: - name: "subscripted-norm" - children: - - x: "*[1]/*[2]" # grab the contents of the bracketed expr - - x: "*[2]" - -- - name: vector-magnitude - tag: mrow - match: "IsBracketed(., '‖', '‖')" - replace: - - intent: - name: "magnitude" - children: [x: "*[2]"] - -- - name: transpose - tag: msup - match: - - "*[2][text()='T']" - replace: - - intent: - name: "transpose" - children: [x: "*[1]"] - -- - name: trace - tag: mrow - match: - - "count(*)=3 and (*[1][text()='tr' or text()='Tr']) and *[2][text()='\u2061']" - replace: - - intent: - name: "trace" - children: [x: "*[3]"] - -- - name: dimension - tag: mrow - match: - - "count(*)=3 and (*[1][text()='dim' or text()='Dim']) and *[2][text()='\u2061']" - replace: - - intent: - name: "dimension" - children: [x: "*[3]"] - -- - name: homomorphism - tag: mrow - match: - - "count(*)=3 and (*[1][text()='hom' or text()='Hom']) and *[2][text()='\u2061']" - replace: - - intent: - name: "homomorphism" - children: [x: "*[3]"] - -- - name: kernel - tag: mrow - match: - - "count(*)=3 and (*[1][text()='ker' or text()='Ker']) and *[2][text()='\u2061']" - replace: - - intent: - name: "kernel" - children: [x: "*[3]"] - - - diff --git a/Example/Rules/Intent/probability.yaml b/Example/Rules/Intent/probability.yaml deleted file mode 100644 index 672804b..0000000 --- a/Example/Rules/Intent/probability.yaml +++ /dev/null @@ -1,18 +0,0 @@ ---- -- - name: probability - tag: mi - match: "(text()='P'" - replace: - - intent: - name: "probability" - children: [x: "text()"] - -- - name: given - tag: mo - match: "(text()='|' and count(preceding-sibling::*)=1 and count(following-sibling::*)=1 and parent::preceding-sibling::*[last()][text()='P']" - replace: - - intent: - name: "given" - children: [x: "text()"] diff --git a/Example/Rules/definitions.yaml b/Example/Rules/definitions.yaml deleted file mode 100644 index 06e58c4..0000000 --- a/Example/Rules/definitions.yaml +++ /dev/null @@ -1,181 +0,0 @@ ---- -# Each definition in this file is of the form -# - name: { "...", "..." "..." } -# For numbers, -# - name: [] "...", "..." "..." ] - - -- SI_Units: { - "A": "amp", - "cd": "candela", - "K": "kelvin", - "K": "kelvin", - "g": "gram", - "m": "meter", - "mol": "mole", - "s": "second", - "″": "second", - "\"": "second", - "sec": "second", - } - - # ---------------- Large Operators ---------------------------------- -- LargeOperators: { - "⅀", # double struck - "∏", "∐", "∑", "⋀", "⋁", "⋂", "⋃", "⨀", "⨁", "⨂", "⨃", "⨄", "⨅", - "⨆", "⨇", "⨈", "⨉", "⨊", "⨋", "⫼", "⫿", - "∫", "∬", "∭", "∮", "∯", "∰", "∱", "∲", "∳", "⨌", "⨍", "⨎", "⨏", - "⨐", "⨑", "⨒", "⨓", "⨔", "⨕", "⨖", "⨗", "⨘", "⨙", "⨚", "⨛", "⨜" - } - - - # ---------------- Function Names ------------------------------------ - # these are always considered to be function names, so "sin x" will be interpreted as "sin" applied to "x", not times - # Note: the log functions follow the same syntax rules as trig functions, so they are included here -- TrigFunctionNames: { - "cos", "cosec", "cosech", "cosh", "cot", "cotan", "cotanh", "coth", "csc", "csch", - "sec", "sech", "sin", "sinh", "tan", "tang", "tanh", "tg", "ln", "log", "lg" - } - -- AdditionalFunctionNames: { - // the arcXXX functions aren't really trig functions (they are inverse functions) - "arccos", "arccosec", "arccosech", "arccosh", - "arccot", "arccotan", "arccotanh", "arccoth", - "arccsc", "arccsch", "arcsec", "arcsech", "arcsin", "arcsinh", - "arctan", "arctang", "arctanh", - "exp", "Exp", "expt", "arg", "im", "Im", "ℑ", re", "Re", "ℜ", "Pr", "cis", - "gcd", "lcm", - "min", "max", "deg", "det", - "dim", "hom", "ker", "tr", - "Dim", "Hom", "Ker", "Tr", - "inf", "lim", "liminf", "limsup", "sup", - "injlim", "projlim", "inj lim", "proj lim", - - # these come from the nemeth book (Rule XVII) - "arc", - "amp", "amplitude", - "antilog", "antilogarithm", - "argument", - "colog", "cologarithm", - "cosine", - "hyperbolic cosine", - "cotangent", - "hyperbolic cotangent", - "covers", "coversine", - "cosecant", - "hyperbolic cosecant", - "ctn", - "ctnh", - "determinant", - "erf", "error function", - "exponential", - "exsec", "exsecant", - "grad", "gradient", - "hav", "haversine", - "limit", - "upper limit", - "lower limit", - "natural logarithm", - "logarithm", - "maximum", - "minimum", - "real part", - "secant", - "hyperbolic secant", - "sine", - "hyperbolic sine", - "supremum", - "tangent", - "hyperbolic tangent", - "vers", "versine" - } - - # common names of functions along with a capital letter followed more one or more letters - # these will be interpreted as functions when followed by parens as in f(x+y) - # other variables will be interpreted as functions when there is a "," in the parens as in t(x,a) - # FIX: Also useful to define are multi-letter function names that start with a capital letter: "[A-Za-z]+" - # FIX: That's not currently possible, so the canonicalize function builds that in. Should be externally-definable -- LikelyFunctionNames: { - "f", "g", "h", "F", "G", "H" - } - - # probably need to expand, but these are ones that have braille codes and are in the op dict -- GeometryPrefixOperators: { - "∟", "∠", "∡", "∢", "⊾", "⊿", - } - - # note: the u+2500 are shapes, but they should be s that will potentially make use of invisible function application - # so that △ABC makes sense since △ and the other shapes are not in the operator dictionary. -- GeometryShapes: { - "■","□","▢","▣","▤","▥","▦","▧","▨","▩","▪","▫","▬","▭","▮","▯", - "▰","▱","▲","△","▴","▵","▶","▷","▸","▹","►","▻","▼","▽","▾","▿", - "◀","◁","◂","◃","◄","◅","◆","◇","◈","◉","◊","○","◌","◍","◎","●", - "◐","◑","◒","◓","◔","◕","◖","◗","◘","◙","◚","◛","◜","◝","◞","◟", - "◠","◡","◢","◣","◤","◥","◧","◨","◩","◪","◫","◬","◭","◮","◯", - "◰","◱","◲","◳","◴","◵","◶","◷","◸","◹","◺","◻","◼","◽","◾", "◿", - } - -- SubsetOperators: { - "⋢", "⋣", "⋤", "⋥", "⊂", "⊃", "⊄", "⊅", "⊆", "⊇", "⊈", "⊉", "⊊", "⊋", - "⊏", "⊐", "⊑", "⊒", "⪽", "⪾", "⪿", "⫀", "⫁", "⫂", "⫃", "⫄", "⫅", "⫆", - "⫇", "⫈", "⫉", "⫊", "⫋", "⫌", "⫍", "⫎", "⫏", "⫐", "⫑", "⫒", "⫓", "⫔", - "⫕", "⫖", "⫗", "⫘", "⋐", "⋑", "⋪", "⋫", "⋬", "⋭", "⊲", "⊳", "⊴", "⊵", - - "∈", "∊", "⋲", "⋳", "⋴", "⋵", "⋶", "⋷", "⋸", "⋹", "⋿", - - "∋", "∍", "⋺", "⋻", "⋼", "⋽", "⋾", - } - -- Arrows: { - "←", "↑", "→", "↓", "↔", "↕", "↖", "↗", "↘", "↙", "↚", "↛", "↜", "↝", "↞", - "↟", "↠", "↡", "↢", "↣", "↤", "↥", "↦", "↧", "↨", "↩", "↪", "↫", "↬", "↭", - "↮", "↯", "↰", "↱", "↲", "↳", "↴", "↵", "↶", "↷", "↸", "↹", "↺", "↻", "⇄", - "⇅", "⇆", "⇇", "⇈", "⇉", "⇊", "⇍", "⇎", "⇏", "⇐", "⇑", "⇒", "⇓", "⇔", "⇕", - "⇖", "⇗", "⇘", "⇙", "⇚", "⇛", "⇜", "⇝", "⇞", "⇟", "⇠", "⇡", "⇢", "⇣", "⇤", - "⇥", "⇦", "⇧", "⇨", "⇩", "⇪", "⇫", "⇬", "⇭", "⇮", "⇯", "⇰", "⇱", "⇲", "⇳", - "⇴", "⇵", "⇶", "⇷", "⇸", "⇹", "⇺", "⇻", "⇼", "⇽", "⇾", "⇿", "⌁", "⌃", "⌄", - "⌤", "⎋", "➔", "➘", "➙", "➚", "➛", "➜", "➝", "➞", "➟", "➠", "➡", "➢", "➣", - "➤", "➥", "➦", "➧", "➨", "➩", "➪", "➫", "➬", "➭", "➮", "➯", "➱", "➲", "➳", - "➴", "➵", "➶", "➷", "➸", "➹", "➺", "➻", "➼", "➽", "➾", "⟰", "⟱", "⟲", "⟳", - "⟴", "⟵", "⟶", "⟷", "⟸", "⟹", "⟺", "⟻", "⟼", "⟽", "⟾", "⟿", "⤀", "⤁", "⤂", - "⤃", "⤄", "⤅", "⤆", "⤇", "⤈", "⤉", "⤊", "⤋", "⤌", "⤍", "⤎", "⤏", "⤐", "⤑", - "⤒", "⤓", "⤔", "⤕", "⤖", "⤗", "⤘", "⤙", "⤚", "⤛", "⤜", "⤝", "⤞", "⤟", "⤠", - "⤡", "⤢", "⤣", "⤤", "⤥", "⤦", "⤧", "⤨", "⤩", "⤪", "⤭", "⤮", "⤯", "⤰", "⤱", - "⤲", "⤳", "⤴", "⤵", "⤶", "⤷", "⤸", "⤹", "⤺", "⤻", "⤼", "⤽", "⤾", "⤿", "⥀", - "⥁", "⥂", "⥃", "⥄", "⥅", "⥆", "⥇", "⥈", "⥉", "⥰", "⥱", "⥲", "⥳", "⥴", "⥵", - "⥶", "⥷", "⥸", "⥹", "⥺", "⥻", "⦳", "⦴", "⦽", "⧪", "⧬", "⧭", "⨗", "⬀", "⬁", - "⬂", "⬃", "⬄", "⬅", "⬆", "⬇", "⬈", "⬉", "⬊", "⬋", "⬌", "⬍", "⬎", "⬏", "⬐", - "⬑", "⬰", "⬱", "⬲", "⬳", "⬴", "⬵", "⬶", "⬷", "⬸", "⬹", "⬺", "⬻", "⬼", "⬽", - "⬾", "⬿", "⭀", "⭁", "⭂", "⭃", "⭄", "⭅", "⭆", "⭇", "⭈", "⭉", "⭊", "⭋", "⭌", - "←", "↑", "→", "↓", - # Harpoons - "↼", "↽", "↾", "↿", "⇀", "⇁", "⇂", "⇃", "⇋", "⇌", "⥊", "⥋", "⥌", "⥍", "⥎", - "⥏", "⥐", "⥑", "⥒", "⥓", "⥔", "⥕", "⥖", "⥗", "⥘", "⥙", "⥚", "⥛", "⥜", "⥝", - "⥞", "⥟", "⥠", "⥡", "⥢", "⥣", "⥤", "⥥", "⥦", "⥧", "⥨", "⥩", "⥪", "⥫", "⥬", - "🣒", "🣓", "🣔", # 0x1f8d2 - 0x1f8d4 (Unicode 17 equilibrium arrows - } - - # these come from SRE's ts\semantic_tree\semantic_attr.ts file -- ComparisonOperators: { - "=", "⁼", "₌", "∼", "∽", "≃", "≅", "≈", "≊", "≋", "≌", "≍", "≎", - "≑", "≒", "≓", "≔", "≕", "≖", "≗", "≘", "≙", "≚", "≛", "≜", "≝", "≞", - "≟", "≡", "≣", "⧤", "⩦", "⩮", "⩯", "⩰", "⩱", "⩲", "⩳", "⩴", "⩵", "⩶", - "⩷", "⩸", "⋕", "⩭", "⩪", "⩫", "⩬", "﹦", "=", "⊜", "∷", - - "<", ">", "≁", "≂", "≄", "≆", "≇", "≉", "≏", "≐", "≠", "≢", "≤", "≥", - "≦", "≧", "≨", "≩", "≪", "≫", "≬", "≭", "≮", "≯", "≰", "≱", "≲", "≳", - "≴", "≵", "≶", "≷", "≸", "≹", "≺", "≻", "≼", "≽", "≾", "≿", "⊀", "⊁", - "⋖", "⋗", "⋘", "⋙", "⋚", "⋛", "⋜", "⋝", "⋞", "⋟", "⋠", "⋡", "⋦", "⋧", - "⋨", "⋩", "⩹", "⩺", "⩻", "⩼", "⩽", "⩾", "⩿", "⪀", "⪁", "⪂", "⪃", "⪄", - "⪅", "⪆", "⪇", "⪈", "⪉", "⪊", "⪋", "⪌", "⪍", "⪎", "⪏", "⪐", "⪑", "⪒", - "⪓", "⪔", "⪕", "⪖", "⪗", "⪘", "⪙", "⪚", "⪛", "⪜", "⪝", "⪞", "⪟", "⪠", - "⪡", "⪢", "⪣", "⪤", "⪥", "⪦", "⪧", "⪨", "⪩", "⪪", "⪫", "⪬", "⪭", "⪮", - "⪯", "⪰", "⪱", "⪲", "⪳", "⪴", "⪵", "⪶", "⪷", "⪸", "⪹", "⪺", "⪻", "⪼", - "⫷", "⫸", "⫹", "⫺", "⧀", "⧁", "﹤", "﹥", "<", ">", - - "⋢", "⋣", "⋤", "⋥", "⊂", "⊃", "⊄", "⊅", "⊆", "⊇", "⊈", "⊉", "⊊", "⊋", - "⊏", "⊐", "⊑", "⊒", "⪽", "⪾", "⪿", "⫀", "⫁", "⫂", "⫃", "⫄", "⫅", "⫆", - "⫇", "⫈", "⫉", "⫊", "⫋", "⫌", "⫍", "⫎", "⫏", "⫐", "⫑", "⫒", "⫓", "⫔", - "⫕", "⫖", "⫗", "⫘", "⋐", "⋑", "⋪", "⋫", "⋬", "⋭", "⊲", "⊳", "⊴", "⊵", - } - diff --git a/Example/Rules/intent.yaml b/Example/Rules/intent.yaml deleted file mode 100644 index c249bec..0000000 --- a/Example/Rules/intent.yaml +++ /dev/null @@ -1,118 +0,0 @@ ---- -- - name: intent-exists - tag: "!*" # matches any tag -- runs before specific rules - match: "@intent" - replace: [x: "process-intent(.)"] # Warning: this is a special case hack and only "." is allowed - -- - name: turn-off - tag: "!*" # turn off matching -- runs before specific rules - match: "ancestor-or-self::*[contains(@data-intent-property, ':literal:')]" - replace: - - test: - - if: "*" - then: - - intent: - xpath-name: "name(.)" - children: [x: "*"] - - else_if: "text()" - then: - - intent: - xpath-name: "name(.)" - children: [x: "text()"] - else: - - intent: - xpath-name: "name(.)" - children: [] - -- include: "Intent/geometry.yaml" -- include: "Intent/linear-algebra.yaml" -- include: "Intent/calculus.yaml" - -- - # this is last because other rules are/should be(???) more specific - include: "Intent/general.yaml" - -- - # we need to leave "math" here so that there is a parent to all the rules (otherwise we have a special case tests) - name: ignore-punctuation - tag: math - match: "*[1][self::m:mrow and count(*)=2 and *[2][translate(.,'.,;:?', '')='']]" - variables: [TableProperty: "Grid"] - replace: - - test: - if: "*[1]/*[1]" - then: - - intent: - name: "math" - children: [x: "*[1]/*[1]"] - else: [t: " "] # empty math -- don't say anything - - -- - name: default - tag: math - match: "." - variables: [TableProperty: "Grid"] - replace: - - test: - if: "*" - then: - - intent: - name: "math" - children: [x: "*"] - else: [t: " "] # empty math -- don't say anything - - -- - name: default - tag: mstyle - match: "." - replace: - - test: - if: "*" - then: [x: "*"] - else: [t: " "] # empty math -- don't say anything - -- - name: semantics - tag: "semantics" - match: "*[@encoding='MathML-Presentation']" - replace: - - test: - if: "parent::m:math and *[@encoding='MathML-Presentation'][self::m:mrow and count(*)=2 and *[2][translate(.,'.,;:?', '')='']]" - then: [x: "*[@encoding='MathML-Presentation']/*[1]"] - else: [x: "*[@encoding='MathML-Presentation']"] - -- - name: semantics-default - tag: "semantics" - match: . - replace: - - test: - if: "parent::m:math and *[1][self::m:mrow and count(*)=2 and *[2][translate(.,'.,;:?', '')='']]" - then: [x: "*[1]/*[1]"] - else: [x: "*[1]"] - -- - # pass uncaught MathML through to the speech rules -- no point to renaming them - name: default - tag: "*" - match: "." - replace: - - test: - - if: "count(*) > 0" - then: - - intent: - xpath-name: "name(.)" - children: [x: "*"] - - else_if: "text()" - then: - - intent: - xpath-name: "name(.)" - children: [x: "text()"] - else: - - intent: - xpath-name: "name(.)" - children: [] diff --git a/Example/Rules/prefs.yaml b/Example/Rules/prefs.yaml deleted file mode 100644 index b755cf7..0000000 --- a/Example/Rules/prefs.yaml +++ /dev/null @@ -1,92 +0,0 @@ ---- - Speech: - Impairment: Blindness # LearningDisability, LowVision, Blindness - Language: Auto # any known language code and sub-code -- could be en-uk, etc - SpeechStyle: ClearSpeak # Any known speech style (falls back to ClearSpeak) - Verbosity: Medium # Terse, Medium, Verbose - MathRate: 100 # Change from text speech rate (%) - PauseFactor: 100 # Change from normal pause length (%) - SpeechSound: None # make a sound when starting/ending math speech -- None, Beep - SubjectArea: General # FIX: still working on this - Chemistry: SpellOut # SpellOut (H 2 0), AsCompound (Water) -- not implemented, Off (H sub 2 O) - - SpeechOverrides: - CapitalLetters: "" # word to say as a prefix/postfix for capital letters; empty string leaves it calling AT with Unicode fallback - LeftParen: "" # word used as override (not implemented) - RightParen: "" # word used as override (not implemented) - - ClearSpeak: # see ClearSpeak speak for meanings - CapitalLetters: Auto # SayCaps or use pitch - AbsoluteValue: Auto # AbsEnd, Cardinality, Determinant - Fractions: Auto # Ordinal, Over, FracOver, General, EndFrac, GeneralEndFrac, OverEndFrac, Per - Exponents: Auto # Ordinal, OrdinalPower, AfterPower - Roots: Auto # PosNegSqRoot, RootEnd, PosNegSqRootEnd - Functions: Auto # None - Trig: Auto # TrigInverse, ArcTrig - Log: Auto # LnAsNaturalLog - ImpliedTimes: Auto # MoreImpliedTimes , None - Paren: Auto # Speak, SpeakNestingLevel, Silent, CoordPoint, Interval - Matrix: Auto # SpeakColNum, SilentColNum, EndMatrix, Vector, EndVector, Combinatorics - MultiLineLabel: Auto # Case, Constraint, Equation, Line, None, Row, Step - MultiLineOverview: Auto # None, - MultiLinePausesBetweenColumns: Short # Long - Sets: Auto # woAll, SilentBracket - MultSymbolX: Auto # By, Cross - MultSymbolDot: Auto # Dot - TriangleSymbol: Auto # Delta - Ellipses: Auto # AndSoOn, - VerticalLine: Auto # SuchThat, Divides, Given - SetMemberSymbol: Auto # Belongs, Element, Member - Prime: Auto # Angle, Length - CombinationPermutation: Auto # ChoosePermute - Bar: Auto # Bar, Conjugate, Mean - - MathSpeak: Verbose # Brief, SuperBrief - - Navigation: - NavMode: Enhanced # Enhanced, Simple, Character - ResetNavMode: false # remember previous value and use it - Overview: false # speak the expression or give a description/overview - ResetOverview: true # remember previous value and use it - NavVerbosity: Medium # Terse, Medium, Full (words to say for nav command) - AutoZoomOut: true # Auto zoom out of 2D exprs (use shift-arrow to force zoom out if unchecked) - - Braille: - BrailleCode: "Nemeth" # Any supported braille code (currently Nemeth, UEB) - BrailleNavHighlight: EndPoints # Highlight with dots 7 & 8 the current nav node -- values are Off, FirstChar, EndPoints, All - UseSpacesAroundAllOperators: false # true/false - - UEB: - StartMode: "Grade2" # Grade1/Grade2 -- assumed starting mode UEB braille (Grade1 assumes we are in G1 passage mode) - UseSpacesAroundAllOperators: false # true/false - - # UEB Guide to Technical Material (https://iceb.org/Guidelines_for_Technical_Material_2008-10.pdf) - # says to normally treat Fraktur and DoubleStruck as Script - # Here we provide an option to specify a transcriber-defined typeform prefix indicator instead - # Note: here are prefixes for 1st - 5th: "⠈⠼", "⠘⠼", "⠸⠼", "⠐⠼", "⠨⠼" - DoubleStruck: "⠈" # script - Fraktur: "⠈" # script - SansSerif: "⠈⠼" # first transcriber-defined typeform prefix indicator - GreekVariant: "⠨" # default to Greek - - Vietnam: - UseDropNumbers: false # drop digits down a row in simple numeric fractions - # The guideline is being revised -- current guidance is to follow UEB for alternative scripts - # UEB Guide to Technical Material (https://iceb.org/Guidelines_for_Technical_Material_2008-10.pdf) - # says to normally treat Fraktur and DoubleStruck as Script - # Here we provide an option to specify a transcriber-defined typeform prefix indicator instead - # Note: here are prefixes for 1st - 5th: "⠈⠼", "⠘⠼", "⠸⠼", "⠐⠼", "⠨⠼" - DoubleStruck: "⠈" # script - Fraktur: "⠈" # script - SansSerif: "⠈⠼" # first transcriber-defined typeform prefix indicator - GreekVariant: "⠸" # default to Greek - - LaTeX: - UseShortName: false # Use the short form for the latex (e.g., "~a" instead of "\alpha") - - - Other: - CopyAs: "MathML" # MathML, LaTeX, ASCIIMath - DecimalSeparators: "." # [default] - BlockSeparators: ", \u00A0\u202F" # [default -- includes two forms of non-breaking spaces] - DecimalSeparator: "Auto" # Auto, '.', ',', Custom \ No newline at end of file diff --git a/Example/mathcat-c.h b/Example/mathcat-c.h deleted file mode 100644 index 309b759..0000000 --- a/Example/mathcat-c.h +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include - -/// `NavigationLocation` is a structure used with Navigation. -/// In many cases, the `id` is enough to uniquely identify the navigation location. -/// However, for a number such as "123" or an identifier such as "sin", there is no `id` representing each character. -/// An `offset` is used to uniquely identify each character. `offset` = 0 is the entire identifier, 1 is the first char, etc. -/// For example, the "i" in `sin` has `id="xyz-123"` and `offset=1`. -/// -/// Note: currently (2/24) offsets are not implemented in MathCAT and will always return 0. This will hopefully be supported by the end of 2024. -typedef struct NavigationLocation { - const char *id; - uint32_t offset; -} NavigationLocation; - -/// Returns the error set by the last call. -/// Calling GetError() will clear the current error. -/// If there is no error, "" (an empty string) will be returned. -const char *GetError(void); - -/// IMPORTANT: For every MathCAT function that returns a string, it must be free'd with this call -/// If this is not called, the memory will be leaked. -void FreeMathCATString(char *str); - -/// The absolute path location of the MathCAT Rules dir. -/// Returns "Ok" or an empty string if there is an error (use GetError()). -/// IMPORTANT: This should be the first call to MathCAT -const char *SetRulesDir(const char *rules_dir_location); - -/// The MathML to be spoken, brailled, or navigated. -/// -/// This will override any previous MathML that was set. -/// Returns: the MathML that was set, annotated with 'id' values on each node (if none were present) -/// The 'id' values can be used during navigation for highlighting the current node -const char *SetMathML(const char *mathml_str); - -/// Get the spoken text of the MathML that was set. -/// The speech takes into account any AT or user preferences. -const char *GetMathCATVersion(void); - -/// Get the spoken text of the MathML that was set. -/// The speech takes into account any AT or user preferences. -const char *GetSpokenText(void); - -/// Set an API preference. The preference name should be a known preference name. -/// The value should either be a string or a number (depending upon the preference being set) -/// -/// This function can be called multiple times to set different values. -/// The values are persistent but can be overwritten by setting a preference with the same name and a different value. -const char *SetPreference(const char *name, - const char *value); - -/// Set an API preference. The preference name should be a known preference name. -/// The value should either be a string or a number (depending upon the preference being set) -/// -/// This function can be called multiple times to set different values. -/// The values are persistent but can be overwritten by setting a preference with the same name and a different value. -const char *GetPreference(const char *name); - -/// Get the braille associated with the MathML that was set by [`set_mathml`]. -/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`). -/// -/// If 'nav_node_id' is a non-empty string, the node is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`) -const char *GetBraille(const char *nav_node_id); - -/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`]. -/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`). -/// -/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled. -const char *GetNavigationBraille(void); - -/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases). -/// -/// The spoken text for the new current node is returned. -const char *DoNavigateKeyPress(uintptr_t key, - bool shift_key, - bool control_key, - bool alt_key, - bool meta_key); - -/// Given a command, the current node is moved accordingly (or value reported in some cases). -/// -/// The spoken text for the new current node is returned. -/// -/// The list of legal commands are: -/// "MovePrevious", "MoveNext", "MoveStart", "MoveEnd", "MoveLineStart", "MoveLineEnd", -/// "MoveCellPrevious", "MoveCellNext", "MoveCellUp", "MoveCellDown", "MoveColumnStart", "MoveColumnEnd", -/// "ZoomIn", "ZoomOut", "ZoomOutAll", "ZoomInAll", -/// "MoveLastLocation", -/// "ReadPrevious", "ReadNext", "ReadCurrent", "ReadCellCurrent", "ReadStart", "ReadEnd", "ReadLineStart", "ReadLineEnd", -/// "DescribePrevious", "DescribeNext", "DescribeCurrent", -/// "WhereAmI", "WhereAmIAll", -/// "ToggleZoomLockUp", "ToggleZoomLockDown", "ToggleSpeakMode", -/// "Exit", -/// "MoveTo0","MoveTo1","MoveTo2","MoveTo3","MoveTo4","MoveTo5","MoveTo6","MoveTo7","MoveTo8","MoveTo9", -/// "Read0","Read1","Read2","Read3","Read4","Read5","Read6","Read7","Read8","Read9", -/// "Describe0","Describe1","Describe2","Describe3","Describe4","Describe5","Describe6","Describe7","Describe8","Describe9", -/// "SetPlacemarker0","SetPlacemarker1","SetPlacemarker2","SetPlacemarker3","SetPlacemarker4","SetPlacemarker5","SetPlacemarker6","SetPlacemarker7","SetPlacemarker8","SetPlacemarker9", -const char *DoNavigateCommand(const char *command); - -/// Return the MathML associated with the current (navigation) node. -const char *GetNavigationMathML(void); - -/// Return the id of the MathML associated with the current (navigation) node. -/// Note: this is deprecated -- use GetNavigationLocation() -const char *GetNavigationMathMLId(void); - -/// Return the offset from the MathML node associated with the current (navigation) node. -/// Note: this is deprecated -- use GetNavigationLocation() -uint32_t GetNavigationMathMLOffset(void); - -/// Set the location of the navigation node associated with the current MathML expression. -/// Returns "Ok" or an empty string if there is an error (use GetError()). -const char *SetNavigationLocation(struct NavigationLocation location); - -/// Return the NavigationLocation (id and offset) associated with the current (navigation) node. -/// If there is an error, the id is set to an empty string (use GetError()). -struct NavigationLocation GetNavigationLocation(void); - -/// Return the NavigationLocation (id and offset) associated with braille cursor location (0-based). -/// If there is an error, the id is set to an empty string (use GetError()). -struct NavigationLocation GetNavigationLocationFromBraillePosition(uint32_t position); diff --git a/Example/mathcat.h b/Example/mathcat.h deleted file mode 100644 index 212e4af..0000000 --- a/Example/mathcat.h +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include -#include -#include - -/// `NavigationLocation` is a structure used with Navigation. -/// In many cases, the `id` is enough to uniquely identify the navigation location. -/// However, for a number such as "123" or an identifier such as "sin", there is no `id` representing each character. -/// An `offset` is used to uniquely identify each character. `offset` = 0 is the entire identifier, 1 is the first char, etc. -/// For example, the "i" in `sin` has `id="xyz-123"` and `offset=1`. -/// -/// Note: currently (2/24) offsets are not implemented in MathCAT and will always return 0. This will hopefully be supported by the end of 2024. -struct NavigationLocation { - const char *id; - uint32_t offset; -}; - -extern "C" { - -/// Returns the error set by the last call. -/// Calling GetError() will clear the current error. -/// If there is no error, "" (an empty string) will be returned. -const char *GetError(); - -/// IMPORTANT: For every MathCAT function that returns a string, it must be free'd with this call -/// If this is not called, the memory will be leaked. -void FreeMathCATString(char *str); - -/// The absolute path location of the MathCAT Rules dir. -/// Returns "Ok" or an empty string if there is an error (use GetError()). -/// IMPORTANT: This should be the first call to MathCAT -const char *SetRulesDir(const char *rules_dir_location); - -/// The MathML to be spoken, brailled, or navigated. -/// -/// This will override any previous MathML that was set. -/// Returns: the MathML that was set, annotated with 'id' values on each node (if none were present) -/// The 'id' values can be used during navigation for highlighting the current node -const char *SetMathML(const char *mathml_str); - -/// Get the spoken text of the MathML that was set. -/// The speech takes into account any AT or user preferences. -const char *GetMathCATVersion(); - -/// Get the spoken text of the MathML that was set. -/// The speech takes into account any AT or user preferences. -const char *GetSpokenText(); - -/// Set an API preference. The preference name should be a known preference name. -/// The value should either be a string or a number (depending upon the preference being set) -/// -/// This function can be called multiple times to set different values. -/// The values are persistent but can be overwritten by setting a preference with the same name and a different value. -const char *SetPreference(const char *name, - const char *value); - -/// Set an API preference. The preference name should be a known preference name. -/// The value should either be a string or a number (depending upon the preference being set) -/// -/// This function can be called multiple times to set different values. -/// The values are persistent but can be overwritten by setting a preference with the same name and a different value. -const char *GetPreference(const char *name); - -/// Get the braille associated with the MathML that was set by [`set_mathml`]. -/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`). -/// -/// If 'nav_node_id' is a non-empty string, the node is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`) -const char *GetBraille(const char *nav_node_id); - -/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`]. -/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`). -/// -/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled. -const char *GetNavigationBraille(); - -/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases). -/// -/// The spoken text for the new current node is returned. -const char *DoNavigateKeyPress(uintptr_t key, - bool shift_key, - bool control_key, - bool alt_key, - bool meta_key); - -/// Given a command, the current node is moved accordingly (or value reported in some cases). -/// -/// The spoken text for the new current node is returned. -/// -/// The list of legal commands are: -/// "MovePrevious", "MoveNext", "MoveStart", "MoveEnd", "MoveLineStart", "MoveLineEnd", -/// "MoveCellPrevious", "MoveCellNext", "MoveCellUp", "MoveCellDown", "MoveColumnStart", "MoveColumnEnd", -/// "ZoomIn", "ZoomOut", "ZoomOutAll", "ZoomInAll", -/// "MoveLastLocation", -/// "ReadPrevious", "ReadNext", "ReadCurrent", "ReadCellCurrent", "ReadStart", "ReadEnd", "ReadLineStart", "ReadLineEnd", -/// "DescribePrevious", "DescribeNext", "DescribeCurrent", -/// "WhereAmI", "WhereAmIAll", -/// "ToggleZoomLockUp", "ToggleZoomLockDown", "ToggleSpeakMode", -/// "Exit", -/// "MoveTo0","MoveTo1","MoveTo2","MoveTo3","MoveTo4","MoveTo5","MoveTo6","MoveTo7","MoveTo8","MoveTo9", -/// "Read0","Read1","Read2","Read3","Read4","Read5","Read6","Read7","Read8","Read9", -/// "Describe0","Describe1","Describe2","Describe3","Describe4","Describe5","Describe6","Describe7","Describe8","Describe9", -/// "SetPlacemarker0","SetPlacemarker1","SetPlacemarker2","SetPlacemarker3","SetPlacemarker4","SetPlacemarker5","SetPlacemarker6","SetPlacemarker7","SetPlacemarker8","SetPlacemarker9", -const char *DoNavigateCommand(const char *command); - -/// Return the MathML associated with the current (navigation) node. -const char *GetNavigationMathML(); - -/// Return the id of the MathML associated with the current (navigation) node. -/// Note: this is deprecated -- use GetNavigationLocation() -const char *GetNavigationMathMLId(); - -/// Return the offset from the MathML node associated with the current (navigation) node. -/// Note: this is deprecated -- use GetNavigationLocation() -uint32_t GetNavigationMathMLOffset(); - -/// Set the location of the navigation node associated with the current MathML expression. -/// Returns "Ok" or an empty string if there is an error (use GetError()). -const char *SetNavigationLocation(NavigationLocation location); - -/// Return the NavigationLocation (id and offset) associated with the current (navigation) node. -/// If there is an error, the id is set to an empty string (use GetError()). -NavigationLocation GetNavigationLocation(); - -/// Return the NavigationLocation (id and offset) associated with braille cursor location (0-based). -/// If there is an error, the id is set to an empty string (use GetError()). -NavigationLocation GetNavigationLocationFromBraillePosition(uint32_t position); - -} // extern "C"