From 62099438eb1411d69f0a95eeb67977d5771563b7 Mon Sep 17 00:00:00 2001 From: postaid Date: Wed, 23 Oct 2024 12:53:15 +0500 Subject: [PATCH 1/6] replace "group" with "math_group" in math_token grammar --- .../tests/parse.test.ts | 46 ++++++++++++++++++- .../grammars/latex.pegjs | 2 +- packages/unified-latex/libs/unified-latex.ts | 6 ++- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/packages/unified-latex-util-parse/tests/parse.test.ts b/packages/unified-latex-util-parse/tests/parse.test.ts index adf3ba88..2bda2779 100644 --- a/packages/unified-latex-util-parse/tests/parse.test.ts +++ b/packages/unified-latex-util-parse/tests/parse.test.ts @@ -5,6 +5,8 @@ import { trimRenderInfo } from "@unified-latex/unified-latex-util-render-info"; import * as Ast from "@unified-latex/unified-latex-types/index"; import { trim } from "@unified-latex/unified-latex-util-trim"; import { processLatexToAstViaUnified } from "@unified-latex/unified-latex"; +import { PluginOptions as ParserPluginOptions } from "../libs/plugin-from-string"; +import * as AstBuilder from "@unified-latex/unified-latex-builder"; /* eslint-env jest */ @@ -18,9 +20,9 @@ describe("unified-latex-util-parse", () => { let value: string | undefined; let file: VFile | undefined; - function strToNodes(str: string) { + function strToNodes(str: string, options?: ParserPluginOptions) { value = str; - file = processLatexToAstViaUnified().processSync({ value }); + file = processLatexToAstViaUnified(options).processSync({ value }); const root = trimRenderInfo(file.result as any) as Ast.Root; return root.content; } @@ -73,4 +75,44 @@ describe("unified-latex-util-parse", () => { trim(ast); expect(ast).toEqual(targetAst); }); + + it("nested math subscripts", () => { + let ast = strToNodes("{1_2}", { + mode: 'math', + }); + expect(ast).toEqual([{ + type: "group", + content: [ + AstBuilder.s("1"), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([AstBuilder.s("2")], { + openMark: '{', + closeMark: '}', + }), + ]), { escapeToken: "" }), + ], + }]); + }); + + it("nested math single char arguments", () => { + const ast = strToNodes("{\\frac12}", { + mode: "math", + }); + expect(ast).toEqual([{ + type: "group", + content: [ + AstBuilder.m('frac', AstBuilder.args([ + AstBuilder.arg([AstBuilder.s("1")], { + openMark: '{', + closeMark: '}', + }), + AstBuilder.arg([AstBuilder.s("2")], { + openMark: '{', + closeMark: '}', + }), + ])), + ], + }]); + + }); }); diff --git a/packages/unified-latex-util-pegjs/grammars/latex.pegjs b/packages/unified-latex-util-pegjs/grammars/latex.pegjs index ea703117..aa200bdc 100644 --- a/packages/unified-latex-util-pegjs/grammars/latex.pegjs +++ b/packages/unified-latex-util-pegjs/grammars/latex.pegjs @@ -69,7 +69,7 @@ math_token "math token" = special_macro / macro / full_comment - / whitespace* x:group whitespace* { return x; } + / whitespace* x:math_group whitespace* { return x; } / whitespace* x:alignment_tab whitespace* { return x; } / macro_parameter / whitespace* superscript whitespace* { diff --git a/packages/unified-latex/libs/unified-latex.ts b/packages/unified-latex/libs/unified-latex.ts index f17ffe77..d902280d 100644 --- a/packages/unified-latex/libs/unified-latex.ts +++ b/packages/unified-latex/libs/unified-latex.ts @@ -28,6 +28,8 @@ export const processLatexViaUnified = ( * Use `unified()` to a string to an `Ast.Ast` and then return it. This function * will not print/pretty-print the `Ast.Ast` back to a string. */ -export const processLatexToAstViaUnified = () => { - return unified().use(unifiedLatexFromString).use(unifiedLatexAstComplier); +export const processLatexToAstViaUnified = ( + options?: ParserPluginOptions +) => { + return unified().use(unifiedLatexFromString, options).use(unifiedLatexAstComplier); }; From f7670881ff486df515599168c728c18b574c00c7 Mon Sep 17 00:00:00 2001 From: postaid Date: Thu, 31 Oct 2024 14:05:14 +0500 Subject: [PATCH 2/6] add "math_shift" parsing in math_token grammar --- packages/unified-latex-util-pegjs/grammars/latex.pegjs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/unified-latex-util-pegjs/grammars/latex.pegjs b/packages/unified-latex-util-pegjs/grammars/latex.pegjs index aa200bdc..0c33cb51 100644 --- a/packages/unified-latex-util-pegjs/grammars/latex.pegjs +++ b/packages/unified-latex-util-pegjs/grammars/latex.pegjs @@ -71,6 +71,9 @@ math_token "math token" / full_comment / whitespace* x:math_group whitespace* { return x; } / whitespace* x:alignment_tab whitespace* { return x; } + / math_shift eq:(!math_shift t:math_token { return t; })+ math_shift { + return createNode("inlinemath", { content: eq.flatMap((x) => x) }); + } / macro_parameter / whitespace* superscript whitespace* { return createNode("macro", { content: "^", escapeToken: "" }); @@ -78,6 +81,7 @@ math_token "math token" / whitespace* subscript whitespace* { return createNode("macro", { content: "_", escapeToken: "" }); } + / math_shift / ignore / whitespace / s:. { return createNode("string", { content: s }); } From a43cc7c6b495b71aee2d3c0d9657b7e6f5c2ba80 Mon Sep 17 00:00:00 2001 From: postaid Date: Thu, 31 Oct 2024 19:42:09 +0500 Subject: [PATCH 3/6] fix text content nodes detection inside "unifiedLatexLintNoPlaintextOperators" --- .../index.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/unified-latex-lint/rules/unified-latex-lint-no-plaintext-operators/index.ts b/packages/unified-latex-lint/rules/unified-latex-lint-no-plaintext-operators/index.ts index 525d6708..9954e74d 100644 --- a/packages/unified-latex-lint/rules/unified-latex-lint-no-plaintext-operators/index.ts +++ b/packages/unified-latex-lint/rules/unified-latex-lint-no-plaintext-operators/index.ts @@ -65,6 +65,13 @@ const OPERATOR_NAMES = [ // `$` should never be a string in math mode. const prefixTree = Trie(OPERATOR_NAMES); +// Macro that has only text content +const TEXT_CONTENT_MACRO = [ + "operatorname", +]; + +const textContentMacroTree = Trie(TEXT_CONTENT_MACRO); + /** * If the sequence starting at `pos` is a sequence of single character strings * matching one of the `OPERATOR_NAMES`, then the matching operator name is returned. @@ -113,6 +120,10 @@ Avoid writing operators in plaintext. For example, instead of \`$sin(2)$\` write ChkTeX Warning 35 `; +function nodeIsTextMacro(node: Ast.Node | Ast.Argument): boolean { + return node.type === "macro" && textContentMacroTree.hasWord(node.content); +} + export const unifiedLatexLintNoPlaintextOperators = lintRule< Ast.Root, PluginOptions @@ -122,7 +133,7 @@ export const unifiedLatexLintNoPlaintextOperators = lintRule< visit( tree, (nodes, info) => { - if (!info.context.inMathMode) { + if (!info.context.inMathMode || info.parents.some(nodeIsTextMacro)) { return; } From 535eebf715bc4158a6527dafd954cdd665f051c5 Mon Sep 17 00:00:00 2001 From: postaid Date: Fri, 1 Nov 2024 09:51:11 +0500 Subject: [PATCH 4/6] add additional test for nested subscript --- .../tests/parse.test.ts | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/packages/unified-latex-util-parse/tests/parse.test.ts b/packages/unified-latex-util-parse/tests/parse.test.ts index 2bda2779..a345fa62 100644 --- a/packages/unified-latex-util-parse/tests/parse.test.ts +++ b/packages/unified-latex-util-parse/tests/parse.test.ts @@ -27,6 +27,16 @@ describe("unified-latex-util-parse", () => { return root.content; } + function textToStringNodes(text: string) { + return Array.from(text).map((s) => { + if (/\s/.test(s)) { + return AstBuilder.SP; + } else { + return AstBuilder.s(s); + } + }); + } + it("trims whitespace/parbreaks in math environments", () => { // Display math let targetAst = strToNodes("\\[\\]"); @@ -92,6 +102,60 @@ describe("unified-latex-util-parse", () => { ]), { escapeToken: "" }), ], }]); + + ast = strToNodes("$x_{y_{\\text{hello there $p_q_r$}}}$"); + expect(ast).toEqual([{ + type: "inlinemath", + content:[ + AstBuilder.s('x'), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.s("y"), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.m("text", AstBuilder.args([ + AstBuilder.arg([ + ...textToStringNodes("hello there "), + { + type: "inlinemath", + content: [ + AstBuilder.s("p"), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.s("q"), + ], { + openMark: '{', + closeMark: '}', + }), + ]), { escapeToken: "" }), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.s("r"), + ], { + openMark: '{', + closeMark: '}', + }), + ]), { escapeToken: "" }), + ] + }, + ], { + openMark: '{', + closeMark: '}', + }) + ])), + ], { + openMark: '{', + closeMark: '}', + }) + ]), { escapeToken: "" }), + ], { + openMark: '{', + closeMark: '}', + }) + ]), { escapeToken: "" }), + ] + }]); + }); it("nested math single char arguments", () => { From 295eb9fc75ac6ee4f81bd7fbbbae53f74c09506d Mon Sep 17 00:00:00 2001 From: postaid Date: Tue, 12 Nov 2024 12:29:21 +0500 Subject: [PATCH 5/6] fix "text" macro process inside math context --- .../tests/parse.test.ts | 82 ++++++------------- .../grammars/latex.pegjs | 8 +- 2 files changed, 33 insertions(+), 57 deletions(-) diff --git a/packages/unified-latex-util-parse/tests/parse.test.ts b/packages/unified-latex-util-parse/tests/parse.test.ts index a345fa62..32f55c1b 100644 --- a/packages/unified-latex-util-parse/tests/parse.test.ts +++ b/packages/unified-latex-util-parse/tests/parse.test.ts @@ -27,16 +27,6 @@ describe("unified-latex-util-parse", () => { return root.content; } - function textToStringNodes(text: string) { - return Array.from(text).map((s) => { - if (/\s/.test(s)) { - return AstBuilder.SP; - } else { - return AstBuilder.s(s); - } - }); - } - it("trims whitespace/parbreaks in math environments", () => { // Display math let targetAst = strToNodes("\\[\\]"); @@ -103,57 +93,37 @@ describe("unified-latex-util-parse", () => { ], }]); - ast = strToNodes("$x_{y_{\\text{hello there $p_q_r$}}}$"); + ast = strToNodes("$O_O\\text{T_T$U_U$}$"); expect(ast).toEqual([{ type: "inlinemath", - content:[ - AstBuilder.s('x'), + content: [ + AstBuilder.s("O"), AstBuilder.m("_", AstBuilder.args([ + AstBuilder.s("O"), + ]), { escapeToken: "" }), + AstBuilder.m("text", AstBuilder.args([ AstBuilder.arg([ - AstBuilder.s("y"), - AstBuilder.m("_", AstBuilder.args([ - AstBuilder.arg([ - AstBuilder.m("text", AstBuilder.args([ - AstBuilder.arg([ - ...textToStringNodes("hello there "), - { - type: "inlinemath", - content: [ - AstBuilder.s("p"), - AstBuilder.m("_", AstBuilder.args([ - AstBuilder.arg([ - AstBuilder.s("q"), - ], { - openMark: '{', - closeMark: '}', - }), - ]), { escapeToken: "" }), - AstBuilder.m("_", AstBuilder.args([ - AstBuilder.arg([ - AstBuilder.s("r"), - ], { - openMark: '{', - closeMark: '}', - }), - ]), { escapeToken: "" }), - ] - }, - ], { - openMark: '{', - closeMark: '}', - }) - ])), - ], { - openMark: '{', - closeMark: '}', - }) - ]), { escapeToken: "" }), + AstBuilder.s("T_T"), + { + type: "inlinemath", + content: [ + AstBuilder.s("U"), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.s("U"), + ], { + openMark: "{", + closeMark: "}", + }), + ]), { escapeToken: "" }), + ], + }, ], { - openMark: '{', - closeMark: '}', - }) - ]), { escapeToken: "" }), - ] + openMark: "{", + closeMark: "}", + }), + ])), + ], }]); }); diff --git a/packages/unified-latex-util-pegjs/grammars/latex.pegjs b/packages/unified-latex-util-pegjs/grammars/latex.pegjs index 0c33cb51..2b3f2cb9 100644 --- a/packages/unified-latex-util-pegjs/grammars/latex.pegjs +++ b/packages/unified-latex-util-pegjs/grammars/latex.pegjs @@ -67,6 +67,7 @@ parbreak "parbreak" math_token "math token" = special_macro + / whitespace* x:text_macro whitespace* y:group whitespace* { return [x, y].flatMap((x) => x); } / macro / full_comment / whitespace* x:math_group whitespace* { return x; } @@ -290,6 +291,11 @@ macro "macro" return createNode("macro", { content: m }); } +text_macro "text macro" + = m:(escape n:"text" { return n; }) { + return createNode("macro", { content: m }); + } + group "group" = begin_group x:(!end_group c:token { return c; })* end_group { return createNode("group", { content: x.flatMap((x) => x) }); @@ -345,7 +351,7 @@ math_environment "math environment" }); } -// group that assumes you're in math mode. If you use "\text{}" this isn't a good idea.... +// group that assumes you're in math mode. math_group "math group" = begin_group x:(!end_group c:math_token { return c; })* end_group { return createNode("group", { content: x.flatMap((x) => x) }); From f47e6769519c7fd1ffc874b4abd38cbca8291e08 Mon Sep 17 00:00:00 2001 From: postaid Date: Thu, 21 Nov 2024 10:35:33 +0500 Subject: [PATCH 6/6] Revert "fix "text" macro process inside math context" This reverts commit 295eb9fc75ac6ee4f81bd7fbbbae53f74c09506d. --- .../tests/parse.test.ts | 82 +++++++++++++------ .../grammars/latex.pegjs | 8 +- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/packages/unified-latex-util-parse/tests/parse.test.ts b/packages/unified-latex-util-parse/tests/parse.test.ts index 32f55c1b..a345fa62 100644 --- a/packages/unified-latex-util-parse/tests/parse.test.ts +++ b/packages/unified-latex-util-parse/tests/parse.test.ts @@ -27,6 +27,16 @@ describe("unified-latex-util-parse", () => { return root.content; } + function textToStringNodes(text: string) { + return Array.from(text).map((s) => { + if (/\s/.test(s)) { + return AstBuilder.SP; + } else { + return AstBuilder.s(s); + } + }); + } + it("trims whitespace/parbreaks in math environments", () => { // Display math let targetAst = strToNodes("\\[\\]"); @@ -93,37 +103,57 @@ describe("unified-latex-util-parse", () => { ], }]); - ast = strToNodes("$O_O\\text{T_T$U_U$}$"); + ast = strToNodes("$x_{y_{\\text{hello there $p_q_r$}}}$"); expect(ast).toEqual([{ type: "inlinemath", - content: [ - AstBuilder.s("O"), + content:[ + AstBuilder.s('x'), AstBuilder.m("_", AstBuilder.args([ - AstBuilder.s("O"), - ]), { escapeToken: "" }), - AstBuilder.m("text", AstBuilder.args([ AstBuilder.arg([ - AstBuilder.s("T_T"), - { - type: "inlinemath", - content: [ - AstBuilder.s("U"), - AstBuilder.m("_", AstBuilder.args([ - AstBuilder.arg([ - AstBuilder.s("U"), - ], { - openMark: "{", - closeMark: "}", - }), - ]), { escapeToken: "" }), - ], - }, + AstBuilder.s("y"), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.m("text", AstBuilder.args([ + AstBuilder.arg([ + ...textToStringNodes("hello there "), + { + type: "inlinemath", + content: [ + AstBuilder.s("p"), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.s("q"), + ], { + openMark: '{', + closeMark: '}', + }), + ]), { escapeToken: "" }), + AstBuilder.m("_", AstBuilder.args([ + AstBuilder.arg([ + AstBuilder.s("r"), + ], { + openMark: '{', + closeMark: '}', + }), + ]), { escapeToken: "" }), + ] + }, + ], { + openMark: '{', + closeMark: '}', + }) + ])), + ], { + openMark: '{', + closeMark: '}', + }) + ]), { escapeToken: "" }), ], { - openMark: "{", - closeMark: "}", - }), - ])), - ], + openMark: '{', + closeMark: '}', + }) + ]), { escapeToken: "" }), + ] }]); }); diff --git a/packages/unified-latex-util-pegjs/grammars/latex.pegjs b/packages/unified-latex-util-pegjs/grammars/latex.pegjs index 2b3f2cb9..0c33cb51 100644 --- a/packages/unified-latex-util-pegjs/grammars/latex.pegjs +++ b/packages/unified-latex-util-pegjs/grammars/latex.pegjs @@ -67,7 +67,6 @@ parbreak "parbreak" math_token "math token" = special_macro - / whitespace* x:text_macro whitespace* y:group whitespace* { return [x, y].flatMap((x) => x); } / macro / full_comment / whitespace* x:math_group whitespace* { return x; } @@ -291,11 +290,6 @@ macro "macro" return createNode("macro", { content: m }); } -text_macro "text macro" - = m:(escape n:"text" { return n; }) { - return createNode("macro", { content: m }); - } - group "group" = begin_group x:(!end_group c:token { return c; })* end_group { return createNode("group", { content: x.flatMap((x) => x) }); @@ -351,7 +345,7 @@ math_environment "math environment" }); } -// group that assumes you're in math mode. +// group that assumes you're in math mode. If you use "\text{}" this isn't a good idea.... math_group "math group" = begin_group x:(!end_group c:math_token { return c; })* end_group { return createNode("group", { content: x.flatMap((x) => x) });