Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix nested subscripts parsing #114 #117

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ const OPERATOR_NAMES = [
// `$` should never be a string in math mode.
const prefixTree = Trie(OPERATOR_NAMES);

// Macro that has only text content
const TEXT_CONTENT_MACRO = [
"operatorname",
];

const textContentMacroTree = Trie(TEXT_CONTENT_MACRO);

/**
* If the sequence starting at `pos` is a sequence of single character strings
* matching one of the `OPERATOR_NAMES`, then the matching operator name is returned.
Expand Down Expand Up @@ -113,6 +120,10 @@ Avoid writing operators in plaintext. For example, instead of \`$sin(2)$\` write
ChkTeX Warning 35
`;

function nodeIsTextMacro(node: Ast.Node | Ast.Argument): boolean {
return node.type === "macro" && textContentMacroTree.hasWord(node.content);
}

export const unifiedLatexLintNoPlaintextOperators = lintRule<
Ast.Root,
PluginOptions
Expand All @@ -122,7 +133,7 @@ export const unifiedLatexLintNoPlaintextOperators = lintRule<
visit(
tree,
(nodes, info) => {
if (!info.context.inMathMode) {
if (!info.context.inMathMode || info.parents.some(nodeIsTextMacro)) {
return;
}

Expand Down
110 changes: 108 additions & 2 deletions packages/unified-latex-util-parse/tests/parse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { trimRenderInfo } from "@unified-latex/unified-latex-util-render-info";
import * as Ast from "@unified-latex/unified-latex-types/index";
import { trim } from "@unified-latex/unified-latex-util-trim";
import { processLatexToAstViaUnified } from "@unified-latex/unified-latex";
import { PluginOptions as ParserPluginOptions } from "../libs/plugin-from-string";
import * as AstBuilder from "@unified-latex/unified-latex-builder";

/* eslint-env jest */

Expand All @@ -18,13 +20,23 @@ describe("unified-latex-util-parse", () => {
let value: string | undefined;
let file: VFile | undefined;

function strToNodes(str: string) {
function strToNodes(str: string, options?: ParserPluginOptions) {
value = str;
file = processLatexToAstViaUnified().processSync({ value });
file = processLatexToAstViaUnified(options).processSync({ value });
const root = trimRenderInfo(file.result as any) as Ast.Root;
return root.content;
}

function textToStringNodes(text: string) {
return Array.from(text).map((s) => {
if (/\s/.test(s)) {
return AstBuilder.SP;
} else {
return AstBuilder.s(s);
}
});
}

it("trims whitespace/parbreaks in math environments", () => {
// Display math
let targetAst = strToNodes("\\[\\]");
Expand Down Expand Up @@ -73,4 +85,98 @@ describe("unified-latex-util-parse", () => {
trim(ast);
expect(ast).toEqual(targetAst);
});

it("nested math subscripts", () => {
let ast = strToNodes("{1_2}", {
mode: 'math',
});
expect(ast).toEqual([{
type: "group",
content: [
AstBuilder.s("1"),
AstBuilder.m("_", AstBuilder.args([
AstBuilder.arg([AstBuilder.s("2")], {
openMark: '{',
closeMark: '}',
}),
]), { escapeToken: "" }),
],
}]);

ast = strToNodes("$x_{y_{\\text{hello there $p_q_r$}}}$");
expect(ast).toEqual([{
type: "inlinemath",
content:[
AstBuilder.s('x'),
AstBuilder.m("_", AstBuilder.args([
AstBuilder.arg([
AstBuilder.s("y"),
AstBuilder.m("_", AstBuilder.args([
AstBuilder.arg([
AstBuilder.m("text", AstBuilder.args([
AstBuilder.arg([
...textToStringNodes("hello there "),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This concerns me, because it means that we aren't parsing as text at this point, but instead as math mode.

If you do $\text{\url{my_url}}$ does it reprint as $\text{\url{my_{u}rl}}$?

{
type: "inlinemath",
content: [
AstBuilder.s("p"),
AstBuilder.m("_", AstBuilder.args([
AstBuilder.arg([
AstBuilder.s("q"),
], {
openMark: '{',
closeMark: '}',
}),
]), { escapeToken: "" }),
AstBuilder.m("_", AstBuilder.args([
AstBuilder.arg([
AstBuilder.s("r"),
], {
openMark: '{',
closeMark: '}',
}),
]), { escapeToken: "" }),
]
},
], {
openMark: '{',
closeMark: '}',
})
])),
], {
openMark: '{',
closeMark: '}',
})
]), { escapeToken: "" }),
], {
openMark: '{',
closeMark: '}',
})
]), { escapeToken: "" }),
]
}]);

});

it("nested math single char arguments", () => {
const ast = strToNodes("{\\frac12}", {
mode: "math",
});
expect(ast).toEqual([{
type: "group",
content: [
AstBuilder.m('frac', AstBuilder.args([
AstBuilder.arg([AstBuilder.s("1")], {
openMark: '{',
closeMark: '}',
}),
AstBuilder.arg([AstBuilder.s("2")], {
openMark: '{',
closeMark: '}',
}),
])),
],
}]);

});
});
6 changes: 5 additions & 1 deletion packages/unified-latex-util-pegjs/grammars/latex.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,19 @@ math_token "math token"
= special_macro
/ macro
/ full_comment
/ whitespace* x:group whitespace* { return x; }
/ whitespace* x:math_group whitespace* { return x; }
/ whitespace* x:alignment_tab whitespace* { return x; }
/ math_shift eq:(!math_shift t:math_token { return t; })+ math_shift {
return createNode("inlinemath", { content: eq.flatMap((x) => x) });
}
/ macro_parameter
/ whitespace* superscript whitespace* {
return createNode("macro", { content: "^", escapeToken: "" });
}
/ whitespace* subscript whitespace* {
return createNode("macro", { content: "_", escapeToken: "" });
}
/ math_shift
/ ignore
/ whitespace
/ s:. { return createNode("string", { content: s }); }
Expand Down
6 changes: 4 additions & 2 deletions packages/unified-latex/libs/unified-latex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ export const processLatexViaUnified = (
* Use `unified()` to a string to an `Ast.Ast` and then return it. This function
* will not print/pretty-print the `Ast.Ast` back to a string.
*/
export const processLatexToAstViaUnified = () => {
return unified().use(unifiedLatexFromString).use(unifiedLatexAstComplier);
export const processLatexToAstViaUnified = (
options?: ParserPluginOptions
) => {
return unified().use(unifiedLatexFromString, options).use(unifiedLatexAstComplier);
};