Skip to content

Commit

Permalink
add unicode escaping sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
Gusarich committed Mar 28, 2024
1 parent 40983d9 commit 1bb6f5a
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 20 deletions.
17 changes: 16 additions & 1 deletion src/generator/writers/writeExpression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ export function writeExpression(f: ASTExpression, ctx: WriterContext): string {
//

if (f.kind === 'string') {
const s = f.value.replace(/\\\\|\\"|\\n|\\r|\\t|\\b|\\f/g, (match) => {
const s = f.value.replace(/\\\\|\\"|\\n|\\r|\\t|\\b|\\f|\\u{([0-9A-Fa-f]+)}|\\u([0-9A-Fa-f]{4})|\\x([0-9A-Fa-f]{2})/g, (match, unicodeCodePoint, unicodeEscape, hexEscape) => {
switch (match) {
case '\\\\':
return '\\';
Expand All @@ -149,6 +149,21 @@ export function writeExpression(f: ASTExpression, ctx: WriterContext): string {
case '\\f':
return '\f';
default:
// Handle Unicode code point escape
if (unicodeCodePoint) {
const codePoint = parseInt(unicodeCodePoint, 16);
return String.fromCodePoint(codePoint);
}
// Handle Unicode escape
if (unicodeEscape) {
const codeUnit = parseInt(unicodeEscape, 16);
return String.fromCharCode(codeUnit);
}
// Handle hex escape
if (hexEscape) {
const hexValue = parseInt(hexEscape, 16);
return String.fromCharCode(hexValue);
}
return match;
}
});
Expand Down
17 changes: 10 additions & 7 deletions src/grammar/grammar.ohm
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,16 @@ Tact {
// String literal
stringLiteral = "\"" (nonQuoteOrBackslashChar | escapeSequence)* "\""
nonQuoteOrBackslashChar = ~("\"" | "\\") any
escapeSequence = "\\" "\\" -- backslash
| "\\" "\"" -- doubleQuote
| "\\" "n" -- newline
| "\\" "r" -- carriageReturn
| "\\" "t" -- tab
| "\\" "b" -- backspace
| "\\" "f" -- formFeed
escapeSequence = "\\\\" -- backslash
| "\\\"" -- doubleQuote
| "\\n" -- newline
| "\\r" -- carriageReturn
| "\\t" -- tab
| "\\b" -- backspace
| "\\f" -- formFeed
| "\\u{" hexDigit hexDigit? hexDigit? hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
| "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
| "\\x" hexDigit hexDigit -- hexEscape

// Keywords
// NOTE Order is important
Expand Down
17 changes: 10 additions & 7 deletions src/grammar/grammar.ohm-bundle.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,16 @@ export interface TactActionDict<T> extends ActionDict<T> {
boolLiteral?: (this: NonterminalNode, arg0: TerminalNode) => T;
stringLiteral?: (this: NonterminalNode, arg0: TerminalNode, arg1: IterationNode, arg2: TerminalNode) => T;
nonQuoteOrBackslashChar?: (this: NonterminalNode, arg0: NonterminalNode) => T;
escapeSequence_backslash?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_doubleQuote?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_newline?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_carriageReturn?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_tab?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_backspace?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_formFeed?: (this: NonterminalNode, arg0: TerminalNode, arg1: TerminalNode) => T;
escapeSequence_backslash?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_doubleQuote?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_newline?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_carriageReturn?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_tab?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_backspace?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_formFeed?: (this: NonterminalNode, arg0: TerminalNode) => T;
escapeSequence_unicodeCodePoint?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode, arg2: IterationNode, arg3: IterationNode, arg4: IterationNode, arg5: IterationNode, arg6: IterationNode, arg7: TerminalNode) => T;
escapeSequence_unicodeEscape?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode, arg2: NonterminalNode, arg3: NonterminalNode, arg4: NonterminalNode) => T;
escapeSequence_hexEscape?: (this: NonterminalNode, arg0: TerminalNode, arg1: NonterminalNode, arg2: NonterminalNode) => T;
escapeSequence?: (this: NonterminalNode, arg0: NonterminalNode) => T;
keyword?: (this: NonterminalNode, arg0: NonterminalNode) => T;
contract?: (this: NonterminalNode, arg0: TerminalNode) => T;
Expand Down
2 changes: 1 addition & 1 deletion src/grammar/grammar.ohm-bundle.js

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions src/test/feature-strings.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,16 @@ describe('feature-strings', () => {
}

expect(await contract.getStringWithEscapedChars1()).toBe(
'test \n \n \\ \\\n "string"'
"test \n \n \\ \\\n \"string\""
);
expect(await contract.getStringWithEscapedChars2()).toEqual(
'test \n test \t test \r test \b test \f test " test \' test \\ \\\\ "_" "" test'
"test \n test \t test \r test \b test \f test \" test ' test \\ \\\\ \"_\" \"\" test"
);
expect(await contract.getStringWithEscapedChars3()).toEqual(
'test \\n test \\t test \\r test \\b test \\f test \\" test \\\' test \\\\ \\\\\\\\ \\"_\\" \\"\\" test'
"test \\n test \\t test \\r test \\\\b\b test \\f test \\\" test \\' test \\\\ \\\\\\\\ \\\"_\\\" \\\"\\\" test"
);
expect(await contract.getStringWithEscapedChars4()).toEqual(
"\u{2028}\u{2029} \u0044 \x41\x42\x43"
);
});
});
6 changes: 5 additions & 1 deletion src/test/features/strings.tact
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ contract StringsTester {
}

get fun stringWithEscapedChars3(): String {
return "test \\n test \\t test \\r test \\b test \\f test \\\" test \\' test \\\\ \\\\\\\\ \\\"_\\\" \\\"\\\" test";
return "test \\n test \\t test \\r test \\\\b\b test \\f test \\\" test \\' test \\\\ \\\\\\\\ \\\"_\\\" \\\"\\\" test";
}

get fun stringWithEscapedChars4(): String {
return "\u{2028}\u{2029} \u0044 \x41\x42\x43";
}
}

0 comments on commit 1bb6f5a

Please sign in to comment.