diff --git a/llamafile/high.cpp b/llamafile/high.cpp index 9c2e9d78f0..3fe1cd0c48 100644 --- a/llamafile/high.cpp +++ b/llamafile/high.cpp @@ -48,40 +48,7 @@ static std::string extname(const std::string_view path) { return std::string(name.substr(dot_pos + 1)); } -int main(int argc, char *argv[]) { - - // process flags - int opt; - int infd = 0; - int outfd = 1; - const char *lang = nullptr; - const char *inpath = nullptr; - while ((opt = getopt(argc, argv, "hl:o:")) != -1) { - switch (opt) { - case 'h': - printf("usage: %s [-l LANG] [-o OUTFILE] [INFILE]\n", argv[0]); - exit(0); - case 'l': - lang = optarg; - break; - case 'o': - if ((outfd = creat(optarg, 0644)) == -1) { - perror(optarg); - exit(1); - } - break; - default: - exit(1); - } - } - if (optind < argc) { - inpath = argv[optind]; - if ((infd = open(inpath, O_RDONLY)) == -1) { - perror(inpath); - exit(1); - } - } - +static void highlight(int infd, int outfd, const char *lang, const char *inpath) { // create syntax highlighter Highlight *h; const char *ext; @@ -123,3 +90,44 @@ int main(int argc, char *argv[]) { H.flush(&res); write(outfd, res.data(), res.size()); } + +int main(int argc, char *argv[]) { + + // process flags + int opt; + int outfd = 1; + const char *lang = nullptr; + while ((opt = getopt(argc, argv, "hl:o:")) != -1) { + switch (opt) { + case 'h': + printf("usage: %s [-l LANG] [-o OUTFILE] [INFILE]\n", argv[0]); + exit(0); + case 'l': + lang = optarg; + break; + case 'o': + if ((outfd = creat(optarg, 0644)) == -1) { + perror(optarg); + exit(1); + } + break; + default: + exit(1); + } + } + + // process files + if (optind == argc) { + highlight(0, outfd, lang, 0); + } else { + for (int i = optind; i < argc; ++i) { + int infd; + const char *inpath = argv[i]; + if ((infd = open(inpath, O_RDONLY)) == -1) { + perror(inpath); + exit(1); + } + highlight(infd, outfd, lang, inpath); + } + } +} diff --git a/llamafile/highlight.h b/llamafile/highlight.h index 54d2e286fb..3595780890 100644 --- a/llamafile/highlight.h +++ b/llamafile/highlight.h @@ -471,8 +471,11 @@ class HighlightShell : public Highlight { int u_ = 0; int t_ = 0; int i_ = 0; + int curl_ = 0; + int last_ = 0; bool pending_heredoc_ = false; bool indented_heredoc_ = false; + bool no_interpolation_ = false; std::string word_; std::string heredoc_; }; diff --git a/llamafile/highlight_shell.cpp b/llamafile/highlight_shell.cpp index 79ec29fcb8..b86e13a101 100644 --- a/llamafile/highlight_shell.cpp +++ b/llamafile/highlight_shell.cpp @@ -25,11 +25,17 @@ enum { WORD, QUOTE, DQUOTE, + DQUOTE_VAR, + DQUOTE_VAR2, + DQUOTE_CURL, + DQUOTE_CURL_BACKSLASH, DQUOTE_BACKSLASH, TICK, TICK_BACKSLASH, VAR, VAR2, + CURL, + CURL_BACKSLASH, COMMENT, LT, LT_LT, @@ -37,6 +43,10 @@ enum { LT_LT_QNAME, HEREDOC_BOL, HEREDOC, + HEREDOC_VAR, + HEREDOC_VAR2, + HEREDOC_CURL, + HEREDOC_CURL_BACKSLASH, BACKSLASH, }; @@ -50,9 +60,10 @@ void HighlightShell::feed(std::string *r, std::string_view input) { for (size_t i = 0; i < input.size(); ++i) { wchar_t c; int b = input[i] & 255; + last_ = c_; if (!u_) { if (b < 0300) { - c = b; + c_ = c = b; } else { c_ = ThomPikeByte(b); u_ = ThomPikeLen(b) - 1; @@ -97,7 +108,7 @@ void HighlightShell::feed(std::string *r, std::string_view input) { } else if (c == '<') { t_ = LT; *r += '<'; - } else if (c == '#') { + } else if (c == '#' && (!last_ || isspace(last_))) { *r += HI_COMMENT; *r += '#'; t_ = COMMENT; @@ -157,9 +168,10 @@ void HighlightShell::feed(std::string *r, std::string_view input) { t_ = NORMAL; break; } else if (c == '{') { - append_wchar(r, c); + *r += '{'; *r += HI_VAR; - t_ = VAR2; + t_ = CURL; + curl_ = 1; break; } else { *r += HI_VAR; @@ -177,6 +189,37 @@ void HighlightShell::feed(std::string *r, std::string_view input) { } break; + case CURL: + if (c == '\\') { + t_ = CURL_BACKSLASH; + *r += HI_RESET; + *r += HI_ESCAPE; + *r += '\\'; + } else if (c == '{') { + *r += HI_RESET; + *r += '{'; + *r += HI_VAR; + ++curl_; + } else if (c == '}') { + *r += HI_RESET; + *r += '}'; + if (!--curl_) { + t_ = NORMAL; + } + } else if (ispunct(c)) { + *r += HI_RESET; + append_wchar(r, c); + } else { + append_wchar(r, c); + } + break; + + case CURL_BACKSLASH: + append_wchar(r, c); + *r += HI_RESET; + t_ = CURL; + break; + case COMMENT: append_wchar(r, c); if (c == '\n') { @@ -193,13 +236,19 @@ void HighlightShell::feed(std::string *r, std::string_view input) { } break; + Dquote: case DQUOTE: - append_wchar(r, c); if (c == '"') { + append_wchar(r, c); *r += HI_RESET; t_ = NORMAL; } else if (c == '\\') { + append_wchar(r, c); t_ = DQUOTE_BACKSLASH; + } else if (c == '$') { + t_ = DQUOTE_VAR; + } else { + append_wchar(r, c); } break; @@ -208,6 +257,73 @@ void HighlightShell::feed(std::string *r, std::string_view input) { t_ = DQUOTE; break; + case DQUOTE_VAR: + if (c == '!' || // + c == '#' || // + c == '$' || // + c == '*' || // + c == '-' || // + c == '?' || // + c == '@' || // + c == '\\' || // + c == '^') { + *r += HI_BOLD; + *r += '$'; + append_wchar(r, c); + *r += HI_UNBOLD; + t_ = DQUOTE; + break; + } else if (c == '{') { + *r += HI_BOLD; + *r += "${"; + t_ = DQUOTE_CURL; + curl_ = 1; + break; + } else if (c == '(') { + *r += '$'; + t_ = DQUOTE_VAR2; + } else { + *r += HI_BOLD; + *r += '$'; + t_ = DQUOTE_VAR2; + } + // fallthrough + + case DQUOTE_VAR2: + if (!isascii(c) || isalnum(c) || c == '_') { + append_wchar(r, c); + } else { + *r += HI_UNBOLD; + t_ = DQUOTE; + goto Dquote; + } + break; + + case DQUOTE_CURL: + if (c == '\\') { + t_ = DQUOTE_CURL_BACKSLASH; + *r += '\\'; + } else if (c == '{') { + *r += '{'; + ++curl_; + } else if (c == '}') { + *r += '}'; + if (!--curl_) { + *r += HI_UNBOLD; + t_ = DQUOTE; + } + } else if (ispunct(c)) { + append_wchar(r, c); + } else { + append_wchar(r, c); + } + break; + + case DQUOTE_CURL_BACKSLASH: + append_wchar(r, c); + t_ = DQUOTE_CURL; + break; + case TICK: append_wchar(r, c); if (c == '`') { @@ -230,6 +346,7 @@ void HighlightShell::feed(std::string *r, std::string_view input) { heredoc_.clear(); pending_heredoc_ = false; indented_heredoc_ = false; + no_interpolation_ = false; } else { t_ = NORMAL; goto Normal; @@ -246,6 +363,7 @@ void HighlightShell::feed(std::string *r, std::string_view input) { t_ = LT_LT_QNAME; *r += HI_STRING; append_wchar(r, c); + no_interpolation_ = true; } else if (isalpha(c) || c == '_') { t_ = LT_LT_NAME; append_wchar(&heredoc_, c); @@ -302,10 +420,83 @@ void HighlightShell::feed(std::string *r, std::string_view input) { } break; + Heredoc: case HEREDOC: - append_wchar(r, c); - if (c == '\n') + if (c == '\n') { + *r += '\n'; t_ = HEREDOC_BOL; + } else if (c == '$' && !no_interpolation_) { + t_ = HEREDOC_VAR; + } else { + append_wchar(r, c); + } + break; + + case HEREDOC_VAR: + if (c == '!' || // + c == '#' || // + c == '$' || // + c == '*' || // + c == '-' || // + c == '?' || // + c == '@' || // + c == '\\' || // + c == '^') { + *r += HI_BOLD; + *r += '$'; + append_wchar(r, c); + *r += HI_UNBOLD; + t_ = HEREDOC; + break; + } else if (c == '{') { + *r += HI_BOLD; + *r += "${"; + t_ = HEREDOC_CURL; + curl_ = 1; + break; + } else if (c == '(') { + *r += '$'; + t_ = HEREDOC_VAR2; + } else { + *r += HI_BOLD; + *r += '$'; + t_ = HEREDOC_VAR2; + } + // fallthrough + + case HEREDOC_VAR2: + if (!isascii(c) || isalnum(c) || c == '_') { + append_wchar(r, c); + } else { + *r += HI_UNBOLD; + t_ = HEREDOC; + goto Heredoc; + } + break; + + case HEREDOC_CURL: + if (c == '\\') { + t_ = HEREDOC_CURL_BACKSLASH; + *r += '\\'; + } else if (c == '{') { + *r += '{'; + ++curl_; + } else if (c == '}') { + *r += '}'; + if (!--curl_) { + *r += HI_UNBOLD; + t_ = HEREDOC; + } + } else if (ispunct(c)) { + append_wchar(r, c); + } else { + append_wchar(r, c); + } + break; + + case HEREDOC_CURL_BACKSLASH: + append_wchar(r, c); + t_ = HEREDOC_CURL; break; default: @@ -330,15 +521,31 @@ void HighlightShell::flush(std::string *r) { } word_.clear(); break; + case DQUOTE_VAR: + *r += '$'; + *r += HI_RESET; + break; + case HEREDOC_VAR: + *r += '$'; + *r += HI_RESET; + break; case VAR2: + case CURL: + case CURL_BACKSLASH: case TICK: case TICK_BACKSLASH: case QUOTE: case DQUOTE: + case DQUOTE_VAR2: + case DQUOTE_CURL: + case DQUOTE_CURL_BACKSLASH: case DQUOTE_BACKSLASH: case COMMENT: case HEREDOC_BOL: case HEREDOC: + case HEREDOC_VAR2: + case HEREDOC_CURL: + case HEREDOC_CURL_BACKSLASH: case LT_LT_QNAME: case BACKSLASH: *r += HI_RESET; @@ -349,4 +556,5 @@ void HighlightShell::flush(std::string *r) { c_ = 0; u_ = 0; t_ = NORMAL; + last_ = 0; } diff --git a/llamafile/highlight_typescript.cpp b/llamafile/highlight_typescript.cpp index 5680e76bfa..0bdde0be35 100644 --- a/llamafile/highlight_typescript.cpp +++ b/llamafile/highlight_typescript.cpp @@ -259,9 +259,9 @@ void HighlightTypescript::feed(std::string *r, std::string_view input) { case TICK_DOLLAR: if (c == '{' && nesti_ < sizeof(nest_)) { - // this is how the typescript playground highlights - *r += HI_RESET; + *r += HI_BOLD; *r += '$'; + *r += HI_UNBOLD; *r += HI_STRING; *r += '{'; *r += HI_RESET; diff --git a/llamafile/is_keyword_typescript.gperf b/llamafile/is_keyword_typescript.gperf index 99f7426db3..c9b3f30c47 100644 --- a/llamafile/is_keyword_typescript.gperf +++ b/llamafile/is_keyword_typescript.gperf @@ -54,6 +54,7 @@ set static switch target +this throw try type