From 636840a23b5b1bed5dcab77caa34920b50a52f6f Mon Sep 17 00:00:00 2001
From: Stephen Hurwitz <steve@stevehurwitz.com>
Date: Mon, 16 Oct 2023 17:56:26 -0700
Subject: [PATCH 1/4] Implements Scanner type for tokenizing nginx configs

Implemented `crossplane.Scanner` that follows the example of other
"scanner" types implemented in the Go stdlib. The existing `Lex` uses
concurrency to make tokens available to the caller while managing
"state". I think this design queue was taken from Rob Pike's 2011 talk
on [Lexical Scanning in Go](https://go.dev/talks/2011/lex.slide). If you
look at examples from the Go stdlib-- such as `bufio.Scanner` that `Lex`
depends on-- you'd find that this isn't the strategy being employed and
instead there is a struct that manages the state of the scanner and a
method that used by the caller to advance the scanner to obtain tokens.

After a bit of Internet archeology, I found [this](https://groups.google.com/g/golang-nuts/c/q--5t2cxv78/m/Vkr9bNuhP5sJ)
post on `golang-nuts` from Rob Pike himself:

> That talk was about a lexer, but the deeper purpose was to demonstrate
> how concurrency can make programs nice even without obvious parallelism
> in the problem. And like many such uses of concurrency, the code is
> pretty but not necessarily fast.
>
> I think it's a fine approach to a lexer if you don't care about
> performance. It is significantly slower than some other approaches but
> is very easy to adapt. I used it in ivy, for example, but just so you
> know, I'm probably going to replace the one in ivy with a more
> traditional model to avoid some issues with the lexer accessing global
> state. You don't care about that for your application, I'm sure.

> So: It's pretty and nice to work on, but you'd probably not choose that
> approach for a production compiler.

An implementation of a "scanner" using the more "traditional" model--
much of the logic is the same or very close to `Lex`-- seems to support
the above statement.

```
go test -benchmem -run=^$ -bench "^BenchmarkScan|BenchmarkLex$" github.com/nginxinc/nginx-go-crossplane -count=1 -v
goos: darwin
goarch: arm64
pkg: github.com/nginxinc/nginx-go-crossplane
BenchmarkLex
BenchmarkLex/simple
BenchmarkLex/simple-10             70982             16581 ns/op          102857 B/op         37 allocs/op
BenchmarkLex/with-comments
BenchmarkLex/with-comments-10      64125             18366 ns/op          102921 B/op         43 allocs/op
BenchmarkLex/messy
BenchmarkLex/messy-10              28171             42697 ns/op          104208 B/op        166 allocs/op
BenchmarkLex/quote-behavior
BenchmarkLex/quote-behavior-10     83667             14154 ns/op          102768 B/op         24 allocs/op
BenchmarkLex/quoted-right-brace
BenchmarkLex/quoted-right-brace-10                 48022             24799 ns/op          103369 B/op         52 allocs/op
BenchmarkScan
BenchmarkScan/simple
BenchmarkScan/simple-10                           179712              6660 ns/op            4544 B/op         34 allocs/op
BenchmarkScan/with-comments
BenchmarkScan/with-comments-10                    133178              7628 ns/op            4608 B/op         40 allocs/op
BenchmarkScan/messy
BenchmarkScan/messy-10                             49251             24106 ns/op            5896 B/op        163 allocs/op
BenchmarkScan/quote-behavior
BenchmarkScan/quote-behavior-10                   240026              4854 ns/op            4456 B/op         21 allocs/op
BenchmarkScan/quoted-right-brace
BenchmarkScan/quoted-right-brace-10                87468             13534 ns/op            5056 B/op         49 allocs/op
PASS
ok      github.com/nginxinc/nginx-go-crossplane 13.676s
```

This alternative to `Lex` is probably a micro-optimization for many use
cases. As the size and number of NGINX configurations that need to be
analyzed grows, optimization can be a good thing as well as an API that
feels familiar to Go developers who might use this tool for their own
purposes.

Next steps:

- Use `Scanner` to "parse" NGINX configurations. I think this should be
  done in place so that the existing API works as is, but we should also
  expose a way to allow the caller to provide the scanner.
- Deprecate `Lex` in favor of `Scanner`. If we leave `Lex` in place then
  I don't think we would need a `v2` of the crossplane package (yet).
---
 lex_test.go     |  57 +++++++++---
 scanner.go      | 230 ++++++++++++++++++++++++++++++++++++++++++++++++
 scanner_test.go | 110 +++++++++++++++++++++++
 util.go         |   2 +
 4 files changed, 386 insertions(+), 13 deletions(-)
 create mode 100644 scanner.go
 create mode 100644 scanner_test.go

diff --git a/lex_test.go b/lex_test.go
index cb3c5148..6bfc0bb0 100644
--- a/lex_test.go
+++ b/lex_test.go
@@ -446,22 +446,53 @@ func TestLex(t *testing.T) {
 	}
 }
 
-func TestLex_unhappy(t *testing.T) {
-	t.Parallel()
+var lexToken NgxToken //nolint: gochecknoglobals // trying to avoid return value being optimzed away
+
+func BenchmarkLex(b *testing.B) {
+	var t NgxToken
 
-	testcases := map[string]string{
-		"unbalanced open brance":                  `http {{}`,
-		"unbalanced closing brace":                `http {}}`,
-		"multiple open braces":                    `http {{server {}}`,
-		"multiple closing braces after block end": `http {server {}}}`,
-		"multiple semicolons":                     `server { listen 80;; }`,
-		"semicolon afer closing brace":            `server { listen 80; };`,
-		"open brace after semicolon":              `server { listen 80; {}`,
-		"braces with no directive":                `http{}{}`,
-		"missing final brace":                     `http{`,
+	for _, bm := range lexFixtures {
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			file, err := os.Open(path)
+			if err != nil {
+				b.Fatal(err)
+			}
+			defer file.Close()
+			b.ResetTimer()
+
+			for i := 0; i < b.N; i++ {
+				if _, err := file.Seek(0, 0); err != nil {
+					b.Fatal(err)
+				}
+
+				for tok := range Lex(file) {
+					t = tok
+				}
+			}
+		})
 	}
 
-	for name, c := range testcases {
+	lexToken = t
+}
+
+//nolint:gochecknoglobals
+var unhappyFixtures = map[string]string{
+	"unbalanced open brance":                  `http {{}`,
+	"unbalanced closing brace":                `http {}}`,
+	"multiple open braces":                    `http {{server {}}`,
+	"multiple closing braces after block end": `http {server {}}}`,
+	"multiple semicolons":                     `server { listen 80;; }`,
+	"semicolon afer closing brace":            `server { listen 80; };`,
+	"open brace after semicolon":              `server { listen 80; {}`,
+	"braces with no directive":                `http{}{}`,
+	"missing final brace":                     `http{`,
+}
+
+func TestLex_unhappy(t *testing.T) {
+	t.Parallel()
+
+	for name, c := range unhappyFixtures {
 		c := c
 		t.Run(name, func(t *testing.T) {
 			t.Parallel()
diff --git a/scanner.go b/scanner.go
new file mode 100644
index 00000000..cae32bf0
--- /dev/null
+++ b/scanner.go
@@ -0,0 +1,230 @@
+package crossplane
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+)
+
+// Token is a lexical token of the NGINX configuration syntax.
+type Token struct {
+	// Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
+	// sequence in order to support defining directives in modules other than the core NGINX module set.
+	Text string
+	// Line is the source starting line number the token within a file.
+	Line int
+	// IsQuoted signifies if the token is wrapped by quotes (", '). Quotes are not usually necessary in an NGINX
+	// configuration and mostly serve to help make the config less ambiguous.
+	IsQuoted bool
+}
+
+type scannerError struct {
+	msg  string
+	line int
+}
+
+func (e *scannerError) Error() string { return e.msg }
+func (e *scannerError) Line() int     { return e.line }
+
+func newScannerErrf(line int, format string, a ...any) *scannerError {
+	return &scannerError{line: line, msg: fmt.Sprintf(format, a...)}
+}
+
+// LineNumber reports the line on which the error occurred by finding the first error in
+// the errs chain that returns a line number. Otherwise, it returns 0, false.
+//
+// An error type should provide a Line() int method to return a line number.
+func LineNumber(err error) (int, bool) {
+	var e interface{ Line() int }
+	if !errors.As(err, &e) {
+		return 0, false
+	}
+
+	return e.Line(), true
+}
+
+// Scanner provides an interface for tokenizing an NGINX configuration. Successive calls to the Scane method will step
+// through the 'tokens; of an NGINX configuration.
+//
+// Scanning stops unrecoverably at EOF, the first I/O error, or an unexpected token.
+//
+// Use NewScanner to construct a Scanner.
+type Scanner struct {
+	scanner            *bufio.Scanner
+	lineno             int
+	tokenStartLine     int
+	tokenDepth         int
+	repeateSpecialChar bool //  only '}' can be repeated
+	prev               string
+}
+
+// NewScanner returns a new Scanner to read from r.
+func NewScanner(r io.Reader) *Scanner {
+	s := &Scanner{
+		scanner:            bufio.NewScanner(r),
+		lineno:             1,
+		tokenStartLine:     1,
+		tokenDepth:         0,
+		repeateSpecialChar: false,
+	}
+
+	s.scanner.Split(bufio.ScanRunes)
+
+	return s
+}
+
+// Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
+// returned when encountered.
+func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
+	var tok strings.Builder
+
+	lexState := skipSpace
+	newToken := false
+	readNext := true
+	esc := false
+
+	var r, quote string
+
+	for {
+		switch {
+		case s.prev != "":
+			r = s.prev
+			s.prev = ""
+		case readNext:
+			if !s.scanner.Scan() {
+				if tok.Len() > 0 {
+					return Token{Text: tok.String(), Line: s.tokenStartLine, IsQuoted: lexState == inQuote}, nil
+				}
+
+				if s.tokenDepth > 0 {
+					return Token{}, &scannerError{line: s.tokenStartLine, msg: "unexpected end of file, expecting }"}
+				}
+
+				return Token{}, io.EOF
+			}
+
+			nextRune := s.scanner.Text()
+			r = nextRune
+			if isEOL(r) {
+				s.lineno++
+			}
+		default:
+			readNext = true
+		}
+
+		// skip CRs
+		if r == "\r" || r == "\\\r" {
+			continue
+		}
+
+		if r == "\\" && !esc {
+			esc = true
+			continue
+		}
+
+		if esc {
+			esc = false
+			r = "\\" + r
+		}
+
+		switch lexState {
+		case skipSpace:
+			if !isSpace(r) {
+				lexState = inWord
+				newToken = true
+				readNext = false // re-eval
+				s.tokenStartLine = s.lineno
+			}
+			continue
+
+		case inWord:
+			if newToken {
+				newToken = false
+				if r == "#" {
+					tok.WriteString(r)
+					lexState = inComment
+					s.tokenStartLine = s.lineno
+					continue
+				}
+			}
+
+			if isSpace(r) {
+				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
+			}
+
+			// parameter expansion syntax (ex: "${var[@]}")
+			if tok.Len() > 0 && strings.HasSuffix(tok.String(), "$") && r == "{" {
+				tok.WriteString(r)
+				lexState = inVar
+				s.repeateSpecialChar = false
+				continue
+			}
+
+			// add entire quoted string to the token buffer
+			if r == `"` || r == "'" {
+				if tok.Len() > 0 {
+					// if a quote is inside a token, treat it like any other char
+					tok.WriteString(r)
+				} else {
+					quote = r
+					lexState = inQuote
+					s.tokenStartLine = s.lineno
+				}
+				s.repeateSpecialChar = false
+				continue
+			}
+
+			// special characters treated as full tokens
+			if isSpecialChar(r) {
+				if tok.Len() > 0 {
+					s.prev = r
+					return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
+				}
+
+				// only } can be repeated
+				if s.repeateSpecialChar && r != "}" {
+					return Token{}, newScannerErrf(s.tokenStartLine, "unxpected %q", r)
+				}
+
+				s.repeateSpecialChar = true
+				if r == "{" {
+					s.tokenDepth++
+				}
+
+				if r == "}" {
+					s.tokenDepth--
+					if s.tokenDepth < 0 {
+						return Token{}, &scannerError{line: s.tokenStartLine, msg: `unexpected "}"`}
+					}
+				}
+
+				tok.WriteString(r)
+				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
+			}
+
+			s.repeateSpecialChar = false
+			tok.WriteString(r)
+		case inComment:
+			if isEOL(r) {
+				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
+			}
+			tok.WriteString(r)
+		case inVar:
+			tok.WriteString(r)
+			if r != "}" && !isSpace(r) {
+				continue
+			}
+			lexState = inWord
+		case inQuote:
+			if r == quote {
+				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
+			}
+			if r == "\\"+quote {
+				r = quote
+			}
+			tok.WriteString(r)
+		}
+	}
+}
diff --git a/scanner_test.go b/scanner_test.go
new file mode 100644
index 00000000..30d06bbe
--- /dev/null
+++ b/scanner_test.go
@@ -0,0 +1,110 @@
+package crossplane
+
+import (
+	"io"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestScanner(t *testing.T) {
+	t.Parallel()
+
+	for _, f := range lexFixtures {
+		f := f
+		t.Run(f.name, func(t *testing.T) {
+			t.Parallel()
+
+			path := getTestConfigPath(f.name, "nginx.conf")
+			file, err := os.Open(path)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer file.Close()
+
+			s := NewScanner(file)
+
+			i := 0
+			for {
+				got, err := s.Scan()
+				if err == io.EOF {
+					if i < len(f.tokens)-1 {
+						t.Fatal("unexpected end of file")
+					}
+					return
+				}
+
+				if err != nil {
+					t.Fatalf("unexpected error: %v", err)
+				}
+
+				want := f.tokens[i]
+				require.Equal(t, want.value, got.Text)
+				require.Equal(t, want.line, got.Line)
+				i++
+			}
+		})
+	}
+}
+
+func TestScanner_unhappy(t *testing.T) {
+	t.Parallel()
+
+	for name, c := range unhappyFixtures {
+		c := c
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+
+			s := NewScanner(strings.NewReader(c))
+			for {
+				_, err := s.Scan()
+				if err == io.EOF {
+					t.Fatal("reached end of string")
+				}
+
+				if err != nil {
+					t.Logf("got error: %v", err)
+					return
+				}
+			}
+		})
+	}
+}
+
+var t Token //nolint: gochecknoglobals // trying to avoid return value being optimzed away
+
+func BenchmarkScan(b *testing.B) {
+	for _, bm := range lexFixtures {
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			file, err := os.Open(path)
+			if err != nil {
+				b.Fatal(err)
+			}
+			defer file.Close()
+
+			b.ResetTimer()
+
+			for i := 0; i < b.N; i++ {
+				if _, err := file.Seek(0, 0); err != nil {
+					b.Fatal(err)
+				}
+
+				s := NewScanner(file)
+
+				for {
+					tok, err := s.Scan()
+					if err == io.EOF {
+						break
+					}
+					if err != nil {
+						b.Fatal(err)
+					}
+					t = tok
+				}
+			}
+		})
+	}
+}
diff --git a/util.go b/util.go
index d2e84ade..186afeb5 100644
--- a/util.go
+++ b/util.go
@@ -35,6 +35,8 @@ func isEOL(s string) bool {
 	return strings.HasSuffix(s, "\n")
 }
 
+func isSpecialChar(s string) bool { return s == "{" || s == "}" || s == ";" }
+
 func repr(s string) string {
 	q := fmt.Sprintf("%q", s)
 	for _, char := range s {

From c07b1beddf5e53a407d0cd5e084e88b0b32bc782 Mon Sep 17 00:00:00 2001
From: Stephen Hurwitz <steve@stevehurwitz.com>
Date: Wed, 31 Jan 2024 09:22:17 -0800
Subject: [PATCH 2/4] Adds Err() method to scanner and checks error in Scan()

Stores the first error encountered by Scan() and checks it to make sure
scanning stops unrecoverably. The Err() method can use used to fetch the
last non-EOF error.
---
 scanner.go      | 34 ++++++++++++++++++++++++++++------
 scanner_test.go | 12 +++++++++++-
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/scanner.go b/scanner.go
index cae32bf0..2a3a25b8 100644
--- a/scanner.go
+++ b/scanner.go
@@ -58,6 +58,7 @@ type Scanner struct {
 	tokenDepth         int
 	repeateSpecialChar bool //  only '}' can be repeated
 	prev               string
+	err                error
 }
 
 // NewScanner returns a new Scanner to read from r.
@@ -75,6 +76,20 @@ func NewScanner(r io.Reader) *Scanner {
 	return s
 }
 
+// Err returns the first non-EOF error that was encountered by the Scanner.
+func (s *Scanner) Err() error {
+	if s.err == io.EOF {
+		return nil
+	}
+	return s.err
+}
+
+func (s *Scanner) setErr(err error) {
+	if s.err == nil || s.err != io.EOF {
+		s.err = err
+	}
+}
+
 // Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
 // returned when encountered.
 func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
@@ -88,10 +103,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 	var r, quote string
 
 	for {
+		if s.err != nil {
+			return Token{}, s.err
+		}
+
 		switch {
 		case s.prev != "":
-			r = s.prev
-			s.prev = ""
+			r, s.prev = s.prev, ""
 		case readNext:
 			if !s.scanner.Scan() {
 				if tok.Len() > 0 {
@@ -99,10 +117,12 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 				}
 
 				if s.tokenDepth > 0 {
-					return Token{}, &scannerError{line: s.tokenStartLine, msg: "unexpected end of file, expecting }"}
+					s.setErr(&scannerError{line: s.tokenStartLine, msg: "unexpected end of file, expecting }"})
+					return Token{}, s.err
 				}
 
-				return Token{}, io.EOF
+				s.setErr(io.EOF)
+				return Token{}, s.err
 			}
 
 			nextRune := s.scanner.Text()
@@ -185,7 +205,8 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 
 				// only } can be repeated
 				if s.repeateSpecialChar && r != "}" {
-					return Token{}, newScannerErrf(s.tokenStartLine, "unxpected %q", r)
+					s.setErr(newScannerErrf(s.tokenStartLine, "unxpected %q", r))
+					return Token{}, s.err
 				}
 
 				s.repeateSpecialChar = true
@@ -196,7 +217,8 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 				if r == "}" {
 					s.tokenDepth--
 					if s.tokenDepth < 0 {
-						return Token{}, &scannerError{line: s.tokenStartLine, msg: `unexpected "}"`}
+						s.setErr(&scannerError{line: s.tokenStartLine, msg: `unexpected "}"`})
+						return Token{}, s.err
 					}
 				}
 
diff --git a/scanner_test.go b/scanner_test.go
index 30d06bbe..0774a50a 100644
--- a/scanner_test.go
+++ b/scanner_test.go
@@ -1,6 +1,7 @@
 package crossplane
 
 import (
+	"errors"
 	"io"
 	"os"
 	"strings"
@@ -66,7 +67,16 @@ func TestScanner_unhappy(t *testing.T) {
 
 				if err != nil {
 					t.Logf("got error: %v", err)
-					return
+
+					if gotErr := s.Err(); !errors.Is(gotErr, err) {
+						t.Fatalf("error do not match: have=%+v, want=%+v", gotErr, err)
+					}
+
+					if _, gotErr := s.Scan(); !errors.Is(gotErr, err) {
+						t.Fatalf("error after scan does not match: have=%+v, want=%+v", gotErr, err)
+					}
+
+					break
 				}
 			}
 		})

From cc657b18c93b73f48cade75c8ce12c6926a057b7 Mon Sep 17 00:00:00 2001
From: Stephen Hurwitz <steve@stevehurwitz.com>
Date: Thu, 4 Apr 2024 10:11:34 -0700
Subject: [PATCH 3/4] Fixes issue parsing comments in args with quote

Fixed bug where the quoted token did not have `IsQuoted` set to `true`.
I added an additional lex fixture which shows both the existing lexer
and new scanner handle the case correctly.
---
 lex_test.go | 14 ++++++++++++++
 scanner.go  |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/lex_test.go b/lex_test.go
index 6bfc0bb0..520be852 100644
--- a/lex_test.go
+++ b/lex_test.go
@@ -415,6 +415,20 @@ var lexFixtures = []lexFixture{
 		{"}", 20},
 		{"}", 21},
 	}},
+	{"comments-between-args", []tokenLine{
+		{"http", 1},
+		{"{", 1},
+		{"#comment 1", 1},
+		{"log_format", 2},
+		{"#comment 2", 2},
+		{"\\#arg\\ 1", 3},
+		{"#comment 3", 3},
+		{"#arg 2", 4},
+		{"#comment 4", 4},
+		{"#comment 5", 5},
+		{";", 6},
+		{"}", 7},
+	}},
 }
 
 func TestLex(t *testing.T) {
diff --git a/scanner.go b/scanner.go
index 2a3a25b8..683d1cb3 100644
--- a/scanner.go
+++ b/scanner.go
@@ -241,7 +241,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 			lexState = inWord
 		case inQuote:
 			if r == quote {
-				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
+				return Token{Text: tok.String(), Line: s.tokenStartLine, IsQuoted: true}, nil
 			}
 			if r == "\\"+quote {
 				r = quote

From fe04b93474273b67c11e57b70f56efeef509f31c Mon Sep 17 00:00:00 2001
From: Stephen Hurwitz <steve@stevehurwitz.com>
Date: Fri, 5 Jul 2024 14:52:46 -0700
Subject: [PATCH 4/4] Updates scanner to support Lua extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixed up the Scanner logic to mirror changes made to support Lua
extension in Lex. Added a compat layer so that the existing Lua type can
be used with `Scanner` vs trying to refactor the implementation to
remove the channel. Doing so I think would result in further gains.

Benchmarks:

```
❯ go test -benchmem -run=^$ -bench "^(BenchmarkLex|BenchmarkLexWithLua|BenchmarkScanner|BenchmarkScannerWithLua)$" github.com/nginxinc/nginx-go-crossplane -count=1
goos: darwin
goarch: arm64
pkg: github.com/nginxinc/nginx-go-crossplane
BenchmarkLex/simple-10             57963             17756 ns/op          103049 B/op         39 allocs/op
BenchmarkLex/with-comments-10      60025             20067 ns/op          103112 B/op         45 allocs/op
BenchmarkLex/messy-10              26170             47822 ns/op          104400 B/op        168 allocs/op
BenchmarkLex/quote-behavior-10             74510             17693 ns/op          102961 B/op         26 allocs/op
BenchmarkLex/quoted-right-brace-10         43134             27752 ns/op          103560 B/op         54 allocs/op
BenchmarkLex/comments-between-args-10      78271             14866 ns/op          102937 B/op         27 allocs/op
BenchmarkLexWithLua/lua-basic-10           46273             26012 ns/op          105499 B/op         53 allocs/op
BenchmarkLexWithLua/lua-block-simple-10                    22514             54149 ns/op          108556 B/op        143 allocs/op
BenchmarkLexWithLua/lua-block-larger-10                    25983             46605 ns/op          108403 B/op         59 allocs/op
BenchmarkLexWithLua/lua-block-tricky-10                    33756             35067 ns/op          106684 B/op         66 allocs/op
BenchmarkScanner/simple-10                                163138              7084 ns/op            4648 B/op         36 allocs/op
BenchmarkScanner/with-comments-10                         144558              8100 ns/op            4712 B/op         42 allocs/op
BenchmarkScanner/messy-10                                  47570             25026 ns/op            6000 B/op        165 allocs/op
BenchmarkScanner/quote-behavior-10                        222280              5083 ns/op            4560 B/op         23 allocs/op
BenchmarkScanner/quoted-right-brace-10                     82656             14281 ns/op            5160 B/op         51 allocs/op
BenchmarkScanner/comments-between-args-10                 225475              4872 ns/op            4536 B/op         24 allocs/op
BenchmarkScannerWithLua/lua-basic-10                       93081             12833 ns/op            7866 B/op         66 allocs/op
BenchmarkScannerWithLua/lua-block-simple-10                31426             37989 ns/op           10924 B/op        156 allocs/op
BenchmarkScannerWithLua/lua-block-larger-10                37148             30723 ns/op           10770 B/op         72 allocs/op
BenchmarkScannerWithLua/lua-block-tricky-10                54890             22383 ns/op            9050 B/op         79 allocs/op
PASS
ok      github.com/nginxinc/nginx-go-crossplane 29.969s
```
---
 lex.go          |  60 +++++++++++++++++++++--
 lex_test.go     |  57 ++++++++++++++--------
 scanner.go      | 125 ++++++++++++++++++++++++++++++++++++++++++------
 scanner_test.go |  80 ++++++++++++++++++++-----------
 4 files changed, 258 insertions(+), 64 deletions(-)

diff --git a/lex.go b/lex.go
index 4b69cbea..a39b8a08 100644
--- a/lex.go
+++ b/lex.go
@@ -65,6 +65,7 @@ type LexOptions struct {
 // RegisterLexer is an option that cna be used to add a lexer to tokenize external NGINX tokens.
 type RegisterLexer interface {
 	applyLexOptions(options *LexOptions)
+	applyScannerOptions(options *scannerOptions)
 }
 
 type registerLexer struct {
@@ -82,6 +83,16 @@ func (rl registerLexer) applyLexOptions(o *LexOptions) {
 	}
 }
 
+func (rl registerLexer) applyScannerOptions(o *scannerOptions) {
+	if o.extensions == nil {
+		o.extensions = make(map[string]ScannerExt)
+	}
+
+	for _, s := range rl.stringTokens {
+		o.extensions[s] = &LexerScanner{lexer: rl.l}
+	}
+}
+
 // LexWithLexer registers a Lexer that implements tokenization of an NGINX configuration after one of the given
 // stringTokens is encountered by Lex.
 func LexWithLexer(l Lexer, stringTokens ...string) RegisterLexer { //nolint:ireturn
@@ -106,12 +117,38 @@ func Lex(reader io.Reader) chan NgxToken {
 // SubScanner provides an interface for scanning alternative grammars within NGINX configuration data.
 type SubScanner struct {
 	scanner   *bufio.Scanner
+	parent    *Scanner
 	tokenLine int
 }
 
 // Scan advances the scanner to the next token which will be available though the Text method. It returns false
 // when the scan stops by reaching the end of input.
 func (e *SubScanner) Scan() bool {
+	if e.scanner != nil {
+		return e.lexScan()
+	}
+
+	if e.parent.err != nil {
+		return false
+	}
+
+	if !e.parent.scanner.Scan() {
+		if err := e.parent.scanner.Err(); err != nil {
+			e.parent.setErr(err)
+		}
+		return false
+	}
+
+	// e.parent.prev = e.parent.scanner.Text()
+	// if isEOL(e.parent.prev) {
+	if t := e.parent.scanner.Text(); isEOL(t) {
+		e.parent.lineno++
+	}
+
+	return true
+}
+
+func (e *SubScanner) lexScan() bool {
 	if !e.scanner.Scan() {
 		return false
 	}
@@ -122,13 +159,30 @@ func (e *SubScanner) Scan() bool {
 }
 
 // Err returns the fist non-EOF error encountered by the Scanner.
-func (e *SubScanner) Err() error { return e.scanner.Err() }
+func (e *SubScanner) Err() error {
+	if e.scanner != nil {
+		return e.scanner.Err()
+	}
+	return e.parent.Err()
+}
 
 // Text returns the most recent token generated by a call to Scan.
-func (e *SubScanner) Text() string { return e.scanner.Text() }
+func (e *SubScanner) Text() string {
+	if e.scanner != nil {
+		return e.scanner.Text()
+	}
+	// return e.parent.prev
+	return e.parent.scanner.Text()
+}
 
 // Line returns the line number of the most recent token generated by a call to Scan.
-func (e *SubScanner) Line() int { return e.tokenLine }
+func (e *SubScanner) Line() int {
+	if e.scanner != nil {
+		return e.tokenLine
+	}
+
+	return e.parent.lineno
+}
 
 //nolint:gocyclo,funlen,gocognit,maintidx
 func tokenize(reader io.Reader, tokenCh chan NgxToken, options LexOptions) {
diff --git a/lex_test.go b/lex_test.go
index 520be852..d7348089 100644
--- a/lex_test.go
+++ b/lex_test.go
@@ -460,34 +460,53 @@ func TestLex(t *testing.T) {
 	}
 }
 
-var lexToken NgxToken //nolint: gochecknoglobals // trying to avoid return value being optimzed away
-
-func BenchmarkLex(b *testing.B) {
+func benchmarkLex(b *testing.B, path string, options LexOptions) {
 	var t NgxToken
 
+	file, err := os.Open(path)
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer file.Close()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if _, err := file.Seek(0, 0); err != nil {
+			b.Fatal(err)
+		}
+
+		for tok := range LexWithOptions(file, options) {
+			t = tok
+		}
+	}
+
+	_ = t
+}
+
+func BenchmarkLex(b *testing.B) {
 	for _, bm := range lexFixtures {
+		if strings.HasPrefix(bm.name, "lua") {
+			continue
+		}
+
 		b.Run(bm.name, func(b *testing.B) {
 			path := getTestConfigPath(bm.name, "nginx.conf")
-			file, err := os.Open(path)
-			if err != nil {
-				b.Fatal(err)
-			}
-			defer file.Close()
-			b.ResetTimer()
+			benchmarkLex(b, path, LexOptions{})
+		})
+	}
+}
 
-			for i := 0; i < b.N; i++ {
-				if _, err := file.Seek(0, 0); err != nil {
-					b.Fatal(err)
-				}
+func BenchmarkLexWithLua(b *testing.B) {
+	for _, bm := range lexFixtures {
+		if !strings.HasPrefix(bm.name, "lua") {
+			continue
+		}
 
-				for tok := range Lex(file) {
-					t = tok
-				}
-			}
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			benchmarkLex(b, path, LexOptions{Lexers: []RegisterLexer{lua.RegisterLexer()}})
 		})
 	}
-
-	lexToken = t
 }
 
 //nolint:gochecknoglobals
diff --git a/scanner.go b/scanner.go
index 683d1cb3..b688f078 100644
--- a/scanner.go
+++ b/scanner.go
@@ -8,6 +8,14 @@ import (
 	"strings"
 )
 
+type scannerOptions struct {
+	extensions map[string]ScannerExt
+}
+
+type ScannerOption interface {
+	applyScannerOptions(options *scannerOptions)
+}
+
 // Token is a lexical token of the NGINX configuration syntax.
 type Token struct {
 	// Text is the string corresponding to the token. It could be a directive or symbol. The value is the actual token
@@ -20,6 +28,8 @@ type Token struct {
 	IsQuoted bool
 }
 
+func (t Token) String() string { return fmt.Sprintf("{%d, %s, %t}", t.Line, t.Text, t.IsQuoted) }
+
 type scannerError struct {
 	msg  string
 	line int
@@ -52,23 +62,33 @@ func LineNumber(err error) (int, bool) {
 //
 // Use NewScanner to construct a Scanner.
 type Scanner struct {
-	scanner            *bufio.Scanner
-	lineno             int
-	tokenStartLine     int
-	tokenDepth         int
-	repeateSpecialChar bool //  only '}' can be repeated
-	prev               string
-	err                error
+	scanner              *bufio.Scanner
+	lineno               int
+	tokenStartLine       int
+	tokenDepth           int
+	repeateSpecialChar   bool //  only '}' can be repeated
+	nextTokenIsDirective bool
+	prev                 string
+	err                  error
+	options              *scannerOptions
+	ext                  Tokenizer
 }
 
 // NewScanner returns a new Scanner to read from r.
-func NewScanner(r io.Reader) *Scanner {
+func NewScanner(r io.Reader, options ...ScannerOption) *Scanner {
+	opts := &scannerOptions{}
+	for _, opt := range options {
+		opt.applyScannerOptions(opts)
+	}
+
 	s := &Scanner{
-		scanner:            bufio.NewScanner(r),
-		lineno:             1,
-		tokenStartLine:     1,
-		tokenDepth:         0,
-		repeateSpecialChar: false,
+		scanner:              bufio.NewScanner(r),
+		lineno:               1,
+		tokenStartLine:       1,
+		tokenDepth:           0,
+		repeateSpecialChar:   false,
+		nextTokenIsDirective: true,
+		options:              opts,
 	}
 
 	s.scanner.Split(bufio.ScanRunes)
@@ -92,7 +112,21 @@ func (s *Scanner) setErr(err error) {
 
 // Scan reads the next token from source and returns it.. It returns io.EOF at the end of the source. Scanner errors are
 // returned when encountered.
-func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
+func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo, maintidx // sorry
+	if s.ext != nil {
+		t, err := s.ext.Next()
+		if err != nil {
+			if !errors.Is(err, ErrTokenizerDone) {
+				s.setErr(err)
+				return Token{}, s.err
+			}
+
+			s.ext = nil
+		} else {
+			return t, nil
+		}
+	}
+
 	var tok strings.Builder
 
 	lexState := skipSpace
@@ -129,6 +163,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 			r = nextRune
 			if isEOL(r) {
 				s.lineno++
+				s.nextTokenIsDirective = true
 			}
 		default:
 			readNext = true
@@ -149,6 +184,16 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 			r = "\\" + r
 		}
 
+		if tok.Len() > 0 {
+			t := tok.String()
+			if s.nextTokenIsDirective {
+				if ext, ok := s.options.extensions[t]; ok {
+					s.ext = ext.Tokenizer(&SubScanner{parent: s, tokenLine: s.tokenStartLine}, t)
+					return Token{Text: t, Line: s.tokenStartLine}, nil
+				}
+			}
+		}
+
 		switch lexState {
 		case skipSpace:
 			if !isSpace(r) {
@@ -166,11 +211,13 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 					tok.WriteString(r)
 					lexState = inComment
 					s.tokenStartLine = s.lineno
+					s.nextTokenIsDirective = false
 					continue
 				}
 			}
 
 			if isSpace(r) {
+				s.nextTokenIsDirective = false
 				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
 			}
 
@@ -179,6 +226,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 				tok.WriteString(r)
 				lexState = inVar
 				s.repeateSpecialChar = false
+				s.nextTokenIsDirective = false
 				continue
 			}
 
@@ -223,6 +271,7 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 				}
 
 				tok.WriteString(r)
+				s.nextTokenIsDirective = true
 				return Token{Text: tok.String(), Line: s.tokenStartLine}, nil
 			}
 
@@ -250,3 +299,51 @@ func (s *Scanner) Scan() (Token, error) { //nolint: funlen, gocognit, gocyclo
 		}
 	}
 }
+
+// ScannerExt is the interface that describes an extension for the [Scanner]. Scanner extensions enable scanning of
+// configurations that contain syntaxes that do not follow the usual grammar.
+type ScannerExt interface {
+	Tokenizer(s *SubScanner, matchedToken string) Tokenizer
+}
+
+// ErrTokenizerDone is returned by [Tokenizer] when tokenization is complete.
+var ErrTokenizerDone = errors.New("done")
+
+// Tokenizer is the interface that wraps the Next method.
+//
+// Next returns the next token scanned from the NGINX configuration or an error if the configuration cannot be
+// tokenized. Return the special error, [ErrTokenizerDone] when finished tokenizing.
+type Tokenizer interface {
+	Next() (Token, error)
+}
+
+// LexerScanner is a compatibility layer between Lexers and Scanner.
+type LexerScanner struct {
+	lexer        Lexer
+	scanner      *SubScanner
+	matchedToken string
+	ch           <-chan NgxToken
+}
+
+func (s *LexerScanner) Tokenizer(scanner *SubScanner, matchedtoken string) Tokenizer {
+	s.scanner = scanner
+	s.matchedToken = matchedtoken
+	return s
+}
+
+func (s *LexerScanner) Next() (Token, error) {
+	if s.ch == nil {
+		s.ch = s.lexer.Lex(s.scanner, s.matchedToken)
+	}
+
+	ngxTok, ok := <-s.ch
+	if !ok {
+		return Token{}, ErrTokenizerDone
+	}
+
+	if ngxTok.Error != nil {
+		return Token{}, newScannerErrf(ngxTok.Line, ngxTok.Error.Error())
+	}
+
+	return Token{Text: ngxTok.Value, Line: ngxTok.Line, IsQuoted: ngxTok.IsQuoted}, nil
+}
diff --git a/scanner_test.go b/scanner_test.go
index 0774a50a..7f2f5b1e 100644
--- a/scanner_test.go
+++ b/scanner_test.go
@@ -15,6 +15,7 @@ func TestScanner(t *testing.T) {
 
 	for _, f := range lexFixtures {
 		f := f
+
 		t.Run(f.name, func(t *testing.T) {
 			t.Parallel()
 
@@ -25,7 +26,7 @@ func TestScanner(t *testing.T) {
 			}
 			defer file.Close()
 
-			s := NewScanner(file)
+			s := NewScanner(file, lua.RegisterLexer())
 
 			i := 0
 			for {
@@ -42,8 +43,8 @@ func TestScanner(t *testing.T) {
 				}
 
 				want := f.tokens[i]
-				require.Equal(t, want.value, got.Text)
-				require.Equal(t, want.line, got.Line)
+				require.Equal(t, want.value, got.Text, "got=%s", got)
+				require.Equal(t, want.line, got.Line, "got=%s", got)
 				i++
 			}
 		})
@@ -58,7 +59,7 @@ func TestScanner_unhappy(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			t.Parallel()
 
-			s := NewScanner(strings.NewReader(c))
+			s := NewScanner(strings.NewReader(c), lua.RegisterLexer())
 			for {
 				_, err := s.Scan()
 				if err == io.EOF {
@@ -83,38 +84,61 @@ func TestScanner_unhappy(t *testing.T) {
 	}
 }
 
-var t Token //nolint: gochecknoglobals // trying to avoid return value being optimzed away
+func benchmarkScanner(b *testing.B, path string, options ...ScannerOption) {
+	var t Token
 
-func BenchmarkScan(b *testing.B) {
-	for _, bm := range lexFixtures {
-		b.Run(bm.name, func(b *testing.B) {
-			path := getTestConfigPath(bm.name, "nginx.conf")
-			file, err := os.Open(path)
+	file, err := os.Open(path)
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer file.Close()
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if _, err := file.Seek(0, 0); err != nil {
+			b.Fatal(err)
+		}
+
+		s := NewScanner(file, options...)
+
+		for {
+			tok, err := s.Scan()
+			if err == io.EOF {
+				break
+			}
 			if err != nil {
 				b.Fatal(err)
 			}
-			defer file.Close()
+			t = tok
+		}
+	}
 
-			b.ResetTimer()
+	_ = t
+}
 
-			for i := 0; i < b.N; i++ {
-				if _, err := file.Seek(0, 0); err != nil {
-					b.Fatal(err)
-				}
+func BenchmarkScanner(b *testing.B) {
+	for _, bm := range lexFixtures {
+		if strings.HasPrefix(bm.name, "lua") {
+			continue
+		}
 
-				s := NewScanner(file)
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			benchmarkScanner(b, path)
+		})
+	}
+}
 
-				for {
-					tok, err := s.Scan()
-					if err == io.EOF {
-						break
-					}
-					if err != nil {
-						b.Fatal(err)
-					}
-					t = tok
-				}
-			}
+func BenchmarkScannerWithLua(b *testing.B) {
+	for _, bm := range lexFixtures {
+		if !strings.HasPrefix(bm.name, "lua") {
+			continue
+		}
+
+		b.Run(bm.name, func(b *testing.B) {
+			path := getTestConfigPath(bm.name, "nginx.conf")
+			benchmarkScanner(b, path, lua.RegisterLexer())
 		})
 	}
 }