From fb9d874fa91c796fdb1c6cb32ab3246e63d38af9 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Mon, 20 Jul 2020 15:55:51 -0400 Subject: caddyfile: Export Tokenize function for lexing (#3549) --- caddyconfig/caddyfile/lexer.go | 19 +++++++++ caddyconfig/caddyfile/lexer_test.go | 84 ++++++++++++++++--------------------- caddyconfig/caddyfile/parse.go | 8 +--- 3 files changed, 57 insertions(+), 54 deletions(-) (limited to 'caddyconfig') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 568d15c..188ef06 100755 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -16,6 +16,7 @@ package caddyfile import ( "bufio" + "bytes" "io" "unicode" ) @@ -168,3 +169,21 @@ func (l *lexer) next() bool { val = append(val, ch) } } + +// Tokenize takes bytes as input and lexes it into +// a list of tokens that can be parsed as a Caddyfile. +// Also takes a filename to fill the token's File as +// the source of the tokens, which is important to +// determine relative paths for `import` directives. +func Tokenize(input []byte, filename string) ([]Token, error) { + l := lexer{} + if err := l.load(bytes.NewReader(input)); err != nil { + return nil, err + } + var tokens []Token + for l.next() { + l.token.File = filename + tokens = append(tokens, l.token) + } + return tokens, nil +} diff --git a/caddyconfig/caddyfile/lexer_test.go b/caddyconfig/caddyfile/lexer_test.go index c8c941a..57df16c 100755 --- a/caddyconfig/caddyfile/lexer_test.go +++ b/caddyconfig/caddyfile/lexer_test.go @@ -15,37 +15,35 @@ package caddyfile import ( - "log" - "strings" "testing" ) type lexerTestCase struct { - input string + input []byte expected []Token } func TestLexer(t *testing.T) { testCases := []lexerTestCase{ { - input: `host:123`, + input: []byte(`host:123`), expected: []Token{ {Line: 1, Text: "host:123"}, }, }, { - input: `host:123 + input: []byte(`host:123 - directive`, + directive`), expected: []Token{ {Line: 1, Text: "host:123"}, {Line: 3, Text: "directive"}, }, }, { - input: `host:123 { + input: []byte(`host:123 { directive - }`, + }`), expected: []Token{ {Line: 1, Text: "host:123"}, {Line: 1, Text: "{"}, @@ -54,7 +52,7 @@ func TestLexer(t *testing.T) { }, }, { - input: `host:123 { directive }`, + input: []byte(`host:123 { directive }`), expected: []Token{ {Line: 1, Text: "host:123"}, {Line: 1, Text: "{"}, @@ -63,12 +61,12 @@ func TestLexer(t *testing.T) { }, }, { - input: `host:123 { + input: []byte(`host:123 { #comment directive # comment foobar # another comment - }`, + }`), expected: []Token{ {Line: 1, Text: "host:123"}, {Line: 1, Text: "{"}, @@ -78,10 +76,10 @@ func TestLexer(t *testing.T) { }, }, { - input: `host:123 { + input: []byte(`host:123 { # hash inside string is not a comment redir / /some/#/path - }`, + }`), expected: []Token{ {Line: 1, Text: "host:123"}, {Line: 1, Text: "{"}, @@ -92,14 +90,14 @@ func TestLexer(t *testing.T) { }, }, { - input: "# comment at beginning of file\n# comment at beginning of line\nhost:123", + input: []byte("# comment at beginning of file\n# comment at beginning of line\nhost:123"), expected: []Token{ {Line: 3, Text: "host:123"}, }, }, { - input: `a "quoted value" b - foobar`, + input: []byte(`a "quoted value" b + foobar`), expected: []Token{ {Line: 1, Text: "a"}, {Line: 1, Text: "quoted value"}, @@ -108,7 +106,7 @@ func TestLexer(t *testing.T) { }, }, { - input: `A "quoted \"value\" inside" B`, + input: []byte(`A "quoted \"value\" inside" B`), expected: []Token{ {Line: 1, Text: "A"}, {Line: 1, Text: `quoted "value" inside`}, @@ -116,7 +114,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "An escaped \"newline\\\ninside\" quotes", + input: []byte("An escaped \"newline\\\ninside\" quotes"), expected: []Token{ {Line: 1, Text: "An"}, {Line: 1, Text: "escaped"}, @@ -125,7 +123,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "An escaped newline\\\noutside quotes", + input: []byte("An escaped newline\\\noutside quotes"), expected: []Token{ {Line: 1, Text: "An"}, {Line: 1, Text: "escaped"}, @@ -135,7 +133,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "line1\\\nescaped\nline2\nline3", + input: []byte("line1\\\nescaped\nline2\nline3"), expected: []Token{ {Line: 1, Text: "line1"}, {Line: 1, Text: "escaped"}, @@ -144,7 +142,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "line1\\\nescaped1\\\nescaped2\nline4\nline5", + input: []byte("line1\\\nescaped1\\\nescaped2\nline4\nline5"), expected: []Token{ {Line: 1, Text: "line1"}, {Line: 1, Text: "escaped1"}, @@ -154,34 +152,34 @@ func TestLexer(t *testing.T) { }, }, { - input: `"unescapable\ in quotes"`, + input: []byte(`"unescapable\ in quotes"`), expected: []Token{ {Line: 1, Text: `unescapable\ in quotes`}, }, }, { - input: `"don't\escape"`, + input: []byte(`"don't\escape"`), expected: []Token{ {Line: 1, Text: `don't\escape`}, }, }, { - input: `"don't\\escape"`, + input: []byte(`"don't\\escape"`), expected: []Token{ {Line: 1, Text: `don't\\escape`}, }, }, { - input: `un\escapable`, + input: []byte(`un\escapable`), expected: []Token{ {Line: 1, Text: `un\escapable`}, }, }, { - input: `A "quoted value with line + input: []byte(`A "quoted value with line break inside" { foobar - }`, + }`), expected: []Token{ {Line: 1, Text: "A"}, {Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"}, @@ -191,13 +189,13 @@ func TestLexer(t *testing.T) { }, }, { - input: `"C:\php\php-cgi.exe"`, + input: []byte(`"C:\php\php-cgi.exe"`), expected: []Token{ {Line: 1, Text: `C:\php\php-cgi.exe`}, }, }, { - input: `empty "" string`, + input: []byte(`empty "" string`), expected: []Token{ {Line: 1, Text: `empty`}, {Line: 1, Text: ``}, @@ -205,7 +203,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "skip those\r\nCR characters", + input: []byte("skip those\r\nCR characters"), expected: []Token{ {Line: 1, Text: "skip"}, {Line: 1, Text: "those"}, @@ -214,13 +212,13 @@ func TestLexer(t *testing.T) { }, }, { - input: "\xEF\xBB\xBF:8080", // test with leading byte order mark + input: []byte("\xEF\xBB\xBF:8080"), // test with leading byte order mark expected: []Token{ {Line: 1, Text: ":8080"}, }, }, { - input: "simple `backtick quoted` string", + input: []byte("simple `backtick quoted` string"), expected: []Token{ {Line: 1, Text: `simple`}, {Line: 1, Text: `backtick quoted`}, @@ -228,7 +226,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "multiline `backtick\nquoted\n` string", + input: []byte("multiline `backtick\nquoted\n` string"), expected: []Token{ {Line: 1, Text: `multiline`}, {Line: 1, Text: "backtick\nquoted\n"}, @@ -236,7 +234,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "nested `\"quotes inside\" backticks` string", + input: []byte("nested `\"quotes inside\" backticks` string"), expected: []Token{ {Line: 1, Text: `nested`}, {Line: 1, Text: `"quotes inside" backticks`}, @@ -244,7 +242,7 @@ func TestLexer(t *testing.T) { }, }, { - input: "reverse-nested \"`backticks` inside\" quotes", + input: []byte("reverse-nested \"`backticks` inside\" quotes"), expected: []Token{ {Line: 1, Text: `reverse-nested`}, {Line: 1, Text: "`backticks` inside"}, @@ -254,22 +252,14 @@ func TestLexer(t *testing.T) { } for i, testCase := range testCases { - actual := tokenize(testCase.input) + actual, err := Tokenize(testCase.input, "") + if err != nil { + t.Errorf("%v", err) + } lexerCompare(t, i, testCase.expected, actual) } } -func tokenize(input string) (tokens []Token) { - l := lexer{} - if err := l.load(strings.NewReader(input)); err != nil { - log.Printf("[ERROR] load failed: %v", err) - } - for l.next() { - tokens = append(tokens, l.token) - } - return -} - func lexerCompare(t *testing.T, n int, expected, actual []Token) { if len(expected) != len(actual) { t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual)) diff --git a/caddyconfig/caddyfile/parse.go b/caddyconfig/caddyfile/parse.go index 5cb5b68..4ae98ca 100755 --- a/caddyconfig/caddyfile/parse.go +++ b/caddyconfig/caddyfile/parse.go @@ -87,16 +87,10 @@ func allTokens(filename string, input []byte) ([]Token, error) { if err != nil { return nil, err } - l := new(lexer) - err = l.load(bytes.NewReader(input)) + tokens, err := Tokenize(input, filename) if err != nil { return nil, err } - var tokens []Token - for l.next() { - l.token.File = filename - tokens = append(tokens, l.token) - } return tokens, nil } -- cgit v1.2.3