diff options
Diffstat (limited to 'caddyconfig/caddyfile')
-rw-r--r-- | caddyconfig/caddyfile/adapter.go | 2 | ||||
-rw-r--r-- | caddyconfig/caddyfile/dispenser.go | 90 | ||||
-rw-r--r-- | caddyconfig/caddyfile/importargs.go | 153 | ||||
-rw-r--r-- | caddyconfig/caddyfile/importgraph.go | 3 | ||||
-rw-r--r-- | caddyconfig/caddyfile/lexer.go | 246 | ||||
-rw-r--r-- | caddyconfig/caddyfile/lexer_test.go | 227 | ||||
-rw-r--r-- | caddyconfig/caddyfile/parse.go | 148 | ||||
-rw-r--r-- | caddyconfig/caddyfile/parse_test.go | 120 | ||||
-rw-r--r-- | caddyconfig/caddyfile/testdata/import_args0.txt | 2 | ||||
-rw-r--r-- | caddyconfig/caddyfile/testdata/import_args1.txt | 2 |
10 files changed, 858 insertions, 135 deletions
diff --git a/caddyconfig/caddyfile/adapter.go b/caddyconfig/caddyfile/adapter.go index b924325..d6ef602 100644 --- a/caddyconfig/caddyfile/adapter.go +++ b/caddyconfig/caddyfile/adapter.go @@ -88,7 +88,7 @@ func FormattingDifference(filename string, body []byte) (caddyconfig.Warning, bo return caddyconfig.Warning{ File: filename, Line: line, - Message: "Caddyfile input is not formatted; run the 'caddy fmt' command to fix inconsistencies", + Message: "Caddyfile input is not formatted; run 'caddy fmt --overwrite' to fix inconsistencies", }, true } diff --git a/caddyconfig/caddyfile/dispenser.go b/caddyconfig/caddyfile/dispenser.go index 91bd9a5..215a164 100644 --- a/caddyconfig/caddyfile/dispenser.go +++ b/caddyconfig/caddyfile/dispenser.go @@ -101,12 +101,12 @@ func (d *Dispenser) nextOnSameLine() bool { d.cursor++ return true } - if d.cursor >= len(d.tokens) { + if d.cursor >= len(d.tokens)-1 { return false } - if d.cursor < len(d.tokens)-1 && - d.tokens[d.cursor].File == d.tokens[d.cursor+1].File && - d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) == d.tokens[d.cursor+1].Line { + curr := d.tokens[d.cursor] + next := d.tokens[d.cursor+1] + if !isNextOnNewLine(curr, next) { d.cursor++ return true } @@ -122,12 +122,12 @@ func (d *Dispenser) NextLine() bool { d.cursor++ return true } - if d.cursor >= len(d.tokens) { + if d.cursor >= len(d.tokens)-1 { return false } - if d.cursor < len(d.tokens)-1 && - (d.tokens[d.cursor].File != d.tokens[d.cursor+1].File || - d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) < d.tokens[d.cursor+1].Line) { + curr := d.tokens[d.cursor] + next := d.tokens[d.cursor+1] + if isNextOnNewLine(curr, next) { d.cursor++ return true } @@ -203,14 +203,17 @@ func (d *Dispenser) Val() string { } // ValRaw gets the raw text of the current token (including quotes). +// If the token was a heredoc, then the delimiter is not included, +// because that is not relevant to any unmarshaling logic at this time. // If there is no token loaded, it returns empty string. func (d *Dispenser) ValRaw() string { if d.cursor < 0 || d.cursor >= len(d.tokens) { return "" } quote := d.tokens[d.cursor].wasQuoted - if quote > 0 { - return string(quote) + d.tokens[d.cursor].Text + string(quote) // string literal + if quote > 0 && quote != '<' { + // string literal + return string(quote) + d.tokens[d.cursor].Text + string(quote) } return d.tokens[d.cursor].Text } @@ -388,22 +391,22 @@ func (d *Dispenser) Reset() { // an argument. func (d *Dispenser) ArgErr() error { if d.Val() == "{" { - return d.Err("Unexpected token '{', expecting argument") + return d.Err("unexpected token '{', expecting argument") } - return d.Errf("Wrong argument count or unexpected line ending after '%s'", d.Val()) + return d.Errf("wrong argument count or unexpected line ending after '%s'", d.Val()) } // SyntaxErr creates a generic syntax error which explains what was // found and what was expected. func (d *Dispenser) SyntaxErr(expected string) error { - msg := fmt.Sprintf("%s:%d - Syntax error: Unexpected token '%s', expecting '%s'", d.File(), d.Line(), d.Val(), expected) + msg := fmt.Sprintf("syntax error: unexpected token '%s', expecting '%s', at %s:%d import chain: ['%s']", d.Val(), expected, d.File(), d.Line(), strings.Join(d.Token().imports, "','")) return errors.New(msg) } // EOFErr returns an error indicating that the dispenser reached // the end of the input when searching for the next token. func (d *Dispenser) EOFErr() error { - return d.Errf("Unexpected EOF") + return d.Errf("unexpected EOF") } // Err generates a custom parse-time error with a message of msg. @@ -418,7 +421,10 @@ func (d *Dispenser) Errf(format string, args ...any) error { // WrapErr takes an existing error and adds the Caddyfile file and line number. func (d *Dispenser) WrapErr(err error) error { - return fmt.Errorf("%s:%d - Error during parsing: %w", d.File(), d.Line(), err) + if len(d.Token().imports) > 0 { + return fmt.Errorf("%w, at %s:%d import chain ['%s']", err, d.File(), d.Line(), strings.Join(d.Token().imports, "','")) + } + return fmt.Errorf("%w, at %s:%d", err, d.File(), d.Line()) } // Delete deletes the current token and returns the updated slice @@ -438,14 +444,14 @@ func (d *Dispenser) Delete() []Token { return d.tokens } -// numLineBreaks counts how many line breaks are in the token -// value given by the token index tknIdx. It returns 0 if the -// token does not exist or there are no line breaks. -func (d *Dispenser) numLineBreaks(tknIdx int) int { - if tknIdx < 0 || tknIdx >= len(d.tokens) { - return 0 +// DeleteN is the same as Delete, but can delete many tokens at once. +// If there aren't N tokens available to delete, none are deleted. +func (d *Dispenser) DeleteN(amount int) []Token { + if amount > 0 && d.cursor >= (amount-1) && d.cursor <= len(d.tokens)-1 { + d.tokens = append(d.tokens[:d.cursor-(amount-1)], d.tokens[d.cursor+1:]...) + d.cursor -= amount } - return strings.Count(d.tokens[tknIdx].Text, "\n") + return d.tokens } // isNewLine determines whether the current token is on a different @@ -461,25 +467,7 @@ func (d *Dispenser) isNewLine() bool { prev := d.tokens[d.cursor-1] curr := d.tokens[d.cursor] - - // If the previous token is from a different file, - // we can assume it's from a different line - if prev.File != curr.File { - return true - } - - // The previous token may contain line breaks if - // it was quoted and spanned multiple lines. e.g: - // - // dir "foo - // bar - // baz" - prevLineBreaks := d.numLineBreaks(d.cursor - 1) - - // If the previous token (incl line breaks) ends - // on a line earlier than the current token, - // then the current token is on a new line - return prev.Line+prevLineBreaks < curr.Line + return isNextOnNewLine(prev, curr) } // isNextOnNewLine determines whether the current token is on a different @@ -495,23 +483,5 @@ func (d *Dispenser) isNextOnNewLine() bool { curr := d.tokens[d.cursor] next := d.tokens[d.cursor+1] - - // If the next token is from a different file, - // we can assume it's from a different line - if curr.File != next.File { - return true - } - - // The current token may contain line breaks if - // it was quoted and spanned multiple lines. e.g: - // - // dir "foo - // bar - // baz" - currLineBreaks := d.numLineBreaks(d.cursor) - - // If the current token (incl line breaks) ends - // on a line earlier than the next token, - // then the next token is on a new line - return curr.Line+currLineBreaks < next.Line + return isNextOnNewLine(curr, next) } diff --git a/caddyconfig/caddyfile/importargs.go b/caddyconfig/caddyfile/importargs.go new file mode 100644 index 0000000..2e21a36 --- /dev/null +++ b/caddyconfig/caddyfile/importargs.go @@ -0,0 +1,153 @@ +// Copyright 2015 Matthew Holt and The Caddy Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package caddyfile + +import ( + "regexp" + "strconv" + "strings" + + "go.uber.org/zap" + + "github.com/caddyserver/caddy/v2" +) + +// parseVariadic determines if the token is a variadic placeholder, +// and if so, determines the index range (start/end) of args to use. +// Returns a boolean signaling whether a variadic placeholder was found, +// and the start and end indices. +func parseVariadic(token Token, argCount int) (bool, int, int) { + if !strings.HasPrefix(token.Text, "{args[") { + return false, 0, 0 + } + if !strings.HasSuffix(token.Text, "]}") { + return false, 0, 0 + } + + argRange := strings.TrimSuffix(strings.TrimPrefix(token.Text, "{args["), "]}") + if argRange == "" { + caddy.Log().Named("caddyfile").Warn( + "Placeholder "+token.Text+" cannot have an empty index", + zap.String("file", token.File+":"+strconv.Itoa(token.Line)), zap.Strings("import_chain", token.imports)) + return false, 0, 0 + } + + start, end, found := strings.Cut(argRange, ":") + + // If no ":" delimiter is found, this is not a variadic. + // The replacer will pick this up. + if !found { + return false, 0, 0 + } + + var ( + startIndex = 0 + endIndex = argCount + err error + ) + if start != "" { + startIndex, err = strconv.Atoi(start) + if err != nil { + caddy.Log().Named("caddyfile").Warn( + "Variadic placeholder "+token.Text+" has an invalid start index", + zap.String("file", token.File+":"+strconv.Itoa(token.Line)), zap.Strings("import_chain", token.imports)) + return false, 0, 0 + } + } + if end != "" { + endIndex, err = strconv.Atoi(end) + if err != nil { + caddy.Log().Named("caddyfile").Warn( + "Variadic placeholder "+token.Text+" has an invalid end index", + zap.String("file", token.File+":"+strconv.Itoa(token.Line)), zap.Strings("import_chain", token.imports)) + return false, 0, 0 + } + } + + // bound check + if startIndex < 0 || startIndex > endIndex || endIndex > argCount { + caddy.Log().Named("caddyfile").Warn( + "Variadic placeholder "+token.Text+" indices are out of bounds, only "+strconv.Itoa(argCount)+" argument(s) exist", + zap.String("file", token.File+":"+strconv.Itoa(token.Line)), zap.Strings("import_chain", token.imports)) + return false, 0, 0 + } + return true, startIndex, endIndex +} + +// makeArgsReplacer prepares a Replacer which can replace +// non-variadic args placeholders in imported tokens. +func makeArgsReplacer(args []string) *caddy.Replacer { + repl := caddy.NewEmptyReplacer() + repl.Map(func(key string) (any, bool) { + // TODO: Remove the deprecated {args.*} placeholder + // support at some point in the future + if matches := argsRegexpIndexDeprecated.FindStringSubmatch(key); len(matches) > 0 { + // What's matched may be a substring of the key + if matches[0] != key { + return nil, false + } + + value, err := strconv.Atoi(matches[1]) + if err != nil { + caddy.Log().Named("caddyfile").Warn( + "Placeholder {args." + matches[1] + "} has an invalid index") + return nil, false + } + if value >= len(args) { + caddy.Log().Named("caddyfile").Warn( + "Placeholder {args." + matches[1] + "} index is out of bounds, only " + strconv.Itoa(len(args)) + " argument(s) exist") + return nil, false + } + caddy.Log().Named("caddyfile").Warn( + "Placeholder {args." + matches[1] + "} deprecated, use {args[" + matches[1] + "]} instead") + return args[value], true + } + + // Handle args[*] form + if matches := argsRegexpIndex.FindStringSubmatch(key); len(matches) > 0 { + // What's matched may be a substring of the key + if matches[0] != key { + return nil, false + } + + if strings.Contains(matches[1], ":") { + caddy.Log().Named("caddyfile").Warn( + "Variadic placeholder {args[" + matches[1] + "]} must be a token on its own") + return nil, false + } + value, err := strconv.Atoi(matches[1]) + if err != nil { + caddy.Log().Named("caddyfile").Warn( + "Placeholder {args[" + matches[1] + "]} has an invalid index") + return nil, false + } + if value >= len(args) { + caddy.Log().Named("caddyfile").Warn( + "Placeholder {args[" + matches[1] + "]} index is out of bounds, only " + strconv.Itoa(len(args)) + " argument(s) exist") + return nil, false + } + return args[value], true + } + + // Not an args placeholder, ignore + return nil, false + }) + return repl +} + +var ( + argsRegexpIndexDeprecated = regexp.MustCompile(`args\.(.+)`) + argsRegexpIndex = regexp.MustCompile(`args\[(.+)]`) +) diff --git a/caddyconfig/caddyfile/importgraph.go b/caddyconfig/caddyfile/importgraph.go index 659c368..d27f471 100644 --- a/caddyconfig/caddyfile/importgraph.go +++ b/caddyconfig/caddyfile/importgraph.go @@ -34,6 +34,7 @@ func (i *importGraph) addNode(name string) { } i.nodes[name] = true } + func (i *importGraph) addNodes(names []string) { for _, name := range names { i.addNode(name) @@ -43,6 +44,7 @@ func (i *importGraph) addNodes(names []string) { func (i *importGraph) removeNode(name string) { delete(i.nodes, name) } + func (i *importGraph) removeNodes(names []string) { for _, name := range names { i.removeNode(name) @@ -73,6 +75,7 @@ func (i *importGraph) addEdge(from, to string) error { i.edges[from] = append(i.edges[from], to) return nil } + func (i *importGraph) addEdges(from string, tos []string) error { for _, to := range tos { err := i.addEdge(from, to) diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 5605a6a..bfd6c0f 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -17,7 +17,10 @@ package caddyfile import ( "bufio" "bytes" + "fmt" "io" + "regexp" + "strings" "unicode" ) @@ -35,15 +38,41 @@ type ( // Token represents a single parsable unit. Token struct { - File string - Line int - Text string - wasQuoted rune // enclosing quote character, if any - inSnippet bool - snippetName string + File string + imports []string + Line int + Text string + wasQuoted rune // enclosing quote character, if any + heredocMarker string + snippetName string } ) +// Tokenize takes bytes as input and lexes it into +// a list of tokens that can be parsed as a Caddyfile. +// Also takes a filename to fill the token's File as +// the source of the tokens, which is important to +// determine relative paths for `import` directives. +func Tokenize(input []byte, filename string) ([]Token, error) { + l := lexer{} + if err := l.load(bytes.NewReader(input)); err != nil { + return nil, err + } + var tokens []Token + for { + found, err := l.next() + if err != nil { + return nil, err + } + if !found { + break + } + l.token.File = filename + tokens = append(tokens, l.token) + } + return tokens, nil +} + // load prepares the lexer to scan an input for tokens. // It discards any leading byte order mark. func (l *lexer) load(input io.Reader) error { @@ -75,28 +104,107 @@ func (l *lexer) load(input io.Reader) error { // may be escaped. The rest of the line is skipped // if a "#" character is read in. Returns true if // a token was loaded; false otherwise. -func (l *lexer) next() bool { +func (l *lexer) next() (bool, error) { var val []rune - var comment, quoted, btQuoted, escaped bool + var comment, quoted, btQuoted, inHeredoc, heredocEscaped, escaped bool + var heredocMarker string makeToken := func(quoted rune) bool { l.token.Text = string(val) l.token.wasQuoted = quoted + l.token.heredocMarker = heredocMarker return true } for { + // Read a character in; if err then if we had + // read some characters, make a token. If we + // reached EOF, then no more tokens to read. + // If no EOF, then we had a problem. ch, _, err := l.reader.ReadRune() if err != nil { if len(val) > 0 { - return makeToken(0) + if inHeredoc { + return false, fmt.Errorf("incomplete heredoc <<%s on line #%d, expected ending marker %s", heredocMarker, l.line+l.skippedLines, heredocMarker) + } + + return makeToken(0), nil } if err == io.EOF { - return false + return false, nil + } + return false, err + } + + // detect whether we have the start of a heredoc + if !(quoted || btQuoted) && !(inHeredoc || heredocEscaped) && + len(val) > 1 && string(val[:2]) == "<<" { + // a space means it's just a regular token and not a heredoc + if ch == ' ' { + return makeToken(0), nil + } + + // skip CR, we only care about LF + if ch == '\r' { + continue + } + + // after hitting a newline, we know that the heredoc marker + // is the characters after the two << and the newline. + // we reset the val because the heredoc is syntax we don't + // want to keep. + if ch == '\n' { + if len(val) == 2 { + return false, fmt.Errorf("missing opening heredoc marker on line #%d; must contain only alpha-numeric characters, dashes and underscores; got empty string", l.line) + } + + // check if there's too many < + if string(val[:3]) == "<<<" { + return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <<END", l.line) + } + + heredocMarker = string(val[2:]) + if !heredocMarkerRegexp.Match([]byte(heredocMarker)) { + return false, fmt.Errorf("heredoc marker on line #%d must contain only alpha-numeric characters, dashes and underscores; got '%s'", l.line, heredocMarker) + } + + inHeredoc = true + l.skippedLines++ + val = nil + continue + } + val = append(val, ch) + continue + } + + // if we're in a heredoc, all characters are read as-is + if inHeredoc { + val = append(val, ch) + + if ch == '\n' { + l.skippedLines++ + } + + // check if we're done, i.e. that the last few characters are the marker + if len(val) > len(heredocMarker) && heredocMarker == string(val[len(val)-len(heredocMarker):]) { + // set the final value + val, err = l.finalizeHeredoc(val, heredocMarker) + if err != nil { + return false, err + } + + // set the line counter, and make the token + l.line += l.skippedLines + l.skippedLines = 0 + return makeToken('<'), nil } - panic(err) + + // stay in the heredoc until we find the ending marker + continue } + // track whether we found an escape '\' for the next + // iteration to be contextually aware if !escaped && !btQuoted && ch == '\\' { escaped = true continue @@ -111,26 +219,29 @@ func (l *lexer) next() bool { } escaped = false } else { - if quoted && ch == '"' { - return makeToken('"') - } - if btQuoted && ch == '`' { - return makeToken('`') + if (quoted && ch == '"') || (btQuoted && ch == '`') { + return makeToken(ch), nil } } + // allow quoted text to wrap continue on multiple lines if ch == '\n' { l.line += 1 + l.skippedLines l.skippedLines = 0 } + // collect this character as part of the quoted token val = append(val, ch) continue } if unicode.IsSpace(ch) { + // ignore CR altogether, we only actually care about LF (\n) if ch == '\r' { continue } + // end of the line if ch == '\n' { + // newlines can be escaped to chain arguments + // onto multiple lines; else, increment the line count if escaped { l.skippedLines++ escaped = false @@ -138,14 +249,18 @@ func (l *lexer) next() bool { l.line += 1 + l.skippedLines l.skippedLines = 0 } + // comments (#) are single-line only comment = false } + // any kind of space means we're at the end of this token if len(val) > 0 { - return makeToken(0) + return makeToken(0), nil } continue } + // comments must be at the start of a token, + // in other words, preceded by space or newline if ch == '#' && len(val) == 0 { comment = true } @@ -166,7 +281,12 @@ func (l *lexer) next() bool { } if escaped { - val = append(val, '\\') + // allow escaping the first < to skip the heredoc syntax + if ch == '<' { + heredocEscaped = true + } else { + val = append(val, '\\') + } escaped = false } @@ -174,24 +294,86 @@ func (l *lexer) next() bool { } } -// Tokenize takes bytes as input and lexes it into -// a list of tokens that can be parsed as a Caddyfile. -// Also takes a filename to fill the token's File as -// the source of the tokens, which is important to -// determine relative paths for `import` directives. -func Tokenize(input []byte, filename string) ([]Token, error) { - l := lexer{} - if err := l.load(bytes.NewReader(input)); err != nil { - return nil, err +// finalizeHeredoc takes the runes read as the heredoc text and the marker, +// and processes the text to strip leading whitespace, returning the final +// value without the leading whitespace. +func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) { + stringVal := string(val) + + // find the last newline of the heredoc, which is where the contents end + lastNewline := strings.LastIndex(stringVal, "\n") + + // collapse the content, then split into separate lines + lines := strings.Split(stringVal[:lastNewline+1], "\n") + + // figure out how much whitespace we need to strip from the front of every line + // by getting the string that precedes the marker, on the last line + paddingToStrip := stringVal[lastNewline+1 : len(stringVal)-len(marker)] + + // iterate over each line and strip the whitespace from the front + var out string + for lineNum, lineText := range lines[:len(lines)-1] { + // find an exact match for the padding + index := strings.Index(lineText, paddingToStrip) + + // if the padding doesn't match exactly at the start then we can't safely strip + if index != 0 { + return nil, fmt.Errorf("mismatched leading whitespace in heredoc <<%s on line #%d [%s], expected whitespace [%s] to match the closing marker", marker, l.line+lineNum+1, lineText, paddingToStrip) + } + + // strip, then append the line, with the newline, to the output. + // also removes all "\r" because Windows. + out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "") } - var tokens []Token - for l.next() { - l.token.File = filename - tokens = append(tokens, l.token) + + // Remove the trailing newline from the loop + if len(out) > 0 && out[len(out)-1] == '\n' { + out = out[:len(out)-1] } - return tokens, nil + + // return the final value + return []rune(out), nil } func (t Token) Quoted() bool { return t.wasQuoted > 0 } + +// NumLineBreaks counts how many line breaks are in the token text. +func (t Token) NumLineBreaks() int { + lineBreaks := strings.Count(t.Text, "\n") + if t.wasQuoted == '<' { + // heredocs have an extra linebreak because the opening + // delimiter is on its own line and is not included in the + // token Text itself, and the trailing newline is removed. + lineBreaks += 2 + } + return lineBreaks +} + +var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$") + +// isNextOnNewLine tests whether t2 is on a different line from t1 +func isNextOnNewLine(t1, t2 Token) bool { + // If the second token is from a different file, + // we can assume it's from a different line + if t1.File != t2.File { + return true + } + + // If the second token is from a different import chain, + // we can assume it's from a different line + if len(t1.imports) != len(t2.imports) { + return true + } + for i, im := range t1.imports { + if im != t2.imports[i] { + return true + } + } + + // If the first token (incl line breaks) ends + // on a line earlier than the next token, + // then the second token is on a new line + return t1.Line+t1.NumLineBreaks() < t2.Line +} diff --git a/caddyconfig/caddyfile/lexer_test.go b/caddyconfig/caddyfile/lexer_test.go index 30ee0f6..92acc4d 100644 --- a/caddyconfig/caddyfile/lexer_test.go +++ b/caddyconfig/caddyfile/lexer_test.go @@ -18,13 +18,13 @@ import ( "testing" ) -type lexerTestCase struct { - input []byte - expected []Token -} - func TestLexer(t *testing.T) { - testCases := []lexerTestCase{ + testCases := []struct { + input []byte + expected []Token + expectErr bool + errorMessage string + }{ { input: []byte(`host:123`), expected: []Token{ @@ -249,12 +249,219 @@ func TestLexer(t *testing.T) { {Line: 1, Text: `quotes`}, }, }, + { + input: []byte(`heredoc <<EOF +content +EOF same-line-arg + `), + expected: []Token{ + {Line: 1, Text: `heredoc`}, + {Line: 1, Text: "content"}, + {Line: 3, Text: `same-line-arg`}, + }, + }, + { + input: []byte(`heredoc <<VERY-LONG-MARKER +content +VERY-LONG-MARKER same-line-arg + `), + expected: []Token{ + {Line: 1, Text: `heredoc`}, + {Line: 1, Text: "content"}, + {Line: 3, Text: `same-line-arg`}, + }, + }, + { + input: []byte(`heredoc <<EOF +extra-newline + +EOF same-line-arg + `), + expected: []Token{ + {Line: 1, Text: `heredoc`}, + {Line: 1, Text: "extra-newline\n"}, + {Line: 4, Text: `same-line-arg`}, + }, + }, + { + input: []byte(`heredoc <<EOF + EOF same-line-arg + `), + expected: []Token{ + {Line: 1, Text: `heredoc`}, + {Line: 1, Text: ""}, + {Line: 2, Text: `same-line-arg`}, + }, + }, + { + input: []byte(`heredoc <<EOF + content + EOF same-line-arg + `), + expected: []Token{ + {Line: 1, Text: `heredoc`}, + {Line: 1, Text: "content"}, + {Line: 3, Text: `same-line-arg`}, + }, + }, + { + input: []byte(`prev-line + heredoc <<EOF + multi + line + content + EOF same-line-arg + next-line + `), + expected: []Token{ + {Line: 1, Text: `prev-line`}, + {Line: 2, Text: `heredoc`}, + {Line: 2, Text: "\tmulti\n\tline\n\tcontent"}, + {Line: 6, Text: `same-line-arg`}, + {Line: 7, Text: `next-line`}, + }, + }, + { + input: []byte(`escaped-heredoc \<< >>`), + expected: []Token{ + {Line: 1, Text: `escaped-heredoc`}, + {Line: 1, Text: `<<`}, + {Line: 1, Text: `>>`}, + }, + }, + { + input: []byte(`not-a-heredoc <EOF + content + `), + expected: []Token{ + {Line: 1, Text: `not-a-heredoc`}, + {Line: 1, Text: `<EOF`}, + {Line: 2, Text: `content`}, + }, + }, + { + input: []byte(`not-a-heredoc <<<EOF content`), + expected: []Token{ + {Line: 1, Text: `not-a-heredoc`}, + {Line: 1, Text: `<<<EOF`}, + {Line: 1, Text: `content`}, + }, + }, + { + input: []byte(`not-a-heredoc "<<" ">>"`), + expected: []Token{ + {Line: 1, Text: `not-a-heredoc`}, + {Line: 1, Text: `<<`}, + {Line: 1, Text: `>>`}, + }, + }, + { + input: []byte(`not-a-heredoc << >>`), + expected: []Token{ + {Line: 1, Text: `not-a-heredoc`}, + {Line: 1, Text: `<<`}, + {Line: 1, Text: `>>`}, + }, + }, + { + input: []byte(`not-a-heredoc <<HERE SAME LINE + content + HERE same-line-arg + `), + expected: []Token{ + {Line: 1, Text: `not-a-heredoc`}, + {Line: 1, Text: `<<HERE`}, + {Line: 1, Text: `SAME`}, + {Line: 1, Text: `LINE`}, + {Line: 2, Text: `content`}, + {Line: 3, Text: `HERE`}, + {Line: 3, Text: `same-line-arg`}, + }, + }, + { + input: []byte(`heredoc <<s + � + s + `), + expected: []Token{ + {Line: 1, Text: `heredoc`}, + {Line: 1, Text: "�"}, + }, + }, + { + input: []byte("\u000Aheredoc \u003C\u003C\u0073\u0073\u000A\u00BF\u0057\u0001\u0000\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u003D\u001F\u000A\u0073\u0073\u000A\u00BF\u0057\u0001\u0000\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u003D\u001F\u000A\u00BF\u00BF\u0057\u0001\u0000\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u003D\u001F"), + expected: []Token{ + { + Line: 2, + Text: "heredoc", + }, + { + Line: 2, + Text: "\u00BF\u0057\u0001\u0000\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u003D\u001F", + }, + { + Line: 5, + Text: "\u00BF\u0057\u0001\u0000\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u003D\u001F", + }, + { + Line: 6, + Text: "\u00BF\u00BF\u0057\u0001\u0000\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u00FF\u003D\u001F", + }, + }, + }, + { + input: []byte("not-a-heredoc <<\n"), + expectErr: true, + errorMessage: "missing opening heredoc marker on line #1; must contain only alpha-numeric characters, dashes and underscores; got empty string", + }, + { + input: []byte(`heredoc <<<EOF + content + EOF same-line-arg + `), + expectErr: true, + errorMessage: "too many '<' for heredoc on line #1; only use two, for example <<END", + }, + { + input: []byte(`heredoc <<EOF + content + `), + expectErr: true, + errorMessage: "incomplete heredoc <<EOF on line #3, expected ending marker EOF", + }, + { + input: []byte(`heredoc <<EOF + content + EOF + `), + expectErr: true, + errorMessage: "mismatched leading whitespace in heredoc <<EOF on line #2 [\tcontent], expected whitespace [\t\t] to match the closing marker", + }, + { + input: []byte(`heredoc <<EOF + content + EOF + `), + expectErr: true, + errorMessage: "mismatched leading whitespace in heredoc <<EOF on line #2 [ content], expected whitespace [\t\t] to match the closing marker", + }, } for i, testCase := range testCases { actual, err := Tokenize(testCase.input, "") + if testCase.expectErr { + if err == nil { + t.Fatalf("expected error, got actual: %v", actual) + continue + } + if err.Error() != testCase.errorMessage { + t.Fatalf("expected error '%v', got: %v", testCase.errorMessage, err) + } + continue + } + if err != nil { - t.Errorf("%v", err) + t.Fatalf("%v", err) } lexerCompare(t, i, testCase.expected, actual) } @@ -262,17 +469,17 @@ func TestLexer(t *testing.T) { func lexerCompare(t *testing.T, n int, expected, actual []Token) { if len(expected) != len(actual) { - t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual)) + t.Fatalf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual)) } for i := 0; i < len(actual) && i < len(expected); i++ { if actual[i].Line != expected[i].Line { - t.Errorf("Test case %d token %d ('%s'): expected line %d but was line %d", + t.Fatalf("Test case %d token %d ('%s'): expected line %d but was line %d", n, i, expected[i].Text, expected[i].Line, actual[i].Line) break } if actual[i].Text != expected[i].Text { - t.Errorf("Test case %d token %d: expected text '%s' but was '%s'", + t.Fatalf("Test case %d token %d: expected text '%s' but was '%s'", n, i, expected[i].Text, actual[i].Text) break } diff --git a/caddyconfig/caddyfile/parse.go b/caddyconfig/caddyfile/parse.go index edc86f2..65d6ee9 100644 --- a/caddyconfig/caddyfile/parse.go +++ b/caddyconfig/caddyfile/parse.go @@ -20,11 +20,11 @@ import ( "io" "os" "path/filepath" - "strconv" "strings" - "github.com/caddyserver/caddy/v2" "go.uber.org/zap" + + "github.com/caddyserver/caddy/v2" ) // Parse parses the input just enough to group tokens, in @@ -149,7 +149,6 @@ func (p *parser) begin() error { } err := p.addresses() - if err != nil { return err } @@ -160,6 +159,25 @@ func (p *parser) begin() error { return nil } + if ok, name := p.isNamedRoute(); ok { + // named routes only have one key, the route name + p.block.Keys = []string{name} + p.block.IsNamedRoute = true + + // we just need a dummy leading token to ease parsing later + nameToken := p.Token() + nameToken.Text = name + + // get all the tokens from the block, including the braces + tokens, err := p.blockTokens(true) + if err != nil { + return err + } + tokens = append([]Token{nameToken}, tokens...) + p.block.Segments = []Segment{tokens} + return nil + } + if ok, name := p.isSnippet(); ok { if p.definedSnippets == nil { p.definedSnippets = map[string][]Token{} @@ -168,16 +186,15 @@ func (p *parser) begin() error { return p.Errf("redeclaration of previously declared snippet %s", name) } // consume all tokens til matched close brace - tokens, err := p.snippetTokens() + tokens, err := p.blockTokens(false) if err != nil { return err } // Just as we need to track which file the token comes from, we need to - // keep track of which snippets do the tokens come from. This is helpful - // in tracking import cycles across files/snippets by namespacing them. Without - // this we end up with false-positives in cycle-detection. + // keep track of which snippet the token comes from. This is helpful + // in tracking import cycles across files/snippets by namespacing them. + // Without this, we end up with false-positives in cycle-detection. for k, v := range tokens { - v.inSnippet = true v.snippetName = name tokens[k] = v } @@ -198,7 +215,7 @@ func (p *parser) addresses() error { // special case: import directive replaces tokens during parse-time if tkn == "import" && p.isNewLine() { - err := p.doImport() + err := p.doImport(0) if err != nil { return err } @@ -298,7 +315,7 @@ func (p *parser) directives() error { // special case: import directive replaces tokens during parse-time if p.Val() == "import" { - err := p.doImport() + err := p.doImport(1) if err != nil { return err } @@ -324,7 +341,7 @@ func (p *parser) directives() error { // is on the token before where the import directive was. In // other words, call Next() to access the first token that was // imported. -func (p *parser) doImport() error { +func (p *parser) doImport(nesting int) error { // syntax checks if !p.NextArg() { return p.ArgErr() @@ -337,11 +354,8 @@ func (p *parser) doImport() error { // grab remaining args as placeholder replacements args := p.RemainingArgs() - // add args to the replacer - repl := caddy.NewEmptyReplacer() - for index, arg := range args { - repl.Set("args."+strconv.Itoa(index), arg) - } + // set up a replacer for non-variadic args replacement + repl := makeArgsReplacer(args) // splice out the import directive and its arguments // (2 tokens, plus the length of args) @@ -417,7 +431,7 @@ func (p *parser) doImport() error { } nodeName := p.File() - if p.Token().inSnippet { + if p.Token().snippetName != "" { nodeName += fmt.Sprintf(":%s", p.Token().snippetName) } p.importGraph.addNode(nodeName) @@ -428,13 +442,69 @@ func (p *parser) doImport() error { } // copy the tokens so we don't overwrite p.definedSnippets - tokensCopy := make([]Token, len(importedTokens)) - copy(tokensCopy, importedTokens) + tokensCopy := make([]Token, 0, len(importedTokens)) + + var ( + maybeSnippet bool + maybeSnippetId bool + index int + ) // run the argument replacer on the tokens - for index, token := range tokensCopy { - token.Text = repl.ReplaceKnown(token.Text, "") - tokensCopy[index] = token + // golang for range slice return a copy of value + // similarly, append also copy value + for i, token := range importedTokens { + // update the token's imports to refer to import directive filename, line number and snippet name if there is one + if token.snippetName != "" { + token.imports = append(token.imports, fmt.Sprintf("%s:%d (import %s)", p.File(), p.Line(), token.snippetName)) + } else { + token.imports = append(token.imports, fmt.Sprintf("%s:%d (import)", p.File(), p.Line())) + } + + // naive way of determine snippets, as snippets definition can only follow name + block + // format, won't check for nesting correctness or any other error, that's what parser does. + if !maybeSnippet && nesting == 0 { + // first of the line + if i == 0 || isNextOnNewLine(tokensCopy[i-1], token) { + index = 0 + } else { + index++ + } + + if index == 0 && len(token.Text) >= 3 && strings.HasPrefix(token.Text, "(") && strings.HasSuffix(token.Text, ")") { + maybeSnippetId = true + } + } + + switch token.Text { + case "{": + nesting++ + if index == 1 && maybeSnippetId && nesting == 1 { + maybeSnippet = true + maybeSnippetId = false + } + case "}": + nesting-- + if nesting == 0 && maybeSnippet { + maybeSnippet = false + } + } + + if maybeSnippet { + tokensCopy = append(tokensCopy, token) + continue + } + + foundVariadic, startIndex, endIndex := parseVariadic(token, len(args)) + if foundVariadic { + for _, arg := range args[startIndex:endIndex] { + token.Text = arg + tokensCopy = append(tokensCopy, token) + } + } else { + token.Text = repl.ReplaceKnown(token.Text, "") + tokensCopy = append(tokensCopy, token) + } } // splice the imported tokens in the place of the import statement @@ -496,7 +566,6 @@ func (p *parser) doSingleImport(importFile string) ([]Token, error) { // are loaded into the current server block for later use // by directive setup functions. func (p *parser) directive() error { - // a segment is a list of tokens associated with this directive var segment Segment @@ -509,6 +578,9 @@ func (p *parser) directive() error { if !p.isNextOnNewLine() && p.Token().wasQuoted == 0 { return p.Err("Unexpected next token after '{' on same line") } + if p.isNewLine() { + return p.Err("Unexpected '{' on a new line; did you mean to place the '{' on the previous line?") + } } else if p.Val() == "{}" { if p.isNextOnNewLine() && p.Token().wasQuoted == 0 { return p.Err("Unexpected '{}' at end of line") @@ -521,7 +593,7 @@ func (p *parser) directive() error { } else if p.Val() == "}" && p.nesting == 0 { return p.Err("Unexpected '}' because no matching opening brace") } else if p.Val() == "import" && p.isNewLine() { - if err := p.doImport(); err != nil { + if err := p.doImport(1); err != nil { return err } p.cursor-- // cursor is advanced when we continue, so roll back one more @@ -562,6 +634,15 @@ func (p *parser) closeCurlyBrace() error { return nil } +func (p *parser) isNamedRoute() (bool, string) { + keys := p.block.Keys + // A named route block is a single key with parens, prefixed with &. + if len(keys) == 1 && strings.HasPrefix(keys[0], "&(") && strings.HasSuffix(keys[0], ")") { + return true, strings.TrimSuffix(keys[0][2:], ")") + } + return false, "" +} + func (p *parser) isSnippet() (bool, string) { keys := p.block.Keys // A snippet block is a single key with parens. Nothing else qualifies. @@ -572,18 +653,24 @@ func (p *parser) isSnippet() (bool, string) { } // read and store everything in a block for later replay. -func (p *parser) snippetTokens() ([]Token, error) { - // snippet must have curlies. +func (p *parser) blockTokens(retainCurlies bool) ([]Token, error) { + // block must have curlies. err := p.openCurlyBrace() if err != nil { return nil, err } - nesting := 1 // count our own nesting in snippets + nesting := 1 // count our own nesting tokens := []Token{} + if retainCurlies { + tokens = append(tokens, p.Token()) + } for p.Next() { if p.Val() == "}" { nesting-- if nesting == 0 { + if retainCurlies { + tokens = append(tokens, p.Token()) + } break } } @@ -603,9 +690,10 @@ func (p *parser) snippetTokens() ([]Token, error) { // head of the server block with tokens, which are // grouped by segments. type ServerBlock struct { - HasBraces bool - Keys []string - Segments []Segment + HasBraces bool + Keys []string + Segments []Segment + IsNamedRoute bool } // DispenseDirective returns a dispenser that contains diff --git a/caddyconfig/caddyfile/parse_test.go b/caddyconfig/caddyfile/parse_test.go index 4d18cc4..b1104ed 100644 --- a/caddyconfig/caddyfile/parse_test.go +++ b/caddyconfig/caddyfile/parse_test.go @@ -21,6 +21,88 @@ import ( "testing" ) +func TestParseVariadic(t *testing.T) { + var args = make([]string, 10) + for i, tc := range []struct { + input string + result bool + }{ + { + input: "", + result: false, + }, + { + input: "{args[1", + result: false, + }, + { + input: "1]}", + result: false, + }, + { + input: "{args[:]}aaaaa", + result: false, + }, + { + input: "aaaaa{args[:]}", + result: false, + }, + { + input: "{args.}", + result: false, + }, + { + input: "{args.1}", + result: false, + }, + { + input: "{args[]}", + result: false, + }, + { + input: "{args[:]}", + result: true, + }, + { + input: "{args[:]}", + result: true, + }, + { + input: "{args[0:]}", + result: true, + }, + { + input: "{args[:0]}", + result: true, + }, + { + input: "{args[-1:]}", + result: false, + }, + { + input: "{args[:11]}", + result: false, + }, + { + input: "{args[10:0]}", + result: false, + }, + { + input: "{args[0:10]}", + result: true, + }, + } { + token := Token{ + File: "test", + Line: 1, + Text: tc.input, + } + if v, _, _ := parseVariadic(token, len(args)); v != tc.result { + t.Errorf("Test %d error expectation failed Expected: %t, got %t", i, tc.result, v) + } + } +} + func TestAllTokens(t *testing.T) { input := []byte("a b c\nd e") expected := []string{"a", "b", "c", "d", "e"} @@ -211,6 +293,14 @@ func TestParseOneAndImport(t *testing.T) { // Unexpected next token after '{' on same line {`localhost dir1 { a b }`, true, []string{"localhost"}, []int{}}, + + // Unexpected '{' on a new line + {`localhost + dir1 + { + a b + }`, true, []string{"localhost"}, []int{}}, + // Workaround with quotes {`localhost dir1 "{" a b "}"`, false, []string{"localhost"}, []int{5}}, @@ -628,6 +718,36 @@ func TestEnvironmentReplacement(t *testing.T) { } } +func TestImportReplacementInJSONWithBrace(t *testing.T) { + for i, test := range []struct { + args []string + input string + expect string + }{ + { + args: []string{"123"}, + input: "{args[0]}", + expect: "123", + }, + { + args: []string{"123"}, + input: `{"key":"{args[0]}"}`, + expect: `{"key":"123"}`, + }, + { + args: []string{"123", "123"}, + input: `{"key":[{args[0]},{args[1]}]}`, + expect: `{"key":[123,123]}`, + }, + } { + repl := makeArgsReplacer(test.args) + actual := repl.ReplaceKnown(test.input, "") + if actual != test.expect { + t.Errorf("Test %d: Expected: '%s' but got '%s'", i, test.expect, actual) + } + } +} + func TestSnippets(t *testing.T) { p := testParser(` (common) { diff --git a/caddyconfig/caddyfile/testdata/import_args0.txt b/caddyconfig/caddyfile/testdata/import_args0.txt index af946fe..add211e 100644 --- a/caddyconfig/caddyfile/testdata/import_args0.txt +++ b/caddyconfig/caddyfile/testdata/import_args0.txt @@ -1 +1 @@ -{args.0}
\ No newline at end of file +{args[0]}
\ No newline at end of file diff --git a/caddyconfig/caddyfile/testdata/import_args1.txt b/caddyconfig/caddyfile/testdata/import_args1.txt index 519a92d..422692a 100644 --- a/caddyconfig/caddyfile/testdata/import_args1.txt +++ b/caddyconfig/caddyfile/testdata/import_args1.txt @@ -1 +1 @@ -{args.0} {args.1}
\ No newline at end of file +{args[0]} {args[1]}
\ No newline at end of file |