summaryrefslogtreecommitdiff
path: root/caddyconfig
diff options
context:
space:
mode:
authorFrancis Lavoie <lavofr@gmail.com>2023-02-25 19:34:27 -0500
committerGitHub <noreply@github.com>2023-02-26 00:34:27 +0000
commit960150bb034dc9a549ee7289b1a4eb4abafeb30a (patch)
treea3608546db0b154a75afc3dae2d52d44b92ceac9 /caddyconfig
parent9e6919550be5689628d0020ec14e90ea6f527716 (diff)
caddyfile: Implement heredoc support (#5385)
Diffstat (limited to 'caddyconfig')
-rw-r--r--caddyconfig/caddyfile/dispenser.go58
-rw-r--r--caddyconfig/caddyfile/lexer.go226
-rw-r--r--caddyconfig/caddyfile/lexer_test.go125
-rw-r--r--caddyconfig/caddyfile/testdata/import_args0.txt2
-rw-r--r--caddyconfig/caddyfile/testdata/import_args1.txt2
5 files changed, 324 insertions, 89 deletions
diff --git a/caddyconfig/caddyfile/dispenser.go b/caddyconfig/caddyfile/dispenser.go
index 91bd9a5..0daed3c 100644
--- a/caddyconfig/caddyfile/dispenser.go
+++ b/caddyconfig/caddyfile/dispenser.go
@@ -20,7 +20,6 @@ import (
"io"
"log"
"strconv"
- "strings"
)
// Dispenser is a type that dispenses tokens, similarly to a lexer,
@@ -101,12 +100,12 @@ func (d *Dispenser) nextOnSameLine() bool {
d.cursor++
return true
}
- if d.cursor >= len(d.tokens) {
+ if d.cursor >= len(d.tokens)-1 {
return false
}
- if d.cursor < len(d.tokens)-1 &&
- d.tokens[d.cursor].File == d.tokens[d.cursor+1].File &&
- d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) == d.tokens[d.cursor+1].Line {
+ curr := d.tokens[d.cursor]
+ next := d.tokens[d.cursor+1]
+ if curr.File == next.File && curr.Line+curr.NumLineBreaks() == next.Line {
d.cursor++
return true
}
@@ -122,12 +121,12 @@ func (d *Dispenser) NextLine() bool {
d.cursor++
return true
}
- if d.cursor >= len(d.tokens) {
+ if d.cursor >= len(d.tokens)-1 {
return false
}
- if d.cursor < len(d.tokens)-1 &&
- (d.tokens[d.cursor].File != d.tokens[d.cursor+1].File ||
- d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) < d.tokens[d.cursor+1].Line) {
+ curr := d.tokens[d.cursor]
+ next := d.tokens[d.cursor+1]
+ if curr.File != next.File || curr.Line+curr.NumLineBreaks() < next.Line {
d.cursor++
return true
}
@@ -203,14 +202,17 @@ func (d *Dispenser) Val() string {
}
// ValRaw gets the raw text of the current token (including quotes).
+// If the token was a heredoc, then the delimiter is not included,
+// because that is not relevant to any unmarshaling logic at this time.
// If there is no token loaded, it returns empty string.
func (d *Dispenser) ValRaw() string {
if d.cursor < 0 || d.cursor >= len(d.tokens) {
return ""
}
quote := d.tokens[d.cursor].wasQuoted
- if quote > 0 {
- return string(quote) + d.tokens[d.cursor].Text + string(quote) // string literal
+ if quote > 0 && quote != '<' {
+ // string literal
+ return string(quote) + d.tokens[d.cursor].Text + string(quote)
}
return d.tokens[d.cursor].Text
}
@@ -438,14 +440,14 @@ func (d *Dispenser) Delete() []Token {
return d.tokens
}
-// numLineBreaks counts how many line breaks are in the token
-// value given by the token index tknIdx. It returns 0 if the
-// token does not exist or there are no line breaks.
-func (d *Dispenser) numLineBreaks(tknIdx int) int {
- if tknIdx < 0 || tknIdx >= len(d.tokens) {
- return 0
+// DeleteN is the same as Delete, but can delete many tokens at once.
+// If there aren't N tokens available to delete, none are deleted.
+func (d *Dispenser) DeleteN(amount int) []Token {
+ if amount > 0 && d.cursor >= (amount-1) && d.cursor <= len(d.tokens)-1 {
+ d.tokens = append(d.tokens[:d.cursor-(amount-1)], d.tokens[d.cursor+1:]...)
+ d.cursor -= amount
}
- return strings.Count(d.tokens[tknIdx].Text, "\n")
+ return d.tokens
}
// isNewLine determines whether the current token is on a different
@@ -468,18 +470,10 @@ func (d *Dispenser) isNewLine() bool {
return true
}
- // The previous token may contain line breaks if
- // it was quoted and spanned multiple lines. e.g:
- //
- // dir "foo
- // bar
- // baz"
- prevLineBreaks := d.numLineBreaks(d.cursor - 1)
-
// If the previous token (incl line breaks) ends
// on a line earlier than the current token,
// then the current token is on a new line
- return prev.Line+prevLineBreaks < curr.Line
+ return prev.Line+prev.NumLineBreaks() < curr.Line
}
// isNextOnNewLine determines whether the current token is on a different
@@ -502,16 +496,8 @@ func (d *Dispenser) isNextOnNewLine() bool {
return true
}
- // The current token may contain line breaks if
- // it was quoted and spanned multiple lines. e.g:
- //
- // dir "foo
- // bar
- // baz"
- currLineBreaks := d.numLineBreaks(d.cursor)
-
// If the current token (incl line breaks) ends
// on a line earlier than the next token,
// then the next token is on a new line
- return curr.Line+currLineBreaks < next.Line
+ return curr.Line+curr.NumLineBreaks() < next.Line
}
diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go
index 09b04c1..ba8b879 100644
--- a/caddyconfig/caddyfile/lexer.go
+++ b/caddyconfig/caddyfile/lexer.go
@@ -17,7 +17,10 @@ package caddyfile
import (
"bufio"
"bytes"
+ "fmt"
"io"
+ "regexp"
+ "strings"
"unicode"
)
@@ -35,30 +38,39 @@ type (
// Token represents a single parsable unit.
Token struct {
- File string
- origFile string
- Line int
- Text string
- wasQuoted rune // enclosing quote character, if any
- snippetName string
+ File string
+ origFile string
+ Line int
+ Text string
+ wasQuoted rune // enclosing quote character, if any
+ heredocMarker string
+ snippetName string
}
)
-// originalFile gets original filename before import modification.
-func (t Token) originalFile() string {
- if t.origFile != "" {
- return t.origFile
+// Tokenize takes bytes as input and lexes it into
+// a list of tokens that can be parsed as a Caddyfile.
+// Also takes a filename to fill the token's File as
+// the source of the tokens, which is important to
+// determine relative paths for `import` directives.
+func Tokenize(input []byte, filename string) ([]Token, error) {
+ l := lexer{}
+ if err := l.load(bytes.NewReader(input)); err != nil {
+ return nil, err
}
- return t.File
-}
-
-// updateFile updates the token's source filename for error display
-// and remembers the original filename. Used during "import" processing.
-func (t *Token) updateFile(file string) {
- if t.origFile == "" {
- t.origFile = t.File
+ var tokens []Token
+ for {
+ found, err := l.next()
+ if err != nil {
+ return nil, err
+ }
+ if !found {
+ break
+ }
+ l.token.File = filename
+ tokens = append(tokens, l.token)
}
- t.File = file
+ return tokens, nil
}
// load prepares the lexer to scan an input for tokens.
@@ -92,28 +104,93 @@ func (l *lexer) load(input io.Reader) error {
// may be escaped. The rest of the line is skipped
// if a "#" character is read in. Returns true if
// a token was loaded; false otherwise.
-func (l *lexer) next() bool {
+func (l *lexer) next() (bool, error) {
var val []rune
- var comment, quoted, btQuoted, escaped bool
+ var comment, quoted, btQuoted, inHeredoc, heredocEscaped, escaped bool
+ var heredocMarker string
makeToken := func(quoted rune) bool {
l.token.Text = string(val)
l.token.wasQuoted = quoted
+ l.token.heredocMarker = heredocMarker
return true
}
for {
+ // Read a character in; if err then if we had
+ // read some characters, make a token. If we
+ // reached EOF, then no more tokens to read.
+ // If no EOF, then we had a problem.
ch, _, err := l.reader.ReadRune()
if err != nil {
if len(val) > 0 {
- return makeToken(0)
+ if inHeredoc {
+ return false, fmt.Errorf("incomplete heredoc <<%s on line #%d, expected ending marker %s", heredocMarker, l.line+l.skippedLines, heredocMarker)
+ }
+
+ return makeToken(0), nil
}
if err == io.EOF {
- return false
+ return false, nil
}
- panic(err)
+ return false, err
}
+ // detect whether we have the start of a heredoc
+ if !inHeredoc && !heredocEscaped && len(val) > 1 && string(val[:2]) == "<<" {
+ if ch == '<' {
+ return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <<END", l.line)
+ }
+ if ch == '\r' {
+ continue
+ }
+ // after hitting a newline, we know that the heredoc marker
+ // is the characters after the two << and the newline.
+ // we reset the val because the heredoc is syntax we don't
+ // want to keep.
+ if ch == '\n' {
+ heredocMarker = string(val[2:])
+ if !heredocMarkerRegexp.Match([]byte(heredocMarker)) {
+ return false, fmt.Errorf("heredoc marker on line #%d must contain only alpha-numeric characters, dashes and underscores; got '%s'", l.line, heredocMarker)
+ }
+
+ inHeredoc = true
+ l.skippedLines++
+ val = nil
+ continue
+ }
+ val = append(val, ch)
+ continue
+ }
+
+ // if we're in a heredoc, all characters are read as-is
+ if inHeredoc {
+ val = append(val, ch)
+
+ if ch == '\n' {
+ l.skippedLines++
+ }
+
+ // check if we're done, i.e. that the last few characters are the marker
+ if len(val) > len(heredocMarker) && heredocMarker == string(val[len(val)-len(heredocMarker):]) {
+ // set the final value
+ val, err = l.finalizeHeredoc(val, heredocMarker)
+ if err != nil {
+ return false, err
+ }
+
+ // set the line counter, and make the token
+ l.line += l.skippedLines
+ l.skippedLines = 0
+ return makeToken('<'), nil
+ }
+
+ // stay in the heredoc until we find the ending marker
+ continue
+ }
+
+ // track whether we found an escape '\' for the next
+ // iteration to be contextually aware
if !escaped && !btQuoted && ch == '\\' {
escaped = true
continue
@@ -128,26 +205,29 @@ func (l *lexer) next() bool {
}
escaped = false
} else {
- if quoted && ch == '"' {
- return makeToken('"')
- }
- if btQuoted && ch == '`' {
- return makeToken('`')
+ if (quoted && ch == '"') || (btQuoted && ch == '`') {
+ return makeToken(ch), nil
}
}
+ // allow quoted text to wrap continue on multiple lines
if ch == '\n' {
l.line += 1 + l.skippedLines
l.skippedLines = 0
}
+ // collect this character as part of the quoted token
val = append(val, ch)
continue
}
if unicode.IsSpace(ch) {
+ // ignore CR altogether, we only actually care about LF (\n)
if ch == '\r' {
continue
}
+ // end of the line
if ch == '\n' {
+ // newlines can be escaped to chain arguments
+ // onto multiple lines; else, increment the line count
if escaped {
l.skippedLines++
escaped = false
@@ -155,14 +235,18 @@ func (l *lexer) next() bool {
l.line += 1 + l.skippedLines
l.skippedLines = 0
}
+ // comments (#) are single-line only
comment = false
}
+ // any kind of space means we're at the end of this token
if len(val) > 0 {
- return makeToken(0)
+ return makeToken(0), nil
}
continue
}
+ // comments must be at the start of a token,
+ // in other words, preceded by space or newline
if ch == '#' && len(val) == 0 {
comment = true
}
@@ -183,7 +267,12 @@ func (l *lexer) next() bool {
}
if escaped {
- val = append(val, '\\')
+ // allow escaping the first < to skip the heredoc syntax
+ if ch == '<' {
+ heredocEscaped = true
+ } else {
+ val = append(val, '\\')
+ }
escaped = false
}
@@ -191,24 +280,71 @@ func (l *lexer) next() bool {
}
}
-// Tokenize takes bytes as input and lexes it into
-// a list of tokens that can be parsed as a Caddyfile.
-// Also takes a filename to fill the token's File as
-// the source of the tokens, which is important to
-// determine relative paths for `import` directives.
-func Tokenize(input []byte, filename string) ([]Token, error) {
- l := lexer{}
- if err := l.load(bytes.NewReader(input)); err != nil {
- return nil, err
+// finalizeHeredoc takes the runes read as the heredoc text and the marker,
+// and processes the text to strip leading whitespace, returning the final
+// value without the leading whitespace.
+func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) {
+ // find the last newline of the heredoc, which is where the contents end
+ lastNewline := strings.LastIndex(string(val), "\n")
+
+ // collapse the content, then split into separate lines
+ lines := strings.Split(string(val[:lastNewline+1]), "\n")
+
+ // figure out how much whitespace we need to strip from the front of every line
+ // by getting the string that precedes the marker, on the last line
+ paddingToStrip := string(val[lastNewline+1 : len(val)-len(marker)])
+
+ // iterate over each line and strip the whitespace from the front
+ var out string
+ for lineNum, lineText := range lines[:len(lines)-1] {
+ // find an exact match for the padding
+ index := strings.Index(lineText, paddingToStrip)
+
+ // if the padding doesn't match exactly at the start then we can't safely strip
+ if index != 0 {
+ return nil, fmt.Errorf("mismatched leading whitespace in heredoc <<%s on line #%d [%s], expected whitespace [%s] to match the closing marker", marker, l.line+lineNum+1, lineText, paddingToStrip)
+ }
+
+ // strip, then append the line, with the newline, to the output.
+ // also removes all "\r" because Windows.
+ out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "")
}
- var tokens []Token
- for l.next() {
- l.token.File = filename
- tokens = append(tokens, l.token)
+
+ // return the final value
+ return []rune(out), nil
+}
+
+// originalFile gets original filename before import modification.
+func (t Token) originalFile() string {
+ if t.origFile != "" {
+ return t.origFile
}
- return tokens, nil
+ return t.File
+}
+
+// updateFile updates the token's source filename for error display
+// and remembers the original filename. Used during "import" processing.
+func (t *Token) updateFile(file string) {
+ if t.origFile == "" {
+ t.origFile = t.File
+ }
+ t.File = file
}
func (t Token) Quoted() bool {
return t.wasQuoted > 0
}
+
+// NumLineBreaks counts how many line breaks are in the token text.
+func (t Token) NumLineBreaks() int {
+ lineBreaks := strings.Count(t.Text, "\n")
+ if t.wasQuoted == '<' {
+ // heredocs have an extra linebreak because the opening
+ // delimiter is on its own line and is not included in
+ // the token Text itself
+ lineBreaks++
+ }
+ return lineBreaks
+}
+
+var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$")
diff --git a/caddyconfig/caddyfile/lexer_test.go b/caddyconfig/caddyfile/lexer_test.go
index 30ee0f6..3c7e157 100644
--- a/caddyconfig/caddyfile/lexer_test.go
+++ b/caddyconfig/caddyfile/lexer_test.go
@@ -18,13 +18,13 @@ import (
"testing"
)
-type lexerTestCase struct {
- input []byte
- expected []Token
-}
-
func TestLexer(t *testing.T) {
- testCases := []lexerTestCase{
+ testCases := []struct {
+ input []byte
+ expected []Token
+ expectErr bool
+ errorMessage string
+ }{
{
input: []byte(`host:123`),
expected: []Token{
@@ -249,10 +249,123 @@ func TestLexer(t *testing.T) {
{Line: 1, Text: `quotes`},
},
},
+ {
+ input: []byte(`heredoc <<EOF
+content
+EOF same-line-arg
+ `),
+ expected: []Token{
+ {Line: 1, Text: `heredoc`},
+ {Line: 1, Text: "content\n"},
+ {Line: 3, Text: `same-line-arg`},
+ },
+ },
+ {
+ input: []byte(`heredoc <<VERY-LONG-MARKER
+content
+VERY-LONG-MARKER same-line-arg
+ `),
+ expected: []Token{
+ {Line: 1, Text: `heredoc`},
+ {Line: 1, Text: "content\n"},
+ {Line: 3, Text: `same-line-arg`},
+ },
+ },
+ {
+ input: []byte(`heredoc <<EOF
+ content
+ EOF same-line-arg
+ `),
+ expected: []Token{
+ {Line: 1, Text: `heredoc`},
+ {Line: 1, Text: "content\n"},
+ {Line: 3, Text: `same-line-arg`},
+ },
+ },
+ {
+ input: []byte(`prev-line
+ heredoc <<EOF
+ multi
+ line
+ content
+ EOF same-line-arg
+ next-line
+ `),
+ expected: []Token{
+ {Line: 1, Text: `prev-line`},
+ {Line: 2, Text: `heredoc`},
+ {Line: 2, Text: "\tmulti\n\tline\n\tcontent\n"},
+ {Line: 6, Text: `same-line-arg`},
+ {Line: 7, Text: `next-line`},
+ },
+ },
+ {
+ input: []byte(`heredoc <EOF
+ content
+ EOF same-line-arg
+ `),
+ expected: []Token{
+ {Line: 1, Text: `heredoc`},
+ {Line: 1, Text: `<EOF`},
+ {Line: 2, Text: `content`},
+ {Line: 3, Text: `EOF`},
+ {Line: 3, Text: `same-line-arg`},
+ },
+ },
+ {
+ input: []byte(`heredoc <<HERE SAME LINE
+ content
+ HERE same-line-arg
+ `),
+ expectErr: true,
+ errorMessage: "heredoc marker on line #1 must contain only alpha-numeric characters, dashes and underscores; got 'HERE SAME LINE'",
+ },
+ {
+ input: []byte(`heredoc <<<EOF
+ content
+ EOF same-line-arg
+ `),
+ expectErr: true,
+ errorMessage: "too many '<' for heredoc on line #1; only use two, for example <<END",
+ },
+ {
+ input: []byte(`heredoc <<EOF
+ content
+ `),
+ expectErr: true,
+ errorMessage: "incomplete heredoc <<EOF on line #3, expected ending marker EOF",
+ },
+ {
+ input: []byte(`heredoc <<EOF
+ content
+ EOF
+ `),
+ expectErr: true,
+ errorMessage: "mismatched leading whitespace in heredoc <<EOF on line #2 [\tcontent], expected whitespace [\t\t] to match the closing marker",
+ },
+ {
+ input: []byte(`heredoc <<EOF
+ content
+ EOF
+ `),
+ expectErr: true,
+ errorMessage: "mismatched leading whitespace in heredoc <<EOF on line #2 [ content], expected whitespace [\t\t] to match the closing marker",
+ },
}
for i, testCase := range testCases {
actual, err := Tokenize(testCase.input, "")
+ if testCase.expectErr {
+ if err == nil {
+ t.Errorf("expected error, got actual: %v", actual)
+ continue
+ }
+ if err.Error() != testCase.errorMessage {
+ t.Errorf("expected error '%v', got: %v", testCase.errorMessage, err)
+ }
+ continue
+ }
+
if err != nil {
t.Errorf("%v", err)
}
diff --git a/caddyconfig/caddyfile/testdata/import_args0.txt b/caddyconfig/caddyfile/testdata/import_args0.txt
index af946fe..add211e 100644
--- a/caddyconfig/caddyfile/testdata/import_args0.txt
+++ b/caddyconfig/caddyfile/testdata/import_args0.txt
@@ -1 +1 @@
-{args.0} \ No newline at end of file
+{args[0]} \ No newline at end of file
diff --git a/caddyconfig/caddyfile/testdata/import_args1.txt b/caddyconfig/caddyfile/testdata/import_args1.txt
index 519a92d..422692a 100644
--- a/caddyconfig/caddyfile/testdata/import_args1.txt
+++ b/caddyconfig/caddyfile/testdata/import_args1.txt
@@ -1 +1 @@
-{args.0} {args.1} \ No newline at end of file
+{args[0]} {args[1]} \ No newline at end of file