From 8bc05e598da0068358a2876c45f92f2c77455341 Mon Sep 17 00:00:00 2001 From: WeidiDeng Date: Fri, 17 Feb 2023 08:08:36 +0800 Subject: caddyfile: Implement variadics for import args placeholders (#5249) * implement variadic placeholders imported snippets reflect actual lines in file * add import directive line number for imported snippets add tests for parsing * add realfile field to help debug import cycle detection. * use file field to reflect import chain * Switch syntax, deprecate old syntax, refactoring - Moved the import args handling to a separate file - Using {args[0:1]} syntax now - Deprecate {args.*} syntax - Use a replacer map for better control over the parsing - Add plenty of warnings when invalid placeholders are detected - Renaming variables, cleanup comments for readability - More tests to cover edgecases I could think of - Minor cleanup to snippet tracking in tokens, drop a redundant boolean field in tokens --------- Co-authored-by: Francis Lavoie --- caddyconfig/caddyfile/lexer.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 5605a6a..09b04c1 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -36,14 +36,31 @@ type ( // Token represents a single parsable unit. Token struct { File string + origFile string Line int Text string wasQuoted rune // enclosing quote character, if any - inSnippet bool snippetName string } ) +// originalFile gets original filename before import modification. +func (t Token) originalFile() string { + if t.origFile != "" { + return t.origFile + } + return t.File +} + +// updateFile updates the token's source filename for error display +// and remembers the original filename. Used during "import" processing. +func (t *Token) updateFile(file string) { + if t.origFile == "" { + t.origFile = t.File + } + t.File = file +} + // load prepares the lexer to scan an input for tokens. // It discards any leading byte order mark. func (l *lexer) load(input io.Reader) error { -- cgit v1.2.3 From 960150bb034dc9a549ee7289b1a4eb4abafeb30a Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 25 Feb 2023 19:34:27 -0500 Subject: caddyfile: Implement heredoc support (#5385) --- caddyconfig/caddyfile/lexer.go | 226 +++++++++++++++++++++++++++++++++-------- 1 file changed, 181 insertions(+), 45 deletions(-) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 09b04c1..ba8b879 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -17,7 +17,10 @@ package caddyfile import ( "bufio" "bytes" + "fmt" "io" + "regexp" + "strings" "unicode" ) @@ -35,30 +38,39 @@ type ( // Token represents a single parsable unit. Token struct { - File string - origFile string - Line int - Text string - wasQuoted rune // enclosing quote character, if any - snippetName string + File string + origFile string + Line int + Text string + wasQuoted rune // enclosing quote character, if any + heredocMarker string + snippetName string } ) -// originalFile gets original filename before import modification. -func (t Token) originalFile() string { - if t.origFile != "" { - return t.origFile +// Tokenize takes bytes as input and lexes it into +// a list of tokens that can be parsed as a Caddyfile. +// Also takes a filename to fill the token's File as +// the source of the tokens, which is important to +// determine relative paths for `import` directives. +func Tokenize(input []byte, filename string) ([]Token, error) { + l := lexer{} + if err := l.load(bytes.NewReader(input)); err != nil { + return nil, err } - return t.File -} - -// updateFile updates the token's source filename for error display -// and remembers the original filename. Used during "import" processing. -func (t *Token) updateFile(file string) { - if t.origFile == "" { - t.origFile = t.File + var tokens []Token + for { + found, err := l.next() + if err != nil { + return nil, err + } + if !found { + break + } + l.token.File = filename + tokens = append(tokens, l.token) } - t.File = file + return tokens, nil } // load prepares the lexer to scan an input for tokens. @@ -92,28 +104,93 @@ func (l *lexer) load(input io.Reader) error { // may be escaped. The rest of the line is skipped // if a "#" character is read in. Returns true if // a token was loaded; false otherwise. -func (l *lexer) next() bool { +func (l *lexer) next() (bool, error) { var val []rune - var comment, quoted, btQuoted, escaped bool + var comment, quoted, btQuoted, inHeredoc, heredocEscaped, escaped bool + var heredocMarker string makeToken := func(quoted rune) bool { l.token.Text = string(val) l.token.wasQuoted = quoted + l.token.heredocMarker = heredocMarker return true } for { + // Read a character in; if err then if we had + // read some characters, make a token. If we + // reached EOF, then no more tokens to read. + // If no EOF, then we had a problem. ch, _, err := l.reader.ReadRune() if err != nil { if len(val) > 0 { - return makeToken(0) + if inHeredoc { + return false, fmt.Errorf("incomplete heredoc <<%s on line #%d, expected ending marker %s", heredocMarker, l.line+l.skippedLines, heredocMarker) + } + + return makeToken(0), nil } if err == io.EOF { - return false + return false, nil } - panic(err) + return false, err } + // detect whether we have the start of a heredoc + if !inHeredoc && !heredocEscaped && len(val) > 1 && string(val[:2]) == "<<" { + if ch == '<' { + return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example < len(heredocMarker) && heredocMarker == string(val[len(val)-len(heredocMarker):]) { + // set the final value + val, err = l.finalizeHeredoc(val, heredocMarker) + if err != nil { + return false, err + } + + // set the line counter, and make the token + l.line += l.skippedLines + l.skippedLines = 0 + return makeToken('<'), nil + } + + // stay in the heredoc until we find the ending marker + continue + } + + // track whether we found an escape '\' for the next + // iteration to be contextually aware if !escaped && !btQuoted && ch == '\\' { escaped = true continue @@ -128,26 +205,29 @@ func (l *lexer) next() bool { } escaped = false } else { - if quoted && ch == '"' { - return makeToken('"') - } - if btQuoted && ch == '`' { - return makeToken('`') + if (quoted && ch == '"') || (btQuoted && ch == '`') { + return makeToken(ch), nil } } + // allow quoted text to wrap continue on multiple lines if ch == '\n' { l.line += 1 + l.skippedLines l.skippedLines = 0 } + // collect this character as part of the quoted token val = append(val, ch) continue } if unicode.IsSpace(ch) { + // ignore CR altogether, we only actually care about LF (\n) if ch == '\r' { continue } + // end of the line if ch == '\n' { + // newlines can be escaped to chain arguments + // onto multiple lines; else, increment the line count if escaped { l.skippedLines++ escaped = false @@ -155,14 +235,18 @@ func (l *lexer) next() bool { l.line += 1 + l.skippedLines l.skippedLines = 0 } + // comments (#) are single-line only comment = false } + // any kind of space means we're at the end of this token if len(val) > 0 { - return makeToken(0) + return makeToken(0), nil } continue } + // comments must be at the start of a token, + // in other words, preceded by space or newline if ch == '#' && len(val) == 0 { comment = true } @@ -183,7 +267,12 @@ func (l *lexer) next() bool { } if escaped { - val = append(val, '\\') + // allow escaping the first < to skip the heredoc syntax + if ch == '<' { + heredocEscaped = true + } else { + val = append(val, '\\') + } escaped = false } @@ -191,24 +280,71 @@ func (l *lexer) next() bool { } } -// Tokenize takes bytes as input and lexes it into -// a list of tokens that can be parsed as a Caddyfile. -// Also takes a filename to fill the token's File as -// the source of the tokens, which is important to -// determine relative paths for `import` directives. -func Tokenize(input []byte, filename string) ([]Token, error) { - l := lexer{} - if err := l.load(bytes.NewReader(input)); err != nil { - return nil, err +// finalizeHeredoc takes the runes read as the heredoc text and the marker, +// and processes the text to strip leading whitespace, returning the final +// value without the leading whitespace. +func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) { + // find the last newline of the heredoc, which is where the contents end + lastNewline := strings.LastIndex(string(val), "\n") + + // collapse the content, then split into separate lines + lines := strings.Split(string(val[:lastNewline+1]), "\n") + + // figure out how much whitespace we need to strip from the front of every line + // by getting the string that precedes the marker, on the last line + paddingToStrip := string(val[lastNewline+1 : len(val)-len(marker)]) + + // iterate over each line and strip the whitespace from the front + var out string + for lineNum, lineText := range lines[:len(lines)-1] { + // find an exact match for the padding + index := strings.Index(lineText, paddingToStrip) + + // if the padding doesn't match exactly at the start then we can't safely strip + if index != 0 { + return nil, fmt.Errorf("mismatched leading whitespace in heredoc <<%s on line #%d [%s], expected whitespace [%s] to match the closing marker", marker, l.line+lineNum+1, lineText, paddingToStrip) + } + + // strip, then append the line, with the newline, to the output. + // also removes all "\r" because Windows. + out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "") } - var tokens []Token - for l.next() { - l.token.File = filename - tokens = append(tokens, l.token) + + // return the final value + return []rune(out), nil +} + +// originalFile gets original filename before import modification. +func (t Token) originalFile() string { + if t.origFile != "" { + return t.origFile } - return tokens, nil + return t.File +} + +// updateFile updates the token's source filename for error display +// and remembers the original filename. Used during "import" processing. +func (t *Token) updateFile(file string) { + if t.origFile == "" { + t.origFile = t.File + } + t.File = file } func (t Token) Quoted() bool { return t.wasQuoted > 0 } + +// NumLineBreaks counts how many line breaks are in the token text. +func (t Token) NumLineBreaks() int { + lineBreaks := strings.Count(t.Text, "\n") + if t.wasQuoted == '<' { + // heredocs have an extra linebreak because the opening + // delimiter is on its own line and is not included in + // the token Text itself + lineBreaks++ + } + return lineBreaks +} + +var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$") -- cgit v1.2.3 From f3379f650a3de0e83903e3aa34d39b4ec647ee50 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sun, 26 Feb 2023 16:56:48 -0500 Subject: caddyfile: Fix heredoc fuzz crasher, drop trailing newline (#5404) Co-authored-by: Mohammed Al Sahaf --- caddyconfig/caddyfile/lexer.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index ba8b879..1f531f4 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -284,15 +284,17 @@ func (l *lexer) next() (bool, error) { // and processes the text to strip leading whitespace, returning the final // value without the leading whitespace. func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) { + stringVal := string(val) + // find the last newline of the heredoc, which is where the contents end - lastNewline := strings.LastIndex(string(val), "\n") + lastNewline := strings.LastIndex(stringVal, "\n") // collapse the content, then split into separate lines - lines := strings.Split(string(val[:lastNewline+1]), "\n") + lines := strings.Split(stringVal[:lastNewline+1], "\n") // figure out how much whitespace we need to strip from the front of every line // by getting the string that precedes the marker, on the last line - paddingToStrip := string(val[lastNewline+1 : len(val)-len(marker)]) + paddingToStrip := stringVal[lastNewline+1 : len(stringVal)-len(marker)] // iterate over each line and strip the whitespace from the front var out string @@ -310,6 +312,11 @@ func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) { out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "") } + // Remove the trailing newline from the loop + if len(out) > 0 && out[len(out)-1] == '\n' { + out = out[:len(out)-1] + } + // return the final value return []rune(out), nil } @@ -340,9 +347,9 @@ func (t Token) NumLineBreaks() int { lineBreaks := strings.Count(t.Text, "\n") if t.wasQuoted == '<' { // heredocs have an extra linebreak because the opening - // delimiter is on its own line and is not included in - // the token Text itself - lineBreaks++ + // delimiter is on its own line and is not included in the + // token Text itself, and the trailing newline is removed. + lineBreaks += 2 } return lineBreaks } -- cgit v1.2.3 From 9cde71552576dcc724eff38dee42879c927b72ec Mon Sep 17 00:00:00 2001 From: WeidiDeng Date: Fri, 26 May 2023 03:05:00 +0800 Subject: caddyfile: Track import name instead of modifying filename (#5540) * Merge branch 'master' into import_file_stack * remove space in log key --- caddyconfig/caddyfile/lexer.go | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 1f531f4..d1fbfbb 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -39,7 +39,7 @@ type ( // Token represents a single parsable unit. Token struct { File string - origFile string + imports []string Line int Text string wasQuoted rune // enclosing quote character, if any @@ -321,23 +321,6 @@ func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) { return []rune(out), nil } -// originalFile gets original filename before import modification. -func (t Token) originalFile() string { - if t.origFile != "" { - return t.origFile - } - return t.File -} - -// updateFile updates the token's source filename for error display -// and remembers the original filename. Used during "import" processing. -func (t *Token) updateFile(file string) { - if t.origFile == "" { - t.origFile = t.File - } - t.File = file -} - func (t Token) Quoted() bool { return t.wasQuoted > 0 } -- cgit v1.2.3 From bbe1952a59a196b7598c6488beb770ec38a70dfc Mon Sep 17 00:00:00 2001 From: WeidiDeng Date: Thu, 13 Jul 2023 04:32:22 +0800 Subject: caddyfile: Fix comparing if two tokens are on the same line (#5626) * fix comparing if two tokens are on the same line * compare tokens from copies when importing --- caddyconfig/caddyfile/lexer.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index d1fbfbb..47aaa03 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -338,3 +338,28 @@ func (t Token) NumLineBreaks() int { } var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$") + +// isNextOnNewLine tests whether t2 is on a different line from t1 +func isNextOnNewLine(t1, t2 Token) bool { + // If the second token is from a different file, + // we can assume it's from a different line + if t1.File != t2.File { + return true + } + + // If the second token is from a different import chain, + // we can assume it's from a different line + if len(t1.imports) != len(t2.imports) { + return true + } + for i, im := range t1.imports { + if im != t2.imports[i] { + return true + } + } + + // If the first token (incl line breaks) ends + // on a line earlier than the next token, + // then the second token is on a new line + return t1.Line+t1.NumLineBreaks() < t2.Line +} -- cgit v1.2.3 From 10053f7570b113d41c5a5ba40e6c457ab0784ef5 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 19 Aug 2023 06:32:32 -0400 Subject: caddyfile: Loosen heredoc parsing (#5761) --- caddyconfig/caddyfile/lexer.go | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 47aaa03..6f72b5d 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -137,18 +137,28 @@ func (l *lexer) next() (bool, error) { } // detect whether we have the start of a heredoc - if !inHeredoc && !heredocEscaped && len(val) > 1 && string(val[:2]) == "<<" { - if ch == '<' { - return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example < 1 && string(val[:2]) == "<<" { + // a space means it's just a regular token and not a heredoc + if ch == ' ' { + return makeToken(0), nil } + + // skip CR, we only care about LF if ch == '\r' { continue } + // after hitting a newline, we know that the heredoc marker // is the characters after the two << and the newline. // we reset the val because the heredoc is syntax we don't // want to keep. if ch == '\n' { + // check if there's too many < + if string(val[:3]) == "<<<" { + return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example < Date: Wed, 23 Aug 2023 23:27:57 -0400 Subject: caddyfile: Fix case where heredoc marker is empty after newline (#5769) Fixes `panic: runtime error: slice bounds out of range [:3] with capacity 2` Co-authored-by: Matt Holt --- caddyconfig/caddyfile/lexer.go | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'caddyconfig/caddyfile/lexer.go') diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 6f72b5d..bfd6c0f 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -154,6 +154,10 @@ func (l *lexer) next() (bool, error) { // we reset the val because the heredoc is syntax we don't // want to keep. if ch == '\n' { + if len(val) == 2 { + return false, fmt.Errorf("missing opening heredoc marker on line #%d; must contain only alpha-numeric characters, dashes and underscores; got empty string", l.line) + } + // check if there's too many < if string(val[:3]) == "<<<" { return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <