From c12bf4054c37b88eb37f7c59631d55ae512bcd29 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Sat, 28 Sep 2019 21:18:36 -0600 Subject: caddyfile: Fix lexer behavior with regards to escaped newlines Newlines (\n) can be escaped outside of quoted areas and the newline will be treated as whitespace but not as an actual line break. Escaping newlines inside a quoted area is not necessary, and because quotes trigger literal interpretation of the contents, the escaping backslash will be parsed as a literal backslash, and the newline will not be escaped. Caveat: When a newline is escaped, tokens after it until an unescaped newline will appear to the parser be on the same line as the initial token after the last unescaped newline. This may technically lead to some false line numbers if errors are given, but escaped newlines are counted so that the next token after an unescaped newline is correct. See #2766 --- caddyconfig/caddyfile/lexer.go | 45 ++++++++++++++++++++++--------------- caddyconfig/caddyfile/lexer_test.go | 42 +++++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index efe648d..c0b6e1d 100755 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -26,9 +26,10 @@ type ( // are separated by whitespace. A word can be enclosed // in quotes if it contains whitespace. lexer struct { - reader *bufio.Reader - token Token - line int + reader *bufio.Reader + token Token + line int + skippedLines int } // Token represents a single parsable unit. @@ -91,27 +92,30 @@ func (l *lexer) next() bool { panic(err) } + if !escaped && ch == '\\' { + escaped = true + continue + } + if quoted { - if !escaped { - if ch == '\\' { - escaped = true - continue - } else if ch == '"' { + if escaped { + // all is literal in quoted area, + // so only escape quotes + if ch != '"' { + val = append(val, '\\') + } + escaped = false + } else { + if ch == '"' { quoted = false return makeToken() } } if ch == '\n' { - l.line++ - } - if escaped { - // only escape quotes and newlines - if ch != '"' && ch != '\n' { - val = append(val, '\\') - } + l.line += 1 + l.skippedLines + l.skippedLines = 0 } val = append(val, ch) - escaped = false continue } @@ -120,7 +124,13 @@ func (l *lexer) next() bool { continue } if ch == '\n' { - l.line++ + if escaped { + l.skippedLines++ + escaped = false + } else { + l.line += 1 + l.skippedLines + l.skippedLines = 0 + } comment = false } if len(val) > 0 { @@ -132,7 +142,6 @@ func (l *lexer) next() bool { if ch == '#' { comment = true } - if comment { continue } diff --git a/caddyconfig/caddyfile/lexer_test.go b/caddyconfig/caddyfile/lexer_test.go index f9a843c..ce2e147 100755 --- a/caddyconfig/caddyfile/lexer_test.go +++ b/caddyconfig/caddyfile/lexer_test.go @@ -96,13 +96,49 @@ func TestLexer(t *testing.T) { }, }, { - input: "A \"newline \\\ninside\" quotes", + input: "An escaped \"newline\\\ninside\" quotes", expected: []Token{ - {Line: 1, Text: "A"}, - {Line: 1, Text: "newline \ninside"}, + {Line: 1, Text: "An"}, + {Line: 1, Text: "escaped"}, + {Line: 1, Text: "newline\\\ninside"}, {Line: 2, Text: "quotes"}, }, }, + { + input: "An escaped newline\\\noutside quotes", + expected: []Token{ + {Line: 1, Text: "An"}, + {Line: 1, Text: "escaped"}, + {Line: 1, Text: "newline"}, + {Line: 1, Text: "outside"}, + {Line: 1, Text: "quotes"}, + }, + }, + { + input: "line1\\\nescaped\nline2\nline3", + expected: []Token{ + {Line: 1, Text: "line1"}, + {Line: 1, Text: "escaped"}, + {Line: 3, Text: "line2"}, + {Line: 4, Text: "line3"}, + }, + }, + { + input: "line1\\\nescaped1\\\nescaped2\nline4\nline5", + expected: []Token{ + {Line: 1, Text: "line1"}, + {Line: 1, Text: "escaped1"}, + {Line: 1, Text: "escaped2"}, + {Line: 4, Text: "line4"}, + {Line: 5, Text: "line5"}, + }, + }, + { + input: `"unescapable\ in quotes"`, + expected: []Token{ + {Line: 1, Text: `unescapable\ in quotes`}, + }, + }, { input: `"don't\escape"`, expected: []Token{ -- cgit v1.2.3