From d5ea43fb4b549dda6040b31ce472e7843dfc5077 Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Mon, 5 Sep 2022 13:53:41 -0600 Subject: fileserver: Support glob expansion in file matcher (#4993) * fileserver: Support glob expansion in file matcher * Fix tests * Fix bugs and tests * Attempt Windows fix, sigh * debug Windows, WIP * Continue debugging Windows * Another attempt at Windows * Plz Windows * Cmon... * Clean up, hope I didn't break anything --- modules/caddyhttp/fileserver/caddyfile.go | 61 ++++-- modules/caddyhttp/fileserver/matcher.go | 230 ++++++++++++++------- modules/caddyhttp/fileserver/matcher_test.go | 29 ++- modules/caddyhttp/fileserver/staticfiles.go | 13 +- .../caddyhttp/fileserver/testdata/foodir/bar.txt | 1 + 5 files changed, 223 insertions(+), 111 deletions(-) create mode 100644 modules/caddyhttp/fileserver/testdata/foodir/bar.txt (limited to 'modules/caddyhttp/fileserver') diff --git a/modules/caddyhttp/fileserver/caddyfile.go b/modules/caddyhttp/fileserver/caddyfile.go index a3cf9e4..1a0424c 100644 --- a/modules/caddyhttp/fileserver/caddyfile.go +++ b/modules/caddyhttp/fileserver/caddyfile.go @@ -36,17 +36,16 @@ func init() { // parseCaddyfile parses the file_server directive. It enables the static file // server and configures it with this syntax: // -// file_server [] [browse] { -// fs -// root -// hide -// index -// browse [] -// precompressed -// status -// disable_canonical_uris -// } -// +// file_server [] [browse] { +// fs +// root +// hide +// index +// browse [] +// precompressed +// status +// disable_canonical_uris +// } func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) { var fsrv FileServer @@ -177,22 +176,23 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) // with a rewrite directive, so this is not a standard handler directive. // A try_files directive has this syntax (notice no matcher tokens accepted): // -// try_files +// try_files { +// policy first_exist|smallest_size|largest_size|most_recently_modified +// } // // and is basically shorthand for: // -// @try_files { -// file { -// try_files -// } -// } -// rewrite @try_files {http.matchers.file.relative} +// @try_files file { +// try_files +// policy first_exist|smallest_size|largest_size|most_recently_modified +// } +// rewrite @try_files {http.matchers.file.relative} // // This directive rewrites request paths only, preserving any other part // of the URI, unless the part is explicitly given in the file list. For // example, if any of the files in the list have a query string: // -// try_files {path} index.php?{query}&p={path} +// try_files {path} index.php?{query}&p={path} // // then the query string will not be treated as part of the file name; and // if that file matches, the given query string will replace any query string @@ -207,6 +207,27 @@ func parseTryFiles(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error) return nil, h.ArgErr() } + // parse out the optional try policy + var tryPolicy string + for nesting := h.Nesting(); h.NextBlock(nesting); { + switch h.Val() { + case "policy": + if tryPolicy != "" { + return nil, h.Err("try policy already configured") + } + if !h.NextArg() { + return nil, h.ArgErr() + } + tryPolicy = h.Val() + + switch tryPolicy { + case tryPolicyFirstExist, tryPolicyLargestSize, tryPolicySmallestSize, tryPolicyMostRecentlyMod: + default: + return nil, h.Errf("unrecognized try policy: %s", tryPolicy) + } + } + } + // makeRoute returns a route that tries the files listed in try // and then rewrites to the matched file; userQueryString is // appended to the rewrite rule. @@ -215,7 +236,7 @@ func parseTryFiles(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error) URI: "{http.matchers.file.relative}" + userQueryString, } matcherSet := caddy.ModuleMap{ - "file": h.JSON(MatchFile{TryFiles: try}), + "file": h.JSON(MatchFile{TryFiles: try, TryPolicy: tryPolicy}), } return h.NewRoute(matcherSet, handler) } diff --git a/modules/caddyhttp/fileserver/matcher.go b/modules/caddyhttp/fileserver/matcher.go index 5cade25..a5dab6e 100644 --- a/modules/caddyhttp/fileserver/matcher.go +++ b/modules/caddyhttp/fileserver/matcher.go @@ -21,9 +21,10 @@ import ( "net/http" "os" "path" + "path/filepath" + "runtime" "strconv" "strings" - "time" "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" @@ -33,6 +34,7 @@ import ( "github.com/google/cel-go/common/operators" "github.com/google/cel-go/common/types/ref" "github.com/google/cel-go/parser" + "go.uber.org/zap" exprpb "google.golang.org/genproto/googleapis/api/expr/v1alpha1" ) @@ -55,6 +57,9 @@ func init() { // the matched file is a directory, "file" otherwise. // - `{http.matchers.file.remainder}` Set to the remainder // of the path if the path was split by `split_path`. +// +// Even though file matching may depend on the OS path +// separator, the placeholder values always use /. type MatchFile struct { // The file system implementation to use. By default, the // local disk file system will be used. @@ -101,6 +106,8 @@ type MatchFile struct { // Each delimiter must appear at the end of a URI path // component in order to be used as a split delimiter. SplitPath []string `json:"split_path,omitempty"` + + logger *zap.Logger } // CaddyModule returns the Caddy module information. @@ -113,12 +120,11 @@ func (MatchFile) CaddyModule() caddy.ModuleInfo { // UnmarshalCaddyfile sets up the matcher from Caddyfile tokens. Syntax: // -// file { -// root -// try_files -// try_policy first_exist|smallest_size|largest_size|most_recently_modified -// } -// +// file { +// root +// try_files +// try_policy first_exist|smallest_size|largest_size|most_recently_modified +// } func (m *MatchFile) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { m.TryFiles = append(m.TryFiles, d.RemainingArgs()...) @@ -156,7 +162,8 @@ func (m *MatchFile) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { // expression matchers. // // Example: -// expression file({'root': '/srv', 'try_files': [{http.request.uri.path}, '/index.php'], 'try_policy': 'first_exist', 'split_path': ['.php']}) +// +// expression file({'root': '/srv', 'try_files': [{http.request.uri.path}, '/index.php'], 'try_policy': 'first_exist', 'split_path': ['.php']}) func (MatchFile) CELLibrary(ctx caddy.Context) (cel.Library, error) { requestType := cel.ObjectType("http.Request") @@ -249,6 +256,8 @@ func celFileMatcherMacroExpander() parser.MacroExpander { // Provision sets up m's defaults. func (m *MatchFile) Provision(ctx caddy.Context) error { + m.logger = ctx.Logger(m) + // establish the file system to use if len(m.FileSystemRaw) > 0 { mod, err := ctx.LoadModule(m, "FileSystemRaw") @@ -290,10 +299,10 @@ func (m MatchFile) Validate() error { // Match returns true if r matches m. Returns true // if a file was matched. If so, four placeholders // will be available: -// - http.matchers.file.relative -// - http.matchers.file.absolute -// - http.matchers.file.type -// - http.matchers.file.remainder +// - http.matchers.file.relative: Path to file relative to site root +// - http.matchers.file.absolute: Path to file including site root +// - http.matchers.file.type: file or directory +// - http.matchers.file.remainder: Portion remaining after splitting file path (if configured) func (m MatchFile) Match(r *http.Request) bool { return m.selectFile(r) } @@ -303,23 +312,80 @@ func (m MatchFile) Match(r *http.Request) bool { func (m MatchFile) selectFile(r *http.Request) (matched bool) { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) - root := repl.ReplaceAll(m.Root, ".") + root := filepath.Clean(repl.ReplaceAll(m.Root, ".")) + + type matchCandidate struct { + fullpath, relative, splitRemainder string + } - // common preparation of the file into parts - prepareFilePath := func(file string) (suffix, fullpath, remainder string) { - suffix, remainder = m.firstSplit(path.Clean(repl.ReplaceAll(file, ""))) + // makeCandidates evaluates placeholders in file and expands any glob expressions + // to build a list of file candidates. Special glob characters are escaped in + // placeholder replacements so globs cannot be expanded from placeholders, and + // globs are not evaluated on Windows because of its path separator character: + // escaping is not supported so we can't safely glob on Windows, or we can't + // support placeholders on Windows (pick one). (Actually, evaluating untrusted + // globs is not the end of the world since the file server will still hide any + // hidden files, it just might lead to unexpected behavior.) + makeCandidates := func(file string) []matchCandidate { + // first, evaluate placeholders in the file pattern + expandedFile, err := repl.ReplaceFunc(file, func(variable string, val any) (any, error) { + if runtime.GOOS == "windows" { + return val, nil + } + switch v := val.(type) { + case string: + return globSafeRepl.Replace(v), nil + case fmt.Stringer: + return globSafeRepl.Replace(v.String()), nil + } + return val, nil + }) + if err != nil { + m.logger.Error("evaluating placeholders", zap.Error(err)) + expandedFile = file // "oh well," I guess? + } + + // clean the path and split, if configured -- we must split before + // globbing so that the file system doesn't include the remainder + // ("afterSplit") in the filename; be sure to restore trailing slash + beforeSplit, afterSplit := m.firstSplit(path.Clean(expandedFile)) if strings.HasSuffix(file, "/") { - suffix += "/" + beforeSplit += "/" + } + + // create the full path to the file by prepending the site root + fullPattern := caddyhttp.SanitizedPathJoin(root, beforeSplit) + + // expand glob expressions, but not on Windows because Glob() doesn't + // support escaping on Windows due to path separator) + var globResults []string + if runtime.GOOS == "windows" { + globResults = []string{fullPattern} // precious Windows + } else { + globResults, err = fs.Glob(m.fileSystem, fullPattern) + if err != nil { + m.logger.Error("expanding glob", zap.Error(err)) + } } - fullpath = caddyhttp.SanitizedPathJoin(root, suffix) - return + + // for each glob result, combine all the forms of the path + var candidates []matchCandidate + for _, result := range globResults { + candidates = append(candidates, matchCandidate{ + fullpath: result, + relative: strings.TrimPrefix(result, root), + splitRemainder: afterSplit, + }) + } + + return candidates } - // sets up the placeholders for the matched file - setPlaceholders := func(info os.FileInfo, rel string, abs string, remainder string) { - repl.Set("http.matchers.file.relative", rel) - repl.Set("http.matchers.file.absolute", abs) - repl.Set("http.matchers.file.remainder", remainder) + // setPlaceholders creates the placeholders for the matched file + setPlaceholders := func(candidate matchCandidate, info fs.FileInfo) { + repl.Set("http.matchers.file.relative", filepath.ToSlash(candidate.relative)) + repl.Set("http.matchers.file.absolute", filepath.ToSlash(candidate.fullpath)) + repl.Set("http.matchers.file.remainder", filepath.ToSlash(candidate.splitRemainder)) fileType := "file" if info.IsDir() { @@ -328,76 +394,83 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) { repl.Set("http.matchers.file.type", fileType) } + // match file according to the configured policy switch m.TryPolicy { case "", tryPolicyFirstExist: - for _, f := range m.TryFiles { - if err := parseErrorCode(f); err != nil { + for _, pattern := range m.TryFiles { + if err := parseErrorCode(pattern); err != nil { caddyhttp.SetVar(r.Context(), caddyhttp.MatcherErrorVarKey, err) return } - suffix, fullpath, remainder := prepareFilePath(f) - if info, exists := m.strictFileExists(fullpath); exists { - setPlaceholders(info, suffix, fullpath, remainder) - return true + candidates := makeCandidates(pattern) + for _, c := range candidates { + if info, exists := m.strictFileExists(c.fullpath); exists { + setPlaceholders(c, info) + return true + } } } case tryPolicyLargestSize: var largestSize int64 - var largestFilename string - var largestSuffix string - var remainder string - var info os.FileInfo - for _, f := range m.TryFiles { - suffix, fullpath, splitRemainder := prepareFilePath(f) - info, err := m.fileSystem.Stat(fullpath) - if err == nil && info.Size() > largestSize { - largestSize = info.Size() - largestFilename = fullpath - largestSuffix = suffix - remainder = splitRemainder + var largest matchCandidate + var largestInfo os.FileInfo + for _, pattern := range m.TryFiles { + candidates := makeCandidates(pattern) + for _, c := range candidates { + info, err := m.fileSystem.Stat(c.fullpath) + if err == nil && info.Size() > largestSize { + largestSize = info.Size() + largest = c + largestInfo = info + } } } - setPlaceholders(info, largestSuffix, largestFilename, remainder) + if largestInfo == nil { + return false + } + setPlaceholders(largest, largestInfo) return true case tryPolicySmallestSize: var smallestSize int64 - var smallestFilename string - var smallestSuffix string - var remainder string - var info os.FileInfo - for _, f := range m.TryFiles { - suffix, fullpath, splitRemainder := prepareFilePath(f) - info, err := m.fileSystem.Stat(fullpath) - if err == nil && (smallestSize == 0 || info.Size() < smallestSize) { - smallestSize = info.Size() - smallestFilename = fullpath - smallestSuffix = suffix - remainder = splitRemainder + var smallest matchCandidate + var smallestInfo os.FileInfo + for _, pattern := range m.TryFiles { + candidates := makeCandidates(pattern) + for _, c := range candidates { + info, err := m.fileSystem.Stat(c.fullpath) + if err == nil && (smallestSize == 0 || info.Size() < smallestSize) { + smallestSize = info.Size() + smallest = c + smallestInfo = info + } } } - setPlaceholders(info, smallestSuffix, smallestFilename, remainder) + if smallestInfo == nil { + return false + } + setPlaceholders(smallest, smallestInfo) return true case tryPolicyMostRecentlyMod: - var recentDate time.Time - var recentFilename string - var recentSuffix string - var remainder string - var info os.FileInfo - for _, f := range m.TryFiles { - suffix, fullpath, splitRemainder := prepareFilePath(f) - info, err := m.fileSystem.Stat(fullpath) - if err == nil && - (recentDate.IsZero() || info.ModTime().After(recentDate)) { - recentDate = info.ModTime() - recentFilename = fullpath - recentSuffix = suffix - remainder = splitRemainder + var recent matchCandidate + var recentInfo os.FileInfo + for _, pattern := range m.TryFiles { + candidates := makeCandidates(pattern) + for _, c := range candidates { + info, err := m.fileSystem.Stat(c.fullpath) + if err == nil && + (recentInfo == nil || info.ModTime().After(recentInfo.ModTime())) { + recent = c + recentInfo = info + } } } - setPlaceholders(info, recentSuffix, recentFilename, remainder) + if recentInfo == nil { + return false + } + setPlaceholders(recent, recentInfo) return true } @@ -425,7 +498,7 @@ func parseErrorCode(input string) error { // NOT end in a forward slash, the file must NOT // be a directory. func (m MatchFile) strictFileExists(file string) (os.FileInfo, bool) { - stat, err := m.fileSystem.Stat(file) + info, err := m.fileSystem.Stat(file) if err != nil { // in reality, this can be any error // such as permission or even obscure @@ -440,11 +513,11 @@ func (m MatchFile) strictFileExists(file string) (os.FileInfo, bool) { if strings.HasSuffix(file, separator) { // by convention, file paths ending // in a path separator must be a directory - return stat, stat.IsDir() + return info, info.IsDir() } // by convention, file paths NOT ending // in a path separator must NOT be a directory - return stat, !stat.IsDir() + return info, !info.IsDir() } // firstSplit returns the first result where the path @@ -581,6 +654,15 @@ func isCELStringListLiteral(e *exprpb.Expr) bool { return false } +// globSafeRepl replaces special glob characters with escaped +// equivalents. Note that the filepath godoc states that +// escaping is not done on Windows because of the separator. +var globSafeRepl = strings.NewReplacer( + "*", "\\*", + "[", "\\[", + "?", "\\?", +) + const ( tryPolicyFirstExist = "first_exist" tryPolicyLargestSize = "largest_size" diff --git a/modules/caddyhttp/fileserver/matcher_test.go b/modules/caddyhttp/fileserver/matcher_test.go index 7734bf1..0f8c6bb 100644 --- a/modules/caddyhttp/fileserver/matcher_test.go +++ b/modules/caddyhttp/fileserver/matcher_test.go @@ -28,7 +28,6 @@ import ( ) func TestFileMatcher(t *testing.T) { - // Windows doesn't like colons in files names isWindows := runtime.GOOS == "windows" if !isWindows { @@ -87,25 +86,25 @@ func TestFileMatcher(t *testing.T) { }, { path: "ملف.txt", // the path file name is not escaped - expectedPath: "ملف.txt", + expectedPath: "/ملف.txt", expectedType: "file", matched: true, }, { path: url.PathEscape("ملف.txt"), // singly-escaped path - expectedPath: "ملف.txt", + expectedPath: "/ملف.txt", expectedType: "file", matched: true, }, { path: url.PathEscape(url.PathEscape("ملف.txt")), // doubly-escaped path - expectedPath: "%D9%85%D9%84%D9%81.txt", + expectedPath: "/%D9%85%D9%84%D9%81.txt", expectedType: "file", matched: true, }, { path: "./with:in-name.txt", // browsers send the request with the path as such - expectedPath: "with:in-name.txt", + expectedPath: "/with:in-name.txt", expectedType: "file", matched: !isWindows, }, @@ -118,7 +117,7 @@ func TestFileMatcher(t *testing.T) { u, err := url.Parse(tc.path) if err != nil { - t.Fatalf("Test %d: parsing path: %v", i, err) + t.Errorf("Test %d: parsing path: %v", i, err) } req := &http.Request{URL: u} @@ -126,24 +125,24 @@ func TestFileMatcher(t *testing.T) { result := m.Match(req) if result != tc.matched { - t.Fatalf("Test %d: expected match=%t, got %t", i, tc.matched, result) + t.Errorf("Test %d: expected match=%t, got %t", i, tc.matched, result) } rel, ok := repl.Get("http.matchers.file.relative") if !ok && result { - t.Fatalf("Test %d: expected replacer value", i) + t.Errorf("Test %d: expected replacer value", i) } if !result { continue } if rel != tc.expectedPath { - t.Fatalf("Test %d: actual path: %v, expected: %v", i, rel, tc.expectedPath) + t.Errorf("Test %d: actual path: %v, expected: %v", i, rel, tc.expectedPath) } fileType, _ := repl.Get("http.matchers.file.type") if fileType != tc.expectedType { - t.Fatalf("Test %d: actual file type: %v, expected: %v", i, fileType, tc.expectedType) + t.Errorf("Test %d: actual file type: %v, expected: %v", i, fileType, tc.expectedType) } } } @@ -222,7 +221,7 @@ func TestPHPFileMatcher(t *testing.T) { u, err := url.Parse(tc.path) if err != nil { - t.Fatalf("Test %d: parsing path: %v", i, err) + t.Errorf("Test %d: parsing path: %v", i, err) } req := &http.Request{URL: u} @@ -230,24 +229,24 @@ func TestPHPFileMatcher(t *testing.T) { result := m.Match(req) if result != tc.matched { - t.Fatalf("Test %d: expected match=%t, got %t", i, tc.matched, result) + t.Errorf("Test %d: expected match=%t, got %t", i, tc.matched, result) } rel, ok := repl.Get("http.matchers.file.relative") if !ok && result { - t.Fatalf("Test %d: expected replacer value", i) + t.Errorf("Test %d: expected replacer value", i) } if !result { continue } if rel != tc.expectedPath { - t.Fatalf("Test %d: actual path: %v, expected: %v", i, rel, tc.expectedPath) + t.Errorf("Test %d: actual path: %v, expected: %v", i, rel, tc.expectedPath) } fileType, _ := repl.Get("http.matchers.file.type") if fileType != tc.expectedType { - t.Fatalf("Test %d: actual file type: %v, expected: %v", i, fileType, tc.expectedType) + t.Errorf("Test %d: actual file type: %v, expected: %v", i, fileType, tc.expectedType) } } } diff --git a/modules/caddyhttp/fileserver/staticfiles.go b/modules/caddyhttp/fileserver/staticfiles.go index 554f8d3..25bcf5a 100644 --- a/modules/caddyhttp/fileserver/staticfiles.go +++ b/modules/caddyhttp/fileserver/staticfiles.go @@ -618,10 +618,15 @@ func (wr statusOverrideResponseWriter) WriteHeader(int) { // rooting or path prefixing without being constrained to a single // root folder. The standard os.DirFS implementation is problematic // since roots can be dynamic in our application.) +// +// osFS also implements fs.GlobFS, fs.ReadDirFS, and fs.ReadFileFS. type osFS struct{} -func (osFS) Open(name string) (fs.File, error) { return os.Open(name) } -func (osFS) Stat(name string) (fs.FileInfo, error) { return os.Stat(name) } +func (osFS) Open(name string) (fs.File, error) { return os.Open(name) } +func (osFS) Stat(name string) (fs.FileInfo, error) { return os.Stat(name) } +func (osFS) Glob(pattern string) ([]string, error) { return filepath.Glob(pattern) } +func (osFS) ReadDir(name string) ([]fs.DirEntry, error) { return os.ReadDir(name) } +func (osFS) ReadFile(name string) ([]byte, error) { return os.ReadFile(name) } var defaultIndexNames = []string{"index.html", "index.txt"} @@ -634,4 +639,8 @@ const ( var ( _ caddy.Provisioner = (*FileServer)(nil) _ caddyhttp.MiddlewareHandler = (*FileServer)(nil) + + _ fs.GlobFS = (*osFS)(nil) + _ fs.ReadDirFS = (*osFS)(nil) + _ fs.ReadFileFS = (*osFS)(nil) ) diff --git a/modules/caddyhttp/fileserver/testdata/foodir/bar.txt b/modules/caddyhttp/fileserver/testdata/foodir/bar.txt new file mode 100644 index 0000000..df34bd2 --- /dev/null +++ b/modules/caddyhttp/fileserver/testdata/foodir/bar.txt @@ -0,0 +1 @@ +foodir/bar.txt \ No newline at end of file -- cgit v1.2.3