From 6668271661857b2cc43143ff65edf1071013e67e Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Sat, 30 Jul 2022 13:07:44 -0600 Subject: fileserver: Support virtual file systems (#4909) * fileserver: Support virtual file systems (close #3720) This change replaces the hard-coded use of os.Open() and os.Stat() with the use of the new (Go 1.16) io/fs APIs, enabling virtual file systems. It introduces a new module namespace, caddy.fs, for such file systems. Also improve documentation for the file server. I realized it was one of the first modules written for Caddy 2, and the docs hadn't really been updated since! * Virtualize FS for file matcher; minor tweaks * Fix tests and rename dirFS -> osFS (Since we do not use a root directory, it is dynamic.) --- modules/caddyhttp/fileserver/browse.go | 21 ++-- modules/caddyhttp/fileserver/browsetplcontext.go | 30 ++++-- modules/caddyhttp/fileserver/matcher.go | 34 +++++-- modules/caddyhttp/fileserver/matcher_test.go | 12 ++- modules/caddyhttp/fileserver/staticfiles.go | 124 ++++++++++++++++++----- 5 files changed, 166 insertions(+), 55 deletions(-) (limited to 'modules/caddyhttp/fileserver') diff --git a/modules/caddyhttp/fileserver/browse.go b/modules/caddyhttp/fileserver/browse.go index d59010d..bdddc23 100644 --- a/modules/caddyhttp/fileserver/browse.go +++ b/modules/caddyhttp/fileserver/browse.go @@ -19,6 +19,7 @@ import ( _ "embed" "encoding/json" "fmt" + "io/fs" "net/http" "os" "path" @@ -80,7 +81,7 @@ func (fsrv *FileServer) serveBrowse(root, dirPath string, w http.ResponseWriter, repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) // calling path.Clean here prevents weird breadcrumbs when URL paths are sketchy like /%2e%2e%2f - listing, err := fsrv.loadDirectoryContents(dir, root, path.Clean(r.URL.Path), repl) + listing, err := fsrv.loadDirectoryContents(dir.(fs.ReadDirFile), root, path.Clean(r.URL.Path), repl) switch { case os.IsPermission(err): return caddyhttp.Error(http.StatusForbidden, err) @@ -133,8 +134,8 @@ func (fsrv *FileServer) serveBrowse(root, dirPath string, w http.ResponseWriter, return nil } -func (fsrv *FileServer) loadDirectoryContents(dir *os.File, root, urlPath string, repl *caddy.Replacer) (browseTemplateContext, error) { - files, err := dir.Readdir(-1) +func (fsrv *FileServer) loadDirectoryContents(dir fs.ReadDirFile, root, urlPath string, repl *caddy.Replacer) (browseTemplateContext, error) { + files, err := dir.ReadDir(10000) // TODO: this limit should probably be configurable if err != nil { return browseTemplateContext{}, err } @@ -201,25 +202,25 @@ func (fsrv *FileServer) makeBrowseTemplate(tplCtx *templateContext) (*template.T return tpl, nil } -// isSymlink return true if f is a symbolic link -func isSymlink(f os.FileInfo) bool { - return f.Mode()&os.ModeSymlink != 0 -} - // isSymlinkTargetDir returns true if f's symbolic link target // is a directory. -func isSymlinkTargetDir(f os.FileInfo, root, urlPath string) bool { +func (fsrv *FileServer) isSymlinkTargetDir(f fs.FileInfo, root, urlPath string) bool { if !isSymlink(f) { return false } target := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, f.Name())) - targetInfo, err := os.Stat(target) + targetInfo, err := fsrv.fileSystem.Stat(target) if err != nil { return false } return targetInfo.IsDir() } +// isSymlink return true if f is a symbolic link. +func isSymlink(f fs.FileInfo) bool { + return f.Mode()&os.ModeSymlink != 0 +} + // templateContext powers the context used when evaluating the browse template. // It combines browse-specific features with the standard templates handler // features. diff --git a/modules/caddyhttp/fileserver/browsetplcontext.go b/modules/caddyhttp/fileserver/browsetplcontext.go index 87156d4..49788ee 100644 --- a/modules/caddyhttp/fileserver/browsetplcontext.go +++ b/modules/caddyhttp/fileserver/browsetplcontext.go @@ -15,6 +15,7 @@ package fileserver import ( + "io/fs" "net/url" "os" "path" @@ -26,22 +27,31 @@ import ( "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/modules/caddyhttp" "github.com/dustin/go-humanize" + "go.uber.org/zap" ) -func (fsrv *FileServer) directoryListing(files []os.FileInfo, canGoUp bool, root, urlPath string, repl *caddy.Replacer) browseTemplateContext { +func (fsrv *FileServer) directoryListing(entries []fs.DirEntry, canGoUp bool, root, urlPath string, repl *caddy.Replacer) browseTemplateContext { filesToHide := fsrv.transformHidePaths(repl) var dirCount, fileCount int fileInfos := []fileInfo{} - for _, f := range files { - name := f.Name() + for _, entry := range entries { + name := entry.Name() if fileHidden(name, filesToHide) { continue } - isDir := f.IsDir() || isSymlinkTargetDir(f, root, urlPath) + info, err := entry.Info() + if err != nil { + fsrv.logger.Error("could not get info about directory entry", + zap.String("name", entry.Name()), + zap.String("root", root)) + continue + } + + isDir := entry.IsDir() || fsrv.isSymlinkTargetDir(info, root, urlPath) // add the slash after the escape of path to avoid escaping the slash as well if isDir { @@ -51,11 +61,11 @@ func (fsrv *FileServer) directoryListing(files []os.FileInfo, canGoUp bool, root fileCount++ } - size := f.Size() - fileIsSymlink := isSymlink(f) + size := info.Size() + fileIsSymlink := isSymlink(info) if fileIsSymlink { - path := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, f.Name())) - fileInfo, err := os.Stat(path) + path := caddyhttp.SanitizedPathJoin(root, path.Join(urlPath, info.Name())) + fileInfo, err := fsrv.fileSystem.Stat(path) if err == nil { size = fileInfo.Size() } @@ -73,8 +83,8 @@ func (fsrv *FileServer) directoryListing(files []os.FileInfo, canGoUp bool, root Name: name, Size: size, URL: u.String(), - ModTime: f.ModTime().UTC(), - Mode: f.Mode(), + ModTime: info.ModTime().UTC(), + Mode: info.Mode(), }) } name, _ := url.PathUnescape(urlPath) diff --git a/modules/caddyhttp/fileserver/matcher.go b/modules/caddyhttp/fileserver/matcher.go index 3ef3e47..5cade25 100644 --- a/modules/caddyhttp/fileserver/matcher.go +++ b/modules/caddyhttp/fileserver/matcher.go @@ -15,7 +15,9 @@ package fileserver import ( + "encoding/json" "fmt" + "io/fs" "net/http" "os" "path" @@ -54,6 +56,11 @@ func init() { // - `{http.matchers.file.remainder}` Set to the remainder // of the path if the path was split by `split_path`. type MatchFile struct { + // The file system implementation to use. By default, the + // local disk file system will be used. + FileSystemRaw json.RawMessage `json:"file_system,omitempty" caddy:"namespace=caddy.fs inline_key=backend"` + fileSystem fs.StatFS + // The root directory, used for creating absolute // file paths, and required when working with // relative paths; if not specified, `{http.vars.root}` @@ -241,10 +248,23 @@ func celFileMatcherMacroExpander() parser.MacroExpander { } // Provision sets up m's defaults. -func (m *MatchFile) Provision(_ caddy.Context) error { +func (m *MatchFile) Provision(ctx caddy.Context) error { + // establish the file system to use + if len(m.FileSystemRaw) > 0 { + mod, err := ctx.LoadModule(m, "FileSystemRaw") + if err != nil { + return fmt.Errorf("loading file system module: %v", err) + } + m.fileSystem = mod.(fs.StatFS) + } + if m.fileSystem == nil { + m.fileSystem = osFS{} + } + if m.Root == "" { m.Root = "{http.vars.root}" } + // if list of files to try was omitted entirely, assume URL path // (use placeholder instead of r.URL.Path; see issue #4146) if m.TryFiles == nil { @@ -316,7 +336,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) { return } suffix, fullpath, remainder := prepareFilePath(f) - if info, exists := strictFileExists(fullpath); exists { + if info, exists := m.strictFileExists(fullpath); exists { setPlaceholders(info, suffix, fullpath, remainder) return true } @@ -330,7 +350,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) { var info os.FileInfo for _, f := range m.TryFiles { suffix, fullpath, splitRemainder := prepareFilePath(f) - info, err := os.Stat(fullpath) + info, err := m.fileSystem.Stat(fullpath) if err == nil && info.Size() > largestSize { largestSize = info.Size() largestFilename = fullpath @@ -349,7 +369,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) { var info os.FileInfo for _, f := range m.TryFiles { suffix, fullpath, splitRemainder := prepareFilePath(f) - info, err := os.Stat(fullpath) + info, err := m.fileSystem.Stat(fullpath) if err == nil && (smallestSize == 0 || info.Size() < smallestSize) { smallestSize = info.Size() smallestFilename = fullpath @@ -368,7 +388,7 @@ func (m MatchFile) selectFile(r *http.Request) (matched bool) { var info os.FileInfo for _, f := range m.TryFiles { suffix, fullpath, splitRemainder := prepareFilePath(f) - info, err := os.Stat(fullpath) + info, err := m.fileSystem.Stat(fullpath) if err == nil && (recentDate.IsZero() || info.ModTime().After(recentDate)) { recentDate = info.ModTime() @@ -404,8 +424,8 @@ func parseErrorCode(input string) error { // the file must also be a directory; if it does // NOT end in a forward slash, the file must NOT // be a directory. -func strictFileExists(file string) (os.FileInfo, bool) { - stat, err := os.Stat(file) +func (m MatchFile) strictFileExists(file string) (os.FileInfo, bool) { + stat, err := m.fileSystem.Stat(file) if err != nil { // in reality, this can be any error // such as permission or even obscure diff --git a/modules/caddyhttp/fileserver/matcher_test.go b/modules/caddyhttp/fileserver/matcher_test.go index fd109e6..7734bf1 100644 --- a/modules/caddyhttp/fileserver/matcher_test.go +++ b/modules/caddyhttp/fileserver/matcher_test.go @@ -111,8 +111,9 @@ func TestFileMatcher(t *testing.T) { }, } { m := &MatchFile{ - Root: "./testdata", - TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/"}, + fileSystem: osFS{}, + Root: "./testdata", + TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/"}, } u, err := url.Parse(tc.path) @@ -213,9 +214,10 @@ func TestPHPFileMatcher(t *testing.T) { }, } { m := &MatchFile{ - Root: "./testdata", - TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/index.php"}, - SplitPath: []string{".php"}, + fileSystem: osFS{}, + Root: "./testdata", + TryFiles: []string{"{http.request.uri.path}", "{http.request.uri.path}/index.php"}, + SplitPath: []string{".php"}, } u, err := url.Parse(tc.path) diff --git a/modules/caddyhttp/fileserver/staticfiles.go b/modules/caddyhttp/fileserver/staticfiles.go index 9a2bc6e..6bdb5af 100644 --- a/modules/caddyhttp/fileserver/staticfiles.go +++ b/modules/caddyhttp/fileserver/staticfiles.go @@ -15,7 +15,11 @@ package fileserver import ( + "encoding/json" + "errors" "fmt" + "io" + "io/fs" weakrand "math/rand" "mime" "net/http" @@ -39,10 +43,63 @@ func init() { caddy.RegisterModule(FileServer{}) } -// FileServer implements a static file server responder for Caddy. +// FileServer implements a handler that serves static files. +// +// The path of the file to serve is constructed by joining the site root +// and the sanitized request path. Any and all files within the root and +// links with targets outside the site root may therefore be accessed. +// For example, with a site root of `/www`, requests to `/foo/bar.txt` +// will serve the file at `/www/foo/bar.txt`. +// +// The request path is sanitized using the Go standard library's +// path.Clean() function (https://pkg.go.dev/path#Clean) before being +// joined to the root. Request paths must be valid and well-formed. +// +// For requests that access directories instead of regular files, +// Caddy will attempt to serve an index file if present. For example, +// a request to `/dir/` will attempt to serve `/dir/index.html` if +// it exists. The index file names to try are configurable. If a +// requested directory does not have an index file, Caddy writes a +// 404 response. Alternatively, file browsing can be enabled with +// the "browse" parameter which shows a list of files when directories +// are requested if no index file is present. +// +// By default, this handler will canonicalize URIs so that requests to +// directories end with a slash, but requests to regular files do not. +// This is enforced with HTTP redirects automatically and can be disabled. +// Canonicalization redirects are not issued, however, if a URI rewrite +// modified the last component of the path (the filename). +// +// This handler sets the Etag and Last-Modified headers for static files. +// It does not perform MIME sniffing to determine Content-Type based on +// contents, but does use the extension (if known); see the Go docs for +// details: https://pkg.go.dev/mime#TypeByExtension +// +// The file server properly handles requests with If-Match, +// If-Unmodified-Since, If-Modified-Since, If-None-Match, Range, and +// If-Range headers. It includes the file's modification time in the +// Last-Modified header of the response. type FileServer struct { + // The file system implementation to use. By default, Caddy uses the local + // disk file system. + // + // File system modules used here must adhere to the following requirements: + // - Implement fs.StatFS interface. + // - Support seeking on opened files; i.e.returned fs.File values must + // implement the io.Seeker interface. This is required for determining + // Content-Length and satisfying Range requests. + // - fs.File values that represent directories must implement the + // fs.ReadDirFile interface so that directory listings can be procured. + FileSystemRaw json.RawMessage `json:"file_system,omitempty" caddy:"namespace=caddy.fs inline_key=backend"` + fileSystem fs.StatFS + // The path to the root of the site. Default is `{http.vars.root}` if set, - // or current working directory otherwise. + // or current working directory otherwise. This should be a trusted value. + // + // Note that a site root is not a sandbox. Although the file server does + // sanitize the request URI to prevent directory traversal, files (including + // links) within the site root may be directly accessed based on the request + // path. Files and folders within the root should be secure and trustworthy. Root string `json:"root,omitempty"` // A list of files or folders to hide; the file server will pretend as if @@ -63,6 +120,7 @@ type FileServer struct { Hide []string `json:"hide,omitempty"` // The names of files to try as index files if a folder is requested. + // Default: index.html, index.txt. IndexNames []string `json:"index_names,omitempty"` // Enables file listings if a directory was requested and no index @@ -95,8 +153,7 @@ type FileServer struct { // If no order specified here, the first encoding from the Accept-Encoding header // that both client and server support is used PrecompressedOrder []string `json:"precompressed_order,omitempty"` - - precompressors map[string]encode.Precompressed + precompressors map[string]encode.Precompressed logger *zap.Logger } @@ -113,6 +170,18 @@ func (FileServer) CaddyModule() caddy.ModuleInfo { func (fsrv *FileServer) Provision(ctx caddy.Context) error { fsrv.logger = ctx.Logger(fsrv) + // establish which file system (possibly a virtual one) we'll be using + if len(fsrv.FileSystemRaw) > 0 { + mod, err := ctx.LoadModule(fsrv, "FileSystemRaw") + if err != nil { + return fmt.Errorf("loading file system module: %v", err) + } + fsrv.fileSystem = mod.(fs.StatFS) + } + if fsrv.fileSystem == nil { + fsrv.fileSystem = osFS{} + } + if fsrv.Root == "" { fsrv.Root = "{http.vars.root}" } @@ -131,6 +200,7 @@ func (fsrv *FileServer) Provision(ctx caddy.Context) error { } } + // support precompressed sidecar files mods, err := ctx.LoadModule(fsrv, "PrecompressedRaw") if err != nil { return fmt.Errorf("loading encoder modules: %v", err) @@ -184,12 +254,12 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c zap.String("result", filename)) // get information about the file - info, err := os.Stat(filename) + info, err := fsrv.fileSystem.Stat(filename) if err != nil { - err = mapDirOpenError(err, filename) - if os.IsNotExist(err) { + err = fsrv.mapDirOpenError(err, filename) + if errors.Is(err, fs.ErrNotExist) { return fsrv.notFound(w, r, next) - } else if os.IsPermission(err) { + } else if errors.Is(err, fs.ErrPermission) { return caddyhttp.Error(http.StatusForbidden, err) } return caddyhttp.Error(http.StatusInternalServerError, err) @@ -210,7 +280,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c continue } - indexInfo, err := os.Stat(indexPath) + indexInfo, err := fsrv.fileSystem.Stat(indexPath) if err != nil { continue } @@ -280,7 +350,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c } } - var file *os.File + var file fs.File // check for precompressed files for _, ae := range encode.AcceptedEncodings(r, fsrv.PrecompressedOrder) { @@ -289,7 +359,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c continue } compressedFilename := filename + precompress.Suffix() - compressedInfo, err := os.Stat(compressedFilename) + compressedInfo, err := fsrv.fileSystem.Stat(compressedFilename) if err != nil || compressedInfo.IsDir() { fsrv.logger.Debug("precompressed file not accessible", zap.String("filename", compressedFilename), zap.Error(err)) continue @@ -328,14 +398,12 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c // set the ETag - note that a conditional If-None-Match request is handled // by http.ServeContent below, which checks against this ETag value - w.Header().Set("ETag", calculateEtag(info)) + w.Header().Set("Etag", calculateEtag(info)) if w.Header().Get("Content-Type") == "" { mtyp := mime.TypeByExtension(filepath.Ext(filename)) if mtyp == "" { - // do not allow Go to sniff the content-type; see - // https://www.youtube.com/watch?v=8t8JYpt0egE - // TODO: If we want a Content-Type, consider writing a default of application/octet-stream - this is secure but violates spec + // do not allow Go to sniff the content-type; see https://www.youtube.com/watch?v=8t8JYpt0egE w.Header()["Content-Type"] = nil } else { w.Header().Set("Content-Type", mtyp) @@ -375,7 +443,7 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c // that errors generated by ServeContent are written immediately // to the response, so we cannot handle them (but errors there // are rare) - http.ServeContent(w, r, info.Name(), info.ModTime(), file) + http.ServeContent(w, r, info.Name(), info.ModTime(), file.(io.ReadSeeker)) return nil } @@ -384,10 +452,10 @@ func (fsrv *FileServer) ServeHTTP(w http.ResponseWriter, r *http.Request, next c // the response is configured to inform the client how to best handle it // and a well-described handler error is returned (do not wrap the // returned error value). -func (fsrv *FileServer) openFile(filename string, w http.ResponseWriter) (*os.File, error) { - file, err := os.Open(filename) +func (fsrv *FileServer) openFile(filename string, w http.ResponseWriter) (fs.File, error) { + file, err := fsrv.fileSystem.Open(filename) if err != nil { - err = mapDirOpenError(err, filename) + err = fsrv.mapDirOpenError(err, filename) if os.IsNotExist(err) { fsrv.logger.Debug("file not found", zap.String("filename", filename), zap.Error(err)) return nil, caddyhttp.Error(http.StatusNotFound, err) @@ -412,8 +480,8 @@ func (fsrv *FileServer) openFile(filename string, w http.ResponseWriter) (*os.Fi // Adapted from the Go standard library; originally written by Nathaniel Caza. // https://go-review.googlesource.com/c/go/+/36635/ // https://go-review.googlesource.com/c/go/+/36804/ -func mapDirOpenError(originalErr error, name string) error { - if os.IsNotExist(originalErr) || os.IsPermission(originalErr) { +func (fsrv *FileServer) mapDirOpenError(originalErr error, name string) error { + if errors.Is(originalErr, fs.ErrNotExist) || errors.Is(originalErr, fs.ErrPermission) { return originalErr } @@ -422,12 +490,12 @@ func mapDirOpenError(originalErr error, name string) error { if parts[i] == "" { continue } - fi, err := os.Stat(strings.Join(parts[:i+1], separator)) + fi, err := fsrv.fileSystem.Stat(strings.Join(parts[:i+1], separator)) if err != nil { return originalErr } if !fi.IsDir() { - return os.ErrNotExist + return fs.ErrNotExist } } @@ -545,6 +613,16 @@ func (wr statusOverrideResponseWriter) WriteHeader(int) { wr.ResponseWriter.WriteHeader(wr.code) } +// osFS is a simple fs.StatFS implementation that uses the local +// file system. (We do not use os.DirFS because we do our own +// rooting or path prefixing without being constrained to a single +// root folder. The standard os.DirFS implementation is problematic +// since roots can be dynamic in our application.) +type osFS struct{} + +func (osFS) Open(name string) (fs.File, error) { return os.Open(name) } +func (osFS) Stat(name string) (fs.FileInfo, error) { return os.Stat(name) } + var defaultIndexNames = []string{"index.html", "index.txt"} const ( -- cgit v1.2.3