From ad8d01cb66316cf04ea49ec277316d6f83b9abb6 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Mon, 1 Mar 2021 18:27:59 -0700 Subject: rewrite: Implement regex path replacements https://caddy.community/t/collapsing-multiple-forward-slashes-in-path-only/11626 --- modules/caddyhttp/rewrite/caddyfile.go | 17 ++++-- modules/caddyhttp/rewrite/rewrite.go | 89 ++++++++++++++++++++++++------- modules/caddyhttp/rewrite/rewrite_test.go | 23 ++++++-- 3 files changed, 103 insertions(+), 26 deletions(-) (limited to 'modules/caddyhttp') diff --git a/modules/caddyhttp/rewrite/caddyfile.go b/modules/caddyhttp/rewrite/caddyfile.go index 950119d..9621af1 100644 --- a/modules/caddyhttp/rewrite/caddyfile.go +++ b/modules/caddyhttp/rewrite/caddyfile.go @@ -54,12 +54,14 @@ func parseCaddyfileRewrite(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, // parseCaddyfileURI sets up a handler for manipulating (but not "rewriting") the // URI from Caddyfile tokens. Syntax: // -// uri [] strip_prefix|strip_suffix|replace [ []] +// uri [] strip_prefix|strip_suffix|replace|path_regexp [ []] // // If strip_prefix or strip_suffix are used, then will be stripped // only if it is the beginning or the end, respectively, of the URI path. If // replace is used, then will be replaced with across -// the whole URI, up to times (or unlimited if unspecified). +// the whole URI, up to times (or unlimited if unspecified). If +// path_regexp is used, then regular expression replacements will be performed +// on the path portion of the URI (and a limit cannot be set). func parseCaddyfileURI(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) { var rewr Rewrite for h.Next() { @@ -103,11 +105,20 @@ func parseCaddyfileURI(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, err } } - rewr.URISubstring = append(rewr.URISubstring, replacer{ + rewr.URISubstring = append(rewr.URISubstring, substrReplacer{ Find: find, Replace: replace, Limit: limInt, }) + case "path_regexp": + if len(args) != 3 { + return nil, h.ArgErr() + } + find, replace := args[1], args[2] + rewr.PathRegexp = append(rewr.PathRegexp, ®exReplacer{ + Find: find, + Replace: replace, + }) default: return nil, h.Errf("unrecognized URI manipulation '%s'", args[0]) } diff --git a/modules/caddyhttp/rewrite/rewrite.go b/modules/caddyhttp/rewrite/rewrite.go index ad1c470..fbf655c 100644 --- a/modules/caddyhttp/rewrite/rewrite.go +++ b/modules/caddyhttp/rewrite/rewrite.go @@ -18,6 +18,7 @@ import ( "fmt" "net/http" "net/url" + "regexp" "strconv" "strings" @@ -64,7 +65,10 @@ type Rewrite struct { StripPathSuffix string `json:"strip_path_suffix,omitempty"` // Performs substring replacements on the URI. - URISubstring []replacer `json:"uri_substring,omitempty"` + URISubstring []substrReplacer `json:"uri_substring,omitempty"` + + // Performs regular expression replacements on the URI path. + PathRegexp []*regexReplacer `json:"path_regexp,omitempty"` logger *zap.Logger } @@ -80,6 +84,18 @@ func (Rewrite) CaddyModule() caddy.ModuleInfo { // Provision sets up rewr. func (rewr *Rewrite) Provision(ctx caddy.Context) error { rewr.logger = ctx.Logger(rewr) + + for i, rep := range rewr.PathRegexp { + if rep.Find == "" { + return fmt.Errorf("path_regexp find cannot be empty") + } + re, err := regexp.Compile(rep.Find) + if err != nil { + return fmt.Errorf("compiling regular expression %d: %v", i, err) + } + rep.re = re + } + return nil } @@ -200,12 +216,9 @@ func (rewr Rewrite) rewrite(r *http.Request, repl *caddy.Replacer, logger *zap.L } if rewr.StripPathSuffix != "" { suffix := repl.ReplaceAll(rewr.StripPathSuffix, "") - r.URL.RawPath = strings.TrimSuffix(r.URL.RawPath, suffix) - if p, err := url.PathUnescape(r.URL.RawPath); err == nil && p != "" { - r.URL.Path = p - } else { - r.URL.Path = strings.TrimSuffix(r.URL.Path, suffix) - } + changePath(r, func(pathOrRawPath string) string { + return strings.TrimSuffix(pathOrRawPath, suffix) + }) } // substring replacements in URI @@ -213,6 +226,11 @@ func (rewr Rewrite) rewrite(r *http.Request, repl *caddy.Replacer, logger *zap.L rep.do(r, repl) } + // regular expression replacements on the path + for _, rep := range rewr.PathRegexp { + rep.do(r, repl) + } + // update the encoded copy of the URI r.RequestURI = r.URL.RequestURI() @@ -286,12 +304,12 @@ func buildQueryString(qs string, repl *caddy.Replacer) string { return sb.String() } -// replacer describes a simple and fast substring replacement. -type replacer struct { - // The substring to find. Supports placeholders. +// substrReplacer describes either a simple and fast substring replacement. +type substrReplacer struct { + // A substring to find. Supports placeholders. Find string `json:"find,omitempty"` - // The substring to replace. Supports placeholders. + // The substring to replace with. Supports placeholders. Replace string `json:"replace,omitempty"` // Maximum number of replacements per string. @@ -299,9 +317,9 @@ type replacer struct { Limit int `json:"limit,omitempty"` } -// do performs the replacement on r. -func (rep replacer) do(r *http.Request, repl *caddy.Replacer) { - if rep.Find == "" || rep.Replace == "" { +// do performs the substring replacement on r. +func (rep substrReplacer) do(r *http.Request, repl *caddy.Replacer) { + if rep.Find == "" { return } @@ -313,15 +331,46 @@ func (rep replacer) do(r *http.Request, repl *caddy.Replacer) { find := repl.ReplaceAll(rep.Find, "") replace := repl.ReplaceAll(rep.Replace, "") - r.URL.RawPath = strings.Replace(r.URL.RawPath, find, replace, lim) - if p, err := url.PathUnescape(r.URL.RawPath); err == nil && p != "" { - r.URL.Path = p - } else { - r.URL.Path = strings.Replace(r.URL.Path, find, replace, lim) - } + changePath(r, func(pathOrRawPath string) string { + return strings.Replace(pathOrRawPath, find, replace, lim) + }) r.URL.RawQuery = strings.Replace(r.URL.RawQuery, find, replace, lim) } +// regexReplacer describes a replacement using a regular expression. +type regexReplacer struct { + // The regular expression to find. + Find string `json:"find,omitempty"` + + // The substring to replace with. Supports placeholders and + // regular expression capture groups. + Replace string `json:"replace,omitempty"` + + re *regexp.Regexp +} + +func (rep regexReplacer) do(r *http.Request, repl *caddy.Replacer) { + if rep.Find == "" || rep.re == nil { + return + } + replace := repl.ReplaceAll(rep.Replace, "") + changePath(r, func(pathOrRawPath string) string { + return rep.re.ReplaceAllString(pathOrRawPath, replace) + }) +} + +// changePath updates the path on the request URL. It first executes newVal on +// req.URL.RawPath, and if the result is a valid escaping, it will be copied +// into req.URL.Path; otherwise newVal is evaluated only on req.URL.Path. +func changePath(req *http.Request, newVal func(pathOrRawPath string) string) { + req.URL.RawPath = newVal(req.URL.RawPath) + if p, err := url.PathUnescape(req.URL.RawPath); err == nil && p != "" { + req.URL.Path = p + } else { + req.URL.Path = newVal(req.URL.Path) + } +} + // Interface guard var _ caddyhttp.MiddlewareHandler = (*Rewrite)(nil) diff --git a/modules/caddyhttp/rewrite/rewrite_test.go b/modules/caddyhttp/rewrite/rewrite_test.go index 9329a04..4d595e2 100644 --- a/modules/caddyhttp/rewrite/rewrite_test.go +++ b/modules/caddyhttp/rewrite/rewrite_test.go @@ -16,6 +16,7 @@ package rewrite import ( "net/http" + "regexp" "testing" "github.com/caddyserver/caddy/v2" @@ -232,20 +233,26 @@ func TestRewrite(t *testing.T) { }, { - rule: Rewrite{URISubstring: []replacer{{Find: "findme", Replace: "replaced"}}}, + rule: Rewrite{URISubstring: []substrReplacer{{Find: "findme", Replace: "replaced"}}}, input: newRequest(t, "GET", "/foo/bar"), expect: newRequest(t, "GET", "/foo/bar"), }, { - rule: Rewrite{URISubstring: []replacer{{Find: "findme", Replace: "replaced"}}}, + rule: Rewrite{URISubstring: []substrReplacer{{Find: "findme", Replace: "replaced"}}}, input: newRequest(t, "GET", "/foo/findme/bar"), expect: newRequest(t, "GET", "/foo/replaced/bar"), }, { - rule: Rewrite{URISubstring: []replacer{{Find: "findme", Replace: "replaced"}}}, + rule: Rewrite{URISubstring: []substrReplacer{{Find: "findme", Replace: "replaced"}}}, input: newRequest(t, "GET", "/foo/findme%2Fbar"), expect: newRequest(t, "GET", "/foo/replaced%2Fbar"), }, + + { + rule: Rewrite{PathRegexp: []*regexReplacer{{Find: "/{2,}", Replace: "/"}}}, + input: newRequest(t, "GET", "/foo//bar///baz?a=b//c"), + expect: newRequest(t, "GET", "/foo/bar/baz?a=b//c"), + }, } { // copy the original input just enough so that we can // compare it after the rewrite to see if it changed @@ -260,6 +267,16 @@ func TestRewrite(t *testing.T) { repl.Set("http.request.uri.path", tc.input.URL.Path) repl.Set("http.request.uri.query", tc.input.URL.RawQuery) + // we can't directly call Provision() without a valid caddy.Context + // (TODO: fix that) so here we ad-hoc compile the regex + for _, rep := range tc.rule.PathRegexp { + re, err := regexp.Compile(rep.Find) + if err != nil { + t.Fatal(err) + } + rep.re = re + } + changed := tc.rule.rewrite(tc.input, repl, nil) if expected, actual := !reqEqual(originalInput, tc.input), changed; expected != actual { -- cgit v1.2.3