summaryrefslogtreecommitdiff
path: root/modules/caddyhttp/rewrite
diff options
context:
space:
mode:
authorMatt Holt <mholt@users.noreply.github.com>2022-08-16 08:48:57 -0600
committerGitHub <noreply@github.com>2022-08-16 08:48:57 -0600
commita479943acd70068c4b80d3a8f4b8dd7ab93ca2ba (patch)
treef50a45b3b0c8c8475b783583967c1175f5e6673c /modules/caddyhttp/rewrite
parentdc62d468e9645f52a5e1b4f6093dff65137ab3fe (diff)
caddyhttp: Smarter path matching and rewriting (#4948)
Co-authored-by: RussellLuo <luopeng.he@gmail.com>
Diffstat (limited to 'modules/caddyhttp/rewrite')
-rw-r--r--modules/caddyhttp/rewrite/rewrite.go110
-rw-r--r--modules/caddyhttp/rewrite/rewrite_test.go41
2 files changed, 133 insertions, 18 deletions
diff --git a/modules/caddyhttp/rewrite/rewrite.go b/modules/caddyhttp/rewrite/rewrite.go
index 7da2327..4316b5a 100644
--- a/modules/caddyhttp/rewrite/rewrite.go
+++ b/modules/caddyhttp/rewrite/rewrite.go
@@ -31,13 +31,25 @@ func init() {
caddy.RegisterModule(Rewrite{})
}
-// Rewrite is a middleware which can rewrite HTTP requests.
+// Rewrite is a middleware which can rewrite/mutate HTTP requests.
//
-// The Method and URI properties are "setters": the request URI
-// will be set to the given values. Other properties are "modifiers":
-// they modify existing files but do not explicitly specify what the
-// result will be. It is atypical to combine the use of setters and
+// The Method and URI properties are "setters" (the request URI
+// will be overwritten with the given values). Other properties are
+// "modifiers" (they modify existing values in a differentiable
+// way). It is atypical to combine the use of setters and
// modifiers in a single rewrite.
+//
+// To ensure consistent behavior, prefix and suffix stripping is
+// performed in the URL-decoded (unescaped, normalized) space by
+// default except for the specific bytes where an escape sequence
+// is used in the prefix or suffix pattern.
+//
+// For all modifiers, paths are cleaned before being modified so that
+// multiple, consecutive slashes are collapsed into a single slash,
+// and dot elements are resolved and removed. In the special case
+// of a prefix, suffix, or substring containing "//" (repeated slashes),
+// slashes will not be merged while cleaning the path so that
+// the rewrite can be interpreted literally.
type Rewrite struct {
// Changes the request's HTTP verb.
Method string `json:"method,omitempty"`
@@ -59,9 +71,15 @@ type Rewrite struct {
URI string `json:"uri,omitempty"`
// Strips the given prefix from the beginning of the URI path.
+ // The prefix should be written in normalized (unescaped) form,
+ // but if an escaping (`%xx`) is used, the path will be required
+ // to have that same escape at that position in order to match.
StripPathPrefix string `json:"strip_path_prefix,omitempty"`
// Strips the given suffix from the end of the URI path.
+ // The suffix should be written in normalized (unescaped) form,
+ // but if an escaping (`%xx`) is used, the path will be required
+ // to have that same escape at that position in order to match.
StripPathSuffix string `json:"strip_path_suffix,omitempty"`
// Performs substring replacements on the URI.
@@ -227,17 +245,18 @@ func (rewr Rewrite) Rewrite(r *http.Request, repl *caddy.Replacer) bool {
// strip path prefix or suffix
if rewr.StripPathPrefix != "" {
prefix := repl.ReplaceAll(rewr.StripPathPrefix, "")
- r.URL.RawPath = strings.TrimPrefix(r.URL.RawPath, prefix)
- if p, err := url.PathUnescape(r.URL.RawPath); err == nil && p != "" {
- r.URL.Path = p
- } else {
- r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
- }
+ mergeSlashes := !strings.Contains(prefix, "//")
+ changePath(r, func(escapedPath string) string {
+ escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes)
+ return trimPathPrefix(escapedPath, prefix)
+ })
}
if rewr.StripPathSuffix != "" {
suffix := repl.ReplaceAll(rewr.StripPathSuffix, "")
- changePath(r, func(pathOrRawPath string) string {
- return strings.TrimSuffix(pathOrRawPath, suffix)
+ mergeSlashes := !strings.Contains(suffix, "//")
+ changePath(r, func(escapedPath string) string {
+ escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes)
+ return reverse(trimPathPrefix(reverse(escapedPath), reverse(suffix)))
})
}
@@ -324,6 +343,58 @@ func buildQueryString(qs string, repl *caddy.Replacer) string {
return sb.String()
}
+// trimPathPrefix is like strings.TrimPrefix, but customized for advanced URI
+// path prefix matching. The string prefix will be trimmed from the beginning
+// of escapedPath if escapedPath starts with prefix. Rather than a naive 1:1
+// comparison of each byte to determine if escapedPath starts with prefix,
+// both strings are iterated in lock-step, and if prefix has a '%' encoding
+// at a particular position, escapedPath must also have the same encoding
+// representation for that character. In other words, if the prefix string
+// uses the escaped form for a character, escapedPath must literally use the
+// same escape at that position. Otherwise, all character comparisons are
+// performed in normalized/unescaped space.
+func trimPathPrefix(escapedPath, prefix string) string {
+ var iPath, iPrefix int
+ for {
+ if iPath >= len(escapedPath) || iPrefix >= len(prefix) {
+ break
+ }
+
+ prefixCh := prefix[iPrefix]
+ ch := string(escapedPath[iPath])
+
+ if ch == "%" && prefixCh != '%' && len(escapedPath) >= iPath+3 {
+ var err error
+ ch, err = url.PathUnescape(escapedPath[iPath : iPath+3])
+ if err != nil {
+ // should be impossible unless EscapedPath() is returning invalid values!
+ return escapedPath
+ }
+ iPath += 2
+ }
+
+ // prefix comparisons are case-insensitive to consistency with
+ // path matcher, which is case-insensitive for good reasons
+ if !strings.EqualFold(ch, string(prefixCh)) {
+ return escapedPath
+ }
+
+ iPath++
+ iPrefix++
+ }
+
+ // found matching prefix, trim it
+ return escapedPath[iPath:]
+}
+
+func reverse(s string) string {
+ r := []rune(s)
+ for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 {
+ r[i], r[j] = r[j], r[i]
+ }
+ return string(r)
+}
+
// substrReplacer describes either a simple and fast substring replacement.
type substrReplacer struct {
// A substring to find. Supports placeholders.
@@ -351,8 +422,10 @@ func (rep substrReplacer) do(r *http.Request, repl *caddy.Replacer) {
find := repl.ReplaceAll(rep.Find, "")
replace := repl.ReplaceAll(rep.Replace, "")
+ mergeSlashes := !strings.Contains(rep.Find, "//")
+
changePath(r, func(pathOrRawPath string) string {
- return strings.Replace(pathOrRawPath, find, replace, lim)
+ return strings.Replace(caddyhttp.CleanPath(pathOrRawPath, mergeSlashes), find, replace, lim)
})
r.URL.RawQuery = strings.Replace(r.URL.RawQuery, find, replace, lim)
@@ -380,16 +453,17 @@ func (rep regexReplacer) do(r *http.Request, repl *caddy.Replacer) {
})
}
-// changePath updates the path on the request URL. It first executes newVal on
-// req.URL.RawPath, and if the result is a valid escaping, it will be copied
-// into req.URL.Path; otherwise newVal is evaluated only on req.URL.Path.
func changePath(req *http.Request, newVal func(pathOrRawPath string) string) {
- req.URL.RawPath = newVal(req.URL.RawPath)
+ req.URL.RawPath = newVal(req.URL.EscapedPath())
if p, err := url.PathUnescape(req.URL.RawPath); err == nil && p != "" {
req.URL.Path = p
} else {
req.URL.Path = newVal(req.URL.Path)
}
+ // RawPath is only set if it's different from the normalized Path (std lib)
+ if req.URL.RawPath == req.URL.Path {
+ req.URL.RawPath = ""
+ }
}
// Interface guard
diff --git a/modules/caddyhttp/rewrite/rewrite_test.go b/modules/caddyhttp/rewrite/rewrite_test.go
index 84dce95..bc20c85 100644
--- a/modules/caddyhttp/rewrite/rewrite_test.go
+++ b/modules/caddyhttp/rewrite/rewrite_test.go
@@ -235,6 +235,42 @@ func TestRewrite(t *testing.T) {
input: newRequest(t, "GET", "/foo/prefix/bar"),
expect: newRequest(t, "GET", "/foo/prefix/bar"),
},
+ {
+ rule: Rewrite{StripPathPrefix: "//prefix"},
+ // scheme and host needed for URL parser to succeed in setting up test
+ input: newRequest(t, "GET", "http://host//prefix/foo/bar"),
+ expect: newRequest(t, "GET", "http://host/foo/bar"),
+ },
+ {
+ rule: Rewrite{StripPathPrefix: "//prefix"},
+ input: newRequest(t, "GET", "/prefix/foo/bar"),
+ expect: newRequest(t, "GET", "/prefix/foo/bar"),
+ },
+ {
+ rule: Rewrite{StripPathPrefix: "/a%2Fb/c"},
+ input: newRequest(t, "GET", "/a%2Fb/c/d"),
+ expect: newRequest(t, "GET", "/d"),
+ },
+ {
+ rule: Rewrite{StripPathPrefix: "/a%2Fb/c"},
+ input: newRequest(t, "GET", "/a%2fb/c/d"),
+ expect: newRequest(t, "GET", "/d"),
+ },
+ {
+ rule: Rewrite{StripPathPrefix: "/a/b/c"},
+ input: newRequest(t, "GET", "/a%2Fb/c/d"),
+ expect: newRequest(t, "GET", "/d"),
+ },
+ {
+ rule: Rewrite{StripPathPrefix: "/a%2Fb/c"},
+ input: newRequest(t, "GET", "/a/b/c/d"),
+ expect: newRequest(t, "GET", "/a/b/c/d"),
+ },
+ {
+ rule: Rewrite{StripPathPrefix: "//a%2Fb/c"},
+ input: newRequest(t, "GET", "/a/b/c/d"),
+ expect: newRequest(t, "GET", "/a/b/c/d"),
+ },
{
rule: Rewrite{StripPathSuffix: "/suffix"},
@@ -252,6 +288,11 @@ func TestRewrite(t *testing.T) {
expect: newRequest(t, "GET", "/foo%2Fbar/"),
},
{
+ rule: Rewrite{StripPathSuffix: "%2fsuffix"},
+ input: newRequest(t, "GET", "/foo%2Fbar%2fsuffix"),
+ expect: newRequest(t, "GET", "/foo%2Fbar"),
+ },
+ {
rule: Rewrite{StripPathSuffix: "/suffix"},
input: newRequest(t, "GET", "/foo/suffix/bar"),
expect: newRequest(t, "GET", "/foo/suffix/bar"),