1 files changed, 92 insertions, 18 deletions
diff --git a/modules/caddyhttp/rewrite/rewrite.go b/modules/caddyhttp/rewrite/rewrite.go
index 7da2327..4316b5a 100644
--- a/modules/caddyhttp/rewrite/rewrite.go
+++ b/modules/caddyhttp/rewrite/rewrite.go
@@ -31,13 +31,25 @@ func init() {
 	caddy.RegisterModule(Rewrite{})
 }
 
-// Rewrite is a middleware which can rewrite HTTP requests.
+// Rewrite is a middleware which can rewrite/mutate HTTP requests.
 //
-// The Method and URI properties are "setters": the request URI
-// will be set to the given values. Other properties are "modifiers":
-// they modify existing files but do not explicitly specify what the
-// result will be. It is atypical to combine the use of setters and
+// The Method and URI properties are "setters" (the request URI
+// will be overwritten with the given values). Other properties are
+// "modifiers" (they modify existing values in a differentiable
+// way). It is atypical to combine the use of setters and
 // modifiers in a single rewrite.
+//
+// To ensure consistent behavior, prefix and suffix stripping is
+// performed in the URL-decoded (unescaped, normalized) space by
+// default except for the specific bytes where an escape sequence
+// is used in the prefix or suffix pattern.
+//
+// For all modifiers, paths are cleaned before being modified so that
+// multiple, consecutive slashes are collapsed into a single slash,
+// and dot elements are resolved and removed. In the special case
+// of a prefix, suffix, or substring containing "//" (repeated slashes),
+// slashes will not be merged while cleaning the path so that
+// the rewrite can be interpreted literally.
 type Rewrite struct {
 	// Changes the request's HTTP verb.
 	Method string `json:"method,omitempty"`
@@ -59,9 +71,15 @@ type Rewrite struct {
 	URI string `json:"uri,omitempty"`
 
 	// Strips the given prefix from the beginning of the URI path.
+	// The prefix should be written in normalized (unescaped) form,
+	// but if an escaping (`%xx`) is used, the path will be required
+	// to have that same escape at that position in order to match.
 	StripPathPrefix string `json:"strip_path_prefix,omitempty"`
 
 	// Strips the given suffix from the end of the URI path.
+	// The suffix should be written in normalized (unescaped) form,
+	// but if an escaping (`%xx`) is used, the path will be required
+	// to have that same escape at that position in order to match.
 	StripPathSuffix string `json:"strip_path_suffix,omitempty"`
 
 	// Performs substring replacements on the URI.
@@ -227,17 +245,18 @@ func (rewr Rewrite) Rewrite(r *http.Request, repl *caddy.Replacer) bool {
 	// strip path prefix or suffix
 	if rewr.StripPathPrefix != "" {
 		prefix := repl.ReplaceAll(rewr.StripPathPrefix, "")
-		r.URL.RawPath = strings.TrimPrefix(r.URL.RawPath, prefix)
-		if p, err := url.PathUnescape(r.URL.RawPath); err == nil && p != "" {
-			r.URL.Path = p
-		} else {
-			r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
-		}
+		mergeSlashes := !strings.Contains(prefix, "//")
+		changePath(r, func(escapedPath string) string {
+			escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes)
+			return trimPathPrefix(escapedPath, prefix)
+		})
 	}
 	if rewr.StripPathSuffix != "" {
 		suffix := repl.ReplaceAll(rewr.StripPathSuffix, "")
-		changePath(r, func(pathOrRawPath string) string {
-			return strings.TrimSuffix(pathOrRawPath, suffix)
+		mergeSlashes := !strings.Contains(suffix, "//")
+		changePath(r, func(escapedPath string) string {
+			escapedPath = caddyhttp.CleanPath(escapedPath, mergeSlashes)
+			return reverse(trimPathPrefix(reverse(escapedPath), reverse(suffix)))
 		})
 	}
 
@@ -324,6 +343,58 @@ func buildQueryString(qs string, repl *caddy.Replacer) string {
 	return sb.String()
 }
 
+// trimPathPrefix is like strings.TrimPrefix, but customized for advanced URI
+// path prefix matching. The string prefix will be trimmed from the beginning
+// of escapedPath if escapedPath starts with prefix. Rather than a naive 1:1
+// comparison of each byte to determine if escapedPath starts with prefix,
+// both strings are iterated in lock-step, and if prefix has a '%' encoding
+// at a particular position, escapedPath must also have the same encoding
+// representation for that character. In other words, if the prefix string
+// uses the escaped form for a character, escapedPath must literally use the
+// same escape at that position. Otherwise, all character comparisons are
+// performed in normalized/unescaped space.
+func trimPathPrefix(escapedPath, prefix string) string {
+	var iPath, iPrefix int
+	for {
+		if iPath >= len(escapedPath) || iPrefix >= len(prefix) {
+			break
+		}
+
+		prefixCh := prefix[iPrefix]
+		ch := string(escapedPath[iPath])
+
+		if ch == "%" && prefixCh != '%' && len(escapedPath) >= iPath+3 {
+			var err error
+			ch, err = url.PathUnescape(escapedPath[iPath : iPath+3])
+			if err != nil {
+				// should be impossible unless EscapedPath() is returning invalid values!
+				return escapedPath
+			}
+			iPath += 2
+		}
+
+		// prefix comparisons are case-insensitive to consistency with
+		// path matcher, which is case-insensitive for good reasons
+		if !strings.EqualFold(ch, string(prefixCh)) {
+			return escapedPath
+		}
+
+		iPath++
+		iPrefix++
+	}
+
+	// found matching prefix, trim it
+	return escapedPath[iPath:]
+}
+
+func reverse(s string) string {
+	r := []rune(s)
+	for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 {
+		r[i], r[j] = r[j], r[i]
+	}
+	return string(r)
+}
+
 // substrReplacer describes either a simple and fast substring replacement.
 type substrReplacer struct {
 	// A substring to find. Supports placeholders.
@@ -351,8 +422,10 @@ func (rep substrReplacer) do(r *http.Request, repl *caddy.Replacer) {
 	find := repl.ReplaceAll(rep.Find, "")
 	replace := repl.ReplaceAll(rep.Replace, "")
 
+	mergeSlashes := !strings.Contains(rep.Find, "//")
+
 	changePath(r, func(pathOrRawPath string) string {
-		return strings.Replace(pathOrRawPath, find, replace, lim)
+		return strings.Replace(caddyhttp.CleanPath(pathOrRawPath, mergeSlashes), find, replace, lim)
 	})
 
 	r.URL.RawQuery = strings.Replace(r.URL.RawQuery, find, replace, lim)
@@ -380,16 +453,17 @@ func (rep regexReplacer) do(r *http.Request, repl *caddy.Replacer) {
 	})
 }
 
-// changePath updates the path on the request URL. It first executes newVal on
-// req.URL.RawPath, and if the result is a valid escaping, it will be copied
-// into req.URL.Path; otherwise newVal is evaluated only on req.URL.Path.
 func changePath(req *http.Request, newVal func(pathOrRawPath string) string) {
-	req.URL.RawPath = newVal(req.URL.RawPath)
+	req.URL.RawPath = newVal(req.URL.EscapedPath())
 	if p, err := url.PathUnescape(req.URL.RawPath); err == nil && p != "" {
 		req.URL.Path = p
 	} else {
 		req.URL.Path = newVal(req.URL.Path)
 	}
+	// RawPath is only set if it's different from the normalized Path (std lib)
+	if req.URL.RawPath == req.URL.Path {
+		req.URL.RawPath = ""
+	}
 }
 
 // Interface guard