// Copyright 2015 Matthew Holt and The Caddy Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package caddyhttp import ( "encoding/json" "errors" "fmt" "net" "net/http" "net/netip" "net/textproto" "net/url" "path" "reflect" "regexp" "sort" "strconv" "strings" "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" "github.com/google/cel-go/cel" "github.com/google/cel-go/common/types" "github.com/google/cel-go/common/types/ref" "go.uber.org/zap" ) type ( // MatchHost matches requests by the Host value (case-insensitive). // // When used in a top-level HTTP route, // [qualifying domain names](/docs/automatic-https#hostname-requirements) // may trigger [automatic HTTPS](/docs/automatic-https), which automatically // provisions and renews certificates for you. Before doing this, you // should ensure that DNS records for these domains are properly configured, // especially A/AAAA pointed at your server. // // Automatic HTTPS can be // [customized or disabled](/docs/modules/http#servers/automatic_https). // // Wildcards (`*`) may be used to represent exactly one label of the // hostname, in accordance with RFC 1034 (because host matchers are also // used for automatic HTTPS which influences TLS certificates). Thus, // a host of `*` matches hosts like `localhost` or `internal` but not // `example.com`. To catch all hosts, omit the host matcher entirely. // // The wildcard can be useful for matching all subdomains, for example: // `*.example.com` matches `foo.example.com` but not `foo.bar.example.com`. // // Duplicate entries will return an error. MatchHost []string // MatchPath case-insensitively matches requests by the URI's path. Path // matching is exact, not prefix-based, giving you more control and clarity // over matching. Wildcards (`*`) may be used: // // - At the end only, for a prefix match (`/prefix/*`) // - At the beginning only, for a suffix match (`*.suffix`) // - On both sides only, for a substring match (`*/contains/*`) // - In the middle, for a globular match (`/accounts/*/info`) // // Slashes are significant; i.e. `/foo*` matches `/foo`, `/foo/`, `/foo/bar`, // and `/foobar`; but `/foo/*` does not match `/foo` or `/foobar`. Valid // paths start with a slash `/`. // // Because there are, in general, multiple possible escaped forms of any // path, path matchers operate in unescaped space; that is, path matchers // should be written in their unescaped form to prevent ambiguities and // possible security issues, as all request paths will be normalized to // their unescaped forms before matcher evaluation. // // However, escape sequences in a match pattern are supported; they are // compared with the request's raw/escaped path for those bytes only. // In other words, a matcher of `/foo%2Fbar` will match a request path // of precisely `/foo%2Fbar`, but not `/foo/bar`. It follows that matching // the literal percent sign (%) in normalized space can be done using the // escaped form, `%25`. // // Even though wildcards (`*`) operate in the normalized space, the special // escaped wildcard (`%*`), which is not a valid escape sequence, may be // used in place of a span that should NOT be decoded; that is, `/bands/%*` // will match `/bands/AC%2fDC` whereas `/bands/*` will not. // // Even though path matching is done in normalized space, the special // wildcard `%*` may be used in place of a span that should NOT be decoded; // that is, `/bands/%*/` will match `/bands/AC%2fDC/` whereas `/bands/*/` // will not. // // This matcher is fast, so it does not support regular expressions or // capture groups. For slower but more powerful matching, use the // path_regexp matcher. (Note that due to the special treatment of // escape sequences in matcher patterns, they may perform slightly slower // in high-traffic environments.) MatchPath []string // MatchPathRE matches requests by a regular expression on the URI's path. // Path matching is performed in the unescaped (decoded) form of the path. // // Upon a match, it adds placeholders to the request: `{http.regexp.name.capture_group}` // where `name` is the regular expression's name, and `capture_group` is either // the named or positional capture group from the expression itself. If no name // is given, then the placeholder omits the name: `{http.regexp.capture_group}` // (potentially leading to collisions). MatchPathRE struct{ MatchRegexp } // MatchMethod matches requests by the method. MatchMethod []string // MatchQuery matches requests by the URI's query string. It takes a JSON object // keyed by the query keys, with an array of string values to match for that key. // Query key matches are exact, but wildcards may be used for value matches. Both // keys and values may be placeholders. // // An example of the structure to match `?key=value&topic=api&query=something` is: // // ```json // { // "key": ["value"], // "topic": ["api"], // "query": ["*"] // } // ``` // // Invalid query strings, including those with bad escapings or illegal characters // like semicolons, will fail to parse and thus fail to match. // // **NOTE:** Notice that query string values are arrays, not singular values. This is // because repeated keys are valid in query strings, and each one may have a // different value. This matcher will match for a key if any one of its configured // values is assigned in the query string. Backend applications relying on query // strings MUST take into consideration that query string values are arrays and can // have multiple values. MatchQuery url.Values // MatchHeader matches requests by header fields. The key is the field // name and the array is the list of field values. It performs fast, // exact string comparisons of the field values. Fast prefix, suffix, // and substring matches can also be done by suffixing, prefixing, or // surrounding the value with the wildcard `*` character, respectively. // If a list is null, the header must not exist. If the list is empty, // the field must simply exist, regardless of its value. // // **NOTE:** Notice that header values are arrays, not singular values. This is // because repeated fields are valid in headers, and each one may have a // different value. This matcher will match for a field if any one of its configured // values matches in the header. Backend applications relying on headers MUST take // into consideration that header field values are arrays and can have multiple // values. MatchHeader http.Header // MatchHeaderRE matches requests by a regular expression on header fields. // // Upon a match, it adds placeholders to the request: `{http.regexp.name.capture_group}` // where `name` is the regular expression's name, and `capture_group` is either // the named or positional capture group from the expression itself. If no name // is given, then the placeholder omits the name: `{http.regexp.capture_group}` // (potentially leading to collisions). MatchHeaderRE map[string]*MatchRegexp // MatchProtocol matches requests by protocol. Recognized values are // "http", "https", and "grpc" for broad protocol matches, or specific // HTTP versions can be specified like so: "http/1", "http/1.1", // "http/2", "http/3", or minimum versions: "http/2+", etc. MatchProtocol string // MatchRemoteIP matches requests by client IP (or CIDR range). MatchRemoteIP struct { // The IPs or CIDR ranges to match. Ranges []string `json:"ranges,omitempty"` // If true, prefer the first IP in the request's X-Forwarded-For // header, if present, rather than the immediate peer's IP, as // the reference IP against which to match. Note that it is easy // to spoof request headers. Default: false Forwarded bool `json:"forwarded,omitempty"` // cidrs and zones vars should aligned always in the same // length and indexes for matching later cidrs []*netip.Prefix zones []string logger *zap.Logger } // MatchNot matches requests by negating the results of its matcher // sets. A single "not" matcher takes one or more matcher sets. Each // matcher set is OR'ed; in other words, if any matcher set returns // true, the final result of the "not" matcher is false. Individual // matchers within a set work the same (i.e. different matchers in // the same set are AND'ed). // // NOTE: The generated docs which describe the structure of this // module are wrong because of how this type unmarshals JSON in a // custom way. The correct structure is: // // ```json // [ // {}, // {} // ] // ``` // // where each of the array elements is a matcher set, i.e. an // object keyed by matcher name. MatchNot struct { MatcherSetsRaw []caddy.ModuleMap `json:"-" caddy:"namespace=http.matchers"` MatcherSets []MatcherSet `json:"-"` } ) func init() { caddy.RegisterModule(MatchHost{}) caddy.RegisterModule(MatchPath{}) caddy.RegisterModule(MatchPathRE{}) caddy.RegisterModule(MatchMethod{}) caddy.RegisterModule(MatchQuery{}) caddy.RegisterModule(MatchHeader{}) caddy.RegisterModule(MatchHeaderRE{}) caddy.RegisterModule(new(MatchProtocol)) caddy.RegisterModule(MatchRemoteIP{}) caddy.RegisterModule(MatchNot{}) } // CaddyModule returns the Caddy module information. func (MatchHost) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.host", New: func() caddy.Module { return new(MatchHost) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchHost) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { *m = append(*m, d.RemainingArgs()...) if d.NextBlock(0) { return d.Err("malformed host matcher: blocks are not supported") } } return nil } // Provision sets up and validates m, including making it more efficient for large lists. func (m MatchHost) Provision(_ caddy.Context) error { // check for duplicates; they are nonsensical and reduce efficiency // (we could just remove them, but the user should know their config is erroneous) seen := make(map[string]int) for i, h := range m { h = strings.ToLower(h) if firstI, ok := seen[h]; ok { return fmt.Errorf("host at index %d is repeated at index %d: %s", firstI, i, h) } seen[h] = i } if m.large() { // sort the slice lexicographically, grouping "fuzzy" entries (wildcards and placeholders) // at the front of the list; this allows us to use binary search for exact matches, which // we have seen from experience is the most common kind of value in large lists; and any // other kinds of values (wildcards and placeholders) are grouped in front so the linear // search should find a match fairly quickly sort.Slice(m, func(i, j int) bool { iInexact, jInexact := m.fuzzy(m[i]), m.fuzzy(m[j]) if iInexact && !jInexact { return true } if !iInexact && jInexact { return false } return m[i] < m[j] }) } return nil } // Match returns true if r matches m. func (m MatchHost) Match(r *http.Request) bool { reqHost, _, err := net.SplitHostPort(r.Host) if err != nil { // OK; probably didn't have a port reqHost = r.Host // make sure we strip the brackets from IPv6 addresses reqHost = strings.TrimPrefix(reqHost, "[") reqHost = strings.TrimSuffix(reqHost, "]") } if m.large() { // fast path: locate exact match using binary search (about 100-1000x faster for large lists) pos := sort.Search(len(m), func(i int) bool { if m.fuzzy(m[i]) { return false } return m[i] >= reqHost }) if pos < len(m) && m[pos] == reqHost { return true } } repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) outer: for _, host := range m { // fast path: if matcher is large, we already know we don't have an exact // match, so we're only looking for fuzzy match now, which should be at the // front of the list; if we have reached a value that is not fuzzy, there // will be no match and we can short-circuit for efficiency if m.large() && !m.fuzzy(host) { break } host = repl.ReplaceAll(host, "") if strings.Contains(host, "*") { patternParts := strings.Split(host, ".") incomingParts := strings.Split(reqHost, ".") if len(patternParts) != len(incomingParts) { continue } for i := range patternParts { if patternParts[i] == "*" { continue } if !strings.EqualFold(patternParts[i], incomingParts[i]) { continue outer } } return true } else if strings.EqualFold(reqHost, host) { return true } } return false } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression host('localhost') func (MatchHost) CELLibrary(ctx caddy.Context) (cel.Library, error) { return CELMatcherImpl( "host", "host_match_request_list", []*cel.Type{cel.ListType(cel.StringType)}, func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) strList, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } matcher := MatchHost(strList.([]string)) err = matcher.Provision(ctx) return matcher, err }, ) } // fuzzy returns true if the given hostname h is not a specific // hostname, e.g. has placeholders or wildcards. func (MatchHost) fuzzy(h string) bool { return strings.ContainsAny(h, "{*") } // large returns true if m is considered to be large. Optimizing // the matcher for smaller lists has diminishing returns. // See related benchmark function in test file to conduct experiments. func (m MatchHost) large() bool { return len(m) > 100 } // CaddyModule returns the Caddy module information. func (MatchPath) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.path", New: func() caddy.Module { return new(MatchPath) }, } } // Provision lower-cases the paths in m to ensure case-insensitive matching. func (m MatchPath) Provision(_ caddy.Context) error { for i := range m { if m[i] == "*" && i > 0 { // will always match, so just put it first m[0] = m[i] break } m[i] = strings.ToLower(m[i]) } return nil } // Match returns true if r matches m. func (m MatchPath) Match(r *http.Request) bool { // Even though RFC 9110 says that path matching is case-sensitive // (https://www.rfc-editor.org/rfc/rfc9110.html#section-4.2.3), // we do case-insensitive matching to mitigate security issues // related to differences between operating systems, applications, // etc; if case-sensitive matching is needed, the regex matcher // can be used instead. reqPath := strings.ToLower(r.URL.Path) // See #2917; Windows ignores trailing dots and spaces // when accessing files (sigh), potentially causing a // security risk (cry) if PHP files end up being served // as static files, exposing the source code, instead of // being matched by *.php to be treated as PHP scripts. reqPath = strings.TrimRight(reqPath, ". ") repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) for _, matchPattern := range m { matchPattern = repl.ReplaceAll(matchPattern, "") // special case: whole path is wildcard; this is unnecessary // as it matches all requests, which is the same as no matcher if matchPattern == "*" { return true } // Clean the path, merge doubled slashes, etc. // This ensures maliciously crafted requests can't bypass // the path matcher. See #4407. Good security posture // requires that we should do all we can to reduce any // funny-looking paths into "normalized" forms such that // weird variants can't sneak by. // // How we clean the path depends on the kind of pattern: // we either merge slashes or we don't. If the pattern // has double slashes, we preserve them in the path. // // TODO: Despite the fact that the *vast* majority of path // matchers have only 1 pattern, a possible optimization is // to remember the cleaned form of the path for future // iterations; it's just that the way we clean depends on // the kind of pattern. mergeSlashes := !strings.Contains(matchPattern, "//") // if '%' appears in the match pattern, we interpret that to mean // the intent is to compare that part of the path in raw/escaped // space; i.e. "%40"=="%40", not "@", and "%2F"=="%2F", not "/" if strings.Contains(matchPattern, "%") { reqPathForPattern := CleanPath(r.URL.EscapedPath(), mergeSlashes) if m.matchPatternWithEscapeSequence(reqPathForPattern, matchPattern) { return true } // doing prefix/suffix/substring matches doesn't make sense continue } reqPathForPattern := CleanPath(reqPath, mergeSlashes) // for substring, prefix, and suffix matching, only perform those // special, fast matches if they are the only wildcards in the pattern; // otherwise we assume a globular match if any * appears in the middle // special case: first and last characters are wildcard, // treat it as a fast substring match if strings.Count(matchPattern, "*") == 2 && strings.HasPrefix(matchPattern, "*") && strings.HasSuffix(matchPattern, "*") && strings.Count(matchPattern, "*") == 2 { if strings.Contains(reqPathForPattern, matchPattern[1:len(matchPattern)-1]) { return true } continue } // only perform prefix/suffix match if it is the only wildcard... // I think that is more correct most of the time if strings.Count(matchPattern, "*") == 1 { // special case: first character is a wildcard, // treat it as a fast suffix match if strings.HasPrefix(matchPattern, "*") { if strings.HasSuffix(reqPathForPattern, matchPattern[1:]) { return true } continue } // special case: last character is a wildcard, // treat it as a fast prefix match if strings.HasSuffix(matchPattern, "*") { if strings.HasPrefix(reqPathForPattern, matchPattern[:len(matchPattern)-1]) { return true } continue } } // at last, use globular matching, which also is exact matching // if there are no glob/wildcard chars; we ignore the error here // because we can't handle it anyway matches, _ := path.Match(matchPattern, reqPathForPattern) if matches { return true } } return false } func (MatchPath) matchPatternWithEscapeSequence(escapedPath, matchPath string) bool { // We would just compare the pattern against r.URL.Path, // but the pattern contains %, indicating that we should // compare at least some part of the path in raw/escaped // space, not normalized space; so we build the string we // will compare against by adding the normalized parts // of the path, then switching to the escaped parts where // the pattern hints to us wherever % is present. var sb strings.Builder // iterate the pattern and escaped path in lock-step; // increment iPattern every time we consume a char from the pattern, // increment iPath every time we consume a char from the path; // iPattern and iPath are our cursors/iterator positions for each string var iPattern, iPath int for { if iPattern >= len(matchPath) || iPath >= len(escapedPath) { break } // get the next character from the request path pathCh := string(escapedPath[iPath]) var escapedPathCh string // normalize (decode) escape sequences if pathCh == "%" && len(escapedPath) >= iPath+3 { // hold onto this in case we find out the intent is to match in escaped space here; // we lowercase it even though technically the spec says: "For consistency, URI // producers and normalizers should use uppercase hexadecimal digits for all percent- // encodings" (RFC 3986 section 2.1) - we lowercased the matcher pattern earlier in // provisioning so we do the same here to gain case-insensitivity in equivalence; // besides, this string is never shown visibly escapedPathCh = strings.ToLower(escapedPath[iPath : iPath+3]) var err error pathCh, err = url.PathUnescape(escapedPathCh) if err != nil { // should be impossible unless EscapedPath() is giving us an invalid sequence! return false } iPath += 2 // escape sequence is 2 bytes longer than normal char } // now get the next character from the pattern normalize := true switch matchPath[iPattern] { case '%': // escape sequence // if not a wildcard ("%*"), compare literally; consume next two bytes of pattern if len(matchPath) >= iPattern+3 && matchPath[iPattern+1] != '*' { sb.WriteString(escapedPathCh) iPath++ iPattern += 2 break } // escaped wildcard sequence; consume next byte only ('*') iPattern++ normalize = false fallthrough case '*': // wildcard, so consume until next matching character remaining := escapedPath[iPath:] until := len(escapedPath) - iPath // go until end of string... if iPattern < len(matchPath)-1 { // ...unless the * is not at the end nextCh := matchPath[iPattern+1] until = strings.IndexByte(remaining, nextCh) if until == -1 { // terminating char of wildcard span not found, so definitely no match return false } } if until == 0 { // empty span; nothing to add on this iteration break } next := remaining[:until] if normalize { var err error next, err = url.PathUnescape(next) if err != nil { return false // should be impossible anyway } } sb.WriteString(next) iPath += until default: sb.WriteString(pathCh) iPath++ } iPattern++ } // we can now treat rawpath globs (%*) as regular globs (*) matchPath = strings.ReplaceAll(matchPath, "%*", "*") // ignore error here because we can't handle it anyway= matches, _ := path.Match(matchPath, sb.String()) return matches } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression path('*substring*', '*suffix') func (MatchPath) CELLibrary(ctx caddy.Context) (cel.Library, error) { return CELMatcherImpl( // name of the macro, this is the function name that users see when writing expressions. "path", // name of the function that the macro will be rewritten to call. "path_match_request_list", // internal data type of the MatchPath value. []*cel.Type{cel.ListType(cel.StringType)}, // function to convert a constant list of strings to a MatchPath instance. func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) strList, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } matcher := MatchPath(strList.([]string)) err = matcher.Provision(ctx) return matcher, err }, ) } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchPath) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { *m = append(*m, d.RemainingArgs()...) if d.NextBlock(0) { return d.Err("malformed path matcher: blocks are not supported") } } return nil } // CaddyModule returns the Caddy module information. func (MatchPathRE) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.path_regexp", New: func() caddy.Module { return new(MatchPathRE) }, } } // Match returns true if r matches m. func (m MatchPathRE) Match(r *http.Request) bool { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) // Clean the path, merges doubled slashes, etc. // This ensures maliciously crafted requests can't bypass // the path matcher. See #4407 cleanedPath := cleanPath(r.URL.Path) return m.MatchRegexp.Match(cleanedPath, repl) } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression path_regexp('^/bar') func (MatchPathRE) CELLibrary(ctx caddy.Context) (cel.Library, error) { unnamedPattern, err := CELMatcherImpl( "path_regexp", "path_regexp_request_string", []*cel.Type{cel.StringType}, func(data ref.Val) (RequestMatcher, error) { pattern := data.(types.String) matcher := MatchPathRE{MatchRegexp{Pattern: string(pattern)}} err := matcher.Provision(ctx) return matcher, err }, ) if err != nil { return nil, err } namedPattern, err := CELMatcherImpl( "path_regexp", "path_regexp_request_string_string", []*cel.Type{cel.StringType, cel.StringType}, func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) params, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } strParams := params.([]string) matcher := MatchPathRE{MatchRegexp{Name: strParams[0], Pattern: strParams[1]}} err = matcher.Provision(ctx) return matcher, err }, ) if err != nil { return nil, err } envOpts := append(unnamedPattern.CompileOptions(), namedPattern.CompileOptions()...) prgOpts := append(unnamedPattern.ProgramOptions(), namedPattern.ProgramOptions()...) return NewMatcherCELLibrary(envOpts, prgOpts), nil } // CaddyModule returns the Caddy module information. func (MatchMethod) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.method", New: func() caddy.Module { return new(MatchMethod) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchMethod) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { *m = append(*m, d.RemainingArgs()...) if d.NextBlock(0) { return d.Err("malformed method matcher: blocks are not supported") } } return nil } // Match returns true if r matches m. func (m MatchMethod) Match(r *http.Request) bool { for _, method := range m { if r.Method == method { return true } } return false } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression method('PUT', 'POST') func (MatchMethod) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "method", "method_request_list", []*cel.Type{cel.ListType(cel.StringType)}, func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) strList, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } return MatchMethod(strList.([]string)), nil }, ) } // CaddyModule returns the Caddy module information. func (MatchQuery) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.query", New: func() caddy.Module { return new(MatchQuery) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchQuery) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { if *m == nil { *m = make(map[string][]string) } for d.Next() { for _, query := range d.RemainingArgs() { if query == "" { continue } before, after, found := strings.Cut(query, "=") if !found { return d.Errf("malformed query matcher token: %s; must be in param=val format", d.Val()) } url.Values(*m).Add(before, after) } if d.NextBlock(0) { return d.Err("malformed query matcher: blocks are not supported") } } return nil } // Match returns true if r matches m. An empty m matches an empty query string. func (m MatchQuery) Match(r *http.Request) bool { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) // parse query string just once, for efficiency parsed, err := url.ParseQuery(r.URL.RawQuery) if err != nil { // Illegal query string. Likely bad escape sequence or unescaped literals. // Note that semicolons in query string have a controversial history. Summaries: // - https://github.com/golang/go/issues/50034 // - https://github.com/golang/go/issues/25192 // Despite the URL WHATWG spec mandating the use of & separators for query strings, // every URL parser implementation is different, and Filippo Valsorda rightly wrote: // "Relying on parser alignment for security is doomed." Overall conclusion is that // splitting on & and rejecting ; in key=value pairs is safer than accepting raw ;. // We regard the Go team's decision as sound and thus reject malformed query strings. return false } for param, vals := range m { param = repl.ReplaceAll(param, "") paramVal, found := parsed[param] if found { for _, v := range vals { v = repl.ReplaceAll(v, "") if paramVal[0] == v || v == "*" { return true } } } } return len(m) == 0 && len(r.URL.Query()) == 0 } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression query({'sort': 'asc'}) || query({'foo': ['*bar*', 'baz']}) func (MatchQuery) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "query", "query_matcher_request_map", []*cel.Type{CELTypeJSON}, func(data ref.Val) (RequestMatcher, error) { mapStrListStr, err := CELValueToMapStrList(data) if err != nil { return nil, err } return MatchQuery(url.Values(mapStrListStr)), nil }, ) } // CaddyModule returns the Caddy module information. func (MatchHeader) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.header", New: func() caddy.Module { return new(MatchHeader) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchHeader) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { if *m == nil { *m = make(map[string][]string) } for d.Next() { var field, val string if !d.Args(&field) { return d.Errf("malformed header matcher: expected field") } if strings.HasPrefix(field, "!") { if len(field) == 1 { return d.Errf("malformed header matcher: must have field name following ! character") } field = field[1:] headers := *m headers[field] = nil m = &headers if d.NextArg() { return d.Errf("malformed header matcher: null matching headers cannot have a field value") } } else { if !d.NextArg() { return d.Errf("malformed header matcher: expected both field and value") } // If multiple header matchers with the same header field are defined, // we want to add the existing to the list of headers (will be OR'ed) val = d.Val() http.Header(*m).Add(field, val) } if d.NextBlock(0) { return d.Err("malformed header matcher: blocks are not supported") } } return nil } // Match returns true if r matches m. func (m MatchHeader) Match(r *http.Request) bool { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) return matchHeaders(r.Header, http.Header(m), r.Host, repl) } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression header({'content-type': 'image/png'}) // expression header({'foo': ['bar', 'baz']}) // match bar or baz func (MatchHeader) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "header", "header_matcher_request_map", []*cel.Type{CELTypeJSON}, func(data ref.Val) (RequestMatcher, error) { mapStrListStr, err := CELValueToMapStrList(data) if err != nil { return nil, err } return MatchHeader(http.Header(mapStrListStr)), nil }, ) } // getHeaderFieldVals returns the field values for the given fieldName from input. // The host parameter should be obtained from the http.Request.Host field since // net/http removes it from the header map. func getHeaderFieldVals(input http.Header, fieldName, host string) []string { fieldName = textproto.CanonicalMIMEHeaderKey(fieldName) if fieldName == "Host" && host != "" { return []string{host} } return input[fieldName] } // matchHeaders returns true if input matches the criteria in against without regex. // The host parameter should be obtained from the http.Request.Host field since // net/http removes it from the header map. func matchHeaders(input, against http.Header, host string, repl *caddy.Replacer) bool { for field, allowedFieldVals := range against { actualFieldVals := getHeaderFieldVals(input, field, host) if allowedFieldVals != nil && len(allowedFieldVals) == 0 && actualFieldVals != nil { // a non-nil but empty list of allowed values means // match if the header field exists at all continue } if allowedFieldVals == nil && actualFieldVals == nil { // a nil list means match if the header does not exist at all continue } var match bool fieldVals: for _, actualFieldVal := range actualFieldVals { for _, allowedFieldVal := range allowedFieldVals { if repl != nil { allowedFieldVal = repl.ReplaceAll(allowedFieldVal, "") } switch { case allowedFieldVal == "*": match = true case strings.HasPrefix(allowedFieldVal, "*") && strings.HasSuffix(allowedFieldVal, "*"): match = strings.Contains(actualFieldVal, allowedFieldVal[1:len(allowedFieldVal)-1]) case strings.HasPrefix(allowedFieldVal, "*"): match = strings.HasSuffix(actualFieldVal, allowedFieldVal[1:]) case strings.HasSuffix(allowedFieldVal, "*"): match = strings.HasPrefix(actualFieldVal, allowedFieldVal[:len(allowedFieldVal)-1]) default: match = actualFieldVal == allowedFieldVal } if match { break fieldVals } } } if !match { return false } } return true } // CaddyModule returns the Caddy module information. func (MatchHeaderRE) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.header_regexp", New: func() caddy.Module { return new(MatchHeaderRE) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchHeaderRE) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { if *m == nil { *m = make(map[string]*MatchRegexp) } for d.Next() { var first, second, third string if !d.Args(&first, &second) { return d.ArgErr() } var name, field, val string if d.Args(&third) { name = first field = second val = third } else { field = first val = second } // If there's already a pattern for this field // then we would end up overwriting the old one if (*m)[field] != nil { return d.Errf("header_regexp matcher can only be used once per named matcher, per header field: %s", field) } (*m)[field] = &MatchRegexp{Pattern: val, Name: name} if d.NextBlock(0) { return d.Err("malformed header_regexp matcher: blocks are not supported") } } return nil } // Match returns true if r matches m. func (m MatchHeaderRE) Match(r *http.Request) bool { for field, rm := range m { actualFieldVals := getHeaderFieldVals(r.Header, field, r.Host) match := false fieldVal: for _, actualFieldVal := range actualFieldVals { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) if rm.Match(actualFieldVal, repl) { match = true break fieldVal } } if !match { return false } } return true } // Provision compiles m's regular expressions. func (m MatchHeaderRE) Provision(ctx caddy.Context) error { for _, rm := range m { err := rm.Provision(ctx) if err != nil { return err } } return nil } // Validate validates m's regular expressions. func (m MatchHeaderRE) Validate() error { for _, rm := range m { err := rm.Validate() if err != nil { return err } } return nil } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression header_regexp('foo', 'Field', 'fo+') func (MatchHeaderRE) CELLibrary(ctx caddy.Context) (cel.Library, error) { unnamedPattern, err := CELMatcherImpl( "header_regexp", "header_regexp_request_string_string", []*cel.Type{cel.StringType, cel.StringType}, func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) params, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } strParams := params.([]string) matcher := MatchHeaderRE{} matcher[strParams[0]] = &MatchRegexp{Pattern: strParams[1], Name: ""} err = matcher.Provision(ctx) return matcher, err }, ) if err != nil { return nil, err } namedPattern, err := CELMatcherImpl( "header_regexp", "header_regexp_request_string_string_string", []*cel.Type{cel.StringType, cel.StringType, cel.StringType}, func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) params, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } strParams := params.([]string) matcher := MatchHeaderRE{} matcher[strParams[1]] = &MatchRegexp{Pattern: strParams[2], Name: strParams[0]} err = matcher.Provision(ctx) return matcher, err }, ) if err != nil { return nil, err } envOpts := append(unnamedPattern.CompileOptions(), namedPattern.CompileOptions()...) prgOpts := append(unnamedPattern.ProgramOptions(), namedPattern.ProgramOptions()...) return NewMatcherCELLibrary(envOpts, prgOpts), nil } // CaddyModule returns the Caddy module information. func (MatchProtocol) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.protocol", New: func() caddy.Module { return new(MatchProtocol) }, } } // Match returns true if r matches m. func (m MatchProtocol) Match(r *http.Request) bool { switch string(m) { case "grpc": return strings.HasPrefix(r.Header.Get("content-type"), "application/grpc") case "https": return r.TLS != nil case "http": return r.TLS == nil case "http/1.0": return r.ProtoMajor == 1 && r.ProtoMinor == 0 case "http/1.0+": return r.ProtoAtLeast(1, 0) case "http/1.1": return r.ProtoMajor == 1 && r.ProtoMinor == 1 case "http/1.1+": return r.ProtoAtLeast(1, 1) case "http/2": return r.ProtoMajor == 2 case "http/2+": return r.ProtoAtLeast(2, 0) case "http/3": return r.ProtoMajor == 3 case "http/3+": return r.ProtoAtLeast(3, 0) } return false } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchProtocol) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { var proto string if !d.Args(&proto) { return d.Err("expected exactly one protocol") } *m = MatchProtocol(proto) } return nil } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression protocol('https') func (MatchProtocol) CELLibrary(_ caddy.Context) (cel.Library, error) { return CELMatcherImpl( "protocol", "protocol_request_string", []*cel.Type{cel.StringType}, func(data ref.Val) (RequestMatcher, error) { protocolStr, ok := data.(types.String) if !ok { return nil, errors.New("protocol argument was not a string") } return MatchProtocol(strings.ToLower(string(protocolStr))), nil }, ) } // CaddyModule returns the Caddy module information. func (MatchNot) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.not", New: func() caddy.Module { return new(MatchNot) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchNot) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { matcherSet, err := ParseCaddyfileNestedMatcherSet(d) if err != nil { return err } m.MatcherSetsRaw = append(m.MatcherSetsRaw, matcherSet) } return nil } // UnmarshalJSON satisfies json.Unmarshaler. It puts the JSON // bytes directly into m's MatcherSetsRaw field. func (m *MatchNot) UnmarshalJSON(data []byte) error { return json.Unmarshal(data, &m.MatcherSetsRaw) } // MarshalJSON satisfies json.Marshaler by marshaling // m's raw matcher sets. func (m MatchNot) MarshalJSON() ([]byte, error) { return json.Marshal(m.MatcherSetsRaw) } // Provision loads the matcher modules to be negated. func (m *MatchNot) Provision(ctx caddy.Context) error { matcherSets, err := ctx.LoadModule(m, "MatcherSetsRaw") if err != nil { return fmt.Errorf("loading matcher sets: %v", err) } for _, modMap := range matcherSets.([]map[string]any) { var ms MatcherSet for _, modIface := range modMap { ms = append(ms, modIface.(RequestMatcher)) } m.MatcherSets = append(m.MatcherSets, ms) } return nil } // Match returns true if r matches m. Since this matcher negates // the embedded matchers, false is returned if any of its matcher // sets return true. func (m MatchNot) Match(r *http.Request) bool { for _, ms := range m.MatcherSets { if ms.Match(r) { return false } } return true } // CaddyModule returns the Caddy module information. func (MatchRemoteIP) CaddyModule() caddy.ModuleInfo { return caddy.ModuleInfo{ ID: "http.matchers.remote_ip", New: func() caddy.Module { return new(MatchRemoteIP) }, } } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (m *MatchRemoteIP) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { for d.NextArg() { if d.Val() == "forwarded" { if len(m.Ranges) > 0 { return d.Err("if used, 'forwarded' must be first argument") } m.Forwarded = true continue } if d.Val() == "private_ranges" { m.Ranges = append(m.Ranges, []string{ "192.168.0.0/16", "172.16.0.0/12", "10.0.0.0/8", "127.0.0.1/8", "fd00::/8", "::1", }...) continue } m.Ranges = append(m.Ranges, d.Val()) } if d.NextBlock(0) { return d.Err("malformed remote_ip matcher: blocks are not supported") } } return nil } // CELLibrary produces options that expose this matcher for use in CEL // expression matchers. // // Example: // // expression remote_ip('forwarded', '192.168.0.0/16', '172.16.0.0/12', '10.0.0.0/8') func (MatchRemoteIP) CELLibrary(ctx caddy.Context) (cel.Library, error) { return CELMatcherImpl( // name of the macro, this is the function name that users see when writing expressions. "remote_ip", // name of the function that the macro will be rewritten to call. "remote_ip_match_request_list", // internal data type of the MatchPath value. []*cel.Type{cel.ListType(cel.StringType)}, // function to convert a constant list of strings to a MatchPath instance. func(data ref.Val) (RequestMatcher, error) { refStringList := reflect.TypeOf([]string{}) strList, err := data.ConvertToNative(refStringList) if err != nil { return nil, err } m := MatchRemoteIP{} for _, input := range strList.([]string) { if input == "forwarded" { if len(m.Ranges) > 0 { return nil, errors.New("if used, 'forwarded' must be first argument") } m.Forwarded = true continue } m.Ranges = append(m.Ranges, input) } err = m.Provision(ctx) return m, err }, ) } // Provision parses m's IP ranges, either from IP or CIDR expressions. func (m *MatchRemoteIP) Provision(ctx caddy.Context) error { m.logger = ctx.Logger() for _, str := range m.Ranges { // Exclude the zone_id from the IP if strings.Contains(str, "%") { split := strings.Split(str, "%") str = split[0] // write zone identifiers in m.zones for matching later m.zones = append(m.zones, split[1]) } else { m.zones = append(m.zones, "") } if strings.Contains(str, "/") { ipNet, err := netip.ParsePrefix(str) if err != nil { return fmt.Errorf("parsing CIDR expression '%s': %v", str, err) } m.cidrs = append(m.cidrs, &ipNet) } else { ipAddr, err := netip.ParseAddr(str) if err != nil { return fmt.Errorf("invalid IP address: '%s': %v", str, err) } ipNew := netip.PrefixFrom(ipAddr, ipAddr.BitLen()) m.cidrs = append(m.cidrs, &ipNew) } } return nil } func (m MatchRemoteIP) getClientIP(r *http.Request) (netip.Addr, string, error) { remote := r.RemoteAddr zoneID := "" if m.Forwarded { if fwdFor := r.Header.Get("X-Forwarded-For"); fwdFor != "" { remote = strings.TrimSpace(strings.Split(fwdFor, ",")[0]) } } ipStr, _, err := net.SplitHostPort(remote) if err != nil { ipStr = remote // OK; probably didn't have a port } // Some IPv6-Adresses can contain zone identifiers at the end, // which are separated with "%" if strings.Contains(ipStr, "%") { split := strings.Split(ipStr, "%") ipStr = split[0] zoneID = split[1] } ipAddr, err := netip.ParseAddr(ipStr) if err != nil { return netip.IPv4Unspecified(), "", err } return ipAddr, zoneID, nil } // Match returns true if r matches m. func (m MatchRemoteIP) Match(r *http.Request) bool { clientIP, zoneID, err := m.getClientIP(r) if err != nil { m.logger.Error("getting client IP", zap.Error(err)) return false } zoneFilter := true for i, ipRange := range m.cidrs { if ipRange.Contains(clientIP) { // Check if there are zone filters assigned and if they match. if m.zones[i] == "" || zoneID == m.zones[i] { return true } zoneFilter = false } } if !zoneFilter { m.logger.Debug("zone ID from remote did not match", zap.String("zone", zoneID)) } return false } // MatchRegexp is an embedable type for matching // using regular expressions. It adds placeholders // to the request's replacer. type MatchRegexp struct { // A unique name for this regular expression. Optional, // but useful to prevent overwriting captures from other // regexp matchers. Name string `json:"name,omitempty"` // The regular expression to evaluate, in RE2 syntax, // which is the same general syntax used by Go, Perl, // and Python. For details, see // [Go's regexp package](https://golang.org/pkg/regexp/). // Captures are accessible via placeholders. Unnamed // capture groups are exposed as their numeric, 1-based // index, while named capture groups are available by // the capture group name. Pattern string `json:"pattern"` compiled *regexp.Regexp phPrefix string } // Provision compiles the regular expression. func (mre *MatchRegexp) Provision(caddy.Context) error { re, err := regexp.Compile(mre.Pattern) if err != nil { return fmt.Errorf("compiling matcher regexp %s: %v", mre.Pattern, err) } mre.compiled = re mre.phPrefix = regexpPlaceholderPrefix if mre.Name != "" { mre.phPrefix += "." + mre.Name } return nil } // Validate ensures mre is set up correctly. func (mre *MatchRegexp) Validate() error { if mre.Name != "" && !wordRE.MatchString(mre.Name) { return fmt.Errorf("invalid regexp name (must contain only word characters): %s", mre.Name) } return nil } // Match returns true if input matches the compiled regular // expression in mre. It sets values on the replacer repl // associated with capture groups, using the given scope // (namespace). func (mre *MatchRegexp) Match(input string, repl *caddy.Replacer) bool { matches := mre.compiled.FindStringSubmatch(input) if matches == nil { return false } // save all capture groups, first by index for i, match := range matches { key := mre.phPrefix + "." + strconv.Itoa(i) repl.Set(key, match) } // then by name for i, name := range mre.compiled.SubexpNames() { if i != 0 && name != "" { key := mre.phPrefix + "." + name repl.Set(key, matches[i]) } } return true } // UnmarshalCaddyfile implements caddyfile.Unmarshaler. func (mre *MatchRegexp) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { // If this is the second iteration of the loop // then there's more than one path_regexp matcher // and we would end up overwriting the old one if mre.Pattern != "" { return d.Err("regular expression can only be used once per named matcher") } args := d.RemainingArgs() switch len(args) { case 1: mre.Pattern = args[0] case 2: mre.Name = args[0] mre.Pattern = args[1] default: return d.ArgErr() } if d.NextBlock(0) { return d.Err("malformed path_regexp matcher: blocks are not supported") } } return nil } // ParseCaddyfileNestedMatcher parses the Caddyfile tokens for a nested // matcher set, and returns its raw module map value. func ParseCaddyfileNestedMatcherSet(d *caddyfile.Dispenser) (caddy.ModuleMap, error) { matcherMap := make(map[string]RequestMatcher) // in case there are multiple instances of the same matcher, concatenate // their tokens (we expect that UnmarshalCaddyfile should be able to // handle more than one segment); otherwise, we'd overwrite other // instances of the matcher in this set tokensByMatcherName := make(map[string][]caddyfile.Token) for nesting := d.Nesting(); d.NextArg() || d.NextBlock(nesting); { matcherName := d.Val() tokensByMatcherName[matcherName] = append(tokensByMatcherName[matcherName], d.NextSegment()...) } for matcherName, tokens := range tokensByMatcherName { mod, err := caddy.GetModule("http.matchers." + matcherName) if err != nil { return nil, d.Errf("getting matcher module '%s': %v", matcherName, err) } unm, ok := mod.New().(caddyfile.Unmarshaler) if !ok { return nil, d.Errf("matcher module '%s' is not a Caddyfile unmarshaler", matcherName) } err = unm.UnmarshalCaddyfile(caddyfile.NewDispenser(tokens)) if err != nil { return nil, err } rm, ok := unm.(RequestMatcher) if !ok { return nil, fmt.Errorf("matcher module '%s' is not a request matcher", matcherName) } matcherMap[matcherName] = rm } // we should now have a functional matcher, but we also // need to be able to marshal as JSON, otherwise config // adaptation will be missing the matchers! matcherSet := make(caddy.ModuleMap) for name, matcher := range matcherMap { jsonBytes, err := json.Marshal(matcher) if err != nil { return nil, fmt.Errorf("marshaling %T matcher: %v", matcher, err) } matcherSet[name] = jsonBytes } return matcherSet, nil } var ( wordRE = regexp.MustCompile(`\w+`) ) const regexpPlaceholderPrefix = "http.regexp" // MatcherErrorVarKey is the key used for the variable that // holds an optional error emitted from a request matcher, // to short-circuit the handler chain, since matchers cannot // return errors via the RequestMatcher interface. const MatcherErrorVarKey = "matchers.error" // Interface guards var ( _ RequestMatcher = (*MatchHost)(nil) _ caddy.Provisioner = (*MatchHost)(nil) _ RequestMatcher = (*MatchPath)(nil) _ RequestMatcher = (*MatchPathRE)(nil) _ caddy.Provisioner = (*MatchPathRE)(nil) _ RequestMatcher = (*MatchMethod)(nil) _ RequestMatcher = (*MatchQuery)(nil) _ RequestMatcher = (*MatchHeader)(nil) _ RequestMatcher = (*MatchHeaderRE)(nil) _ caddy.Provisioner = (*MatchHeaderRE)(nil) _ RequestMatcher = (*MatchProtocol)(nil) _ RequestMatcher = (*MatchRemoteIP)(nil) _ caddy.Provisioner = (*MatchRemoteIP)(nil) _ RequestMatcher = (*MatchNot)(nil) _ caddy.Provisioner = (*MatchNot)(nil) _ caddy.Provisioner = (*MatchRegexp)(nil) _ caddyfile.Unmarshaler = (*MatchHost)(nil) _ caddyfile.Unmarshaler = (*MatchPath)(nil) _ caddyfile.Unmarshaler = (*MatchPathRE)(nil) _ caddyfile.Unmarshaler = (*MatchMethod)(nil) _ caddyfile.Unmarshaler = (*MatchQuery)(nil) _ caddyfile.Unmarshaler = (*MatchHeader)(nil) _ caddyfile.Unmarshaler = (*MatchHeaderRE)(nil) _ caddyfile.Unmarshaler = (*MatchProtocol)(nil) _ caddyfile.Unmarshaler = (*MatchRemoteIP)(nil) _ caddyfile.Unmarshaler = (*VarsMatcher)(nil) _ caddyfile.Unmarshaler = (*MatchVarsRE)(nil) _ CELLibraryProducer = (*MatchHost)(nil) _ CELLibraryProducer = (*MatchPath)(nil) _ CELLibraryProducer = (*MatchPathRE)(nil) _ CELLibraryProducer = (*MatchMethod)(nil) _ CELLibraryProducer = (*MatchQuery)(nil) _ CELLibraryProducer = (*MatchHeader)(nil) _ CELLibraryProducer = (*MatchHeaderRE)(nil) _ CELLibraryProducer = (*MatchProtocol)(nil) _ CELLibraryProducer = (*MatchRemoteIP)(nil) // _ CELLibraryProducer = (*VarsMatcher)(nil) // _ CELLibraryProducer = (*MatchVarsRE)(nil) _ json.Marshaler = (*MatchNot)(nil) _ json.Unmarshaler = (*MatchNot)(nil) )