From e51e56a4944622c0a2c7d19da4bb6b9bb07c1973 Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Thu, 16 Jan 2020 17:08:52 -0700 Subject: httpcaddyfile: Fix nested blocks; add handle directive; refactor The fix that was initially put forth in #2971 was good, but only for up to one layer of nesting. The real problem was that we forgot to increment nesting when already inside a block if we saw another open curly brace that opens another block (dispenser.go L157-158). The new 'handle' directive allows HTTP Caddyfiles to be designed more like nginx location blocks if the user prefers. Inside a handle block, directives are still ordered just like they are outside of them, but handler blocks at a given level of nesting are mutually exclusive. This work benefitted from some refactoring and cleanup. --- caddyconfig/httpcaddyfile/builtins.go | 34 +++++- caddyconfig/httpcaddyfile/directives.go | 53 +++++--- caddyconfig/httpcaddyfile/httptype.go | 190 +++++++++++++++++++++-------- caddyconfig/httpcaddyfile/httptype_test.go | 33 +++++ 4 files changed, 240 insertions(+), 70 deletions(-) create mode 100644 caddyconfig/httpcaddyfile/httptype_test.go (limited to 'caddyconfig/httpcaddyfile') diff --git a/caddyconfig/httpcaddyfile/builtins.go b/caddyconfig/httpcaddyfile/builtins.go index ebb03cc..daba03b 100644 --- a/caddyconfig/httpcaddyfile/builtins.go +++ b/caddyconfig/httpcaddyfile/builtins.go @@ -34,6 +34,7 @@ func init() { RegisterHandlerDirective("redir", parseRedir) RegisterHandlerDirective("respond", parseRespond) RegisterHandlerDirective("route", parseRoute) + RegisterHandlerDirective("handle", parseHandle) } func parseBind(h Helper) ([]ConfigValue, error) { @@ -76,7 +77,7 @@ func parseRoot(h Helper) ([]ConfigValue, error) { route.MatcherSetsRaw = []caddy.ModuleMap{matcherSet} } - return h.NewVarsRoute(route), nil + return []ConfigValue{{Class: "route", Value: route}}, nil } func parseTLS(h Helper) ([]ConfigValue, error) { @@ -330,3 +331,34 @@ func parseRoute(h Helper) (caddyhttp.MiddlewareHandler, error) { return sr, nil } + +func parseHandle(h Helper) (caddyhttp.MiddlewareHandler, error) { + var allResults []ConfigValue + + for h.Next() { + for nesting := h.Nesting(); h.NextBlock(nesting); { + dir := h.Val() + + dirFunc, ok := registeredDirectives[dir] + if !ok { + return nil, h.Errf("unrecognized directive: %s", dir) + } + + subHelper := h + subHelper.Dispenser = h.NewFromNextTokens() + + results, err := dirFunc(subHelper) + if err != nil { + return nil, h.Errf("parsing caddyfile tokens for '%s': %v", dir, err) + } + for _, result := range results { + result.directive = dir + allResults = append(allResults, result) + } + } + + return buildSubroute(allResults, h.groupCounter) + } + + return nil, nil +} diff --git a/caddyconfig/httpcaddyfile/directives.go b/caddyconfig/httpcaddyfile/directives.go index b866c7d..ab7a040 100644 --- a/caddyconfig/httpcaddyfile/directives.go +++ b/caddyconfig/httpcaddyfile/directives.go @@ -16,7 +16,6 @@ package httpcaddyfile import ( "encoding/json" - "fmt" "sort" "github.com/caddyserver/caddy/v2" @@ -28,7 +27,10 @@ import ( // directiveOrder specifies the order // to apply directives in HTTP routes. var directiveOrder = []string{ + "root", + "rewrite", + "strip_prefix", "strip_suffix", "uri_replace", @@ -38,7 +40,10 @@ var directiveOrder = []string{ "request_header", "encode", "templates", + + "handle", "route", + "redir", "respond", "reverse_proxy", @@ -46,6 +51,17 @@ var directiveOrder = []string{ "file_server", } +// directiveIsOrdered returns true if dir is +// a known, ordered (sorted) directive. +func directiveIsOrdered(dir string) bool { + for _, d := range directiveOrder { + if d == dir { + return true + } + } + return false +} + // RegisterDirective registers a unique directive dir with an // associated unmarshaling (setup) function. When directive dir // is encountered in a Caddyfile, setupFunc will be called to @@ -98,7 +114,7 @@ type Helper struct { warnings *[]caddyconfig.Warning matcherDefs map[string]caddy.ModuleMap parentBlock caddyfile.ServerBlock - groupCounter *int + groupCounter counter } // Option gets the option keyed by name. @@ -130,8 +146,8 @@ func (h Helper) JSON(val interface{}) json.RawMessage { return caddyconfig.JSON(val, h.warnings) } -// MatcherToken assumes the current token is (possibly) a matcher, and -// if so, returns the matcher set along with a true value. If the current +// MatcherToken assumes the next argument token is (possibly) a matcher, +// and if so, returns the matcher set along with a true value. If the next // token is not a matcher, nil and false is returned. Note that a true // value may be returned with a nil matcher set if it is a catch-all. func (h Helper) MatcherToken() (caddy.ModuleMap, bool, error) { @@ -186,14 +202,13 @@ func (h Helper) GroupRoutes(vals []ConfigValue) { } // now that we know the group will have some effect, do it - groupNum := *h.groupCounter + groupName := h.groupCounter.nextGroup() for i := range vals { if route, ok := vals[i].Value.(caddyhttp.Route); ok { - route.Group = fmt.Sprintf("group%d", groupNum) + route.Group = groupName vals[i].Value = route } } - *h.groupCounter++ } // NewBindAddresses returns config values relevant to adding @@ -202,12 +217,6 @@ func (h Helper) NewBindAddresses(addrs []string) []ConfigValue { return []ConfigValue{{Class: "bind", Value: addrs}} } -// NewVarsRoute returns config values relevant to adding a -// "vars" wrapper route to the config. -func (h Helper) NewVarsRoute(route caddyhttp.Route) []ConfigValue { - return []ConfigValue{{Class: "var", Value: route}} -} - // ConfigValue represents a value to be added to the final // configuration, or a value to be consulted when building // the final configuration. @@ -228,7 +237,7 @@ type ConfigValue struct { directive string } -func sortRoutes(handlers []ConfigValue) { +func sortRoutes(routes []ConfigValue) { dirPositions := make(map[string]int) for i, dir := range directiveOrder { dirPositions[dir] = i @@ -237,15 +246,21 @@ func sortRoutes(handlers []ConfigValue) { // while we are sorting, we will need to decode a route's path matcher // in order to sub-sort by path length; we can amortize this operation // for efficiency by storing the decoded matchers in a slice - decodedMatchers := make([]caddyhttp.MatchPath, len(handlers)) + decodedMatchers := make([]caddyhttp.MatchPath, len(routes)) - sort.SliceStable(handlers, func(i, j int) bool { - iDir, jDir := handlers[i].directive, handlers[j].directive + sort.SliceStable(routes, func(i, j int) bool { + iDir, jDir := routes[i].directive, routes[j].directive if iDir == jDir { // directives are the same; sub-sort by path matcher length // if there's only one matcher set and one path (common case) - iRoute := handlers[i].Value.(caddyhttp.Route) - jRoute := handlers[j].Value.(caddyhttp.Route) + iRoute, ok := routes[i].Value.(caddyhttp.Route) + if !ok { + return false + } + jRoute, ok := routes[j].Value.(caddyhttp.Route) + if !ok { + return false + } if len(iRoute.MatcherSetsRaw) == 1 && len(jRoute.MatcherSetsRaw) == 1 { // use already-decoded matcher, or decode if it's the first time seeing it diff --git a/caddyconfig/httpcaddyfile/httptype.go b/caddyconfig/httpcaddyfile/httptype.go index 6ed4e39..a57b6e9 100644 --- a/caddyconfig/httpcaddyfile/httptype.go +++ b/caddyconfig/httpcaddyfile/httptype.go @@ -41,7 +41,7 @@ type ServerType struct { func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock, options map[string]interface{}) (*caddy.Config, []caddyconfig.Warning, error) { var warnings []caddyconfig.Warning - groupCounter := new(int) + gc := counter{new(int)} var serverBlocks []serverBlock for _, sblock := range originalServerBlocks { @@ -146,7 +146,7 @@ func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock, warnings: &warnings, matcherDefs: matcherDefs, parentBlock: sb.block, - groupCounter: groupCounter, + groupCounter: gc, }) if err != nil { return nil, warnings, fmt.Errorf("parsing caddyfile tokens for '%s': %v", dir, err) @@ -169,7 +169,7 @@ func (st ServerType) Setup(originalServerBlocks []caddyfile.ServerBlock, // each pairing of listener addresses to list of server // blocks is basically a server definition - servers, err := st.serversFromPairings(pairings, options, &warnings) + servers, err := st.serversFromPairings(pairings, options, &warnings, gc) if err != nil { return nil, warnings, err } @@ -306,6 +306,7 @@ func (st *ServerType) serversFromPairings( pairings []sbAddrAssociation, options map[string]interface{}, warnings *[]caddyconfig.Warning, + groupCounter counter, ) (map[string]*caddyhttp.Server, error) { servers := make(map[string]*caddyhttp.Server) @@ -320,32 +321,32 @@ func (st *ServerType) serversFromPairings( // case their matchers overlap; we do this somewhat naively by // descending sort by length of host then path sort.SliceStable(p.serverBlocks, func(i, j int) bool { - // TODO: we could pre-process the lengths for efficiency, + // TODO: we could pre-process the specificities for efficiency, // but I don't expect many blocks will have SO many keys... var iLongestPath, jLongestPath string var iLongestHost, jLongestHost string for _, key := range p.serverBlocks[i].block.Keys { addr, _ := ParseAddress(key) - if length(addr.Host) > length(iLongestHost) { + if specificity(addr.Host) > specificity(iLongestHost) { iLongestHost = addr.Host } - if len(addr.Path) > len(iLongestPath) { + if specificity(addr.Path) > specificity(iLongestPath) { iLongestPath = addr.Path } } for _, key := range p.serverBlocks[j].block.Keys { addr, _ := ParseAddress(key) - if length(addr.Host) > length(jLongestHost) { + if specificity(addr.Host) > specificity(jLongestHost) { jLongestHost = addr.Host } - if len(addr.Path) > len(jLongestPath) { + if specificity(addr.Path) > specificity(jLongestPath) { jLongestPath = addr.Path } } - if length(iLongestHost) == length(jLongestHost) { + if specificity(iLongestHost) == specificity(jLongestHost) { return len(iLongestPath) > len(jLongestPath) } - return length(iLongestHost) > length(jLongestHost) + return specificity(iLongestHost) > specificity(jLongestHost) }) // create a subroute for each site in the server block @@ -355,10 +356,7 @@ func (st *ServerType) serversFromPairings( return nil, fmt.Errorf("server block %v: compiling matcher sets: %v", sblock.block.Keys, err) } - siteSubroute := new(caddyhttp.Subroute) - // tls: connection policies and toggle auto HTTPS - autoHTTPSQualifiedHosts, err := st.autoHTTPSHosts(sblock) if err != nil { return nil, err @@ -391,38 +389,13 @@ func (st *ServerType) serversFromPairings( // TODO: consolidate equal conn policies } - // vars: make sure these are linked in first so future - // routes can use the variables they define - for _, cfgVal := range sblock.pile["var"] { - siteSubroute.Routes = append(siteSubroute.Routes, cfgVal.Value.(caddyhttp.Route)) - } - // set up each handler directive, making sure to honor directive order dirRoutes := sblock.pile["route"] - sortRoutes(dirRoutes) - - // add all the routes piled in from directives - for _, r := range dirRoutes { - // as a special case, group rewrite directives so that they are mutually exclusive; - // this means that only the first matching rewrite will be evaluated, and that's - // probably a good thing, since there should never be a need to do more than one - // rewrite (I think?), and cascading rewrites smell bad... imagine these rewrites: - // rewrite /docs/json/* /docs/json/index.html - // rewrite /docs/* /docs/index.html - // (We use this on the Caddy website, or at least we did once.) The first rewrite's - // result is also matched by the second rewrite, making the first rewrite pointless. - // See issue #2959. - if r.directive == "rewrite" { - route := r.Value.(caddyhttp.Route) - route.Group = "rewriting" - r.Value = route - } - - siteSubroute.Routes = append(siteSubroute.Routes, r.Value.(caddyhttp.Route)) + siteSubroute, err := buildSubroute(dirRoutes, groupCounter) + if err != nil { + return nil, err } - siteSubroute.Routes = consolidateRoutes(siteSubroute.Routes) - if len(matcherSetsEnc) == 0 && len(p.serverBlocks) == 1 { // no need to wrap the handlers in a subroute if this is // the only server block and there is no matcher for it @@ -446,6 +419,98 @@ func (st *ServerType) serversFromPairings( return servers, nil } +func buildSubroute(routes []ConfigValue, groupCounter counter) (*caddyhttp.Subroute, error) { + for _, val := range routes { + if !directiveIsOrdered(val.directive) { + return nil, fmt.Errorf("directive '%s' is not ordered, so it cannot be used here", val.directive) + } + } + + sortRoutes(routes) + + subroute := new(caddyhttp.Subroute) + + // get a group name for rewrite directives, if needed + var rewriteGroupName string + var rewriteCount int + for _, r := range routes { + if r.directive == "rewrite" { + rewriteCount++ + if rewriteCount > 1 { + break + } + } + } + if rewriteCount > 1 { + rewriteGroupName = groupCounter.nextGroup() + } + + // get a group name for handle blocks, if needed + var handleGroupName string + var handleCount int + for _, r := range routes { + if r.directive == "handle" { + handleCount++ + if handleCount > 1 { + break + } + } + } + if handleCount > 1 { + handleGroupName = groupCounter.nextGroup() + } + + // add all the routes piled in from directives + for _, r := range routes { + // as a special case, group rewrite directives so that they are mutually exclusive; + // this means that only the first matching rewrite will be evaluated, and that's + // probably a good thing, since there should never be a need to do more than one + // rewrite (I think?), and cascading rewrites smell bad... imagine these rewrites: + // rewrite /docs/json/* /docs/json/index.html + // rewrite /docs/* /docs/index.html + // (We use this on the Caddy website, or at least we did once.) The first rewrite's + // result is also matched by the second rewrite, making the first rewrite pointless. + // See issue #2959. + if r.directive == "rewrite" { + route := r.Value.(caddyhttp.Route) + route.Group = rewriteGroupName + r.Value = route + } + + // handle blocks are also mutually exclusive by definition + if r.directive == "handle" { + route := r.Value.(caddyhttp.Route) + route.Group = handleGroupName + r.Value = route + } + + switch route := r.Value.(type) { + case caddyhttp.Subroute: + // if a route-class config value is actually a Subroute handler + // with nothing but a list of routes, then it is the intention + // of the directive to keep these handlers together and in this + // same order, but not necessarily in a subroute (if it wanted + // to keep them in a subroute, the directive would have returned + // a route with a Subroute as its handler); this is useful to + // keep multiple handlers/routes together and in the same order + // so that the sorting procedure we did above doesn't reorder them + if route.Errors != nil { + // if error handlers are also set, this is confusing; it's + // probably supposed to be wrapped in a Route and encoded + // as a regular handler route... programmer error. + panic("found subroute with more than just routes; perhaps it should have been wrapped in a route?") + } + subroute.Routes = append(subroute.Routes, route.Routes...) + case caddyhttp.Route: + subroute.Routes = append(subroute.Routes, route) + } + } + + subroute.Routes = consolidateRoutes(subroute.Routes) + + return subroute, nil +} + func (st ServerType) autoHTTPSHosts(sb serverBlock) ([]string, error) { // get the hosts for this server block... hosts, err := st.hostsFromServerBlockKeys(sb.block) @@ -521,7 +586,6 @@ func matcherSetFromMatcherToken( } return m, true, nil } - return nil, false, nil } @@ -659,14 +723,40 @@ func tryInt(val interface{}, warnings *[]caddyconfig.Warning) int { return intVal } -// length returns len(s) minus any wildcards (*). Basically, -// it's a length count that penalizes the use of wildcards. -// This is useful for comparing hostnames, but probably not -// paths so much (for example, '*.example.com' is clearly -// less specific than 'a.example.com', but is '/a' more or -// less specific than '/a*'?). -func length(s string) int { - return len(s) - strings.Count(s, "*") +// specifity returns len(s) minus any wildcards (*) and +// placeholders ({...}). Basically, it's a length count +// that penalizes the use of wildcards and placeholders. +// This is useful for comparing hostnames and paths. +// However, wildcards in paths are not a sure answer to +// the question of specificity. For exmaple, +// '*.example.com' is clearly less specific than +// 'a.example.com', but is '/a' more or less specific +// than '/a*'? +func specificity(s string) int { + l := len(s) - strings.Count(s, "*") + for len(s) > 0 { + start := strings.Index(s, "{") + if start < 0 { + return l + } + end := strings.Index(s[start:], "}") + start + 1 + if end <= start { + return l + } + l -= end - start + s = s[end:] + } + return l +} + +type counter struct { + n *int +} + +func (c counter) nextGroup() string { + name := fmt.Sprintf("group%d", *c.n) + *c.n++ + return name } type matcherSetAndTokens struct { diff --git a/caddyconfig/httpcaddyfile/httptype_test.go b/caddyconfig/httpcaddyfile/httptype_test.go new file mode 100644 index 0000000..ae4f042 --- /dev/null +++ b/caddyconfig/httpcaddyfile/httptype_test.go @@ -0,0 +1,33 @@ +package httpcaddyfile + +import "testing" + +func TestSpecificity(t *testing.T) { + for i, tc := range []struct { + input string + expect int + }{ + {"", 0}, + {"*", 0}, + {"*.*", 1}, + {"{placeholder}", 0}, + {"/{placeholder}", 1}, + {"foo", 3}, + {"example.com", 11}, + {"a.example.com", 13}, + {"*.example.com", 12}, + {"/foo", 4}, + {"/foo*", 4}, + {"{placeholder}.example.com", 12}, + {"{placeholder.example.com", 24}, + {"}.", 2}, + {"}{", 2}, + {"{}", 0}, + {"{{{}}", 1}, + } { + actual := specificity(tc.input) + if actual != tc.expect { + t.Errorf("Test %d (%s): Expected %d but got %d", i, tc.input, tc.expect, actual) + } + } +} -- cgit v1.2.3