From 25d2b4bf2927bf69ddce582d33339ef7d13cb6bf Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Fri, 2 Oct 2020 14:23:40 -0600 Subject: map: Reimplement; multiple outputs; optimize --- modules/caddyhttp/map/caddyfile.go | 86 ++++++++++++------- modules/caddyhttp/map/map.go | 163 +++++++++++++++++++++++++++---------- 2 files changed, 178 insertions(+), 71 deletions(-) (limited to 'modules/caddyhttp/map') diff --git a/modules/caddyhttp/map/caddyfile.go b/modules/caddyhttp/map/caddyfile.go index 5737971..67c148b 100644 --- a/modules/caddyhttp/map/caddyfile.go +++ b/modules/caddyhttp/map/caddyfile.go @@ -15,6 +15,8 @@ package maphandler import ( + "strings" + "github.com/caddyserver/caddy/v2/caddyconfig/httpcaddyfile" "github.com/caddyserver/caddy/v2/modules/caddyhttp" ) @@ -23,49 +25,75 @@ func init() { httpcaddyfile.RegisterHandlerDirective("map", parseCaddyfile) } -// parseCaddyfile sets up the handler for a map from Caddyfile tokens. Syntax: +// parseCaddyfile sets up the map handler from Caddyfile tokens. Syntax: // -// map { -// [default ] - used if not match is found -// [ ] - regular expression to match against the source find and the matching replacement value -// ... +// map [] { +// [~] +// default // } // -// The map takes a source variable and maps it into the dest variable. The mapping process -// will check the source variable for the first successful match against a list of regular expressions. -// If a successful match is found the dest variable will contain the replacement value. -// If no successful match is found and the default is specified then the dest will contain the default value. +// If the input value is prefixed with a tilde (~), then the input will be parsed as a +// regular expression. // +// The number of outputs for each mapping must not be more than the number of destinations. +// However, for convenience, there may be fewer outputs than destinations and any missing +// outputs will be filled in implicitly. func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) { - m := new(Handler) + var handler Handler for h.Next() { - // first see if source and dest are configured - if h.NextArg() { - m.Source = h.Val() - if h.NextArg() { - m.Destination = h.Val() - } + // source + if !h.NextArg() { + return nil, h.ArgErr() + } + handler.Source = h.Val() + + // destinations + handler.Destinations = h.RemainingArgs() + if len(handler.Destinations) == 0 { + return nil, h.Err("missing destination argument(s)") } - // load the rules + // mappings for h.NextBlock(0) { - expression := h.Val() - if expression == "default" { - args := h.RemainingArgs() - if len(args) != 1 { - return m, h.ArgErr() + // defaults are a special case + if h.Val() == "default" { + if len(handler.Defaults) > 0 { + return nil, h.Err("defaults already defined") } - m.Default = args[0] - } else { - args := h.RemainingArgs() - if len(args) != 1 { - return m, h.ArgErr() + handler.Defaults = h.RemainingArgs() + for len(handler.Defaults) < len(handler.Destinations) { + handler.Defaults = append(handler.Defaults, "") } - m.Items = append(m.Items, Item{Expression: expression, Value: args[0]}) + continue + } + + // every other line maps one input to one or more outputs + in := h.Val() + outs := h.RemainingArgs() + + // cannot have more outputs than destinations + if len(outs) > len(handler.Destinations) { + return nil, h.Err("too many outputs") + } + + // for convenience, can have fewer outputs than destinations, but the + // underlying handler won't accept that, so we fill in empty values + for len(outs) < len(handler.Destinations) { + outs = append(outs, "") } + + // create the mapping + mapping := Mapping{Outputs: outs} + if strings.HasPrefix(in, "~") { + mapping.InputRegexp = in[1:] + } else { + mapping.Input = in + } + + handler.Mappings = append(handler.Mappings, mapping) } } - return m, nil + return handler, nil } diff --git a/modules/caddyhttp/map/map.go b/modules/caddyhttp/map/map.go index d2a0a0a..29aa19f 100644 --- a/modules/caddyhttp/map/map.go +++ b/modules/caddyhttp/map/map.go @@ -15,8 +15,11 @@ package maphandler import ( + "fmt" + "log" "net/http" "regexp" + "strings" "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/modules/caddyhttp" @@ -26,27 +29,26 @@ func init() { caddy.RegisterModule(Handler{}) } -// Handler is a middleware that maps a source placeholder to a destination -// placeholder. -// -// The mapping process happens early in the request handling lifecycle so that -// the Destination placeholder is calculated and available for substitution. -// The Items array contains pairs of regex expressions and values, the -// Source is matched against the expression, if they match then the destination -// placeholder is set to the value. -// -// The Default is optional, if no Item expression is matched then the value of -// the Default will be used. +// Handler implements a middleware that maps inputs to outputs. Specifically, it +// compares a source value against the map inputs, and for one that matches, it +// applies the output values to each destination. Destinations become placeholder +// names. // +// Mapped placeholders are not evaluated until they are used, so even for very +// large mappings, this handler is quite efficient. type Handler struct { - // Source is a placeholder + // Source is the placeholder from which to get the input value. Source string `json:"source,omitempty"` - // Destination is a new placeholder - Destination string `json:"destination,omitempty"` - // Default is an optional value to use if no other was found - Default string `json:"default,omitempty"` - // Items is an array of regex expressions and values - Items []Item `json:"items,omitempty"` + + // Destinations are the placeholders in which to store the outputs. + Destinations []string `json:"destinations,omitempty"` + + // Mappings from source values (inputs) to destination values (outputs). + // The first matching mapping will be applied. + Mappings []Mapping `json:"mappings,omitempty"` + + // If no mappings match, the default value will be applied (optional). + Defaults []string } // CaddyModule returns the Caddy module information. @@ -57,10 +59,52 @@ func (Handler) CaddyModule() caddy.ModuleInfo { } } -// Provision will compile all regular expressions +// Provision sets up h. func (h *Handler) Provision(_ caddy.Context) error { - for i := 0; i < len(h.Items); i++ { - h.Items[i].compiled = regexp.MustCompile(h.Items[i].Expression) + for j, dest := range h.Destinations { + h.Destinations[j] = strings.Trim(dest, "{}") + } + + for i, m := range h.Mappings { + if m.InputRegexp == "" { + continue + } + if m.Input != "" { + return fmt.Errorf("mapping %d has both input and input_regexp fields specified, which is confusing", i) + } + var err error + h.Mappings[i].re, err = regexp.Compile(m.InputRegexp) + if err != nil { + return fmt.Errorf("compiling regexp for mapping %d: %v", i, err) + } + } + + // TODO: improve efficiency even further by using an actual map type + // for the non-regexp mappings, OR sort them and do a binary search + + return nil +} + +// Validate ensures that h is configured properly. +func (h *Handler) Validate() error { + nDest, nDef := len(h.Destinations), len(h.Defaults) + if nDef > 0 && nDef != nDest { + return fmt.Errorf("%d destinations != %d defaults", nDest, nDef) + } + + seen := make(map[string]int) + for i, m := range h.Mappings { + // prevent duplicate mappings + if prev, ok := seen[m.Input]; ok { + return fmt.Errorf("mapping %d has a duplicate input '%s' previously used with mapping %d", i, m.Input, prev) + } + seen[m.Input] = i + + // ensure mappings have 1:1 output-to-destination correspondence + nOut := len(m.Outputs) + if nOut != nDest { + return fmt.Errorf("mapping %d has %d outputs but there are %d destinations defined", i, nOut, nDest) + } } return nil } @@ -68,38 +112,73 @@ func (h *Handler) Provision(_ caddy.Context) error { func (h Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error { repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer) - // get the source value, if the source value was not found do no - // replacement. - val, ok := repl.GetString(h.Source) - if ok { - found := false - for i := 0; i < len(h.Items); i++ { - if h.Items[i].compiled.MatchString(val) { - found = true - repl.Set(h.Destination, h.Items[i].Value) - break + // defer work until a variable is actually evaluated by using replacer's Map callback + repl.Map(func(key string) (interface{}, bool) { + // return early if the variable is not even a configured destination + destIdx := h.destinationIndex(key) + if destIdx < 0 { + return nil, false + } + + input := repl.ReplaceAll(h.Source, "") + + // find the first mapping matching the input and return + // the requested destination/output value + for _, m := range h.Mappings { + log.Printf("MAPPING: %+v", m) + if m.re != nil { + if m.re.MatchString(input) { + return m.Outputs[destIdx], true + } + continue + } + if input == m.Input { + log.Printf("RETURNING: %s", m.Outputs[destIdx]) + return m.Outputs[destIdx], true } } - if !found && h.Default != "" { - repl.Set(h.Destination, h.Default) + // fall back to default if no match + if len(h.Defaults) > destIdx { + return h.Defaults[destIdx], true } - } + + return nil, true + }) + return next.ServeHTTP(w, r) } -// Item defines each entry in the map -type Item struct { - // Expression is the regular expression searched for - Expression string `json:"expression,omitempty"` - // Value to use once the expression has been found - Value string `json:"value,omitempty"` - // compiled expression, internal use - compiled *regexp.Regexp +// destinationIndex returns the positional index of the destination +// is name is a known destination; otherwise it returns -1. +func (h Handler) destinationIndex(name string) int { + for i, dest := range h.Destinations { + if dest == name { + return i + } + } + return -1 +} + +// Mapping describes a mapping from input to outputs. +type Mapping struct { + // The input value to match. Must be distinct from other mappings. + // Mutually exclusive to input_regexp. + Input string `json:"input,omitempty"` + + // The input regular expression to match. Mutually exclusive to input. + InputRegexp string `json:"input_regexp,omitempty"` + + // Upon a match with the input, each output is positionally correlated + // with each destination of the parent handler. + Outputs []string `json:"outputs,omitempty"` + + re *regexp.Regexp } // Interface guards var ( _ caddy.Provisioner = (*Handler)(nil) + _ caddy.Validator = (*Handler)(nil) _ caddyhttp.MiddlewareHandler = (*Handler)(nil) ) -- cgit v1.2.3