summaryrefslogtreecommitdiff
path: root/modules/caddyhttp/reverseproxy
diff options
context:
space:
mode:
authorFrancis Lavoie <lavofr@gmail.com>2022-07-13 16:15:00 -0400
committerGitHub <noreply@github.com>2022-07-13 14:15:00 -0600
commit7d1f7771c9961dc6607a6ac5894b9c3195c25fcf (patch)
tree6e2afea66b04fd5ba5231ef475497e17d50d5c3e /modules/caddyhttp/reverseproxy
parent04a14ee37ac6192d734518fa9082d6eb93971bc6 (diff)
reverseproxy: Implement retry count, alternative to try_duration (#4756)
* reverseproxy: Implement retry count, alternative to try_duration * Add Caddyfile support for `retry_match` * Refactor to deduplicate matcher parsing logic * Fix lint
Diffstat (limited to 'modules/caddyhttp/reverseproxy')
-rw-r--r--modules/caddyhttp/reverseproxy/caddyfile.go25
-rw-r--r--modules/caddyhttp/reverseproxy/reverseproxy.go66
2 files changed, 71 insertions, 20 deletions
diff --git a/modules/caddyhttp/reverseproxy/caddyfile.go b/modules/caddyhttp/reverseproxy/caddyfile.go
index 4fa4be0..a5321d1 100644
--- a/modules/caddyhttp/reverseproxy/caddyfile.go
+++ b/modules/caddyhttp/reverseproxy/caddyfile.go
@@ -59,8 +59,10 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error)
//
// # load balancing
// lb_policy <name> [<options...>]
+// lb_retries <retries>
// lb_try_duration <duration>
// lb_try_interval <interval>
+// lb_retry_match <request-matcher>
//
// # active health checking
// health_uri <uri>
@@ -247,6 +249,19 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
}
h.LoadBalancing.SelectionPolicyRaw = caddyconfig.JSONModuleObject(sel, "policy", name, nil)
+ case "lb_retries":
+ if !d.NextArg() {
+ return d.ArgErr()
+ }
+ tries, err := strconv.Atoi(d.Val())
+ if err != nil {
+ return d.Errf("bad lb_retries number '%s': %v", d.Val(), err)
+ }
+ if h.LoadBalancing == nil {
+ h.LoadBalancing = new(LoadBalancing)
+ }
+ h.LoadBalancing.Retries = tries
+
case "lb_try_duration":
if !d.NextArg() {
return d.ArgErr()
@@ -273,6 +288,16 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
}
h.LoadBalancing.TryInterval = caddy.Duration(dur)
+ case "lb_retry_match":
+ matcherSet, err := caddyhttp.ParseCaddyfileNestedMatcherSet(d)
+ if err != nil {
+ return d.Errf("failed to parse lb_retry_match: %v", err)
+ }
+ if h.LoadBalancing == nil {
+ h.LoadBalancing = new(LoadBalancing)
+ }
+ h.LoadBalancing.RetryMatchRaw = append(h.LoadBalancing.RetryMatchRaw, matcherSet)
+
case "health_uri":
if !d.NextArg() {
return d.ArgErr()
diff --git a/modules/caddyhttp/reverseproxy/reverseproxy.go b/modules/caddyhttp/reverseproxy/reverseproxy.go
index 64adce2..7061275 100644
--- a/modules/caddyhttp/reverseproxy/reverseproxy.go
+++ b/modules/caddyhttp/reverseproxy/reverseproxy.go
@@ -430,12 +430,14 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
// and because we may retry some number of times, carry over the error
// from previous tries because of the nuances of load balancing & retries
var proxyErr error
+ var retries int
for {
var done bool
- done, proxyErr = h.proxyLoopIteration(clonedReq, r, w, proxyErr, start, repl, reqHeader, reqHost, next)
+ done, proxyErr = h.proxyLoopIteration(clonedReq, r, w, proxyErr, start, retries, repl, reqHeader, reqHost, next)
if done {
break
}
+ retries++
}
if proxyErr != nil {
@@ -449,7 +451,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
// that has to be passed in, we brought this into its own method so that we could run defer more easily.
// It returns true when the loop is done and should break; false otherwise. The error value returned should
// be assigned to the proxyErr value for the next iteration of the loop (or the error handled after break).
-func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w http.ResponseWriter, proxyErr error, start time.Time,
+func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w http.ResponseWriter, proxyErr error, start time.Time, retries int,
repl *caddy.Replacer, reqHeader http.Header, reqHost string, next caddyhttp.Handler) (bool, error) {
// get the updated list of upstreams
upstreams := h.Upstreams
@@ -479,7 +481,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
if proxyErr == nil {
proxyErr = caddyhttp.Error(http.StatusServiceUnavailable, fmt.Errorf("no upstreams available"))
}
- if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) {
+ if !h.LoadBalancing.tryAgain(h.ctx, start, retries, proxyErr, r) {
return true, proxyErr
}
return false, proxyErr
@@ -542,7 +544,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h
h.countFailure(upstream)
// if we've tried long enough, break
- if !h.LoadBalancing.tryAgain(h.ctx, start, proxyErr, r) {
+ if !h.LoadBalancing.tryAgain(h.ctx, start, retries, proxyErr, r) {
return true, proxyErr
}
@@ -944,16 +946,26 @@ func (h Handler) finalizeResponse(
return nil
}
-// tryAgain takes the time that the handler was initially invoked
-// as well as any error currently obtained, and the request being
-// tried, and returns true if another attempt should be made at
-// proxying the request. If true is returned, it has already blocked
-// long enough before the next retry (i.e. no more sleeping is
-// needed). If false is returned, the handler should stop trying to
-// proxy the request.
-func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, proxyErr error, req *http.Request) bool {
+// tryAgain takes the time that the handler was initially invoked,
+// the amount of retries already performed, as well as any error
+// currently obtained, and the request being tried, and returns
+// true if another attempt should be made at proxying the request.
+// If true is returned, it has already blocked long enough before
+// the next retry (i.e. no more sleeping is needed). If false is
+// returned, the handler should stop trying to proxy the request.
+func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, retries int, proxyErr error, req *http.Request) bool {
+ // no retries are configured
+ if lb.TryDuration == 0 && lb.Retries == 0 {
+ return false
+ }
+
// if we've tried long enough, break
- if time.Since(start) >= time.Duration(lb.TryDuration) {
+ if lb.TryDuration > 0 && time.Since(start) >= time.Duration(lb.TryDuration) {
+ return false
+ }
+
+ // if we've reached the retry limit, break
+ if lb.Retries > 0 && retries >= lb.Retries {
return false
}
@@ -976,6 +988,11 @@ func (lb LoadBalancing) tryAgain(ctx caddy.Context, start time.Time, proxyErr er
}
}
+ // fast path; if the interval is zero, we don't need to wait
+ if lb.TryInterval == 0 {
+ return true
+ }
+
// otherwise, wait and try the next available host
timer := time.NewTimer(time.Duration(lb.TryInterval))
select {
@@ -1190,16 +1207,25 @@ type LoadBalancing struct {
// The default policy is random selection.
SelectionPolicyRaw json.RawMessage `json:"selection_policy,omitempty" caddy:"namespace=http.reverse_proxy.selection_policies inline_key=policy"`
+ // How many times to retry selecting available backends for each
+ // request if the next available host is down. If try_duration is
+ // also configured, then retries may stop early if the duration
+ // is reached. By default, retries are disabled (zero).
+ Retries int `json:"retries,omitempty"`
+
// How long to try selecting available backends for each request
- // if the next available host is down. By default, this retry is
- // disabled. Clients will wait for up to this long while the load
- // balancer tries to find an available upstream host.
+ // if the next available host is down. Clients will wait for up
+ // to this long while the load balancer tries to find an available
+ // upstream host. If retries is also configured, tries may stop
+ // early if the maximum retries is reached. By default, retries
+ // are disabled (zero duration).
TryDuration caddy.Duration `json:"try_duration,omitempty"`
- // How long to wait between selecting the next host from the pool. Default
- // is 250ms. Only relevant when a request to an upstream host fails. Be
- // aware that setting this to 0 with a non-zero try_duration can cause the
- // CPU to spin if all backends are down and latency is very low.
+ // How long to wait between selecting the next host from the pool.
+ // Default is 250ms if try_duration is enabled, otherwise zero. Only
+ // relevant when a request to an upstream host fails. Be aware that
+ // setting this to 0 with a non-zero try_duration can cause the CPU
+ // to spin if all backends are down and latency is very low.
TryInterval caddy.Duration `json:"try_interval,omitempty"`
// A list of matcher sets that restricts with which requests retries are