From e3c369d4526e44f23efb10aaad8a60ce519720a0 Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Wed, 19 May 2021 15:14:03 -0600 Subject: logging: Implement dial timeout for net writer (fix #4083) (#4172) * logging: Implement dial timeout for net writer (fix #4083) * Limit how often redials are attempted This should cause dial blocking to occur only once every 10 seconds at most, but it also means the logger connection might be down for up to 10 seconds after it comes back online; oh well. We shouldn't block for DialTimeout at every single log emission. * Clarify offline behavior --- modules/logging/netwriter.go | 75 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 14 deletions(-) (limited to 'modules/logging') diff --git a/modules/logging/netwriter.go b/modules/logging/netwriter.go index a25ede7..2bf23bf 100644 --- a/modules/logging/netwriter.go +++ b/modules/logging/netwriter.go @@ -18,7 +18,9 @@ import ( "fmt" "io" "net" + "os" "sync" + "time" "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" @@ -28,10 +30,16 @@ func init() { caddy.RegisterModule(NetWriter{}) } -// NetWriter implements a log writer that outputs to a network socket. +// NetWriter implements a log writer that outputs to a network socket. If +// the socket goes down, it will dump logs to stderr while it attempts to +// reconnect. type NetWriter struct { + // The address of the network socket to which to connect. Address string `json:"address,omitempty"` + // The timeout to wait while connecting to the socket. + DialTimeout caddy.Duration `json:"dial_timeout,omitempty"` + addr caddy.NetworkAddress } @@ -60,6 +68,10 @@ func (nw *NetWriter) Provision(ctx caddy.Context) error { return fmt.Errorf("multiple ports not supported") } + if nw.DialTimeout < 0 { + return fmt.Errorf("timeout cannot be less than 0") + } + return nil } @@ -74,7 +86,10 @@ func (nw NetWriter) WriterKey() string { // OpenWriter opens a new network connection. func (nw NetWriter) OpenWriter() (io.WriteCloser, error) { - reconn := &redialerConn{nw: nw} + reconn := &redialerConn{ + nw: nw, + timeout: time.Duration(nw.DialTimeout), + } conn, err := reconn.dial() if err != nil { return nil, err @@ -87,7 +102,9 @@ func (nw NetWriter) OpenWriter() (io.WriteCloser, error) { // UnmarshalCaddyfile sets up the handler from Caddyfile tokens. Syntax: // -// net
+// net
{ +// dial_timeout +// } // func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { @@ -98,6 +115,22 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { if d.NextArg() { return d.ArgErr() } + for nesting := d.Nesting(); d.NextBlock(nesting); { + switch d.Val() { + case "dial_timeout": + if !d.NextArg() { + return d.ArgErr() + } + timeout, err := caddy.ParseDuration(d.Val()) + if err != nil { + return d.Errf("invalid duration: %s", d.Val()) + } + if d.NextArg() { + return d.ArgErr() + } + nw.DialTimeout = caddy.Duration(timeout) + } + } } return nil } @@ -107,8 +140,10 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { // is retried. type redialerConn struct { net.Conn - connMu sync.RWMutex - nw NetWriter + connMu sync.RWMutex + nw NetWriter + timeout time.Duration + lastRedial time.Time } // Write wraps the underlying Conn.Write method, but if that fails, @@ -131,20 +166,32 @@ func (reconn *redialerConn) Write(b []byte) (n int, err error) { return } - // we're the lucky first goroutine to re-dial the connection - conn2, err2 := reconn.dial() - if err2 != nil { - return - } - if n, err = conn2.Write(b); err == nil { - reconn.Conn.Close() - reconn.Conn = conn2 + // there's still a problem, so try to re-attempt dialing the socket + // if some time has passed in which the issue could have potentially + // been resolved - we don't want to block at every single log + // emission (!) - see discussion in #4111 + if time.Since(reconn.lastRedial) > 10*time.Second { + reconn.lastRedial = time.Now() + conn2, err2 := reconn.dial() + if err2 != nil { + // logger socket still offline; instead of discarding the log, dump it to stderr + os.Stderr.Write(b) + return + } + if n, err = conn2.Write(b); err == nil { + reconn.Conn.Close() + reconn.Conn = conn2 + } + } else { + // last redial attempt was too recent; just dump to stderr for now + os.Stderr.Write(b) } + return } func (reconn *redialerConn) dial() (net.Conn, error) { - return net.Dial(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0)) + return net.DialTimeout(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0), reconn.timeout) } // Interface guards -- cgit v1.2.3