agent: prevent very old servers re-joining a cluster with stale data (#17171)
* agent: configure server lastseen timestamp Signed-off-by: Dan Bond <danbond@protonmail.com> * use correct config Signed-off-by: Dan Bond <danbond@protonmail.com> * add comments Signed-off-by: Dan Bond <danbond@protonmail.com> * use default age in test golden data Signed-off-by: Dan Bond <danbond@protonmail.com> * add changelog Signed-off-by: Dan Bond <danbond@protonmail.com> * fix runtime test Signed-off-by: Dan Bond <danbond@protonmail.com> * agent: add server_metadata Signed-off-by: Dan Bond <danbond@protonmail.com> * update comments Signed-off-by: Dan Bond <danbond@protonmail.com> * correctly check if metadata file does not exist Signed-off-by: Dan Bond <danbond@protonmail.com> * follow instructions for adding new config Signed-off-by: Dan Bond <danbond@protonmail.com> * add comments Signed-off-by: Dan Bond <danbond@protonmail.com> * update comments Signed-off-by: Dan Bond <danbond@protonmail.com> * Update agent/agent.go Co-authored-by: Dan Upton <daniel@floppy.co> * agent/config: add validation for duration with min Signed-off-by: Dan Bond <danbond@protonmail.com> * docs: add new server_rejoin_age_max config definition Signed-off-by: Dan Bond <danbond@protonmail.com> * agent: add unit test for checking server last seen Signed-off-by: Dan Bond <danbond@protonmail.com> * agent: log continually for 60s before erroring Signed-off-by: Dan Bond <danbond@protonmail.com> * pr comments Signed-off-by: Dan Bond <danbond@protonmail.com> * remove unneeded todo * agent: fix error message Signed-off-by: Dan Bond <danbond@protonmail.com> --------- Signed-off-by: Dan Bond <danbond@protonmail.com> Co-authored-by: Dan Upton <daniel@floppy.co>
This commit is contained in:
parent
91ed8de9f5
commit
6bb7782745
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
agent: add a configurable maximimum age (default: 7 days) to prevent servers re-joining a cluster with stale data
|
||||
```
|
|
@ -7,6 +7,7 @@ import (
|
|||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
|
@ -22,8 +23,6 @@ import (
|
|||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/rpcclient"
|
||||
"github.com/hashicorp/consul/agent/rpcclient/configentry"
|
||||
"github.com/hashicorp/go-connlimit"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/go-memdb"
|
||||
|
@ -50,12 +49,13 @@ import (
|
|||
grpcDNS "github.com/hashicorp/consul/agent/grpc-external/services/dns"
|
||||
middleware "github.com/hashicorp/consul/agent/grpc-middleware"
|
||||
"github.com/hashicorp/consul/agent/hcp/scada"
|
||||
libscada "github.com/hashicorp/consul/agent/hcp/scada"
|
||||
"github.com/hashicorp/consul/agent/local"
|
||||
"github.com/hashicorp/consul/agent/proxycfg"
|
||||
proxycfgglue "github.com/hashicorp/consul/agent/proxycfg-glue"
|
||||
catalogproxycfg "github.com/hashicorp/consul/agent/proxycfg-sources/catalog"
|
||||
localproxycfg "github.com/hashicorp/consul/agent/proxycfg-sources/local"
|
||||
"github.com/hashicorp/consul/agent/rpcclient"
|
||||
"github.com/hashicorp/consul/agent/rpcclient/configentry"
|
||||
"github.com/hashicorp/consul/agent/rpcclient/health"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/agent/systemd"
|
||||
|
@ -575,11 +575,11 @@ func (a *Agent) Start(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// copy over the existing node id, this cannot be
|
||||
// changed while running anyways but this prevents
|
||||
// breaking some existing behavior. then overwrite
|
||||
// the configuration
|
||||
// Copy over the existing node id. This cannot be
|
||||
// changed while running, but this prevents
|
||||
// breaking some existing behavior.
|
||||
c.NodeID = a.config.NodeID
|
||||
// Overwrite the configuration.
|
||||
a.config = c
|
||||
|
||||
if err := a.tlsConfigurator.Update(a.config.TLS); err != nil {
|
||||
|
@ -625,6 +625,20 @@ func (a *Agent) Start(ctx context.Context) error {
|
|||
if c.ServerMode {
|
||||
serverLogger := a.baseDeps.Logger.NamedIntercept(logging.ConsulServer)
|
||||
|
||||
// Check for a last seen timestamp and exit if deemed stale before attempting to join
|
||||
// Serf/Raft or listen for requests.
|
||||
if err := a.checkServerLastSeen(consul.ReadServerMetadata); err != nil {
|
||||
deadline := time.Now().Add(time.Minute)
|
||||
for time.Now().Before(deadline) {
|
||||
a.logger.Error("startup error", "error", err)
|
||||
time.Sleep(10 * time.Second)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// periodically write server metadata to disk.
|
||||
go a.persistServerMetadata()
|
||||
|
||||
incomingRPCLimiter := consul.ConfiguredIncomingRPCLimiter(
|
||||
&lib.StopChannelContext{StopCh: a.shutdownCh},
|
||||
serverLogger,
|
||||
|
@ -661,7 +675,6 @@ func (a *Agent) Start(ctx context.Context) error {
|
|||
return fmt.Errorf("failed to start server cert manager: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
a.externalGRPCServer = external.NewServer(
|
||||
a.logger.Named("grpc.external"),
|
||||
|
@ -1094,7 +1107,7 @@ func (a *Agent) listenHTTP() ([]apiServer, error) {
|
|||
MaxHeaderBytes: a.config.HTTPMaxHeaderBytes,
|
||||
}
|
||||
|
||||
if libscada.IsCapability(l.Addr()) {
|
||||
if scada.IsCapability(l.Addr()) {
|
||||
// wrap in http2 server handler
|
||||
httpServer.Handler = h2c.NewHandler(srv.handler(a.config.EnableDebug), &http2.Server{})
|
||||
}
|
||||
|
@ -1521,6 +1534,8 @@ func newConsulConfig(runtimeCfg *config.RuntimeConfig, logger hclog.Logger) (*co
|
|||
|
||||
cfg.Reporting.License.Enabled = runtimeCfg.Reporting.License.Enabled
|
||||
|
||||
cfg.ServerRejoinAgeMax = runtimeCfg.ServerRejoinAgeMax
|
||||
|
||||
enterpriseConsulConfig(cfg, runtimeCfg)
|
||||
|
||||
return cfg, nil
|
||||
|
@ -4529,7 +4544,70 @@ func (a *Agent) proxyDataSources() proxycfg.DataSources {
|
|||
|
||||
a.fillEnterpriseProxyDataSources(&sources)
|
||||
return sources
|
||||
}
|
||||
|
||||
// persistServerMetadata periodically writes a server's metadata to a file
|
||||
// in the configured data directory.
|
||||
func (a *Agent) persistServerMetadata() {
|
||||
file := filepath.Join(a.config.DataDir, consul.ServerMetadataFile)
|
||||
|
||||
// Create a timer with no initial tick to allow metadata to be written immediately.
|
||||
t := time.NewTimer(0)
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
// Reset the timer to the larger periodic interval.
|
||||
t.Reset(1 * time.Hour)
|
||||
|
||||
f, err := consul.OpenServerMetadata(file)
|
||||
if err != nil {
|
||||
a.logger.Error("failed to open existing server metadata: %w", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := consul.WriteServerMetadata(f); err != nil {
|
||||
f.Close()
|
||||
a.logger.Error("failed to write server metadata: %w", err)
|
||||
continue
|
||||
}
|
||||
|
||||
f.Close()
|
||||
case <-a.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkServerLastSeen is a safety check that only occurs once of startup to prevent old servers
|
||||
// with stale data from rejoining an existing cluster.
|
||||
//
|
||||
// It attempts to read a server's metadata file and check the last seen Unix timestamp against a
|
||||
// configurable max age. If the metadata file does not exist, we treat this as an initial startup
|
||||
// and return no error.
|
||||
//
|
||||
// Example: if the server recorded a last seen timestamp of now-7d, and we configure a max age
|
||||
// of 3d, then we should prevent the server from rejoining.
|
||||
func (a *Agent) checkServerLastSeen(readFn consul.ServerMetadataReadFunc) error {
|
||||
filename := filepath.Join(a.config.DataDir, consul.ServerMetadataFile)
|
||||
|
||||
// Read server metadata file.
|
||||
md, err := readFn(filename)
|
||||
if err != nil {
|
||||
// Return early if it doesn't exist as this likely indicates the server is starting for the first time.
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("error reading server metadata: %w", err)
|
||||
}
|
||||
|
||||
maxAge := a.config.ServerRejoinAgeMax
|
||||
if md.IsLastSeenStale(maxAge) {
|
||||
return fmt.Errorf("refusing to rejoin cluster because server has been offline for more than the configured server_rejoin_age_max (%s) - consider wiping your data dir", maxAge)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func listenerPortKey(svcID structs.ServiceID, checkID structs.CheckID) string {
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
mathrand "math/rand"
|
||||
"net"
|
||||
|
@ -6204,6 +6205,70 @@ cloud {
|
|||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestAgent_checkServerLastSeen(t *testing.T) {
|
||||
bd := BaseDeps{
|
||||
Deps: consul.Deps{
|
||||
Logger: hclog.NewInterceptLogger(nil),
|
||||
Tokens: new(token.Store),
|
||||
GRPCConnPool: &fakeGRPCConnPool{},
|
||||
},
|
||||
RuntimeConfig: &config.RuntimeConfig{},
|
||||
Cache: cache.New(cache.Options{}),
|
||||
}
|
||||
agent, err := New(bd)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test that an ErrNotExist OS error is treated as ok.
|
||||
t.Run("TestReadErrNotExist", func(t *testing.T) {
|
||||
readFn := func(filename string) (*consul.ServerMetadata, error) {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
|
||||
err := agent.checkServerLastSeen(readFn)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
// Test that an error reading server metadata is treated as an error.
|
||||
t.Run("TestReadErr", func(t *testing.T) {
|
||||
expected := errors.New("read error")
|
||||
readFn := func(filename string) (*consul.ServerMetadata, error) {
|
||||
return nil, expected
|
||||
}
|
||||
|
||||
err := agent.checkServerLastSeen(readFn)
|
||||
require.ErrorIs(t, err, expected)
|
||||
})
|
||||
|
||||
// Test that a server with a 7d old last seen timestamp is treated as an error.
|
||||
t.Run("TestIsLastSeenStaleErr", func(t *testing.T) {
|
||||
agent.config.ServerRejoinAgeMax = time.Hour
|
||||
|
||||
readFn := func(filename string) (*consul.ServerMetadata, error) {
|
||||
return &consul.ServerMetadata{
|
||||
LastSeenUnix: time.Now().Add(-24 * 7 * time.Hour).Unix(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
err := agent.checkServerLastSeen(readFn)
|
||||
require.Error(t, err)
|
||||
require.ErrorContains(t, err, "refusing to rejoin cluster because server has been offline for more than the configured server_rejoin_age_max")
|
||||
})
|
||||
|
||||
// Test that a server with a 6h old last seen timestamp is not treated as an error.
|
||||
t.Run("TestNoErr", func(t *testing.T) {
|
||||
agent.config.ServerRejoinAgeMax = 24 * 7 * time.Hour
|
||||
|
||||
readFn := func(filename string) (*consul.ServerMetadata, error) {
|
||||
return &consul.ServerMetadata{
|
||||
LastSeenUnix: time.Now().Add(-6 * time.Hour).Unix(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
err := agent.checkServerLastSeen(readFn)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func getExpectedCaPoolByFile(t *testing.T) *x509.CertPool {
|
||||
pool := x509.NewCertPool()
|
||||
data, err := os.ReadFile("../test/ca/root.cer")
|
||||
|
|
|
@ -28,8 +28,6 @@ import (
|
|||
"github.com/hashicorp/memberlist"
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
hcpconfig "github.com/hashicorp/consul/agent/hcp/config"
|
||||
|
||||
"github.com/hashicorp/consul/agent/cache"
|
||||
"github.com/hashicorp/consul/agent/checks"
|
||||
"github.com/hashicorp/consul/agent/connect/ca"
|
||||
|
@ -37,6 +35,7 @@ import (
|
|||
"github.com/hashicorp/consul/agent/consul/authmethod/ssoauth"
|
||||
consulrate "github.com/hashicorp/consul/agent/consul/rate"
|
||||
"github.com/hashicorp/consul/agent/dns"
|
||||
hcpconfig "github.com/hashicorp/consul/agent/hcp/config"
|
||||
"github.com/hashicorp/consul/agent/rpc/middleware"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/agent/token"
|
||||
|
@ -1090,6 +1089,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
|
|||
ServerMode: serverMode,
|
||||
ServerName: stringVal(c.ServerName),
|
||||
ServerPort: serverPort,
|
||||
ServerRejoinAgeMax: b.durationValWithDefaultMin("server_rejoin_age_max", c.ServerRejoinAgeMax, 24*7*time.Hour, 6*time.Hour),
|
||||
Services: services,
|
||||
SessionTTLMin: b.durationVal("session_ttl_min", c.SessionTTLMin),
|
||||
SkipLeaveOnInt: skipLeaveOnInt,
|
||||
|
@ -1952,6 +1952,16 @@ func (b *builder) durationValWithDefault(name string, v *string, defaultVal time
|
|||
return d
|
||||
}
|
||||
|
||||
// durationValWithDefaultMin is equivalent to durationValWithDefault, but enforces a minimum duration.
|
||||
func (b *builder) durationValWithDefaultMin(name string, v *string, defaultVal, minVal time.Duration) (d time.Duration) {
|
||||
d = b.durationValWithDefault(name, v, defaultVal)
|
||||
if d < minVal {
|
||||
b.err = multierror.Append(b.err, fmt.Errorf("%s: duration '%s' cannot be less than: %s", name, *v, minVal))
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
|
||||
func (b *builder) durationVal(name string, v *string) (d time.Duration) {
|
||||
return b.durationValWithDefault(name, v, 0)
|
||||
}
|
||||
|
|
|
@ -311,6 +311,21 @@ func TestBuilder_DurationVal_InvalidDuration(t *testing.T) {
|
|||
require.Contains(t, b.err.Error(), badDuration2)
|
||||
}
|
||||
|
||||
func TestBuilder_DurationValWithDefaultMin(t *testing.T) {
|
||||
b := builder{}
|
||||
|
||||
// Attempt to validate that a duration of 10 hours will not error when the min val is 1 hour.
|
||||
dur := "10h0m0s"
|
||||
b.durationValWithDefaultMin("field2", &dur, 24*7*time.Hour, time.Hour)
|
||||
require.NoError(t, b.err)
|
||||
|
||||
// Attempt to validate that a duration of 1 min will error when the min val is 1 hour.
|
||||
dur = "0h1m0s"
|
||||
b.durationValWithDefaultMin("field1", &dur, 24*7*time.Hour, time.Hour)
|
||||
require.Error(t, b.err)
|
||||
require.Contains(t, b.err.Error(), "1 error")
|
||||
}
|
||||
|
||||
func TestBuilder_ServiceVal_MultiError(t *testing.T) {
|
||||
b := builder{}
|
||||
b.serviceVal(&ServiceDefinition{
|
||||
|
|
|
@ -228,6 +228,7 @@ type Config struct {
|
|||
SerfBindAddrWAN *string `mapstructure:"serf_wan" json:"serf_wan,omitempty"`
|
||||
ServerMode *bool `mapstructure:"server" json:"server,omitempty"`
|
||||
ServerName *string `mapstructure:"server_name" json:"server_name,omitempty"`
|
||||
ServerRejoinAgeMax *string `mapstructure:"server_rejoin_age_max" json:"server_rejoin_age_max,omitempty"`
|
||||
Service *ServiceDefinition `mapstructure:"service" json:"-"`
|
||||
Services []ServiceDefinition `mapstructure:"services" json:"-"`
|
||||
SessionTTLMin *string `mapstructure:"session_ttl_min" json:"session_ttl_min,omitempty"`
|
||||
|
|
|
@ -58,6 +58,7 @@ func DefaultSource() Source {
|
|||
segment_limit = 64
|
||||
|
||||
server = false
|
||||
server_rejoin_age_max = "168h"
|
||||
syslog_facility = "LOCAL0"
|
||||
|
||||
tls = {
|
||||
|
|
|
@ -1358,6 +1358,18 @@ type RuntimeConfig struct {
|
|||
// hcl: ports { server = int }
|
||||
ServerPort int
|
||||
|
||||
// ServerRejoinAgeMax is used to specify the duration of time a server
|
||||
// is allowed to be down/offline before a startup operation is refused.
|
||||
//
|
||||
// For example: if a server has been offline for 5 days, and this option
|
||||
// is configured to 3 days, then any subsequent startup operation will fail
|
||||
// and require an operator to manually intervene.
|
||||
//
|
||||
// The default is: 7 days
|
||||
//
|
||||
// hcl: server_rejoin_age_max = "duration"
|
||||
ServerRejoinAgeMax time.Duration
|
||||
|
||||
// Services contains the provided service definitions:
|
||||
//
|
||||
// hcl: services = [
|
||||
|
|
|
@ -25,13 +25,12 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
hcpconfig "github.com/hashicorp/consul/agent/hcp/config"
|
||||
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/cache"
|
||||
"github.com/hashicorp/consul/agent/checks"
|
||||
"github.com/hashicorp/consul/agent/consul"
|
||||
consulrate "github.com/hashicorp/consul/agent/consul/rate"
|
||||
hcpconfig "github.com/hashicorp/consul/agent/hcp/config"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/agent/token"
|
||||
"github.com/hashicorp/consul/lib"
|
||||
|
@ -6419,6 +6418,7 @@ func TestLoad_FullConfig(t *testing.T) {
|
|||
SerfPortWAN: 8302,
|
||||
ServerMode: true,
|
||||
ServerName: "Oerr9n1G",
|
||||
ServerRejoinAgeMax: 604800 * time.Second,
|
||||
ServerPort: 3757,
|
||||
Services: []*structs.ServiceDefinition{
|
||||
{
|
||||
|
@ -7163,7 +7163,8 @@ func TestRuntimeConfig_Sanitize(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
Locality: &Locality{Region: strPtr("us-west-1"), Zone: strPtr("us-west-1a")},
|
||||
Locality: &Locality{Region: strPtr("us-west-1"), Zone: strPtr("us-west-1a")},
|
||||
ServerRejoinAgeMax: 24 * 7 * time.Hour,
|
||||
}
|
||||
|
||||
b, err := json.MarshalIndent(rt.Sanitized(), "", " ")
|
||||
|
|
|
@ -332,6 +332,7 @@
|
|||
"ServerMode": false,
|
||||
"ServerName": "",
|
||||
"ServerPort": 0,
|
||||
"ServerRejoinAgeMax": "168h0m0s",
|
||||
"Services": [
|
||||
{
|
||||
"Address": "",
|
||||
|
|
|
@ -394,6 +394,7 @@ serf_lan = "99.43.63.15"
|
|||
serf_wan = "67.88.33.19"
|
||||
server = true
|
||||
server_name = "Oerr9n1G"
|
||||
server_rejoin_age_max = "604800s"
|
||||
service = {
|
||||
id = "dLOXpSCI"
|
||||
name = "o1ynPkp0"
|
||||
|
|
|
@ -453,6 +453,7 @@
|
|||
"serf_wan": "67.88.33.19",
|
||||
"server": true,
|
||||
"server_name": "Oerr9n1G",
|
||||
"server_rejoin_age_max": "604800s",
|
||||
"service": {
|
||||
"id": "dLOXpSCI",
|
||||
"name": "o1ynPkp0",
|
||||
|
|
|
@ -447,6 +447,10 @@ type Config struct {
|
|||
|
||||
// Embedded Consul Enterprise specific configuration
|
||||
*EnterpriseConfig
|
||||
|
||||
// ServerRejoinAgeMax is used to specify the duration of time a server
|
||||
// is allowed to be down/offline before a startup operation is refused.
|
||||
ServerRejoinAgeMax time.Duration
|
||||
}
|
||||
|
||||
func (c *Config) InPrimaryDatacenter() bool {
|
||||
|
@ -574,6 +578,8 @@ func DefaultConfig() *Config {
|
|||
PeeringTestAllowPeerRegistrations: false,
|
||||
|
||||
EnterpriseConfig: DefaultEnterpriseConfig(),
|
||||
|
||||
ServerRejoinAgeMax: 24 * 7 * time.Hour,
|
||||
}
|
||||
|
||||
// Increase our reap interval to 3 days instead of 24h.
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package consul
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ServerMetadataFile is the name of the file on disk that server metadata
|
||||
// should be written to.
|
||||
const ServerMetadataFile = "server_metadata.json"
|
||||
|
||||
// ServerMetadata represents specific metadata about a running server.
|
||||
type ServerMetadata struct {
|
||||
// LastSeenUnix is the timestamp a server was last seen, in Unix format.
|
||||
LastSeenUnix int64 `json:"last_seen_unix"`
|
||||
}
|
||||
|
||||
// IsLastSeenStale checks whether the last seen timestamp is older than a given duration.
|
||||
func (md *ServerMetadata) IsLastSeenStale(d time.Duration) bool {
|
||||
lastSeen := time.Unix(md.LastSeenUnix, 0)
|
||||
maxAge := time.Now().Add(-d)
|
||||
|
||||
return lastSeen.Before(maxAge)
|
||||
}
|
||||
|
||||
// OpenServerMetadata is a helper function for opening the server metadata file
|
||||
// with the correct permissions.
|
||||
func OpenServerMetadata(filename string) (io.WriteCloser, error) {
|
||||
return os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
|
||||
}
|
||||
|
||||
type ServerMetadataReadFunc func(filename string) (*ServerMetadata, error)
|
||||
|
||||
// ReadServerMetadata is a helper function for reading the contents of a server
|
||||
// metadata file and unmarshaling the data from JSON.
|
||||
func ReadServerMetadata(filename string) (*ServerMetadata, error) {
|
||||
b, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var md ServerMetadata
|
||||
if err := json.Unmarshal(b, &md); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &md, nil
|
||||
}
|
||||
|
||||
// WriteServerMetadata writes server metadata to a file in JSON format.
|
||||
func WriteServerMetadata(w io.Writer) error {
|
||||
md := &ServerMetadata{
|
||||
LastSeenUnix: time.Now().Unix(),
|
||||
}
|
||||
|
||||
b, err := json.Marshal(md)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := w.Write(b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package consul
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type mockServerMetadataWriter struct {
|
||||
writeErr error
|
||||
}
|
||||
|
||||
func (m *mockServerMetadataWriter) Write(p []byte) (n int, err error) {
|
||||
if m.writeErr != nil {
|
||||
return 0, m.writeErr
|
||||
}
|
||||
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
func TestServerMetadata(t *testing.T) {
|
||||
now := time.Now()
|
||||
|
||||
t.Run("TestIsLastSeenStaleTrue", func(t *testing.T) {
|
||||
// Create a server that is 48 hours old.
|
||||
md := &ServerMetadata{
|
||||
LastSeenUnix: now.Add(-48 * time.Hour).Unix(),
|
||||
}
|
||||
|
||||
stale := md.IsLastSeenStale(24 * time.Hour)
|
||||
assert.True(t, stale)
|
||||
})
|
||||
|
||||
t.Run("TestIsLastSeenStaleFalse", func(t *testing.T) {
|
||||
// Create a server that is 1 hour old.
|
||||
md := &ServerMetadata{
|
||||
LastSeenUnix: now.Add(-1 * time.Hour).Unix(),
|
||||
}
|
||||
|
||||
stale := md.IsLastSeenStale(24 * time.Hour)
|
||||
assert.False(t, stale)
|
||||
})
|
||||
}
|
||||
|
||||
func TestWriteServerMetadata(t *testing.T) {
|
||||
t.Run("TestWriteError", func(t *testing.T) {
|
||||
m := &mockServerMetadataWriter{
|
||||
writeErr: errors.New("write error"),
|
||||
}
|
||||
|
||||
err := WriteServerMetadata(m)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
t.Run("TestOK", func(t *testing.T) {
|
||||
b := new(bytes.Buffer)
|
||||
|
||||
err := WriteServerMetadata(b)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, b.Len() > 0)
|
||||
})
|
||||
}
|
|
@ -736,6 +736,11 @@ Refer to the [formatting specification](https://golang.org/pkg/time/#ParseDurati
|
|||
|
||||
- `server` Equivalent to the [`-server` command-line flag](/consul/docs/agent/config/cli-flags#_server).
|
||||
|
||||
- `server_rejoin_age_max` - controls the allowed maximum age of a stale server attempting to rejoin a cluster.
|
||||
If a server is not running for this period, then it will refuse to start up again until an operator intervenes. This is to protect
|
||||
clusters from instability caused by decommissioned servers accidentally being started again.
|
||||
Note: the default value is 7d and the minimum value is 6h.
|
||||
|
||||
- `non_voting_server` - **This field is deprecated in Consul 1.9.1. See the [`read_replica`](#read_replica) field instead.**
|
||||
|
||||
- `read_replica` - Equivalent to the [`-read-replica` command-line flag](/consul/docs/agent/config/cli-flags#_read_replica).
|
||||
|
|
Loading…
Reference in New Issue