open-consul/agent/consul/config.go

612 lines
22 KiB
Go
Raw Normal View History

2013-12-06 23:43:07 +00:00
package consul
import (
"fmt"
2013-12-31 22:00:25 +00:00
"net"
2013-12-06 23:43:07 +00:00
"os"
2014-01-09 23:44:25 +00:00
"time"
"github.com/hashicorp/memberlist"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"golang.org/x/time/rate"
"github.com/hashicorp/consul/agent/checks"
2018-04-09 04:56:46 +00:00
"github.com/hashicorp/consul/agent/structs"
libserf "github.com/hashicorp/consul/lib/serf"
"github.com/hashicorp/consul/tlsutil"
2017-01-18 06:20:11 +00:00
"github.com/hashicorp/consul/types"
build: make tests independent of build tags When the metadata server is scanning the agents for potential servers it is parsing the version number which the agent provided when it joined. This version number has to conform to a certain format, i.e. 'n.n.n'. Without this version number properly set some tests fail with error messages that disguise the root cause. The default version number is currently set to 'unknown' in version/version.go which does not parse and triggers the tests to fail. The work around is to use a build tag 'consul' which will use the version number set in version_base.go instead which has the correct format and is set to the current release version. In addition, some parts of the code also require the version number to be of a certain value. Setting it to '0.0.0' for example makes some tests pass and others fail since they don't pass the semantic check. When using go build/install/test one has to remember to use '-tags consul' or tests will fail with non-obvious error messages. Using build tags makes the build process more complex and error prone since it prevents the use of the plain go toolchain and - at least in its current form - introduces subtle build and test issues. We should try to eliminate build tags for anything else but platform specific code. This patch removes all references to specific version numbers in the code and tests and sets the default version to '9.9.9' which is syntactically correct and passes the semantic check. This solves the issue of running go build/install/test without tags for the OSS build.
2017-08-28 10:20:21 +00:00
"github.com/hashicorp/consul/version"
2013-12-06 23:43:07 +00:00
)
const (
2013-12-20 23:33:13 +00:00
DefaultDC = "dc1"
2017-05-03 19:02:01 +00:00
DefaultRPCPort = 8300
2013-12-09 22:22:23 +00:00
DefaultLANSerfPort = 8301
DefaultWANSerfPort = 8302
// DefaultRaftMultiplier is used as a baseline Raft configuration that
// will be reliable on a very basic server. See docs/install/performance.html
// for information on how this value was obtained.
DefaultRaftMultiplier uint = 5
// MaxRaftMultiplier is a fairly arbitrary upper bound that limits the
// amount of performance detuning that's possible.
MaxRaftMultiplier uint = 10
2013-12-06 23:43:07 +00:00
)
2014-01-01 00:45:13 +00:00
var (
2017-05-03 19:02:01 +00:00
DefaultRPCAddr = &net.TCPAddr{IP: net.ParseIP("0.0.0.0"), Port: DefaultRPCPort}
2014-01-01 00:45:13 +00:00
2017-05-03 19:02:01 +00:00
// ProtocolVersionMap is the mapping of Consul protocol versions
// to Serf protocol versions. We mask the Serf protocols using
// our own protocol version.
protocolVersionMap map[uint8]uint8
)
func init() {
protocolVersionMap = map[uint8]uint8{
1: 4,
2: 4,
3: 4,
}
}
// (Enterprise-only) NetworkSegment is the address and port configuration
// for a network segment.
type NetworkSegment struct {
Name string
Bind string
Port int
Advertise string
RPCAddr *net.TCPAddr
SerfConfig *serf.Config
}
2013-12-06 23:43:07 +00:00
// Config is used to configure the server
type Config struct {
// Bootstrap mode is used to bring up the first Consul server.
// It is required so that it can elect a leader without any
// other nodes being present
Bootstrap bool
// BootstrapExpect mode is used to automatically bring up a collection of
// Consul servers. This can be used to automatically bring up a collection
// of nodes.
BootstrapExpect int
// Datacenter is the datacenter this Consul server represents.
2013-12-06 23:43:07 +00:00
Datacenter string
// PrimaryDatacenter is the authoritative datacenter for features like ACLs
// and Connect.
PrimaryDatacenter string
// DataDir is the directory to store our state in.
2013-12-06 23:43:07 +00:00
DataDir string
// DefaultQueryTime is the amount of time a blocking query will wait before
// Consul will force a response. This value can be overridden by the 'wait'
// query parameter.
DefaultQueryTime time.Duration
// MaxQueryTime is the maximum amount of time a blocking query can wait
// before Consul will force a response. Consul applies jitter to the wait
// time. The jittered time will be capped to MaxQueryTime.
MaxQueryTime time.Duration
2015-11-29 04:40:05 +00:00
// DevMode is used to enable a development server mode.
DevMode bool
2017-01-18 06:20:11 +00:00
// NodeID is a unique identifier for this node across space and time.
NodeID types.NodeID
2013-12-06 23:43:07 +00:00
// Node name is the name we use to advertise. Defaults to hostname.
NodeName string
// RaftConfig is the configuration used for Raft in the local DC
RaftConfig *raft.Config
// (Enterprise-only) ReadReplica is used to prevent this server from being added
2017-03-21 23:36:44 +00:00
// as a voting member of the Raft cluster.
ReadReplica bool
2017-03-21 23:36:44 +00:00
2017-06-25 19:36:03 +00:00
// NotifyListen is called after the RPC listener has been configured.
// RPCAdvertise will be set to the listener address if it hasn't been
// configured at this point.
NotifyListen func()
2013-12-07 00:35:13 +00:00
// RPCAddr is the RPC address used by Consul. This should be reachable
// by the WAN and LAN
2014-01-01 00:45:13 +00:00
RPCAddr *net.TCPAddr
2013-12-07 00:35:13 +00:00
2013-12-31 22:00:25 +00:00
// RPCAdvertise is the address that is advertised to other nodes for
// the RPC endpoint. This can differ from the RPC address, if for example
// the RPCAddr is unspecified "0.0.0.0:8300", but this address must be
2017-06-25 19:36:03 +00:00
// reachable. If RPCAdvertise is nil then it will be set to the Listener
// address after the listening socket is configured.
2013-12-31 23:44:27 +00:00
RPCAdvertise *net.TCPAddr
2013-12-31 22:00:25 +00:00
// RPCSrcAddr is the source address for outgoing RPC connections.
RPCSrcAddr *net.TCPAddr
// (Enterprise-only) The network segment this agent is part of.
Segment string
// (Enterprise-only) Segments is a list of network segments for a server to
// bind on.
Segments []NetworkSegment
2013-12-07 01:18:09 +00:00
// SerfLANConfig is the configuration for the intra-dc serf
SerfLANConfig *serf.Config
2013-12-06 23:43:07 +00:00
2013-12-07 01:18:09 +00:00
// SerfWANConfig is the configuration for the cross-dc serf
SerfWANConfig *serf.Config
2013-12-06 23:43:07 +00:00
2017-03-15 19:26:54 +00:00
// SerfFloodInterval controls how often we attempt to flood local Serf
// Consul servers into the global areas (WAN and user-defined areas in
// Consul Enterprise).
SerfFloodInterval time.Duration
2014-01-09 23:44:25 +00:00
// ReconcileInterval controls how often we reconcile the strongly
// consistent store with the Serf info. This is used to handle nodes
// that are force removed, as well as intermittent unavailability during
// leader election.
ReconcileInterval time.Duration
// ProtocolVersion is the protocol version to speak. This must be between
// ProtocolVersionMin and ProtocolVersionMax.
ProtocolVersion uint8
TLSConfig tlsutil.Config
2014-04-04 23:22:02 +00:00
// RejoinAfterLeave controls our interaction with Serf.
// When set to false (default), a leave causes a Consul to not rejoin
// the cluster until an explicit join is received. If this is set to
// true, we ignore the leave, and rejoin the cluster on start.
RejoinAfterLeave bool
// AdvertiseReconnectTimeout is the duration after which this node should be
// assumed to not be returning and thus should be reaped within Serf. This
// can only be set for Client agents
AdvertiseReconnectTimeout time.Duration
2014-06-06 22:36:40 +00:00
// Build is a string that is gossiped around, and can be used to help
// operators track which versions are actively deployed
Build string
ACLResolverSettings ACLResolverSettings
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
// ACLEnabled is used to enable ACLs
ACLsEnabled bool
2014-08-05 22:36:08 +00:00
// ACLMasterToken is used to bootstrap the ACL system. It should be specified
// on the servers in the PrimaryDatacenter. When the leader comes online, it ensures
2014-08-05 22:36:08 +00:00
// that the Master token is available. This provides the initial token.
ACLMasterToken string
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
// ACLTokenReplication is used to enabled token replication.
//
// By default policy-only replication is enabled. When token
// replication is off and the primary datacenter is not
// yet upgraded to the new ACLs no replication will be performed
ACLTokenReplication bool
2014-08-05 22:20:35 +00:00
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
// ACLReplicationRate is the max number of replication rounds that can
// be run per second. Note that either 1 or 2 RPCs are used during each replication
// round
ACLReplicationRate int
2016-08-03 05:04:11 +00:00
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
// ACLReplicationBurst is how many replication RPCs can be bursted after a
// period of idleness
ACLReplicationBurst int
2016-08-03 05:04:11 +00:00
// ACLReplicationApplyLimit is the max number of replication-related
// apply operations that we allow during a one second period. This is
// used to limit the amount of Raft bandwidth used for replication.
ACLReplicationApplyLimit int
// ACLEnableKeyListPolicy is used to gate enforcement of the new "list" policy that
// protects listing keys by prefix. This behavior is opt-in
// by default in Consul 1.0 and later.
ACLEnableKeyListPolicy bool
AutoConfigEnabled bool
AutoConfigIntroToken string
AutoConfigIntroTokenFile string
AutoConfigServerAddresses []string
AutoConfigDNSSANs []string
AutoConfigIPSANs []net.IP
AutoConfigAuthzEnabled bool
AutoConfigAuthzAuthMethod structs.ACLAuthMethod
AutoConfigAuthzClaimAssertions []string
AutoConfigAuthzAllowReuse bool
// TombstoneTTL is used to control how long KV tombstones are retained.
2014-12-01 05:05:18 +00:00
// This provides a window of time where the X-Consul-Index is monotonic.
// Outside this window, the index may not be monotonic. This is a result
// of a few trade offs:
// 1) The index is defined by the data view and not globally. This is a
// performance optimization that prevents any write from incrementing the
// index for all data views.
// 2) Tombstones are not kept indefinitely, since otherwise storage required
// is also monotonic. This prevents deletes from reducing the disk space
// used.
// In theory, neither of these are intrinsic limitations, however for the
2015-09-15 12:22:08 +00:00
// purposes of building a practical system, they are reasonable trade offs.
2014-12-01 05:05:18 +00:00
//
// It is also possible to set this to an incredibly long time, thereby
// simulating infinite retention. This is not recommended however.
//
TombstoneTTL time.Duration
// TombstoneTTLGranularity is used to control how granular the timers are
// for the Tombstone GC. This is used to batch the GC of many keys together
// to reduce overhead. It is unlikely a user would ever need to tune this.
TombstoneTTLGranularity time.Duration
2014-12-01 05:05:18 +00:00
// Minimum Session TTL
SessionTTLMin time.Duration
// maxTokenExpirationDuration is the maximum difference allowed between
// ACLToken CreateTime and ExpirationTime values if ExpirationTime is set
// on a token.
ACLTokenMaxExpirationTTL time.Duration
// ACLTokenMinExpirationTTL is the minimum difference allowed between
// ACLToken CreateTime and ExpirationTime values if ExpirationTime is set
// on a token.
ACLTokenMinExpirationTTL time.Duration
// ServerUp callback can be used to trigger a notification that
// a Consul server is now up and known about.
ServerUp func()
// UserEventHandler callback can be used to handle incoming
// user events. This function should not block.
UserEventHandler func(serf.UserEvent)
2015-05-08 08:31:34 +00:00
// ConfigReplicationRate is the max number of replication rounds that can
// be run per second. Note that either 1 or 2 RPCs are used during each replication
// round
ConfigReplicationRate int
// ConfigReplicationBurst is how many replication rounds can be bursted after a
// period of idleness
ConfigReplicationBurst int
// ConfigReplicationApply limit is the max number of replication-related
// apply operations that we allow during a one second period. This is
// used to limit the amount of Raft bandwidth used for replication.
ConfigReplicationApplyLimit int
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// FederationStateReplicationRate is the max number of replication rounds that can
// be run per second. Note that either 1 or 2 RPCs are used during each replication
// round
FederationStateReplicationRate int
// FederationStateReplicationBurst is how many replication rounds can be bursted after a
// period of idleness
FederationStateReplicationBurst int
// FederationStateReplicationApply limit is the max number of replication-related
// apply operations that we allow during a one second period. This is
// used to limit the amount of Raft bandwidth used for replication.
FederationStateReplicationApplyLimit int
// CoordinateUpdatePeriod controls how long a server batches coordinate
// updates before applying them in a Raft transaction. A larger period
// leads to fewer Raft transactions, but also the stored coordinates
// being more stale.
2015-05-14 01:22:34 +00:00
CoordinateUpdatePeriod time.Duration
// CoordinateUpdateBatchSize controls the maximum number of updates a
// server batches before applying them in a Raft transaction.
CoordinateUpdateBatchSize int
// CoordinateUpdateMaxBatches controls the maximum number of batches we
// are willing to apply in one period. After this limit we will issue a
// warning and discard the remaining updates.
CoordinateUpdateMaxBatches int
// CheckOutputMaxSize control the max size of output of checks
CheckOutputMaxSize int
// RPCHandshakeTimeout limits how long we will wait for the initial magic byte
// on an RPC client connection. It also governs how long we will wait for a
// TLS handshake when TLS is configured however the timout applies separately
// for the initial magic byte and the TLS handshake and inner magic byte.
RPCHandshakeTimeout time.Duration
// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
// This is used to paper over a loss of leadership by instead holding RPCs,
// so that the caller experiences a slow response rather than an error.
// This period is meant to be long enough for a leader election to take
// place, and a small jitter is applied to avoid a thundering herd.
RPCHoldTimeout time.Duration
// RPCRateLimit and RPCMaxBurst control how frequently RPC calls are allowed
// to happen. In any large enough time interval, rate limiter limits the
// rate to RPCRateLimit tokens per second, with a maximum burst size of
// RPCMaxBurst events. As a special case, if RPCRateLimit == Inf (the infinite
// rate), RPCMaxBurst is ignored.
//
// See https://en.wikipedia.org/wiki/Token_bucket for more about token
// buckets.
RPCRateLimit rate.Limit
RPCMaxBurst int
// RPCMaxConnsPerClient is the limit of how many concurrent connections are
// allowed from a single source IP.
RPCMaxConnsPerClient int
// LeaveDrainTime is used to wait after a server has left the LAN Serf
// pool for RPCs to drain and new requests to be sent to other servers.
LeaveDrainTime time.Duration
// AutopilotConfig is used to apply the initial autopilot config when
// bootstrapping.
AutopilotConfig *structs.AutopilotConfig
// ServerHealthInterval is the frequency with which the health of the
// servers in the cluster will be updated.
ServerHealthInterval time.Duration
// AutopilotInterval is the frequency with which the leader will perform
// autopilot tasks, such as promoting eligible non-voters and removing
// dead servers.
AutopilotInterval time.Duration
2018-04-09 04:56:46 +00:00
// MetricsReportingInterval is the frequency with which the server will
// report usage metrics to the configured go-metrics Sinks.
MetricsReportingInterval time.Duration
// ConnectEnabled is whether to enable Connect features such as the CA.
ConnectEnabled bool
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// ConnectMeshGatewayWANFederationEnabled determines if wan federation of
// datacenters should exclusively traverse mesh gateways.
ConnectMeshGatewayWANFederationEnabled bool
// DisableFederationStateAntiEntropy solely exists for use in unit tests to
// disable a background routine.
DisableFederationStateAntiEntropy bool
connect: intentions are now managed as a new config entry kind "service-intentions" (#8834) - Upgrade the ConfigEntry.ListAll RPC to be kind-aware so that older copies of consul will not see new config entries it doesn't understand replicate down. - Add shim conversion code so that the old API/CLI method of interacting with intentions will continue to work so long as none of these are edited via config entry endpoints. Almost all of the read-only APIs will continue to function indefinitely. - Add new APIs that operate on individual intentions without IDs so that the UI doesn't need to implement CAS operations. - Add a new serf feature flag indicating support for intentions-as-config-entries. - The old line-item intentions way of interacting with the state store will transparently flip between the legacy memdb table and the config entry representations so that readers will never see a hiccup during migration where the results are incomplete. It uses a piece of system metadata to control the flip. - The primary datacenter will begin migrating intentions into config entries on startup once all servers in the datacenter are on a version of Consul with the intentions-as-config-entries feature flag. When it is complete the old state store representations will be cleared. We also record a piece of system metadata indicating this has occurred. We use this metadata to skip ALL of this code the next time the leader starts up. - The secondary datacenters continue to run the old intentions replicator until all servers in the secondary DC and primary DC support intentions-as-config-entries (via serf flag). Once this condition it met the old intentions replicator ceases. - The secondary datacenters replicate the new config entries as they are migrated in the primary. When they detect that the primary has zeroed it's old state store table it waits until all config entries up to that point are replicated and then zeroes its own copy of the old state store table. We also record a piece of system metadata indicating this has occurred. We use this metadata to skip ALL of this code the next time the leader starts up.
2020-10-06 18:24:05 +00:00
// OverrideInitialSerfTags solely exists for use in unit tests to ensure
// that a serf tag is initially set to a known value, rather than the
// default to test some consul upgrade scenarios with fewer races.
OverrideInitialSerfTags func(tags map[string]string)
2018-04-09 04:56:46 +00:00
// CAConfig is used to apply the initial Connect CA configuration when
// bootstrapping.
CAConfig *structs.CAConfiguration
// ConfigEntryBootstrap contains a list of ConfigEntries to ensure are created
// If entries of the same Kind/Name exist already these will not update them.
ConfigEntryBootstrap []structs.ConfigEntry
// AutoEncryptAllowTLS is whether to enable the server responding to
// AutoEncrypt.Sign requests.
AutoEncryptAllowTLS bool
2019-10-01 18:34:55 +00:00
RPCConfig RPCConfig
RaftBoltDBConfig RaftBoltDBConfig
2019-10-01 18:34:55 +00:00
// Embedded Consul Enterprise specific configuration
*EnterpriseConfig
2013-12-06 23:43:07 +00:00
}
2017-05-03 19:02:01 +00:00
// CheckProtocolVersion validates the protocol version.
func (c *Config) CheckProtocolVersion() error {
if c.ProtocolVersion < ProtocolVersionMin {
2017-05-03 19:02:01 +00:00
return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]", c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
}
if c.ProtocolVersion > ProtocolVersionMax {
return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]", c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax)
}
return nil
}
2017-05-03 19:02:01 +00:00
// CheckACL validates the ACL configuration.
// TODO: move this to ACLResolverSettings
2014-08-05 22:20:35 +00:00
func (c *Config) CheckACL() error {
switch c.ACLResolverSettings.ACLDefaultPolicy {
2014-08-05 22:20:35 +00:00
case "allow":
case "deny":
default:
return fmt.Errorf("Unsupported default ACL policy: %s", c.ACLResolverSettings.ACLDefaultPolicy)
2014-08-05 22:20:35 +00:00
}
switch c.ACLResolverSettings.ACLDownPolicy {
2014-08-05 22:20:35 +00:00
case "allow":
case "deny":
case "async-cache", "extend-cache":
2014-08-05 22:20:35 +00:00
default:
return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLResolverSettings.ACLDownPolicy)
2014-08-05 22:20:35 +00:00
}
return nil
}
// DefaultConfig returns a default configuration.
2013-12-06 23:43:07 +00:00
func DefaultConfig() *Config {
hostname, err := os.Hostname()
if err != nil {
panic(err)
}
conf := &Config{
Build: version.Version,
Datacenter: DefaultDC,
NodeName: hostname,
RPCAddr: DefaultRPCAddr,
RaftConfig: raft.DefaultConfig(),
SerfLANConfig: libserf.DefaultConfig(),
SerfWANConfig: libserf.DefaultConfig(),
SerfFloodInterval: 60 * time.Second,
ReconcileInterval: 60 * time.Second,
ProtocolVersion: ProtocolVersion2Compatible,
ACLResolverSettings: ACLResolverSettings{
ACLsEnabled: false,
Datacenter: DefaultDC,
NodeName: hostname,
ACLPolicyTTL: 30 * time.Second,
ACLTokenTTL: 30 * time.Second,
ACLRoleTTL: 30 * time.Second,
ACLDownPolicy: "extend-cache",
ACLDefaultPolicy: "allow",
},
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
ACLReplicationRate: 1,
ACLReplicationBurst: 5,
ACLReplicationApplyLimit: 100, // ops / sec
ConfigReplicationRate: 1,
ConfigReplicationBurst: 5,
ConfigReplicationApplyLimit: 100, // ops / sec
FederationStateReplicationRate: 1,
FederationStateReplicationBurst: 5,
FederationStateReplicationApplyLimit: 100, // ops / sec
TombstoneTTL: 15 * time.Minute,
TombstoneTTLGranularity: 30 * time.Second,
SessionTTLMin: 10 * time.Second,
ACLTokenMinExpirationTTL: 1 * time.Minute,
ACLTokenMaxExpirationTTL: 24 * time.Hour,
// These are tuned to provide a total throughput of 128 updates
// per second. If you update these, you should update the client-
// side SyncCoordinateRateTarget parameter accordingly.
CoordinateUpdatePeriod: 5 * time.Second,
CoordinateUpdateBatchSize: 128,
CoordinateUpdateMaxBatches: 5,
CheckOutputMaxSize: checks.DefaultBufSize,
RPCRateLimit: rate.Inf,
RPCMaxBurst: 1000,
// TODO (slackpad) - Until #3744 is done, we need to keep these
2017-12-13 18:52:06 +00:00
// in sync with agent/config/default.go.
AutopilotConfig: &structs.AutopilotConfig{
CleanupDeadServers: true,
LastContactThreshold: 200 * time.Millisecond,
MaxTrailingLogs: 250,
ServerStabilizationTime: 10 * time.Second,
},
2017-12-13 18:52:06 +00:00
CAConfig: &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"LeafCertTTL": structs.DefaultLeafCertTTL,
"IntermediateCertTTL": structs.DefaultIntermediateCertTTL,
"RootCertTTL": structs.DefaultRootCertTTL,
},
},
// Stay under the 10 second aggregation interval of
// go-metrics. This ensures we always report the
// usage metrics in each cycle.
MetricsReportingInterval: 9 * time.Second,
ServerHealthInterval: 2 * time.Second,
AutopilotInterval: 10 * time.Second,
DefaultQueryTime: 300 * time.Second,
MaxQueryTime: 600 * time.Second,
EnterpriseConfig: DefaultEnterpriseConfig(),
2013-12-06 23:43:07 +00:00
}
// Increase our reap interval to 3 days instead of 24h.
conf.SerfLANConfig.ReconnectTimeout = 3 * 24 * time.Hour
conf.SerfWANConfig.ReconnectTimeout = 3 * 24 * time.Hour
2013-12-07 01:18:09 +00:00
// WAN Serf should use the WAN timing, since we are using it
2013-12-06 23:43:07 +00:00
// to communicate between DC's
2013-12-07 01:18:09 +00:00
conf.SerfWANConfig.MemberlistConfig = memberlist.DefaultWANConfig()
2013-12-06 23:43:07 +00:00
// Ensure we don't have port conflicts
2013-12-27 20:51:15 +00:00
conf.SerfLANConfig.MemberlistConfig.BindPort = DefaultLANSerfPort
conf.SerfWANConfig.MemberlistConfig.BindPort = DefaultWANSerfPort
2013-12-06 23:43:07 +00:00
2019-05-15 18:59:33 +00:00
// Allow dead nodes to be replaced after 30 seconds.
conf.SerfLANConfig.MemberlistConfig.DeadNodeReclaimTime = 30 * time.Second
conf.SerfWANConfig.MemberlistConfig.DeadNodeReclaimTime = 30 * time.Second
// Raft protocol version 3 only works with other Consul servers running
// 0.8.0 or later.
conf.RaftConfig.ProtocolVersion = 3
// Disable shutdown on removal
conf.RaftConfig.ShutdownOnRemove = false
// Check every 5 seconds to see if there are enough new entries for a snapshot, can be overridden
conf.RaftConfig.SnapshotInterval = 30 * time.Second
// Snapshots are created every 16384 entries by default, can be overridden
conf.RaftConfig.SnapshotThreshold = 16384
2013-12-06 23:43:07 +00:00
return conf
}
// CloneSerfLANConfig clones an existing serf.Config used on the LAN by
// reconstructing it from defaults and re-applying changes made in the agent
// configs.
//
// This function is tricky to keep from rotting so we enforce that it MUST work
// by cloning our own serf LAN configuration on startup and only using the
// cloned one so any configs we need to change have to be changed here for them
// to work at all.
func CloneSerfLANConfig(base *serf.Config) *serf.Config {
cfg := DefaultConfig().SerfLANConfig
// from consul.DefaultConfig()
cfg.ReconnectTimeout = base.ReconnectTimeout
cfg.MemberlistConfig.BindPort = base.MemberlistConfig.BindPort
cfg.MemberlistConfig.DeadNodeReclaimTime = base.MemberlistConfig.DeadNodeReclaimTime
// from agent.newConsulConfig()
cfg.MemberlistConfig.BindAddr = base.MemberlistConfig.BindAddr
cfg.MemberlistConfig.BindPort = base.MemberlistConfig.BindPort
cfg.MemberlistConfig.CIDRsAllowed = base.MemberlistConfig.CIDRsAllowed
cfg.MemberlistConfig.AdvertiseAddr = base.MemberlistConfig.AdvertiseAddr
cfg.MemberlistConfig.AdvertisePort = base.MemberlistConfig.AdvertisePort
cfg.MemberlistConfig.GossipVerifyIncoming = base.MemberlistConfig.GossipVerifyIncoming
cfg.MemberlistConfig.GossipVerifyOutgoing = base.MemberlistConfig.GossipVerifyOutgoing
cfg.MemberlistConfig.GossipInterval = base.MemberlistConfig.GossipInterval
cfg.MemberlistConfig.GossipNodes = base.MemberlistConfig.GossipNodes
cfg.MemberlistConfig.ProbeInterval = base.MemberlistConfig.ProbeInterval
cfg.MemberlistConfig.ProbeTimeout = base.MemberlistConfig.ProbeTimeout
cfg.MemberlistConfig.SuspicionMult = base.MemberlistConfig.SuspicionMult
cfg.MemberlistConfig.RetransmitMult = base.MemberlistConfig.RetransmitMult
// agent/keyring.go
cfg.MemberlistConfig.Keyring = base.MemberlistConfig.Keyring
// tests
cfg.KeyringFile = base.KeyringFile
cfg.ReapInterval = base.ReapInterval
cfg.TombstoneTimeout = base.TombstoneTimeout
cfg.MemberlistConfig.SecretKey = base.MemberlistConfig.SecretKey
return cfg
}
// RPCConfig settings for the RPC server
//
// TODO: move many settings to this struct.
type RPCConfig struct {
EnableStreaming bool
}
// ReloadableConfig is the configuration that is passed to ReloadConfig when
// application config is reloaded.
type ReloadableConfig struct {
RPCRateLimit rate.Limit
RPCMaxBurst int
RPCMaxConnsPerClient int
ConfigEntryBootstrap []structs.ConfigEntry
RaftSnapshotThreshold int
RaftSnapshotInterval time.Duration
RaftTrailingLogs int
}
type RaftBoltDBConfig struct {
NoFreelistSync bool
}