Adds new config to make script checks opt-in, updates documentation. (#3284)

This commit is contained in:
James Phillips 2017-07-17 11:20:35 -07:00 committed by GitHub
parent f7629a4f66
commit 788dd255a1
12 changed files with 125 additions and 26 deletions

View File

@ -1595,8 +1595,15 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType,
if check.CheckID == "" { if check.CheckID == "" {
return fmt.Errorf("CheckID missing") return fmt.Errorf("CheckID missing")
} }
if chkType != nil && !chkType.Valid() {
return fmt.Errorf("Check type is not valid") if chkType != nil {
if !chkType.Valid() {
return fmt.Errorf("Check type is not valid")
}
if chkType.IsScript() && !a.config.EnableScriptChecks {
return fmt.Errorf("Check types that exec scripts are disabled on this agent")
}
} }
if check.ServiceID != "" { if check.ServiceID != "" {

View File

@ -628,7 +628,9 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
func TestAgent_AddCheck(t *testing.T) { func TestAgent_AddCheck(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t.Name(), nil) cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown() defer a.Shutdown()
health := &structs.HealthCheck{ health := &structs.HealthCheck{
@ -665,7 +667,9 @@ func TestAgent_AddCheck(t *testing.T) {
func TestAgent_AddCheck_StartPassing(t *testing.T) { func TestAgent_AddCheck_StartPassing(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t.Name(), nil) cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown() defer a.Shutdown()
health := &structs.HealthCheck{ health := &structs.HealthCheck{
@ -702,7 +706,9 @@ func TestAgent_AddCheck_StartPassing(t *testing.T) {
func TestAgent_AddCheck_MinInterval(t *testing.T) { func TestAgent_AddCheck_MinInterval(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t.Name(), nil) cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown() defer a.Shutdown()
health := &structs.HealthCheck{ health := &structs.HealthCheck{
@ -735,7 +741,9 @@ func TestAgent_AddCheck_MinInterval(t *testing.T) {
func TestAgent_AddCheck_MissingService(t *testing.T) { func TestAgent_AddCheck_MissingService(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t.Name(), nil) cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown() defer a.Shutdown()
health := &structs.HealthCheck{ health := &structs.HealthCheck{
@ -797,9 +805,38 @@ func TestAgent_AddCheck_RestoreState(t *testing.T) {
} }
} }
func TestAgent_AddCheck_ExecDisable(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), nil)
defer a.Shutdown()
health := &structs.HealthCheck{
Node: "foo",
CheckID: "mem",
Name: "memory util",
Status: api.HealthCritical,
}
chk := &structs.CheckType{
Script: "exit 0",
Interval: 15 * time.Second,
}
err := a.AddCheck(health, chk, false, "")
if err == nil || !strings.Contains(err.Error(), "exec scripts are disabled on this agent") {
t.Fatalf("err: %v", err)
}
// Ensure we don't have a check mapping
if memChk := a.state.Checks()["mem"]; memChk != nil {
t.Fatalf("should be missing mem check")
}
}
func TestAgent_RemoveCheck(t *testing.T) { func TestAgent_RemoveCheck(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t.Name(), nil) cfg := TestConfig()
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg)
defer a.Shutdown() defer a.Shutdown()
// Remove check that doesn't exist // Remove check that doesn't exist
@ -1097,6 +1134,7 @@ func TestAgent_PersistCheck(t *testing.T) {
cfg := TestConfig() cfg := TestConfig()
cfg.Server = false cfg.Server = false
cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg) a := NewTestAgent(t.Name(), cfg)
defer os.RemoveAll(cfg.DataDir) defer os.RemoveAll(cfg.DataDir)
defer a.Shutdown() defer a.Shutdown()
@ -1230,6 +1268,7 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) {
cfg := TestConfig() cfg := TestConfig()
cfg.Server = false cfg.Server = false
cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir
cfg.EnableScriptChecks = true
a := NewTestAgent(t.Name(), cfg) a := NewTestAgent(t.Name(), cfg)
defer os.RemoveAll(cfg.DataDir) defer os.RemoveAll(cfg.DataDir)
defer a.Shutdown() defer a.Shutdown()

View File

@ -625,6 +625,11 @@ type Config struct {
// true, we ignore the leave, and rejoin the cluster on start. // true, we ignore the leave, and rejoin the cluster on start.
RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"` RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"`
// EnableScriptChecks controls whether health checks which execute
// scripts are enabled. This includes regular script checks and Docker
// checks.
EnableScriptChecks bool `mapstructure:"enable_script_checks"`
// CheckUpdateInterval controls the interval on which the output of a health check // CheckUpdateInterval controls the interval on which the output of a health check
// is updated if there is no change to the state. For example, a check in a steady // is updated if there is no change to the state. For example, a check in a steady
// state may run every 5 second generating a unique output (timestamp, etc), forcing // state may run every 5 second generating a unique output (timestamp, etc), forcing
@ -1932,6 +1937,9 @@ func MergeConfig(a, b *Config) *Config {
if b.DNSConfig.RecursorTimeout != 0 { if b.DNSConfig.RecursorTimeout != 0 {
result.DNSConfig.RecursorTimeout = b.DNSConfig.RecursorTimeout result.DNSConfig.RecursorTimeout = b.DNSConfig.RecursorTimeout
} }
if b.EnableScriptChecks {
result.EnableScriptChecks = true
}
if b.CheckUpdateIntervalRaw != "" || b.CheckUpdateInterval != 0 { if b.CheckUpdateIntervalRaw != "" || b.CheckUpdateInterval != 0 {
result.CheckUpdateInterval = b.CheckUpdateInterval result.CheckUpdateInterval = b.CheckUpdateInterval
} }

View File

@ -322,6 +322,10 @@ func TestDecodeConfig(t *testing.T) {
in: `{"disable_keyring_file":true}`, in: `{"disable_keyring_file":true}`,
c: &Config{DisableKeyringFile: true}, c: &Config{DisableKeyringFile: true},
}, },
{
in: `{"enable_script_checks":true}`,
c: &Config{EnableScriptChecks: true},
},
{ {
in: `{"encrypt_verify_incoming":true}`, in: `{"encrypt_verify_incoming":true}`,
c: &Config{EncryptVerifyIncoming: Bool(true)}, c: &Config{EncryptVerifyIncoming: Bool(true)},
@ -1363,6 +1367,7 @@ func TestMergeConfig(t *testing.T) {
ReconnectTimeoutLan: 24 * time.Hour, ReconnectTimeoutLan: 24 * time.Hour,
ReconnectTimeoutWanRaw: "36h", ReconnectTimeoutWanRaw: "36h",
ReconnectTimeoutWan: 36 * time.Hour, ReconnectTimeoutWan: 36 * time.Hour,
EnableScriptChecks: true,
CheckUpdateInterval: 8 * time.Minute, CheckUpdateInterval: 8 * time.Minute,
CheckUpdateIntervalRaw: "8m", CheckUpdateIntervalRaw: "8m",
ACLToken: "1111", ACLToken: "1111",

View File

@ -47,6 +47,11 @@ func (c *CheckType) Valid() bool {
return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP() || c.IsDocker() return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP() || c.IsDocker()
} }
// IsScript checks if this is a check that execs some kind of script.
func (c *CheckType) IsScript() bool {
return c.Script != ""
}
// IsTTL checks if this is a TTL type // IsTTL checks if this is a TTL type
func (c *CheckType) IsTTL() bool { func (c *CheckType) IsTTL() bool {
return c.TTL != 0 return c.TTL != 0

View File

@ -529,7 +529,9 @@ func TestAPI_AgentChecks_serviceBound(t *testing.T) {
func TestAPI_AgentChecks_Docker(t *testing.T) { func TestAPI_AgentChecks_Docker(t *testing.T) {
t.Parallel() t.Parallel()
c, s := makeClient(t) c, s := makeClientWithConfig(t, nil, func(c *testutil.TestServerConfig) {
c.EnableScriptChecks = true
})
defer s.Stop() defer s.Stop()
agent := c.Agent() agent := c.Agent()

View File

@ -80,6 +80,7 @@ func (cmd *AgentCommand) readConfig() *agent.Config {
"A unique ID for this node across space and time. Defaults to a randomly-generated ID"+ "A unique ID for this node across space and time. Defaults to a randomly-generated ID"+
" that persists in the data-dir.") " that persists in the data-dir.")
f.BoolVar(&cmdCfg.EnableScriptChecks, "enable-script-checks", false, "Enables health check scripts.")
var disableHostNodeID configutil.BoolValue var disableHostNodeID configutil.BoolValue
f.Var(&disableHostNodeID, "disable-host-node-id", f.Var(&disableHostNodeID, "disable-host-node-id",
"Setting this to true will prevent Consul from using information from the"+ "Setting this to true will prevent Consul from using information from the"+

View File

@ -86,6 +86,7 @@ type TestServerConfig struct {
VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"` VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"`
VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"` VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"`
VerifyOutgoing bool `json:"verify_outgoing,omitempty"` VerifyOutgoing bool `json:"verify_outgoing,omitempty"`
EnableScriptChecks bool `json:"enable_script_checks,omitempty"`
ReadyTimeout time.Duration `json:"-"` ReadyTimeout time.Duration `json:"-"`
Stdout, Stderr io.Writer `json:"-"` Stdout, Stderr io.Writer `json:"-"`
Args []string `json:"-"` Args []string `json:"-"`

View File

@ -21,10 +21,12 @@ There are five different kinds of checks:
that performs the health check, exits with an appropriate exit code, and potentially that performs the health check, exits with an appropriate exit code, and potentially
generates some output. A script is paired with an invocation interval (e.g. generates some output. A script is paired with an invocation interval (e.g.
every 30 seconds). This is similar to the Nagios plugin system. The output of every 30 seconds). This is similar to the Nagios plugin system. The output of
a script check is limited to 4K. Output larger than this will be truncated. a script check is limited to 4KB. Output larger than this will be truncated.
By default, Script checks will be configured with a timeout equal to 30 seconds. By default, Script checks will be configured with a timeout equal to 30 seconds.
It is possible to configure a custom Script check timeout value by specifying the It is possible to configure a custom Script check timeout value by specifying the
`timeout` field in the check definition. `timeout` field in the check definition. In Consul 0.9.0 and later, the agent
must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
set to `true` in order to enable script checks.
* HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g. * HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g.
every 30 seconds) to the specified URL. The status of the service depends on every 30 seconds) to the specified URL. The status of the service depends on
@ -38,7 +40,7 @@ There are five different kinds of checks:
configured with a request timeout equal to the check interval, with a max of configured with a request timeout equal to the check interval, with a max of
10 seconds. It is possible to configure a custom HTTP check timeout value by 10 seconds. It is possible to configure a custom HTTP check timeout value by
specifying the `timeout` field in the check definition. The output of the specifying the `timeout` field in the check definition. The output of the
check is limited to roughly 4K. Responses larger than this will be truncated. check is limited to roughly 4KB. Responses larger than this will be truncated.
HTTP checks also support SSL. By default, a valid SSL certificate is expected. HTTP checks also support SSL. By default, a valid SSL certificate is expected.
Certificate verification can be turned off by setting the `tls_skip_verify` Certificate verification can be turned off by setting the `tls_skip_verify`
field to `true` in the check definition. field to `true` in the check definition.
@ -74,15 +76,17 @@ There are five different kinds of checks:
valid through the end of the TTL from the time of the last check. valid through the end of the TTL from the time of the last check.
* Docker + Interval - These checks depend on invoking an external application which * Docker + Interval - These checks depend on invoking an external application which
is packaged within a Docker Container. The application is triggered within the running is packaged within a Docker Container. The application is triggered within the running
container via the Docker Exec API. We expect that the Consul agent user has access container via the Docker Exec API. We expect that the Consul agent user has access
to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to
determine the Docker API endpoint. The application is expected to run, perform a health determine the Docker API endpoint. The application is expected to run, perform a health
check of the service running inside the container, and exit with an appropriate exit code. check of the service running inside the container, and exit with an appropriate exit code.
The check should be paired with an invocation interval. The shell on which the check The check should be paired with an invocation interval. The shell on which the check
has to be performed is configurable which makes it possible to run containers which has to be performed is configurable which makes it possible to run containers which
have different shells on the same host. Check output for Docker is limited to have different shells on the same host. Check output for Docker is limited to
4K. Any output larger than this will be truncated. 4KB. Any output larger than this will be truncated. In Consul 0.9.0 and later, the agent
must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
set to `true` in order to enable Docker health checks.
## Check Definition ## Check Definition
@ -210,6 +214,10 @@ This is the only convention that Consul depends on. Any output of the script
will be captured and stored in the `notes` field so that it can be viewed will be captured and stored in the `notes` field so that it can be viewed
by human operators. by human operators.
In Consul 0.9.0 and later, the agent must be configured with
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true`
in order to enable script checks.
## Initial Health Check Status ## Initial Health Check Status
By default, when checks are registered against a Consul agent, the state is set By default, when checks are registered against a Consul agent, the state is set

View File

@ -147,6 +147,10 @@ will exit with an error at startup.
[Nomad](https://www.nomadproject.io/), so if you opt-in to host-based IDs then Consul and Nomad will use [Nomad](https://www.nomadproject.io/), so if you opt-in to host-based IDs then Consul and Nomad will use
information on the host to automatically assign the same ID in both systems. information on the host to automatically assign the same ID in both systems.
* <a name="_disable_keyring_file"></a><a href="#_disable_keyring_file">`-disable-keyring-file`</a> - If set,
the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given
`-encrypt` key will be available on startup. This defaults to false.
* <a name="_dns_port"></a><a href="#_dns_port">`-dns-port`</a> - the DNS port to listen on. * <a name="_dns_port"></a><a href="#_dns_port">`-dns-port`</a> - the DNS port to listen on.
This overrides the default port 8600. This is available in Consul 0.7 and later. This overrides the default port 8600. This is available in Consul 0.7 and later.
@ -154,6 +158,12 @@ will exit with an error at startup.
in the "consul." domain. This flag can be used to change that domain. All queries in this domain in the "consul." domain. This flag can be used to change that domain. All queries in this domain
are assumed to be handled by Consul and will not be recursively resolved. are assumed to be handled by Consul and will not be recursively resolved.
* <a name="_enable_script_checks"></a><a href="#_enable_script_checks">`enable-script-checks`</a> This
controls whether [health checks that execute scripts](/docs/agent/checks.html) are enabled on
this agent, and defaults to `false` so operators must opt-in to allowing these. If enabled,
it is recommended to [enable ACLs](/docs/guides/acl.html) as well to control which users are
allowed to register new checks to execute scripts. This was added in Consul 0.9.0.
* <a name="_encrypt"></a><a href="#_encrypt">`-encrypt`</a> - Specifies the secret key to * <a name="_encrypt"></a><a href="#_encrypt">`-encrypt`</a> - Specifies the secret key to
use for encryption of Consul use for encryption of Consul
network traffic. This key must be 16-bytes that are Base64-encoded. The network traffic. This key must be 16-bytes that are Base64-encoded. The
@ -167,10 +177,6 @@ will exit with an error at startup.
initialized with an encryption key, then the provided key is ignored and initialized with an encryption key, then the provided key is ignored and
a warning will be displayed. a warning will be displayed.
* <a name="_disable_keyring_file"></a><a href="#_disable_keyring_file">`-disable-keyring-file`</a> - If set,
the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given
`-encrypt` key will be available on startup. This defaults to false.
* <a name="_http_port"></a><a href="#_http_port">`-http-port`</a> - the HTTP API port to listen on. * <a name="_http_port"></a><a href="#_http_port">`-http-port`</a> - the HTTP API port to listen on.
This overrides the default port 8500. This option is very useful when deploying Consul This overrides the default port 8500. This option is very useful when deploying Consul
to an environment which communicates the HTTP port through the environment e.g. PaaS like CloudFoundry, allowing to an environment which communicates the HTTP port through the environment e.g. PaaS like CloudFoundry, allowing
@ -712,6 +718,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* <a name="enable_debug"></a><a href="#enable_debug">`enable_debug`</a> When set, enables some * <a name="enable_debug"></a><a href="#enable_debug">`enable_debug`</a> When set, enables some
additional debugging features. Currently, this is only used to set the runtime profiling HTTP endpoints. additional debugging features. Currently, this is only used to set the runtime profiling HTTP endpoints.
* <a name="enable_script_checks"></a><a href="#enable_script_checks">`enable_script_checks`</a> Equivalent to the
[`-enable-script-checks` command-line flag](#_enable_script_checks).
* <a name="enable_syslog"></a><a href="#enable_syslog">`enable_syslog`</a> Equivalent to * <a name="enable_syslog"></a><a href="#enable_syslog">`enable_syslog`</a> Equivalent to
the [`-syslog` command-line flag](#_syslog). the [`-syslog` command-line flag](#_syslog).

View File

@ -684,6 +684,10 @@ to use for registration events:
[checks](/docs/agent/checks.html). Tokens may also be passed to the [checks](/docs/agent/checks.html). Tokens may also be passed to the
[HTTP API](/api/index.html) for operations that require them. [HTTP API](/api/index.html) for operations that require them.
In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable
script checks.
#### Operator Rules #### Operator Rules
The `operator` policy controls access to cluster-level operations in the The `operator` policy controls access to cluster-level operations in the
@ -866,6 +870,10 @@ to use for registration events:
[checks](/docs/agent/checks.html). Tokens may also be passed to the [checks](/docs/agent/checks.html). Tokens may also be passed to the
[HTTP API](/api/index.html) for operations that require them. [HTTP API](/api/index.html) for operations that require them.
In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with
[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable
script checks.
#### Session Rules #### Session Rules
The `session` policy controls access to [Session API](/api/session.html) operations. The `session` policy controls access to [Session API](/api/session.html) operations.

View File

@ -72,6 +72,12 @@ the replicated log until the expected number of servers has successfully joined.
You can read more about this in the [bootstrapping You can read more about this in the [bootstrapping
guide](/docs/guides/bootstrapping.html). guide](/docs/guides/bootstrapping.html).
We've included the [`-enable_script_checks`](/docs/agent/options.html#_enable_script_checks)
flag set to `true` in order to enable health checks that can execute external scripts.
This will be used in examples later. For production use, you'd want to configure
[ACLs](/docs/guides/acl.html) in conjunction with this to control the ability to
register arbitrary scripts.
Finally, we add the [`config-dir` flag](/docs/agent/options.html#_config_dir), Finally, we add the [`config-dir` flag](/docs/agent/options.html#_config_dir),
marking where service and check definitions can be found. marking where service and check definitions can be found.
@ -81,7 +87,7 @@ All together, these settings yield a
```text ```text
vagrant@n1:~$ consul agent -server -bootstrap-expect=1 \ vagrant@n1:~$ consul agent -server -bootstrap-expect=1 \
-data-dir=/tmp/consul -node=agent-one -bind=172.20.20.10 \ -data-dir=/tmp/consul -node=agent-one -bind=172.20.20.10 \
-config-dir=/etc/consul.d -enable-script-checks=true -config-dir=/etc/consul.d
... ...
``` ```
@ -102,7 +108,7 @@ All together, these settings yield a
```text ```text
vagrant@n2:~$ consul agent -data-dir=/tmp/consul -node=agent-two \ vagrant@n2:~$ consul agent -data-dir=/tmp/consul -node=agent-two \
-bind=172.20.20.11 -config-dir=/etc/consul.d -bind=172.20.20.11 -enable-script-checks=true -config-dir=/etc/consul.d
... ...
``` ```