diff --git a/agent/agent.go b/agent/agent.go index e81b91fc3..4b3dc8f1f 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1595,8 +1595,15 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *structs.CheckType, if check.CheckID == "" { return fmt.Errorf("CheckID missing") } - if chkType != nil && !chkType.Valid() { - return fmt.Errorf("Check type is not valid") + + if chkType != nil { + if !chkType.Valid() { + return fmt.Errorf("Check type is not valid") + } + + if chkType.IsScript() && !a.config.EnableScriptChecks { + return fmt.Errorf("Check types that exec scripts are disabled on this agent") + } } if check.ServiceID != "" { diff --git a/agent/agent_test.go b/agent/agent_test.go index 97f772cf4..c19ab9832 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -628,7 +628,9 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) { func TestAgent_AddCheck(t *testing.T) { t.Parallel() - a := NewTestAgent(t.Name(), nil) + cfg := TestConfig() + cfg.EnableScriptChecks = true + a := NewTestAgent(t.Name(), cfg) defer a.Shutdown() health := &structs.HealthCheck{ @@ -665,7 +667,9 @@ func TestAgent_AddCheck(t *testing.T) { func TestAgent_AddCheck_StartPassing(t *testing.T) { t.Parallel() - a := NewTestAgent(t.Name(), nil) + cfg := TestConfig() + cfg.EnableScriptChecks = true + a := NewTestAgent(t.Name(), cfg) defer a.Shutdown() health := &structs.HealthCheck{ @@ -702,7 +706,9 @@ func TestAgent_AddCheck_StartPassing(t *testing.T) { func TestAgent_AddCheck_MinInterval(t *testing.T) { t.Parallel() - a := NewTestAgent(t.Name(), nil) + cfg := TestConfig() + cfg.EnableScriptChecks = true + a := NewTestAgent(t.Name(), cfg) defer a.Shutdown() health := &structs.HealthCheck{ @@ -735,7 +741,9 @@ func TestAgent_AddCheck_MinInterval(t *testing.T) { func TestAgent_AddCheck_MissingService(t *testing.T) { t.Parallel() - a := NewTestAgent(t.Name(), nil) + cfg := TestConfig() + cfg.EnableScriptChecks = true + a := NewTestAgent(t.Name(), cfg) defer a.Shutdown() health := &structs.HealthCheck{ @@ -797,9 +805,38 @@ func TestAgent_AddCheck_RestoreState(t *testing.T) { } } +func TestAgent_AddCheck_ExecDisable(t *testing.T) { + t.Parallel() + + a := NewTestAgent(t.Name(), nil) + defer a.Shutdown() + + health := &structs.HealthCheck{ + Node: "foo", + CheckID: "mem", + Name: "memory util", + Status: api.HealthCritical, + } + chk := &structs.CheckType{ + Script: "exit 0", + Interval: 15 * time.Second, + } + err := a.AddCheck(health, chk, false, "") + if err == nil || !strings.Contains(err.Error(), "exec scripts are disabled on this agent") { + t.Fatalf("err: %v", err) + } + + // Ensure we don't have a check mapping + if memChk := a.state.Checks()["mem"]; memChk != nil { + t.Fatalf("should be missing mem check") + } +} + func TestAgent_RemoveCheck(t *testing.T) { t.Parallel() - a := NewTestAgent(t.Name(), nil) + cfg := TestConfig() + cfg.EnableScriptChecks = true + a := NewTestAgent(t.Name(), cfg) defer a.Shutdown() // Remove check that doesn't exist @@ -1097,6 +1134,7 @@ func TestAgent_PersistCheck(t *testing.T) { cfg := TestConfig() cfg.Server = false cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir + cfg.EnableScriptChecks = true a := NewTestAgent(t.Name(), cfg) defer os.RemoveAll(cfg.DataDir) defer a.Shutdown() @@ -1230,6 +1268,7 @@ func TestAgent_PurgeCheckOnDuplicate(t *testing.T) { cfg := TestConfig() cfg.Server = false cfg.DataDir = testutil.TempDir(t, "agent") // we manage the data dir + cfg.EnableScriptChecks = true a := NewTestAgent(t.Name(), cfg) defer os.RemoveAll(cfg.DataDir) defer a.Shutdown() diff --git a/agent/config.go b/agent/config.go index 34506c671..33b537abc 100644 --- a/agent/config.go +++ b/agent/config.go @@ -625,6 +625,11 @@ type Config struct { // true, we ignore the leave, and rejoin the cluster on start. RejoinAfterLeave bool `mapstructure:"rejoin_after_leave"` + // EnableScriptChecks controls whether health checks which execute + // scripts are enabled. This includes regular script checks and Docker + // checks. + EnableScriptChecks bool `mapstructure:"enable_script_checks"` + // CheckUpdateInterval controls the interval on which the output of a health check // is updated if there is no change to the state. For example, a check in a steady // state may run every 5 second generating a unique output (timestamp, etc), forcing @@ -1932,6 +1937,9 @@ func MergeConfig(a, b *Config) *Config { if b.DNSConfig.RecursorTimeout != 0 { result.DNSConfig.RecursorTimeout = b.DNSConfig.RecursorTimeout } + if b.EnableScriptChecks { + result.EnableScriptChecks = true + } if b.CheckUpdateIntervalRaw != "" || b.CheckUpdateInterval != 0 { result.CheckUpdateInterval = b.CheckUpdateInterval } diff --git a/agent/config_test.go b/agent/config_test.go index e4f0b16aa..368111c1b 100644 --- a/agent/config_test.go +++ b/agent/config_test.go @@ -322,6 +322,10 @@ func TestDecodeConfig(t *testing.T) { in: `{"disable_keyring_file":true}`, c: &Config{DisableKeyringFile: true}, }, + { + in: `{"enable_script_checks":true}`, + c: &Config{EnableScriptChecks: true}, + }, { in: `{"encrypt_verify_incoming":true}`, c: &Config{EncryptVerifyIncoming: Bool(true)}, @@ -1363,6 +1367,7 @@ func TestMergeConfig(t *testing.T) { ReconnectTimeoutLan: 24 * time.Hour, ReconnectTimeoutWanRaw: "36h", ReconnectTimeoutWan: 36 * time.Hour, + EnableScriptChecks: true, CheckUpdateInterval: 8 * time.Minute, CheckUpdateIntervalRaw: "8m", ACLToken: "1111", diff --git a/agent/consul/structs/check_type.go b/agent/consul/structs/check_type.go index 07e7007b6..5fbfb3da3 100644 --- a/agent/consul/structs/check_type.go +++ b/agent/consul/structs/check_type.go @@ -47,6 +47,11 @@ func (c *CheckType) Valid() bool { return c.IsTTL() || c.IsMonitor() || c.IsHTTP() || c.IsTCP() || c.IsDocker() } +// IsScript checks if this is a check that execs some kind of script. +func (c *CheckType) IsScript() bool { + return c.Script != "" +} + // IsTTL checks if this is a TTL type func (c *CheckType) IsTTL() bool { return c.TTL != 0 diff --git a/api/agent_test.go b/api/agent_test.go index 01ba9f88d..d49630d66 100644 --- a/api/agent_test.go +++ b/api/agent_test.go @@ -529,7 +529,9 @@ func TestAPI_AgentChecks_serviceBound(t *testing.T) { func TestAPI_AgentChecks_Docker(t *testing.T) { t.Parallel() - c, s := makeClient(t) + c, s := makeClientWithConfig(t, nil, func(c *testutil.TestServerConfig) { + c.EnableScriptChecks = true + }) defer s.Stop() agent := c.Agent() diff --git a/command/agent.go b/command/agent.go index 3a0aeb4d2..0df8848b1 100644 --- a/command/agent.go +++ b/command/agent.go @@ -80,6 +80,7 @@ func (cmd *AgentCommand) readConfig() *agent.Config { "A unique ID for this node across space and time. Defaults to a randomly-generated ID"+ " that persists in the data-dir.") + f.BoolVar(&cmdCfg.EnableScriptChecks, "enable-script-checks", false, "Enables health check scripts.") var disableHostNodeID configutil.BoolValue f.Var(&disableHostNodeID, "disable-host-node-id", "Setting this to true will prevent Consul from using information from the"+ diff --git a/testutil/server.go b/testutil/server.go index 3cb959a47..969d06a58 100644 --- a/testutil/server.go +++ b/testutil/server.go @@ -86,6 +86,7 @@ type TestServerConfig struct { VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"` VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"` VerifyOutgoing bool `json:"verify_outgoing,omitempty"` + EnableScriptChecks bool `json:"enable_script_checks,omitempty"` ReadyTimeout time.Duration `json:"-"` Stdout, Stderr io.Writer `json:"-"` Args []string `json:"-"` diff --git a/website/source/docs/agent/checks.html.md b/website/source/docs/agent/checks.html.md index b4676fed3..7685ce2c4 100644 --- a/website/source/docs/agent/checks.html.md +++ b/website/source/docs/agent/checks.html.md @@ -21,10 +21,12 @@ There are five different kinds of checks: that performs the health check, exits with an appropriate exit code, and potentially generates some output. A script is paired with an invocation interval (e.g. every 30 seconds). This is similar to the Nagios plugin system. The output of - a script check is limited to 4K. Output larger than this will be truncated. + a script check is limited to 4KB. Output larger than this will be truncated. By default, Script checks will be configured with a timeout equal to 30 seconds. It is possible to configure a custom Script check timeout value by specifying the - `timeout` field in the check definition. + `timeout` field in the check definition. In Consul 0.9.0 and later, the agent + must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) + set to `true` in order to enable script checks. * HTTP + Interval - These checks make an HTTP `GET` request every Interval (e.g. every 30 seconds) to the specified URL. The status of the service depends on @@ -38,7 +40,7 @@ There are five different kinds of checks: configured with a request timeout equal to the check interval, with a max of 10 seconds. It is possible to configure a custom HTTP check timeout value by specifying the `timeout` field in the check definition. The output of the - check is limited to roughly 4K. Responses larger than this will be truncated. + check is limited to roughly 4KB. Responses larger than this will be truncated. HTTP checks also support SSL. By default, a valid SSL certificate is expected. Certificate verification can be turned off by setting the `tls_skip_verify` field to `true` in the check definition. @@ -74,15 +76,17 @@ There are five different kinds of checks: valid through the end of the TTL from the time of the last check. * Docker + Interval - These checks depend on invoking an external application which -is packaged within a Docker Container. The application is triggered within the running -container via the Docker Exec API. We expect that the Consul agent user has access -to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to -determine the Docker API endpoint. The application is expected to run, perform a health -check of the service running inside the container, and exit with an appropriate exit code. -The check should be paired with an invocation interval. The shell on which the check -has to be performed is configurable which makes it possible to run containers which -have different shells on the same host. Check output for Docker is limited to -4K. Any output larger than this will be truncated. + is packaged within a Docker Container. The application is triggered within the running + container via the Docker Exec API. We expect that the Consul agent user has access + to either the Docker HTTP API or the unix socket. Consul uses ```$DOCKER_HOST``` to + determine the Docker API endpoint. The application is expected to run, perform a health + check of the service running inside the container, and exit with an appropriate exit code. + The check should be paired with an invocation interval. The shell on which the check + has to be performed is configurable which makes it possible to run containers which + have different shells on the same host. Check output for Docker is limited to + 4KB. Any output larger than this will be truncated. In Consul 0.9.0 and later, the agent + must be configured with [`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) + set to `true` in order to enable Docker health checks. ## Check Definition @@ -210,6 +214,10 @@ This is the only convention that Consul depends on. Any output of the script will be captured and stored in the `notes` field so that it can be viewed by human operators. +In Consul 0.9.0 and later, the agent must be configured with +[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` +in order to enable script checks. + ## Initial Health Check Status By default, when checks are registered against a Consul agent, the state is set diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 88e8df4b3..a43bd39a4 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -147,6 +147,10 @@ will exit with an error at startup. [Nomad](https://www.nomadproject.io/), so if you opt-in to host-based IDs then Consul and Nomad will use information on the host to automatically assign the same ID in both systems. +* `-disable-keyring-file` - If set, + the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given + `-encrypt` key will be available on startup. This defaults to false. + * `-dns-port` - the DNS port to listen on. This overrides the default port 8600. This is available in Consul 0.7 and later. @@ -154,6 +158,12 @@ will exit with an error at startup. in the "consul." domain. This flag can be used to change that domain. All queries in this domain are assumed to be handled by Consul and will not be recursively resolved. +* `enable-script-checks` This + controls whether [health checks that execute scripts](/docs/agent/checks.html) are enabled on + this agent, and defaults to `false` so operators must opt-in to allowing these. If enabled, + it is recommended to [enable ACLs](/docs/guides/acl.html) as well to control which users are + allowed to register new checks to execute scripts. This was added in Consul 0.9.0. + * `-encrypt` - Specifies the secret key to use for encryption of Consul network traffic. This key must be 16-bytes that are Base64-encoded. The @@ -167,10 +177,6 @@ will exit with an error at startup. initialized with an encryption key, then the provided key is ignored and a warning will be displayed. -* `-disable-keyring-file` - If set, - the keyring will not be persisted to a file. Any installed keys will be lost on shutdown, and only the given - `-encrypt` key will be available on startup. This defaults to false. - * `-http-port` - the HTTP API port to listen on. This overrides the default port 8500. This option is very useful when deploying Consul to an environment which communicates the HTTP port through the environment e.g. PaaS like CloudFoundry, allowing @@ -712,6 +718,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass * `enable_debug` When set, enables some additional debugging features. Currently, this is only used to set the runtime profiling HTTP endpoints. +* `enable_script_checks` Equivalent to the + [`-enable-script-checks` command-line flag](#_enable_script_checks). + * `enable_syslog` Equivalent to the [`-syslog` command-line flag](#_syslog). diff --git a/website/source/docs/guides/acl.html.md b/website/source/docs/guides/acl.html.md index a2794fcd3..2ea46a51e 100644 --- a/website/source/docs/guides/acl.html.md +++ b/website/source/docs/guides/acl.html.md @@ -684,6 +684,10 @@ to use for registration events: [checks](/docs/agent/checks.html). Tokens may also be passed to the [HTTP API](/api/index.html) for operations that require them. +In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with +[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable +script checks. + #### Operator Rules The `operator` policy controls access to cluster-level operations in the @@ -866,6 +870,10 @@ to use for registration events: [checks](/docs/agent/checks.html). Tokens may also be passed to the [HTTP API](/api/index.html) for operations that require them. +In addition to ACLs, in Consul 0.9.0 and later, the agent must be configured with +[`enable_script_checks`](/docs/agent/options.html#_enable_script_checks) set to `true` in order to enable +script checks. + #### Session Rules The `session` policy controls access to [Session API](/api/session.html) operations. diff --git a/website/source/intro/getting-started/join.html.md b/website/source/intro/getting-started/join.html.md index 9169e87a4..59123a429 100644 --- a/website/source/intro/getting-started/join.html.md +++ b/website/source/intro/getting-started/join.html.md @@ -72,6 +72,12 @@ the replicated log until the expected number of servers has successfully joined. You can read more about this in the [bootstrapping guide](/docs/guides/bootstrapping.html). +We've included the [`-enable_script_checks`](/docs/agent/options.html#_enable_script_checks) +flag set to `true` in order to enable health checks that can execute external scripts. +This will be used in examples later. For production use, you'd want to configure +[ACLs](/docs/guides/acl.html) in conjunction with this to control the ability to +register arbitrary scripts. + Finally, we add the [`config-dir` flag](/docs/agent/options.html#_config_dir), marking where service and check definitions can be found. @@ -81,7 +87,7 @@ All together, these settings yield a ```text vagrant@n1:~$ consul agent -server -bootstrap-expect=1 \ -data-dir=/tmp/consul -node=agent-one -bind=172.20.20.10 \ - -config-dir=/etc/consul.d + -enable-script-checks=true -config-dir=/etc/consul.d ... ``` @@ -102,7 +108,7 @@ All together, these settings yield a ```text vagrant@n2:~$ consul agent -data-dir=/tmp/consul -node=agent-two \ - -bind=172.20.20.11 -config-dir=/etc/consul.d + -bind=172.20.20.11 -enable-script-checks=true -config-dir=/etc/consul.d ... ```