diff --git a/website/content/docs/discovery/checks.mdx b/website/content/docs/discovery/checks.mdx index d9a6986d3..02ef1c284 100644 --- a/website/content/docs/discovery/checks.mdx +++ b/website/content/docs/discovery/checks.mdx @@ -142,6 +142,19 @@ There are several different kinds of checks: A script check: + + +```hcl +check = { + id = "mem-util" + name = "Memory utilization" + args = ["/usr/local/bin/check_mem.py", "-limit", "256MB"] + interval = "10s" + timeout = "1s" +} + +``` + ```json { "check": { @@ -154,8 +167,29 @@ A script check: } ``` + + A HTTP check: + + +```hcl +check = { + id = "api" + name = "HTTP API on port 5000" + http = "https://localhost:5000/health" + tls_server_name = "" + tls_skip_verify = false + method = "POST" + header = { + Content-Type = ["application/json"] + } + body = "{\"method\":\"health\"}" + interval = "10s" + timeout = "1s" +} +``` + ```json { "check": { @@ -173,8 +207,23 @@ A HTTP check: } ``` + + A TCP check: + + +```hcl +check = { + id = "ssh" + name = "SSH TCP on port 22" + tcp = "localhost:22" + interval = "10s" + timeout = "1s" +} + +``` + ```json { "check": { @@ -187,8 +236,21 @@ A TCP check: } ``` + + A TTL check: + + +```hcl +check = { + id = "web-app" + name = "Web App Status" + notes = "Web app does a curl internally every 10 seconds" + ttl = "30s" +} +``` + ```json { "check": { @@ -200,8 +262,23 @@ A TTL check: } ``` + + A Docker check: + + +```hcl +check = { + id = "mem-util" + name = "Memory utilization" + docker_container_id = "f972c95ebf0e" + shell = "/bin/bash" + args = ["/usr/local/bin/check_mem.py"] + interval = "10s" +} +``` + ```json { "check": { @@ -215,8 +292,22 @@ A Docker check: } ``` + + A gRPC check for the whole application: + + +```hcl +check = { + id = "mem-util" + name = "Service health status" + grpc = "127.0.0.1:12345" + grpc_use_tls = true + interval = "10s" +} +``` + ```json { "check": { @@ -229,8 +320,22 @@ A gRPC check for the whole application: } ``` + + A gRPC check for the specific `my_service` service: + + +```hcl +check = { + id = "mem-util" + name = "Service health status" + grpc = "127.0.0.1:12345/my_service" + grpc_use_tls = true + interval = "10s" +} +``` + ```json { "check": { @@ -243,8 +348,22 @@ A gRPC check for the specific `my_service` service: } ``` + + A h2ping check: + + +```hcl +check = { + id = "h2ping-check" + name = "h2ping" + h2ping = "localhost:22222" + interval = "10s" + h2ping_use_tls = false +} +``` + ```json { "check": { @@ -257,8 +376,19 @@ A h2ping check: } ``` + + An alias check for a local service: + + +```hcl +check = { + id = "web-alias" + alias_service = "web" +} +``` + ```json { "check": { @@ -268,6 +398,8 @@ An alias check for a local service: } ``` + + ~> Configuration info: The alias check configuration expects the alias to be registered on the same agent as the one you are aliasing. If the service is not registered with the same agent, `"alias_node": ""` must also be @@ -342,6 +474,17 @@ to be healthy. In certain cases, it may be desirable to specify the initial state of a health check. This can be done by specifying the `status` field in a health check definition, like so: + + +```hcl +check = { + "id": "mem", + "args": ["/bin/check_mem", "-limit", "256MB"] + "interval": "10s" + "status": "passing" +} +``` + ```json { "check": { @@ -353,6 +496,8 @@ health check definition, like so: } ``` + + The above service definition would cause the new "mem" check to be registered with its initial state set to "passing". @@ -363,6 +508,17 @@ that the status of the health check will only affect the health status of the given service instead of the entire node. Service-bound health checks may be provided by adding a `service_id` field to a check configuration: + + +```hcl +check = { + id = "web-app" + name = "Web App Status" + service_id = "web-app" + ttl = "30s" +} +``` + ```json { "check": { @@ -374,6 +530,8 @@ provided by adding a `service_id` field to a check configuration: } ``` + + In the above configuration, if the web-app health check begins failing, it will only affect the availability of the web-app service. All other services provided by the node will remain unchanged. @@ -389,6 +547,32 @@ to use the agent's credentials when configured for TLS. Multiple check definitions can be defined using the `checks` (plural) key in your configuration file. + + +```hcl +checks = [ + { + id = "chk1" + name = "mem" + args = ["/bin/check_mem", "-limit", "256MB"] + interval = "5s" + }, + { + id = "chk2" + name = "/health" + http = "http://localhost:5000/health" + interval = "15s" + }, + { + id = "chk3" + name = "cpu" + args = ["/bin/check_cpu"] + interval = "10s" + }, + ... +] +``` + ```json { "checks": [ @@ -415,6 +599,8 @@ key in your configuration file. } ``` + + ## Success/Failures before passing/warning/critical To prevent flapping health checks, and limit the load they cause on the cluster, @@ -436,6 +622,22 @@ This feature is available for HTTP, TCP, gRPC, Docker & Monitor checks. By default, both passing and critical thresholds will be set to 0 so the check status will always reflect the last check result. + + +```hcl +checks = [ + { + name = "HTTP TCP on port 80" + tcp = "localhost:80" + interval = "10s" + timeout = "1s" + success_before_passing = 3 + failures_before_warning = 1 + failures_before_critical = 3 + } +] +``` + ```json { "checks": [ @@ -451,3 +653,5 @@ status will always reflect the last check result. ] } ``` + +