From 9cc5540926aba3d1a48a3d6bada0930df9d27ad3 Mon Sep 17 00:00:00 2001 From: James Rasell Date: Wed, 1 Jul 2020 13:03:49 +0200 Subject: [PATCH] docs: migration of Nomad Autoscaler docs with cluster updates. Co-authored-by: Chris Baker <1675087+cgbaker@users.noreply.github.com> --- website/data/docs-navigation.js | 23 ++ website/pages/docs/autoscaling/agent.mdx | 240 ++++++++++++++++++ website/pages/docs/autoscaling/api.mdx | 29 +++ website/pages/docs/autoscaling/cli.mdx | 108 ++++++++ website/pages/docs/autoscaling/index.mdx | 43 ++++ .../docs/autoscaling/internals/checks.mdx | 27 ++ .../docs/autoscaling/internals/index.mdx | 13 + .../pages/docs/autoscaling/plugins/apm.mdx | 126 +++++++++ .../pages/docs/autoscaling/plugins/index.mdx | 65 +++++ .../docs/autoscaling/plugins/strategy.mdx | 46 ++++ .../pages/docs/autoscaling/plugins/target.mdx | 145 +++++++++++ website/pages/docs/autoscaling/policy.mdx | 96 +++++++ 12 files changed, 961 insertions(+) create mode 100644 website/pages/docs/autoscaling/agent.mdx create mode 100644 website/pages/docs/autoscaling/api.mdx create mode 100644 website/pages/docs/autoscaling/cli.mdx create mode 100644 website/pages/docs/autoscaling/index.mdx create mode 100644 website/pages/docs/autoscaling/internals/checks.mdx create mode 100644 website/pages/docs/autoscaling/internals/index.mdx create mode 100644 website/pages/docs/autoscaling/plugins/apm.mdx create mode 100644 website/pages/docs/autoscaling/plugins/index.mdx create mode 100644 website/pages/docs/autoscaling/plugins/strategy.mdx create mode 100644 website/pages/docs/autoscaling/plugins/target.mdx create mode 100644 website/pages/docs/autoscaling/policy.mdx diff --git a/website/data/docs-navigation.js b/website/data/docs-navigation.js index 88a0fdd6b..74bd07704 100644 --- a/website/data/docs-navigation.js +++ b/website/data/docs-navigation.js @@ -223,6 +223,29 @@ export default [ }, 'schedulers', { category: 'runtime', content: ['environment', 'interpolation'] }, + { + category: 'autoscaling', + content: [ + 'agent', + 'api', + 'cli', + 'policy', + { + category: 'plugins', + content: [ + 'apm', + 'strategy', + 'target' + ] + }, + { + category: 'internals', + content: [ + 'checks' + ] + } + ] + }, { category: 'telemetry', content: ['metrics'] }, { category: 'vault-integration' }, '------------', diff --git a/website/pages/docs/autoscaling/agent.mdx b/website/pages/docs/autoscaling/agent.mdx new file mode 100644 index 000000000..23615e922 --- /dev/null +++ b/website/pages/docs/autoscaling/agent.mdx @@ -0,0 +1,240 @@ +--- +layout: docs +page_title: Agent +sidebar_title: Agent +description: The Nomad Autoscaler is a long lived process which coordinates scaling activates. +--- + +# Nomad Autoscaler Agent + +The Nomad Autoscaler agent has a variety of parameters that can be specified +via configuration files or command-line flags. Configuration files are written +in [HCL][hcl_v2]. The Nomad Autoscaler can read and combine parameters from +multiple configuration files or directories to configure the agent. + +## Nomad Namespaces + +The Nomad Autoscaler currently has limited support for +[Nomad Namespaces][nomad_namespaces]. The `nomad` configuration below supports +specifying a namespace; if configured with a namespace, the Autoscaler will +retrieve scaling policies and perform autoscaling only for jobs in that +namespace. A future version will include support for multiple namespaces. + +## Nomad ACLs + +The Nomad Autoscaler can be configured to interact with an ACL-enabled Nomad +cluster. Nomad 0.11 includes the `scale` ACL policy disposition specifically for +supporting the operations of the Nomad Autoscaler. Therefore, the +following policy is sufficient for creating an ACL token that can be used by +the autoscaler for fetching scaling policies and scaling jobs: + +```hcl +namespace "default" { + policy = "scale" +} + +Other APM and target plugins may require additional ACLs; see the plugin documentation for more information. + +## Load Order and Merging + +The Nomad Autoscaler agent supports multiple configuration files, which can be +provided using the [-config][autoscaler_cli_config] CLI flag. The flag can +accept either a file or folder. In the case of a folder, any `.hcl` and `.json` +files in the folder will be loaded and merged in lexicographical order. Directories +are not loaded recursively. + +For example: + +```shell-session +$ nomad-autoscaler agent -config=autoscaler.conf -config=/etc/nomad-autoscaler -config=extra.json +``` + +This will load configuration from autoscaler.conf, from `.hcl` and `.json` files +under `/etc/nomad-autoscaler`, and finally from `extra.json`. As each file is +processed, its contents are merged into the existing configuration. When merging, +any non-empty values from the latest config file will append or replace +parameters in the current configuration. An empty value means `""` for strings, +`0` for integer or float values, and `false` for booleans. + +## General Parameters + +- `log_level` `(string: "INFO")` - Specify the verbosity level of Nomad + Autoscaler's logs. Valid values include DEBUG, INFO, and WARN, in decreasing + order of verbosity. + +- `log_json` `(bool: false)` - Output logs in a JSON format. + +- `plugin_dir` `(string: "./plugins")` - The plugin directory is used to + discover Nomad Autoscaler plugins. + +## `http` Block + +The `http` block configures the Nomad Autoscaler's HTTP endpoint. + +```hcl +http { + bind_address = "10.0.0.10" + bind_port = 9999 +} +``` + +### `http` Parameters + +- `bind_address` `(string "127.0.0.1")` - The HTTP address that the server will + bind to. + +- `bind_port` `(int 8080)` - The port that the server will bind to. + +## `nomad` Block + +The `nomad` block configures the Nomad Autoscaler's Nomad client. + +```hcl +nomad { + address = "http://my-nomad.systems:4646" + region = "esp-vlc-1" +} +``` + +### `nomad` Parameters + +- `address` `(string "http://127.0.0.1:4646")` - The address of the Nomad server + in the form of protocol://addr:port. + +- `region` `(string "global")` - The region of the Nomad servers to connect with. + +- `namespace` `(string "")` - The target namespace for queries and actions bound + to a namespace. + +- `token` `(string "")` - The SecretID of an ACL token to use to authenticate + API requests with. + +- `http_auth` `(string "")` - The authentication information to use when connecting + to a Nomad API which is using HTTP authentication. + +- `ca_cert` `(string "")` - Path to a PEM encoded CA cert file to use to verify + the Nomad server SSL certificate. + +- `ca_path` `(string "")` - Path to a directory of PEM encoded CA cert files to + verify the Nomad server SSL certificate. + +- `client_cert` `(string "")` - Path to a PEM encoded client certificate for TLS + authentication to the Nomad server. + +- `client_key` `(string "")` - Path to an unencrypted PEM encoded private key + matching the client certificate. + +- `tls_server_name` `(string "")` - The server name to use as the SNI host when + connecting via TLS. + +- `skip_verify` `(bool false)` - Do not verify TLS certificates. This is strongly + discouraged. + +## `policy` Block + +The `policy` block configures the Nomad Autoscaler's policy handling. + +```hcl +policy { + dir = "/opt/nomad-autoscaler/plugins" + default_cooldown = "2m" +} +``` + +### `policy` Parameters + +- `dir` `(string "./plugins")` - The path to a directory used to load scaling + policies. + +- `default_cooldown` `(string "5m")` - The default cooldown that will be applied + to all scaling policies which do not specify a cooldown period. + +- `default_evaluation_interval` `(string "10s")` - The default evaluation interval + that will be applied to all scaling policies which do not specify an evaluation + interval. + +## `apm` Block + +The `apm` block is used to configure application performance metric (APM) plugins. + +```hcl +apm "example-apm-plugin" { + driver = "example-apm-plugin" + args = ["-my-flag"] + + config = { + address = "http://127.0.0.1:9090" + } +} +``` + +### `apm` Parameters + +- `args` `(array: [])` - Specifies a set of arguments to pass to the + plugin binary when it is executed. + +- `driver` `(string: "")` - The plugin's executable name relative to to the + plugin_dir. If the plugin has a suffix, such as .exe, this should be omitted. + +- `config` `(map: nil)` - Specifies configuration values for + the plugin either as HCL or JSON. The accepted values are plugin specific. + Please refer to the individual plugin's documentation. + +## `target` Block + +The `target` block is used to configure scaling target plugins. + +```hcl +target "example-target-plugin" { + driver = "example-target-plugin" + args = ["-my-flag"] + + config = { + region = "esp-vlc-1" + } +} +``` + +### `target` Parameters + +- `args` `(array: [])` - Specifies a set of arguments to pass to the + plugin binary when it is executed. + +- `driver` `(string: "")` - The plugin's executable name relative to to the + plugin_dir. If the plugin has a suffix, such as .exe, this should be omitted. + +- `config` `(map: nil)` - Specifies configuration values for + the plugin either as HCL or JSON. The accepted values are plugin specific. + Please refer to the individual plugin's documentation. + +## `strategy` Block + +The `strategy` block is used to configure scaling strategy plugins. + +```hcl +strategy "example-strategy-plugin" { + driver = "example-strategy-plugin" + args = ["-my-flag"] + + config = { + algorithm = "complex" + } +} +``` + +### `strategy` Parameters + +- `args` `(array: [])` - Specifies a set of arguments to pass to the + plugin binary when it is executed. + +- `driver` `(string: "")` - The plugin's executable name relative to to the + plugin_dir. If the plugin has a suffix, such as .exe, this should be omitted. + +- `config` `(map: nil)` - Specifies configuration values for + the plugin either as HCL or JSON. The accepted values are plugin specific. + Please refer to the individual plugin's documentation. + +[hcl_v2]: https://github.com/hashicorp/hcl/tree/hcl2 +[nomad_namespaces]: https://learn.hashicorp.com/nomad/governance-and-policy/namespaces +[nomad_acls]: https://learn.hashicorp.com/nomad?track=acls#acls +[autoscaler_cli_config]: /docs/autoscaling/cli#config diff --git a/website/pages/docs/autoscaling/api.mdx b/website/pages/docs/autoscaling/api.mdx new file mode 100644 index 000000000..1b9591314 --- /dev/null +++ b/website/pages/docs/autoscaling/api.mdx @@ -0,0 +1,29 @@ +--- +layout: docs +page_title: HTTP API +sidebar_title: API +description: Learn about the Nomad Autoscaler HTTP API. +--- + +# Nomad Autoscaler HTTP API + +The Nomad Autoscaler exposes a small, simple API to be used for health checking +the agent. + +## Health API + +This endpoint can be used to query the Nomad Autoscaler agent aliveness. If the +agent is alive, the request will return a 200 OK, otherwise it will return a +503 ServiceUnavailable. + +| Method | Path | Produces | +| ------ | ------------ | ------------------ | +| `GET` | `/v1/health` | `application/json` | + +### Sample Request + +```shell-session +$ curl \ + --request PUT \ + https://localhost:8080/v1/health +``` diff --git a/website/pages/docs/autoscaling/cli.mdx b/website/pages/docs/autoscaling/cli.mdx new file mode 100644 index 000000000..e42ade3e1 --- /dev/null +++ b/website/pages/docs/autoscaling/cli.mdx @@ -0,0 +1,108 @@ +--- +layout: docs +page_title: CLI +sidebar_title: CLI +description: > + The Nomad Autoscaler can be controlled via a command-line interface. This + page documents all the commands the Nomad Autoscaler accepts. +--- + +# Nomad Autoscaler Command: agent + +The agent command is used to start the Nomad Autoscaler which runs until an +interrupt signal is received. The Nomad Autoscaler agent's configuration +primarily comes from the config files used, but a subset of the options may +also be passed directly as CLI arguments. See the +[Nomad Autoscaler Agent guide][nomad_autoscaler_agent_guide] for more information +on how to use this command and the options it has. + +## Command-line Options + +A subset of the available Nomad Autoscaler agent configuration can optionally be +passed in via CLI arguments. The `agent` command accepts the following arguments: + +- `-config=`: The path to either a single config file or a directory of + config files to use for configuring the Nomad Autoscaler agent. + +- `-log-level=`: Specify the verbosity level of Nomad Autoscaler's logs. + Valid values include DEBUG, INFO, and WARN, in decreasing order of verbosity. + The default is `INFO`. + +- `-log-json`: Output logs in a JSON format. The default is false. + +- `-plugin-dir=`: The plugin directory is used to discover Nomad Autoscaler + plugins. If not specified, the plugin directory defaults to be that of + `/plugins/`. + +- `-http-bind-address=`: The HTTP address that the health server will bind + to. The default is `127.0.0.1`. + +- `-http-bind-port=`: The port that the health server will bind to. The + default is `8080`. + +- `-nomad-address=`: The address of the Nomad server in the form of + protocol://addr:port. The default is `http://127.0.0.1:4646`. + +- `-nomad-region=`: The region of the Nomad servers to connect with. + +- `-nomad-namespace=`: The target namespace for queries and actions + bound to a namespace. + +- `-nomad-token=`: The SecretID of an ACL token to use to authenticate + API requests with. + +- `-nomad-http-auth=`: The authentication information to use + when connecting to a Nomad API which is using HTTP authentication. + +- `-nomad-ca-cert=`: Path to a PEM encoded CA cert file to use to verify + the Nomad server SSL certificate. + +- `-nomad-ca-path=`: Path to a directory of PEM encoded CA cert files to + verify the Nomad server SSL certificate. If both `-nomad-ca-cert` and + `-nomad-ca-path` are specified, `-nomad-ca-cert` is used. + +- `-nomad-client-cert=`: Path to a PEM encoded client certificate for TLS + authentication to the Nomad server. Must also specify `-nomad-client-key`. + +- `-nomad-client-key=`: Path to an unencrypted PEM encoded private key + matching the client certificate from `-nomad-client-cert`. + +- `-nomad-tls-server-name=`: The server name to use as the SNI host when + connecting via TLS. + +- `-nomad-skip-verify`: Do not verify TLS certificates. This is strongly discouraged. + +- `-policy-dir=`: The path to a directory used to load scaling policies. + +- `-policy-default-cooldown=`: The default cooldown that will be applied to + all scaling policies which do not specify a cooldown period. The default is `5m`. + +- `-policy-default-evaluation-interval=`: The default evaluation interval + that will be applied to all scaling policies which do not specify an evaluation + interval. The default is `10s`. + +# Nomad Autoscaler Command: version + +The `version` command displays build information about the running binary, +including the release version and the exact revision. + +## Usage + +```plaintext +nomad-autoscaler version +``` + +## Output + +This command prints both the version number as well as the exact commit SHA used +during the build. The SHA may also have the string `+CHANGES` appended to the +end, indicating that local, uncommitted changes were detected at build time. + +## Examples + +```shell-session +$ nomad-autoscaler version +Nomad Autoscaler v0.0.3-dev (da91fa9) +``` + +[nomad_autoscaler_agent_guide]: /docs/autoscaling/agent diff --git a/website/pages/docs/autoscaling/index.mdx b/website/pages/docs/autoscaling/index.mdx new file mode 100644 index 000000000..dffcbf81e --- /dev/null +++ b/website/pages/docs/autoscaling/index.mdx @@ -0,0 +1,43 @@ +--- +layout: docs +page_title: Autoscaling +sidebar_title: Autoscaling +description: |- + Overview of the Nomad Autoscaler that provides horizontal application and + cluster scaling. +--- + +# Nomad Autoscaler Overview + +This section details the Nomad Autoscaler, a horizontal application and cluster +autoscaler for Nomad. The Nomad Autoscaler is built and released separately to +Nomad. The source code can be viewed on [GitHub][autoscaler_github] and releases +are available on the [HashiCorp releases page][autoscaler_releases] or via +[Docker Hub][autoscaler_dockerhub]. + +The Nomad Autoscaler repository includes a number of [demos][autoscaler_demo] +which provide guided learning on running the autoscaler. + +## Horizontal Application Autoscaling + +Horizontal application autoscaling is the process of automatically controlling the number of instances of an application +to have sufficient work throughput to meet service-level agreements (SLA). In +Nomad, horizontal application autoscaling can be achieved by modifying the number +of allocations in a task group based on the value of a relevant metric, such as +CPU and memory utilization or number of open connections. This is enabled by configuring +[autoscaling policies][autoscaling_policy] on individual Nomad jobs using the [scaling block][scaling_block]. +## Horizontal Cluster Autoscaling + +Horizontal cluster autoscaling is the process of adding or removing Nomad clients from a cluster to ensure there +is an appropriate amount of cluster resource for the scheduled applications. +This is achieved by interacting with remote providers to start or terminate new +Nomad clients based on metrics such as the remaining free schedulable CPU or memory. +Cluster scaling is enabled by configuring the [autoscaler agent][/docs/autoscaling/agent#dir] +with policies targeting the Nomad cluster. + +[scaling_block]: /docs/job-specification/scaling +[autoscaling_policy]: /docs/autoscaling/policy +[autoscaler_github]: https://github.com/hashicorp/nomad-autoscaler +[autoscaler_releases]: https://releases.hashicorp.com/nomad-autoscaler/ +[autoscaler_dockerhub]: https://hub.docker.com/repository/docker/hashicorp/nomad-autoscaler +[autoscaler_demo]: https://github.com/hashicorp/nomad-autoscaler/tree/master/demo diff --git a/website/pages/docs/autoscaling/internals/checks.mdx b/website/pages/docs/autoscaling/internals/checks.mdx new file mode 100644 index 000000000..b317d075a --- /dev/null +++ b/website/pages/docs/autoscaling/internals/checks.mdx @@ -0,0 +1,27 @@ +--- +layout: docs +page_title: Checks +sidebar_title: Checks +description: Learn about how the Autoscaler deals with policy checks. +--- + +# Nomad Autoscaler Check Calculations + +A scaling policy can include several checks all of which produce a scaling +suggesting. The checks are executed at the same time during a policy evaluation +and the results can conflict with each other. In a scenario like this, the +autoscaler iterates the results the chooses the safest result which results in +retaining the most capacity of the resource. + +In a scenario where two checks return different desired directions, the following +logic is applied. + +- `ScaleOut and ScaleIn => ScaleOut` +- `ScaleOut and ScaleNone => ScaleOut` +- `ScaleIn and ScaleNone => ScaleNone` + +In situations where the two same actions are suggested, but with different counts the +following logic is applied, where the count is the absolute desired value. + +- `ScaleOut(10) and ScaleOut(9) => ScaleOut(10)` +- `ScaleIn(3) and ScaleIn(4) => ScaleIn(4)` diff --git a/website/pages/docs/autoscaling/internals/index.mdx b/website/pages/docs/autoscaling/internals/index.mdx new file mode 100644 index 000000000..4def6a4be --- /dev/null +++ b/website/pages/docs/autoscaling/internals/index.mdx @@ -0,0 +1,13 @@ +--- +layout: docs +page_title: Internals +sidebar_title: Internals +description: > + This section covers the internals of the Nomad Autoscaler and explains + technical details of its operation. +--- + +# Nomad Autoscaler Internals + +This section covers the internals of the Nomad Autoscaler and explains the +technical details of how it functions, its architecture, and sub-systems. diff --git a/website/pages/docs/autoscaling/plugins/apm.mdx b/website/pages/docs/autoscaling/plugins/apm.mdx new file mode 100644 index 000000000..9a326a25f --- /dev/null +++ b/website/pages/docs/autoscaling/plugins/apm.mdx @@ -0,0 +1,126 @@ +--- +layout: docs +page_title: APM +sidebar_title: APM +description: APM plugins provide metric data points describing the resources current state. +--- + +# APM Plugins + +APMs are used to store metrics about an applications performance and current +state. The APM (Application Performance Management) plugin is responsible for +querying the APM and returning a value which will be used to determine if +scaling should occur. + +## Prometheus APM Plugin + +Use [Prometheus][prometheus_io] metrics to scale your Nomad job task groups or +cluster. The query performed on Prometheus should return a single value. You can +use the [scalar][prometheus_scaler_function] function in your query to achieve +this. + +### Agent Configuration Options + +```hcl +apm "prometheus" { + driver = "prometheus" + + config = { + address = "http://prometheus.my.endpoint.io:9090" + } +} +``` + +- `address` `(string: "http://127.0.0.1:9090")` - The address of the Prometheus + endpoint used to perform queries. + +### Policy Configuration Options + +```hcl +check { + source = "prometheus" + query = "scalar(avg((haproxy_server_current_sessions{backend=\"http_back\"}) and (haproxy_server_up{backend=\"http_back\"} == 1)))" + ... +} +``` + +## Nomad APM Plugin + +The Nomad APM plugin allows querying the Nomad API for metric data. This provides +an immediate starting point without addition applications but comes at the price +of efficiency. When using this APM, it is advised to monitor Nomad carefully +ensuring it is not put under excessive load pressure. + +### Agent Configuration Options + +```hcl +target "nomad-apm" { + driver = "nomad-apm" +} +``` + +### Policy Configuration Options - Task Groups + +The Nomad APM allows querying Nomad to understand the current resource usage of +a task group. + +```hcl +check { + source = "nomad-apm" + query = "avg_cpu" + ... +} +``` + +Querying Nomad task group metrics is be done using the `operation_metric` syntax, +where valid operations are: + +- `avg` - returns the average of the metric value across allocations in the task + group. + +- `min` - returns the lowest metric value among the allocations in the task group. + +- `max` - returns the highest metric value among the allocations in the task + group. + +- `sum` - returns the sum of all the metric values for the allocations in the + task group. + +The metric value can be: + +- `cpu` - CPU usage as reported by the `nomad.client.allocs.cpu.total_percent` + metric. + +- `memory` - Memory usage as reported by the `nomad.client.allocs.memory.usage` + metric. + +### Policy Configuration Options - Client Nodes + +The Nomad APM allows querying Nomad to understand the current allocated resource +as a percentage of the total available. + +```hcl +check { + source = "nomad-apm" + query = "percentage-allocated_cpu" + ... +} +``` + +Querying Nomad client node metrics is be done using the `operation_metric` syntax, +where valid operations are: + +- `percentage-allocated` - returns the allocated percentage of the desired + resource. + +The metric value can be: + +- `cpu` - allocated CPU as reported by calculating total allocatable against the + total allocated by the scheduler. + +- `memory` - allocated memory as reported by calculating total allocatable against + the total allocated by the scheduler. + +[prometheus_io]: https://prometheus.io/ +[prometheus_scaler_function]: https://prometheus.io/docs/prometheus/latest/querying/functions/#scalar +[nomad_telemetry_stanza]: /docs/configuration/telemetry#inlinecode-publish_allocation_metrics diff --git a/website/pages/docs/autoscaling/plugins/index.mdx b/website/pages/docs/autoscaling/plugins/index.mdx new file mode 100644 index 000000000..136eac7ec --- /dev/null +++ b/website/pages/docs/autoscaling/plugins/index.mdx @@ -0,0 +1,65 @@ +--- +layout: docs +page_title: Plugins +sidebar_title: Plugins +description: Plugins are used to architect the Nomad Autoscaler into distinct areas. +--- + +# Nomad Autoscaler Plugins + +Plugins are an essential part of the Nomad Autoscaler architecture. The Autoscaler +uses the [go-plugin][go_plugin_github] library to implement an ecosystem of +different types of plugins. Each plugin type is responsible for a specific task; +APM plugins retrieve metrics about the workloads being monitored and Strategy +plugins decide which actions Nomad should execute to keep the policy valid. The +flexibility of plugins allows the Nomad Autoscaler to be extended to meet specific +business requirements or technology use cases. + +The Nomad Autoscaler currently ships with a number of built-in plugins to ease +the learning curve. Details of these can be found below, under the specific +plugin type sections. + +# General Options + +All plugins which require Nomad API connectivity support the parameters detailed +below. These plugins include Nomad APM, Nomad Target and all cluster scaling +targets. + +- `nomad_config_inherit` `(bool: true)` - A boolean flag which indicates whether + the plugin should inherit the agents Nomad configuration parameters. Plugins + can override individual parameters and have their Nomad configuration merged + with that of the agent. + +- `nomad_address` `(string: "")` - The address of the Nomad server in the form + of protocol://addr:port. + +- `nomad_region` `(string: "")` - The region of the Nomad servers to connect with. + +- `nomad_namespace` `(string: "")` - The target namespace for queries and actions + bound to a namespace. + +- `nomad_token` `(string: "")` - The SecretID of an ACL token to use to authenticate + API requests with. + +- `nomad_http-auth` `(string: "")` - The authentication information to use when + connecting to a Nomad API which is using HTTP authentication. + +- `nomad_ca-cert` `(string: "")` - Path to a PEM encoded CA cert file to use to + verify the Nomad server SSL certificate. + +- `nomad_ca-path` `(string: "")` - Path to a directory of PEM encoded CA cert + files to verify the Nomad server SSL certificate. + +- `nomad_client-cert` `(string: "")` - Path to a PEM encoded client certificate + for TLS authentication to the Nomad server. + +- `nomad-client-key` `(string: "")` - Path to an unencrypted PEM encoded private + key matching the client certificate. + +- `nomad_tls-server-name` `(string: "")` - The server name to use as the SNI + host when connecting via TLS. + +- `nomad_skip-verify` `(string: "")` - Do not verify TLS certificates. This is + strongly discouraged. + +[go_plugin_github]: https://github.com/hashicorp/go-plugin diff --git a/website/pages/docs/autoscaling/plugins/strategy.mdx b/website/pages/docs/autoscaling/plugins/strategy.mdx new file mode 100644 index 000000000..6898eee91 --- /dev/null +++ b/website/pages/docs/autoscaling/plugins/strategy.mdx @@ -0,0 +1,46 @@ +--- +layout: docs +page_title: Strategy +sidebar_title: Strategy +description: Strategy plugins compare the current state of the system against the desired state. +--- + +# Strategy Plugins + +Strategy plugins compare the current state of the system against the desired state +defined by the operator in the scaling policy and generate an action that will +bring the system closer to the desired state. In practical terms, strategies +receive the current count and a metric value for a task group and output what +the new task group count should be. + +## Target Value Strategy Plugin + +The target value strategy plugin will perform count calculations in order to keep +the value resulting from the APM query at or around a specified target. + +### Agent Configuration Options + +```hcl +strategy "target-value" { + driver = "target-value" +} +``` + +### Policy Configuration Options + +```hcl +check { + ... + strategy "target-value" { + target = 20 + threshold = 0.0001 + } + ... +``` + +- `target` `(float: )` - Specifies the metric value the Autscaler + should try to meet. + +- `threshold` `(float: 0.01)` - Specifies how significant a change in the input + metric should be considered. Small threshold values can lead to output + fluctuation. diff --git a/website/pages/docs/autoscaling/plugins/target.mdx b/website/pages/docs/autoscaling/plugins/target.mdx new file mode 100644 index 000000000..a13d6c3a5 --- /dev/null +++ b/website/pages/docs/autoscaling/plugins/target.mdx @@ -0,0 +1,145 @@ +--- +layout: docs +page_title: Target +sidebar_title: Target +description: Target plugins determine where the resource to be autoscaled is located. +--- + +# Target Plugins + +Target Plugins determine where the resource to be autoscaled is located. All +target plugins support the `dry-run` policy config parameter which allows a policy +to be evaluated, but will noop any suggested changes. + +## Nomad Task Group Target + +The Nomad task group target indicates the scalable resource is a Nomad job +running on a Nomad cluster. + +### Agent Configuration Options + +The Nomad target is automatically launched by the Nomad Autoscaler and so the +following setup is optional. + +```hcl +target "nomad" { + driver = "nomad" +} +``` + +### Policy Configuration Options + +If using the [Nomad job specification scaling stanza][nomad_scaling_stanza] to +configure the scaling policy, the following section can be omitted as Nomad will +populate them on job submission. + +```hcl +check { + ... + target "nomad" { + Job = "example" + Group = "cache" + } + ... +``` + +- `job` `(string: "") ` - The job identifier which contains the task group to + scale as defined within the job specification [job stanza][nomad_job_stanza]. + +- `group` `(string: "")` - The name of the task group to scale as defined in the + job specification [group stanza][nomad_group_stanza]. + +## AWS AutoScaling Group Target + +The AWS ASG target plugin allows for the scaling of the Nomad cluster clients +via manipulating [AWS AutoScaling Groups][aws_autoscaling]. + +### Agent Configuration Options + +To use the AWS ASG target plugin, the agent configuration needs to be populated +with the appropriate target block. Authentication to the AWS API can be supplied +in a number of ways including EC2 instance roles. It is recommended, if possible +to use the [Vault AWS Secrets engine][vault_aws_backend] for supplying access +credentials to the plugin. Credentials should be injected into the configuration +via a template rather than as environment variables. This ensures the credentials +are passed only to the plugin, rather than being available for all plugins and +the agent process. + +The IAM policy required for the AWS ASG plugin to function properly is detailed +below. + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Action": [ + "ec2:TerminateInstances", + "ec2:DescribeInstanceStatus", + "autoscaling:UpdateAutoScalingGroup", + "autoscaling:DetachInstances", + "autoscaling:DescribeScalingActivities", + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:CreateOrUpdateTags" + ], + "Resource": "*" + } + ] +} +``` + +```hcl +target "aws-asg" { + driver = "aws-asg" + config = { + aws_region = "eu-west-3" + aws_access_key_id = "AKIAIOSFODNN7EXAMPLE" + aws_secret_key_id = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + } +} +``` + +- `aws_region` `(string: "us-east-1")` - The [AWS region][aws_region] identifier + to connect to and where resources should be managed. + +- `aws_access_key_id` `(string: "")` - The AWS access key ID used to authenticate + with the AWS API. + +- `aws_secret_key_id` `(string: "")` - The AWS secret key ID used to authenticate + with the AWS API. + +- `aws_session_token` `(string: "")` - The AWS session token used to authenticate + with the AWS API. + +### Policy Configuration Options + +```hcl +check { + ... + target "aws-asg" { + asg_name = "hashistack-client-asg" + class = "hashistack" + drain_deadline = "5m" + } + ... +``` + +- `asg_name` `(string: )` - The name of the AWS AutoScaling Group to + interact with when performing scaling actions. + +- `class` `(string: )` - The Nomad [client node class][nomad_node_class] + identifier used to group nodes into a pool of resource. + +- `drain_deadline` `(duration: "15m")` The Nomad [drain deadline][nomad_node_drain_deadline] + to use when performing node draining actions. + +[nomad_node_class]: https://www.nomadproject.io/docs/configuration/client#node_class +[nomad_node_drain_deadline]: https://www.nomadproject.io/api-docs/nodes#deadline +[nomad_scaling_stanza]: /docs/job-specification/scaling +[nomad_group_stanza]: docs/job-specification/group#group-stanza +[nomad_job_stanza]: /docs/job-specification/job#job-stanza +[aws_region]: https://aws.amazon.com/about-aws/global-infrastructure/regions_az/ +[aws_autoscaling]: https://aws.amazon.com/autoscaling/ +[vault_aws_backend]: https://www.vaultproject.io/docs/secrets/aws diff --git a/website/pages/docs/autoscaling/policy.mdx b/website/pages/docs/autoscaling/policy.mdx new file mode 100644 index 000000000..21f852b10 --- /dev/null +++ b/website/pages/docs/autoscaling/policy.mdx @@ -0,0 +1,96 @@ +--- +layout: docs +page_title: Scaling Policies +sidebar_title: Policy +description: > + Scaling policies describe the target resource desired state and how to + perform calculations to ensure the current state reaches the desired. +--- + +# Nomad Autoscaler Scaling Policies + +Nomad Autoscaler scaling policies can be configured via the +[task group scaling stanza][jobspec_scaling_stanza] or by configuration +files stored on disk. + +## Top Level Options + +- `enabled` - A boolean flag that allows operators to administratively disable a + policy from active evaluation. + +- `min` - The minimum running count of the targeted resource. This can be 0 or any + positive integer. + +- `max` - The maximum running count of the targeted resource. This can be 0 or any + positive integer. + +## `policy` Options + +- `cooldown` - A time interval after a scaling action during which no additional + scaling will be performed on the resource. It should be provided as a duration + (e.g.: "5s", "1m"). If omitted the configuration value + [policy_default_cooldown][policy_default_cooldown_agent] from the agent will + be used. + +- `evaluation_interval` - Defines how often the policy is evaluated by the + Autoscaler. It should be provided as a duration (e.g.: "5s", "1m"). If + omitted the configuration value [default_evaluation_interval][eval_interval_agent] + from the agent will be used. + +- `target` - Defines where the autoscaling target is running. Detailed information + on the configuration options can be found on the [target plugin][target_plugin_docs] + page. + +- `check` - Specifies one or more checks to be executed when determining if a + scaling action is required. + +## `check` Options + +- `source` - The APM plugin that should handle the metric query. If omitted, + this defaults to using the Nomad APM. + +- `query` - The query to run against the specified APM. Currently this query + should return a single value. Detailed information on the configuration options + can be found on the [apm plugin][apm_plugin_docs] page. + +- `strategy` - The strategy to use, and it's configuration when calculating the + desired state based on the current count and the metric returned by the APM. + Detailed information on the configuration options can be found on the + [strategy plugin][strategy_plugin_docs] page. + +### Example + +A full example of a policy document that can be written into the Nomad task group +scaling stanza or via a file within the policy dir can be seen below. + +```hcl +min = 2 +max = 10 +enabled = true + +policy { + evaluation_interval = "5s" + cooldown = "1m" + + target "target" { + Job = "example" + Group = "example" + } + + check "active_connections" { + source = "prometheus" + query = "scalar(open_connections_example_cache)" + + strategy "target_value" { + target = 10 + } + } +} +``` + +[policy_default_cooldown_agent]: /docs/autoscaling/agent#default_cooldown +[eval_interval_agent]: /docs/autoscaling/agent#default_evaluation_interval +[target_plugin_docs]: /docs/autoscaling/plugins/target +[strategy_plugin_docs]: /docs/autoscaling/plugins/strategy +[apm_plugin_docs]: /docs/autoscaling/plugins/apm +[jobspec_scaling_stanza]: /docs/job-specification/scaling