open-vault/enos/enos-scenario-agent.hcl

458 lines
14 KiB
HCL
Raw Normal View History

# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: MPL-2.0
scenario "agent" {
matrix {
arch = ["amd64", "arm64"]
artifact_source = ["local", "crt", "artifactory"]
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
artifact_type = ["bundle", "package"]
backend = ["consul", "raft"]
consul_version = ["1.12.9", "1.13.9", "1.14.9", "1.15.5", "1.16.1"]
distro = ["ubuntu", "rhel"]
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
edition = ["ce", "ent", "ent.fips1402", "ent.hsm", "ent.hsm.fips1402"]
seal = ["awskms", "shamir"]
seal_ha_beta = ["true", "false"]
[QT-506] Use enos scenario samples for testing (#22641) (#22933) Replace our prior implementation of Enos test groups with the new Enos sampling feature. With this feature we're able to describe which scenarios and variant combinations are valid for a given artifact and allow enos to create a valid sample field (a matrix of all compatible scenarios) and take an observation (select some to run) for us. This ensures that every valid scenario and variant combination will now be a candidate for testing in the pipeline. See QT-504[0] for further details on the Enos sampling capabilities. Our prior implementation only tested the amd64 and arm64 zip artifacts, as well as the Docker container. We now include the following new artifacts in the test matrix: * CE Amd64 Debian package * CE Amd64 RPM package * CE Arm64 Debian package * CE Arm64 RPM package Each artifact includes a sample definition for both pre-merge/post-merge (build) and release testing. Changes: * Remove the hand crafted `enos-run-matrices` ci matrix targets and replace them with per-artifact samples. * Use enos sampling to generate different sample groups on all pull requests. * Update the enos scenario matrices to handle HSM and FIPS packages. * Simplify enos scenarios by using shared globals instead of cargo-culted locals. Note: This will require coordination with vault-enterprise to ensure a smooth migration to the new system. Integrating new scenarios or modifying existing scenarios/variants should be much smoother after this initial migration. [0] https://github.com/hashicorp/enos/pull/102 Signed-off-by: Ryan Cragun <me@ryan.ec> Co-authored-by: Ryan Cragun <me@ryan.ec>
2023-09-08 19:31:09 +00:00
# Our local builder always creates bundles
exclude {
artifact_source = ["local"]
artifact_type = ["package"]
}
# HSM and FIPS 140-2 are only supported on amd64
exclude {
arch = ["arm64"]
edition = ["ent.fips1402", "ent.hsm", "ent.hsm.fips1402"]
}
}
terraform_cli = terraform_cli.default
terraform = terraform.default
providers = [
provider.aws.default,
provider.enos.ubuntu,
provider.enos.rhel
]
locals {
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
artifact_path = matrix.artifact_source != "artifactory" ? abspath(var.vault_artifact_path) : null
enos_provider = {
rhel = provider.enos.rhel
ubuntu = provider.enos.ubuntu
}
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
manage_service = matrix.artifact_type == "bundle"
vault_install_dir = matrix.artifact_type == "bundle" ? var.vault_install_dir : global.vault_install_dir_packages[matrix.distro]
}
step "get_local_metadata" {
skip_step = matrix.artifact_source != "local"
module = module.get_local_metadata
}
step "build_vault" {
module = "build_${matrix.artifact_source}"
variables {
[QT-506] Use enos scenario samples for testing (#22641) (#22933) Replace our prior implementation of Enos test groups with the new Enos sampling feature. With this feature we're able to describe which scenarios and variant combinations are valid for a given artifact and allow enos to create a valid sample field (a matrix of all compatible scenarios) and take an observation (select some to run) for us. This ensures that every valid scenario and variant combination will now be a candidate for testing in the pipeline. See QT-504[0] for further details on the Enos sampling capabilities. Our prior implementation only tested the amd64 and arm64 zip artifacts, as well as the Docker container. We now include the following new artifacts in the test matrix: * CE Amd64 Debian package * CE Amd64 RPM package * CE Arm64 Debian package * CE Arm64 RPM package Each artifact includes a sample definition for both pre-merge/post-merge (build) and release testing. Changes: * Remove the hand crafted `enos-run-matrices` ci matrix targets and replace them with per-artifact samples. * Use enos sampling to generate different sample groups on all pull requests. * Update the enos scenario matrices to handle HSM and FIPS packages. * Simplify enos scenarios by using shared globals instead of cargo-culted locals. Note: This will require coordination with vault-enterprise to ensure a smooth migration to the new system. Integrating new scenarios or modifying existing scenarios/variants should be much smoother after this initial migration. [0] https://github.com/hashicorp/enos/pull/102 Signed-off-by: Ryan Cragun <me@ryan.ec> Co-authored-by: Ryan Cragun <me@ryan.ec>
2023-09-08 19:31:09 +00:00
build_tags = var.vault_local_build_tags != null ? var.vault_local_build_tags : global.build_tags[matrix.edition]
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
artifact_path = local.artifact_path
goarch = matrix.arch
goos = "linux"
artifactory_host = matrix.artifact_source == "artifactory" ? var.artifactory_host : null
artifactory_repo = matrix.artifact_source == "artifactory" ? var.artifactory_repo : null
artifactory_username = matrix.artifact_source == "artifactory" ? var.artifactory_username : null
artifactory_token = matrix.artifact_source == "artifactory" ? var.artifactory_token : null
arch = matrix.artifact_source == "artifactory" ? matrix.arch : null
product_version = var.vault_product_version
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
artifact_type = matrix.artifact_type
distro = matrix.artifact_source == "artifactory" ? matrix.distro : null
edition = matrix.artifact_source == "artifactory" ? matrix.edition : null
revision = var.vault_revision
}
}
step "ec2_info" {
module = module.ec2_info
}
step "create_vpc" {
module = module.create_vpc
variables {
[QT-506] Use enos scenario samples for testing (#22641) (#22933) Replace our prior implementation of Enos test groups with the new Enos sampling feature. With this feature we're able to describe which scenarios and variant combinations are valid for a given artifact and allow enos to create a valid sample field (a matrix of all compatible scenarios) and take an observation (select some to run) for us. This ensures that every valid scenario and variant combination will now be a candidate for testing in the pipeline. See QT-504[0] for further details on the Enos sampling capabilities. Our prior implementation only tested the amd64 and arm64 zip artifacts, as well as the Docker container. We now include the following new artifacts in the test matrix: * CE Amd64 Debian package * CE Amd64 RPM package * CE Arm64 Debian package * CE Arm64 RPM package Each artifact includes a sample definition for both pre-merge/post-merge (build) and release testing. Changes: * Remove the hand crafted `enos-run-matrices` ci matrix targets and replace them with per-artifact samples. * Use enos sampling to generate different sample groups on all pull requests. * Update the enos scenario matrices to handle HSM and FIPS packages. * Simplify enos scenarios by using shared globals instead of cargo-culted locals. Note: This will require coordination with vault-enterprise to ensure a smooth migration to the new system. Integrating new scenarios or modifying existing scenarios/variants should be much smoother after this initial migration. [0] https://github.com/hashicorp/enos/pull/102 Signed-off-by: Ryan Cragun <me@ryan.ec> Co-authored-by: Ryan Cragun <me@ryan.ec>
2023-09-08 19:31:09 +00:00
common_tags = global.tags
}
}
step "create_seal_key" {
module = "seal_key_${matrix.seal}"
variables {
cluster_id = step.create_vpc.cluster_id
common_tags = global.tags
}
}
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
// This step reads the contents of the backend license if we're using a Consul backend and
// the edition is "ent".
step "read_backend_license" {
skip_step = matrix.backend == "raft" || var.backend_edition == "ce"
module = module.read_license
variables {
file_name = global.backend_license_path
}
}
step "read_vault_license" {
skip_step = matrix.edition == "ce"
module = module.read_license
variables {
[QT-506] Use enos scenario samples for testing (#22641) (#22933) Replace our prior implementation of Enos test groups with the new Enos sampling feature. With this feature we're able to describe which scenarios and variant combinations are valid for a given artifact and allow enos to create a valid sample field (a matrix of all compatible scenarios) and take an observation (select some to run) for us. This ensures that every valid scenario and variant combination will now be a candidate for testing in the pipeline. See QT-504[0] for further details on the Enos sampling capabilities. Our prior implementation only tested the amd64 and arm64 zip artifacts, as well as the Docker container. We now include the following new artifacts in the test matrix: * CE Amd64 Debian package * CE Amd64 RPM package * CE Arm64 Debian package * CE Arm64 RPM package Each artifact includes a sample definition for both pre-merge/post-merge (build) and release testing. Changes: * Remove the hand crafted `enos-run-matrices` ci matrix targets and replace them with per-artifact samples. * Use enos sampling to generate different sample groups on all pull requests. * Update the enos scenario matrices to handle HSM and FIPS packages. * Simplify enos scenarios by using shared globals instead of cargo-culted locals. Note: This will require coordination with vault-enterprise to ensure a smooth migration to the new system. Integrating new scenarios or modifying existing scenarios/variants should be much smoother after this initial migration. [0] https://github.com/hashicorp/enos/pull/102 Signed-off-by: Ryan Cragun <me@ryan.ec> Co-authored-by: Ryan Cragun <me@ryan.ec>
2023-09-08 19:31:09 +00:00
file_name = global.vault_license_path
}
}
step "create_vault_cluster_targets" {
enos: use on-demand targets (#21459) (#21464) Add an updated `target_ec2_instances` module that is capable of dynamically splitting target instances over subnet/az's that are compatible with the AMI architecture and the associated instance type for the architecture. Use the `target_ec2_instances` module where necessary. Ensure that `raft` storage scenarios don't provision unnecessary infrastructure with a new `target_ec2_shim` module. After a lot of trial, the state of Ec2 spot instance capacity, their associated APIs, and current support for different fleet types in AWS Terraform provider, have proven to make using spot instances for scenario targets too unreliable. The current state of each method: * `target_ec2_fleet`: unusable due to the fact that the `instant` type does not guarantee fulfillment of either `spot` or `on-demand` instance request types. The module does support both `on-demand` and `spot` request types and is capable of bidding across a maximum of four availability zones, which makes it an attractive choice if the `instant` type would always fulfill requests. Perhaps a `request` type with `wait_for_fulfillment` option like `aws_spot_fleet_request` would make it more viable for future consideration. * `target_ec2_spot_fleet`: more reliable if bidding for target instances that have capacity in the chosen zone. Issues in the AWS provider prevent us from bidding across multiple zones succesfully. Over the last 2-3 months target capacity for the instance types we'd prefer to use has dropped dramatically and the price is near-or-at on-demand. The volatility for nearly no cost savings means we should put this option on the shelf for now. * `target_ec2_instances`: the most reliable method we've got. It is now capable of automatically determing which subnets and availability zones to provision targets in and has been updated to be usable for both Vault and Consul targets. By default we use the cheapest medium instance types that we've found are reliable to test vault. * Update .gitignore * enos/modules/create_vpc: create a subnet for every availability zone * enos/modules/target_ec2_fleet: bid across the maximum of four availability zones for targets * enos/modules/target_ec2_spot_fleet: attempt to make the spot fleet bid across more availability zones for targets * enos/modules/target_ec2_instances: create module to use ec2:RunInstances for scenario targets * enos/modules/target_ec2_shim: create shim module to satisfy the target module interface * enos/scenarios: use target_ec2_shim for backend targets on raft storage scenarios * enos/modules/az_finder: remove unsed module Signed-off-by: Ryan Cragun <me@ryan.ec> Co-authored-by: Ryan Cragun <me@ryan.ec>
2023-06-26 22:54:39 +00:00
module = module.target_ec2_instances
depends_on = [step.create_vpc]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
ami_id = step.ec2_info.ami_ids[matrix.arch][matrix.distro][global.distro_version[matrix.distro]]
cluster_tag_key = global.vault_tag_key
common_tags = global.tags
seal_key_names = step.create_seal_key.resource_names
vpc_id = step.create_vpc.id
}
}
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
step "create_vault_cluster_backend_targets" {
module = matrix.backend == "consul" ? module.target_ec2_instances : module.target_ec2_shim
depends_on = [step.create_vpc]
providers = {
enos = provider.enos.ubuntu
}
variables {
ami_id = step.ec2_info.ami_ids["arm64"]["ubuntu"]["22.04"]
cluster_tag_key = global.backend_tag_key
common_tags = global.tags
seal_key_names = step.create_seal_key.resource_names
vpc_id = step.create_vpc.id
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
}
}
step "create_backend_cluster" {
module = "backend_${matrix.backend}"
depends_on = [
step.create_vault_cluster_backend_targets
]
providers = {
enos = provider.enos.ubuntu
}
variables {
cluster_name = step.create_vault_cluster_backend_targets.cluster_name
cluster_tag_key = global.backend_tag_key
license = (matrix.backend == "consul" && var.backend_edition == "ent") ? step.read_backend_license.license : null
release = {
edition = var.backend_edition
version = matrix.consul_version
}
target_hosts = step.create_vault_cluster_backend_targets.hosts
}
}
step "create_vault_cluster" {
module = module.vault_cluster
depends_on = [
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
step.create_backend_cluster,
step.build_vault,
step.create_vault_cluster_targets
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
artifactory_release = matrix.artifact_source == "artifactory" ? step.build_vault.vault_artifactory_release : null
backend_cluster_name = step.create_vault_cluster_backend_targets.cluster_name
backend_cluster_tag_key = global.backend_tag_key
cluster_name = step.create_vault_cluster_targets.cluster_name
consul_license = (matrix.backend == "consul" && var.backend_edition == "ent") ? step.read_backend_license.license : null
consul_release = matrix.backend == "consul" ? {
edition = var.backend_edition
version = matrix.consul_version
} : null
enable_audit_devices = var.vault_enable_audit_devices
install_dir = local.vault_install_dir
license = matrix.edition != "ce" ? step.read_vault_license.license : null
local_artifact_path = local.artifact_path
manage_service = local.manage_service
packages = concat(global.packages, global.distro_packages[matrix.distro])
seal_ha_beta = matrix.seal_ha_beta
seal_key_name = step.create_seal_key.resource_name
seal_type = matrix.seal
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
storage_backend = matrix.backend
target_hosts = step.create_vault_cluster_targets.hosts
}
}
// Wait for our cluster to elect a leader
step "wait_for_leader" {
module = module.vault_wait_for_leader
depends_on = [step.create_vault_cluster]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
timeout = 120 # seconds
vault_hosts = step.create_vault_cluster_targets.hosts
vault_install_dir = local.vault_install_dir
vault_root_token = step.create_vault_cluster.root_token
}
}
step "start_vault_agent" {
module = "vault_agent"
depends_on = [
step.build_vault,
step.create_vault_cluster,
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
step.wait_for_leader,
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
vault_install_dir = local.vault_install_dir
vault_instances = step.create_vault_cluster_targets.hosts
vault_root_token = step.create_vault_cluster.root_token
vault_agent_template_destination = "/tmp/agent_output.txt"
vault_agent_template_contents = "{{ with secret \\\"auth/token/lookup-self\\\" }}orphan={{ .Data.orphan }} display_name={{ .Data.display_name }}{{ end }}"
}
}
step "verify_vault_agent_output" {
module = module.vault_verify_agent_output
depends_on = [
step.create_vault_cluster,
step.start_vault_agent,
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
step.wait_for_leader,
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_instances = step.create_vault_cluster_targets.hosts
vault_agent_template_destination = "/tmp/agent_output.txt"
vault_agent_expected_output = "orphan=true display_name=approle"
}
}
Backport [QT-602] Run `proxy` and `agent` test scenarios (#23176) into release/1.14.x (#23302) * [QT-602] Run `proxy` and `agent` test scenarios (#23176) Update our `proxy` and `agent` scenarios to support new variants and perform baseline verification and their scenario specific verification. We integrate these updated scenarios into the pipeline by adding them to artifact samples. We've also improved the reliability of the `autopilot` and `replication` scenarios by refactoring our IP address gathering. Previously, we'd ask vault for the primary IP address and use some Terraform logic to determine followers. The leader IP address gathering script was also implicitly responsible for ensuring that a found leader was within a given group of hosts, and thus waiting for a given cluster to have a leader, and also for doing some arithmetic and outputting `replication` specific output data. We've broken these responsibilities into individual modules, improved their error messages, and fixed various races and bugs, including: * Fix a race between creating the file audit device and installing and starting vault in the `replication` scenario. * Fix how we determine our leader and follower IP addresses. We now query vault instead of a prior implementation that inferred the followers and sometimes did not allow all nodes to be an expected leader. * Fix a bug where we'd always always fail on the first wrong condition in the `vault_verify_performance_replication` module. We also performed some maintenance tasks on Enos scenarios byupdating our references from `oss` to `ce` to handle the naming and license changes. We also enabled `shellcheck` linting for enos module scripts. * Rename `oss` to `ce` for license and naming changes. * Convert template enos scripts to scripts that take environment variables. * Add `shellcheck` linting for enos module scripts. * Add additional `backend` and `seal` support to `proxy` and `agent` scenarios. * Update scenarios to include all baseline verification. * Add `proxy` and `agent` scenarios to artifact samples. * Remove IP address verification from the `vault_get_cluster_ips` modules and implement a new `vault_wait_for_leader` module. * Determine follower IP addresses by querying vault in the `vault_get_cluster_ips` module. * Move replication specific behavior out of the `vault_get_cluster_ips` module and into it's own `replication_data` module. * Extend initial version support for the `upgrade` and `autopilot` scenarios. We also discovered an issue with undo_logs that has been described in the VAULT-20259. As such, we've disabled the undo_logs check until it has been fixed. * actions: fix actionlint error and linting logic (#23305) Signed-off-by: Ryan Cragun <me@ryan.ec>
2023-09-27 16:53:12 +00:00
step "get_vault_cluster_ips" {
module = module.vault_get_cluster_ips
depends_on = [step.wait_for_leader]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_hosts = step.create_vault_cluster_targets.hosts
vault_install_dir = local.vault_install_dir
vault_root_token = step.create_vault_cluster.root_token
}
}
step "verify_vault_version" {
module = module.vault_verify_version
depends_on = [step.wait_for_leader]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_instances = step.create_vault_cluster_targets.hosts
vault_edition = matrix.edition
vault_install_dir = local.vault_install_dir
vault_product_version = matrix.artifact_source == "local" ? step.get_local_metadata.version : var.vault_product_version
vault_revision = matrix.artifact_source == "local" ? step.get_local_metadata.revision : var.vault_revision
vault_build_date = matrix.artifact_source == "local" ? step.get_local_metadata.build_date : var.vault_build_date
vault_root_token = step.create_vault_cluster.root_token
}
}
step "verify_vault_unsealed" {
module = module.vault_verify_unsealed
depends_on = [step.wait_for_leader]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_install_dir = local.vault_install_dir
vault_instances = step.create_vault_cluster_targets.hosts
}
}
step "verify_write_test_data" {
module = module.vault_verify_write_data
depends_on = [
step.create_vault_cluster,
step.get_vault_cluster_ips
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
leader_public_ip = step.get_vault_cluster_ips.leader_public_ip
leader_private_ip = step.get_vault_cluster_ips.leader_private_ip
vault_instances = step.create_vault_cluster_targets.hosts
vault_install_dir = local.vault_install_dir
vault_root_token = step.create_vault_cluster.root_token
}
}
step "verify_raft_auto_join_voter" {
skip_step = matrix.backend != "raft"
module = module.vault_verify_raft_auto_join_voter
depends_on = [
step.create_vault_cluster,
step.get_vault_cluster_ips
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_install_dir = local.vault_install_dir
vault_instances = step.create_vault_cluster_targets.hosts
vault_root_token = step.create_vault_cluster.root_token
}
}
step "verify_replication" {
module = module.vault_verify_replication
depends_on = [
step.create_vault_cluster,
step.get_vault_cluster_ips
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_edition = matrix.edition
vault_install_dir = local.vault_install_dir
vault_instances = step.create_vault_cluster_targets.hosts
}
}
step "verify_read_test_data" {
module = module.vault_verify_read_data
depends_on = [
step.verify_write_test_data,
step.verify_replication
]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
node_public_ips = step.get_vault_cluster_ips.follower_public_ips
vault_install_dir = local.vault_install_dir
}
}
step "verify_ui" {
module = module.vault_verify_ui
depends_on = [step.create_vault_cluster]
providers = {
enos = local.enos_provider[matrix.distro]
}
variables {
vault_instances = step.create_vault_cluster_targets.hosts
}
}
output "audit_device_file_path" {
description = "The file path for the file audit device, if enabled"
value = step.create_vault_cluster.audit_device_file_path
}
output "cluster_name" {
description = "The Vault cluster name"
value = step.create_vault_cluster.cluster_name
}
output "hosts" {
description = "The Vault cluster target hosts"
value = step.create_vault_cluster.target_hosts
}
output "private_ips" {
description = "The Vault cluster private IPs"
value = step.create_vault_cluster.private_ips
}
output "public_ips" {
description = "The Vault cluster public IPs"
value = step.create_vault_cluster.public_ips
}
output "root_token" {
description = "The Vault cluster root token"
value = step.create_vault_cluster.root_token
}
output "recovery_key_shares" {
description = "The Vault cluster recovery key shares"
value = step.create_vault_cluster.recovery_key_shares
}
output "recovery_keys_b64" {
description = "The Vault cluster recovery keys b64"
value = step.create_vault_cluster.recovery_keys_b64
}
output "recovery_keys_hex" {
description = "The Vault cluster recovery keys hex"
value = step.create_vault_cluster.recovery_keys_hex
}
output "seal_key_name" {
description = "The name of the cluster seal key"
value = step.create_seal_key.resource_name
}
output "unseal_keys_b64" {
description = "The Vault cluster unseal keys"
value = step.create_vault_cluster.unseal_keys_b64
}
output "unseal_keys_hex" {
description = "The Vault cluster unseal keys hex"
value = step.create_vault_cluster.unseal_keys_hex
}
}