enos: default undo-logs to cluster behavior (#18771)
* enos: default undo-logs to cluster behavior * change a step dependency * rearrange steps, wait a bit longer for undo logs
This commit is contained in:
parent
74b591c2c3
commit
4b4e0437e1
|
@ -61,6 +61,10 @@ module "shutdown_node" {
|
|||
source = "./modules/shutdown_node"
|
||||
}
|
||||
|
||||
module "shutdown_multiple_nodes" {
|
||||
source = "./modules/shutdown_multiple_nodes"
|
||||
}
|
||||
|
||||
module "vault_agent" {
|
||||
source = "./modules/vault_agent"
|
||||
|
||||
|
@ -188,3 +192,8 @@ module "vault_verify_write_data" {
|
|||
vault_install_dir = var.vault_install_dir
|
||||
vault_instance_count = var.vault_instance_count
|
||||
}
|
||||
|
||||
module "vault_raft_remove_peer" {
|
||||
source = "./modules/vault_raft_remove_peer"
|
||||
vault_install_dir = var.vault_install_dir
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ scenario "autopilot" {
|
|||
distro = ["ubuntu", "rhel"]
|
||||
edition = ["ent", "ent.fips1402", "ent.hsm", "ent.hsm.fips1402"]
|
||||
seal = ["awskms", "shamir"]
|
||||
undo_logs_status = ["0", "1"]
|
||||
|
||||
# Packages are not offered for the oss, ent.fips1402, and ent.hsm.fips1402 editions
|
||||
exclude {
|
||||
|
@ -46,8 +45,6 @@ scenario "autopilot" {
|
|||
arm64 = "t4g.small"
|
||||
}
|
||||
|
||||
enable_undo_logs = matrix.undo_logs_status == "1" && semverconstraint(var.vault_product_version, ">=1.13.0-0") ? true : false
|
||||
|
||||
vault_instance_type = coalesce(var.vault_instance_type, local.vault_instance_types[matrix.arch])
|
||||
vault_license_path = abspath(var.vault_license_path != null ? var.vault_license_path : joinpath(path.root, "./support/vault.hclic"))
|
||||
vault_install_dir_packages = {
|
||||
|
@ -221,29 +218,10 @@ scenario "autopilot" {
|
|||
vault_unseal_when_no_init = matrix.seal == "shamir"
|
||||
vault_unseal_keys = matrix.seal == "shamir" ? step.create_vault_cluster.vault_unseal_keys_hex : null
|
||||
vpc_id = step.create_vpc.vpc_id
|
||||
vault_environment = { "VAULT_REPLICATION_USE_UNDO_LOGS" : local.enable_undo_logs }
|
||||
vault_environment = {
|
||||
"VAULT_LOG_LEVEL" : "debug"
|
||||
}
|
||||
}
|
||||
|
||||
step "get_updated_vault_cluster_ips" {
|
||||
module = module.vault_get_cluster_ips
|
||||
depends_on = [
|
||||
step.create_vault_cluster,
|
||||
step.get_vault_cluster_ips,
|
||||
step.upgrade_vault_cluster_with_autopilot
|
||||
]
|
||||
|
||||
providers = {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
}
|
||||
|
||||
variables {
|
||||
vault_instances = step.create_vault_cluster.vault_instances
|
||||
vault_install_dir = local.vault_install_dir
|
||||
added_vault_instances = step.upgrade_vault_cluster_with_autopilot.vault_instances
|
||||
vault_root_token = step.create_vault_cluster.vault_root_token
|
||||
node_public_ip = step.get_vault_cluster_ips.leader_public_ip
|
||||
}
|
||||
}
|
||||
|
||||
step "verify_vault_unsealed" {
|
||||
|
@ -281,6 +259,47 @@ scenario "autopilot" {
|
|||
}
|
||||
}
|
||||
|
||||
step "verify_autopilot_await_server_removal_state" {
|
||||
module = module.vault_verify_autopilot
|
||||
depends_on = [
|
||||
step.upgrade_vault_cluster_with_autopilot,
|
||||
step.verify_raft_auto_join_voter
|
||||
]
|
||||
|
||||
providers = {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
}
|
||||
|
||||
variables {
|
||||
vault_autopilot_upgrade_version = matrix.artifact_source == "local" ? step.get_local_metadata.version : var.vault_product_version
|
||||
vault_autopilot_upgrade_status = "await-server-removal"
|
||||
vault_install_dir = local.vault_install_dir
|
||||
vault_instances = step.upgrade_vault_cluster_with_autopilot.vault_instances
|
||||
vault_root_token = step.create_vault_cluster.vault_root_token
|
||||
}
|
||||
}
|
||||
|
||||
step "get_updated_vault_cluster_ips" {
|
||||
module = module.vault_get_cluster_ips
|
||||
depends_on = [
|
||||
step.create_vault_cluster,
|
||||
step.get_vault_cluster_ips,
|
||||
step.upgrade_vault_cluster_with_autopilot
|
||||
]
|
||||
|
||||
providers = {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
}
|
||||
|
||||
variables {
|
||||
vault_instances = step.create_vault_cluster.vault_instances
|
||||
vault_install_dir = local.vault_install_dir
|
||||
added_vault_instances = step.upgrade_vault_cluster_with_autopilot.vault_instances
|
||||
vault_root_token = step.create_vault_cluster.vault_root_token
|
||||
node_public_ip = step.get_vault_cluster_ips.leader_public_ip
|
||||
}
|
||||
}
|
||||
|
||||
step "verify_read_test_data" {
|
||||
module = module.vault_verify_read_data
|
||||
depends_on = [
|
||||
|
@ -301,11 +320,50 @@ scenario "autopilot" {
|
|||
}
|
||||
}
|
||||
|
||||
step "verify_autopilot_upgraded_vault_cluster" {
|
||||
step "raft_remove_peers" {
|
||||
module = module.vault_raft_remove_peer
|
||||
depends_on = [
|
||||
step.get_updated_vault_cluster_ips,
|
||||
step.upgrade_vault_cluster_with_autopilot,
|
||||
step.verify_autopilot_await_server_removal_state
|
||||
]
|
||||
|
||||
providers = {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
}
|
||||
|
||||
variables {
|
||||
vault_install_dir = local.vault_install_dir
|
||||
operator_instance = step.get_updated_vault_cluster_ips.leader_public_ip
|
||||
remove_vault_instances = step.create_vault_cluster.vault_instances
|
||||
vault_instance_count = 3
|
||||
vault_root_token = step.create_vault_cluster.vault_root_token
|
||||
}
|
||||
}
|
||||
|
||||
step "remove_old_nodes" {
|
||||
module = module.shutdown_multiple_nodes
|
||||
depends_on = [
|
||||
step.create_vault_cluster,
|
||||
step.raft_remove_peers
|
||||
]
|
||||
|
||||
providers = {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
}
|
||||
|
||||
variables {
|
||||
old_vault_instances = step.create_vault_cluster.vault_instances
|
||||
vault_instance_count = 3
|
||||
}
|
||||
}
|
||||
|
||||
step "verify_autopilot_idle_state" {
|
||||
module = module.vault_verify_autopilot
|
||||
depends_on = [
|
||||
step.upgrade_vault_cluster_with_autopilot,
|
||||
step.verify_raft_auto_join_voter
|
||||
step.verify_raft_auto_join_voter,
|
||||
step.remove_old_nodes
|
||||
]
|
||||
|
||||
providers = {
|
||||
|
@ -314,9 +372,9 @@ scenario "autopilot" {
|
|||
|
||||
variables {
|
||||
vault_autopilot_upgrade_version = matrix.artifact_source == "local" ? step.get_local_metadata.version : var.vault_product_version
|
||||
vault_autopilot_upgrade_status = "await-server-removal"
|
||||
vault_autopilot_upgrade_status = "idle"
|
||||
vault_install_dir = local.vault_install_dir
|
||||
vault_instances = step.create_vault_cluster.vault_instances
|
||||
vault_instances = step.upgrade_vault_cluster_with_autopilot.vault_instances
|
||||
vault_root_token = step.create_vault_cluster.vault_root_token
|
||||
}
|
||||
}
|
||||
|
@ -325,8 +383,9 @@ scenario "autopilot" {
|
|||
skip_step = semverconstraint(var.vault_product_version, "<1.13.0-0")
|
||||
module = module.vault_verify_undo_logs
|
||||
depends_on = [
|
||||
step.remove_old_nodes,
|
||||
step.upgrade_vault_cluster_with_autopilot,
|
||||
step.verify_autopilot_upgraded_vault_cluster
|
||||
step.verify_autopilot_idle_state
|
||||
]
|
||||
|
||||
providers = {
|
||||
|
@ -335,7 +394,6 @@ scenario "autopilot" {
|
|||
|
||||
variables {
|
||||
vault_install_dir = local.vault_install_dir
|
||||
vault_undo_logs_status = matrix.undo_logs_status
|
||||
vault_instances = step.upgrade_vault_cluster_with_autopilot.vault_instances
|
||||
vault_root_token = step.create_vault_cluster.vault_root_token
|
||||
}
|
||||
|
|
|
@ -165,3 +165,17 @@ variable "vault_upgrade_initial_release" {
|
|||
version = "1.10.4"
|
||||
}
|
||||
}
|
||||
|
||||
variable "operator_instance" {
|
||||
type = string
|
||||
description = "The ip address of the operator (Voter) node"
|
||||
}
|
||||
|
||||
variable "remove_vault_instances" {
|
||||
type = map(object({
|
||||
private_ip = string
|
||||
public_ip = string
|
||||
}))
|
||||
description = "The old vault nodes to be removed"
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
enos = {
|
||||
source = "app.terraform.io/hashicorp-qti/enos"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable "vault_instance_count" {
|
||||
type = number
|
||||
description = "How many vault instances are in the cluster"
|
||||
}
|
||||
|
||||
variable "old_vault_instances" {
|
||||
type = map(object({
|
||||
private_ip = string
|
||||
public_ip = string
|
||||
}))
|
||||
description = "The vault cluster instances to be shutdown"
|
||||
}
|
||||
|
||||
locals {
|
||||
public_ips = {
|
||||
for idx in range(var.vault_instance_count) : idx => {
|
||||
public_ip = values(var.old_vault_instances)[idx].public_ip
|
||||
private_ip = values(var.old_vault_instances)[idx].private_ip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "enos_remote_exec" "shutdown_multiple_nodes" {
|
||||
for_each = local.public_ips
|
||||
inline = ["sudo shutdown -H --no-wall; exit 0"]
|
||||
|
||||
transport = {
|
||||
ssh = {
|
||||
host = each.value.public_ip
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
enos = {
|
||||
source = "app.terraform.io/hashicorp-qti/enos"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable "vault_cluster_addr_port" {
|
||||
description = "The Raft cluster address port"
|
||||
type = string
|
||||
default = "8201"
|
||||
}
|
||||
|
||||
variable "vault_install_dir" {
|
||||
type = string
|
||||
description = "The directory where the Vault binary will be installed"
|
||||
}
|
||||
|
||||
variable "vault_instance_count" {
|
||||
type = number
|
||||
description = "How many vault instances are in the cluster"
|
||||
}
|
||||
|
||||
variable "operator_instance" {
|
||||
type = string
|
||||
description = "The ip address of the operator (Voter) node"
|
||||
}
|
||||
|
||||
variable "remove_vault_instances" {
|
||||
type = map(object({
|
||||
private_ip = string
|
||||
public_ip = string
|
||||
}))
|
||||
description = "The old vault nodes to be removed"
|
||||
}
|
||||
|
||||
variable "vault_root_token" {
|
||||
type = string
|
||||
description = "The vault root token"
|
||||
}
|
||||
|
||||
locals {
|
||||
instances = {
|
||||
for idx in range(var.vault_instance_count) : idx => {
|
||||
public_ip = values(var.remove_vault_instances)[idx].public_ip
|
||||
private_ip = values(var.remove_vault_instances)[idx].private_ip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "enos_remote_exec" "vault_raft_remove_peer" {
|
||||
for_each = local.instances
|
||||
|
||||
environment = {
|
||||
VAULT_TOKEN = var.vault_root_token
|
||||
VAULT_ADDR = "http://localhost:8200"
|
||||
}
|
||||
|
||||
content = templatefile("${path.module}/templates/raft-remove-peer.sh", {
|
||||
remove_vault_cluster_addr = "${each.value.private_ip}:${var.vault_cluster_addr_port}"
|
||||
vault_install_dir = var.vault_install_dir
|
||||
vault_local_binary_path = "${var.vault_install_dir}/vault"
|
||||
})
|
||||
|
||||
transport = {
|
||||
ssh = {
|
||||
host = var.operator_instance
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
binpath=${vault_install_dir}/vault
|
||||
|
||||
node_addr=${remove_vault_cluster_addr}
|
||||
|
||||
fail() {
|
||||
echo "$1" 2>&1
|
||||
return 1
|
||||
}
|
||||
|
||||
retry() {
|
||||
local retries=$1
|
||||
shift
|
||||
local count=0
|
||||
|
||||
until "$@"; do
|
||||
exit=$?
|
||||
wait=$((2 ** count))
|
||||
count=$((count + 1))
|
||||
if [ "$count" -lt "$retries" ]; then
|
||||
sleep "$wait"
|
||||
echo "retry $count"
|
||||
else
|
||||
return "$exit"
|
||||
fi
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
remove_peer() {
|
||||
node_id=$($binpath operator raft list-peers -format json | jq -Mr --argjson expected "false" '.data.config.servers[] | select(.address=='\""$node_addr"\"') | select(.voter==$expected) | .node_id')
|
||||
if [ "$?" != "0" ];then
|
||||
fail "failed to get node id of a non-voter node"
|
||||
fi
|
||||
|
||||
$binpath operator raft remove-peer "$node_id"
|
||||
}
|
||||
|
||||
test -x "$binpath" || fail "unable to locate vault binary at $binpath"
|
||||
|
||||
# Retry a few times because it can take some time for things to settle after autopilot upgrade
|
||||
retry 5 remove_peer
|
|
@ -29,12 +29,6 @@ variable "vault_root_token" {
|
|||
description = "The vault root token"
|
||||
}
|
||||
|
||||
variable "vault_undo_logs_status" {
|
||||
type = string
|
||||
description = "An integer either 0 or 1 which indicates whether undo_logs are disabled or enabled"
|
||||
default = null
|
||||
}
|
||||
|
||||
locals {
|
||||
public_ips = {
|
||||
for idx in range(var.vault_instance_count) : idx => {
|
||||
|
@ -50,7 +44,6 @@ resource "enos_remote_exec" "smoke-verify-undo-logs" {
|
|||
environment = {
|
||||
VAULT_TOKEN = var.vault_root_token
|
||||
VAULT_ADDR = "http://localhost:8200"
|
||||
VAULT_UNDO_LOGS_STATUS = var.vault_undo_logs_status
|
||||
}
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/smoke-verify-undo-logs.sh")]
|
||||
|
|
|
@ -1,19 +1,18 @@
|
|||
#!/bin/bash
|
||||
|
||||
undo_logs_status="${VAULT_UNDO_LOGS_STATUS}"
|
||||
|
||||
function fail() {
|
||||
echo "$1" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
count=0
|
||||
retries=7
|
||||
retries=20
|
||||
while :; do
|
||||
state=$(curl --header "X-Vault-Token: $VAULT_TOKEN" "$VAULT_ADDR/v1/sys/metrics" | jq -r '.Gauges[] | select(.Name == "vault.core.replication.write_undo_logs")')
|
||||
leader_address=$(curl -H "X-Vault-Request: true" -H "X-Vault-Token: $VAULT_TOKEN" "$VAULT_ADDR/v1/sys/leader" | jq '.leader_address' | sed 's/\"//g')
|
||||
state=$(curl --header "X-Vault-Token: $VAULT_TOKEN" "$leader_address/v1/sys/metrics" | jq -r '.Gauges[] | select(.Name == "vault.core.replication.write_undo_logs")')
|
||||
target_undo_logs_status="$(jq -r '.Value' <<< "$state")"
|
||||
|
||||
if [ "$undo_logs_status" = "$target_undo_logs_status" ]; then
|
||||
if [ "$target_undo_logs_status" == "1" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
|
Loading…
Reference in New Issue