diff --git a/.github/workflows/test-run-enos-scenario-matrix.yml b/.github/workflows/test-run-enos-scenario-matrix.yml index 00ebff9e3..c05b42930 100644 --- a/.github/workflows/test-run-enos-scenario-matrix.yml +++ b/.github/workflows/test-run-enos-scenario-matrix.yml @@ -128,16 +128,15 @@ jobs: - if: contains(inputs.sample-name, 'ent') name: Configure Vault license run: echo "${{ secrets.VAULT_LICENSE }}" > ./enos/support/vault.hclic || true - - name: Run Enos scenario - id: run - # Continue once and retry to handle occasional blips when creating - # infrastructure. + - id: launch + name: enos scenario launch ${{ matrix.scenario.id.filter }} + # Continue once and retry to handle occasional blips when creating infrastructure. continue-on-error: true - run: enos scenario run --timeout 60m0s --chdir ./enos ${{ matrix.scenario.id.filter }} - - name: Retry Enos scenario if necessary - id: run_retry - if: steps.run.outcome == 'failure' - run: enos scenario run --timeout 60m0s --chdir ./enos ${{ matrix.scenario.id.filter }} + run: enos scenario launch --timeout 60m0s --chdir ./enos ${{ matrix.scenario.id.filter }} + - if: steps.launch.outcome == 'failure' + id: launch_retry + name: Retry enos scenario launch ${{ matrix.scenario.id.filter }} + run: enos scenario launch --timeout 60m0s --chdir ./enos ${{ matrix.scenario.id.filter }} - name: Upload Debug Data if: failure() uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2 @@ -147,11 +146,14 @@ jobs: path: ${{ env.ENOS_DEBUG_DATA_ROOT_DIR }} retention-days: 30 continue-on-error: true - - name: Ensure scenario has been destroyed + - if: ${{ always() }} id: destroy - if: ${{ always() }} - # With Enos version 0.0.11 the destroy step returns an error if the infrastructure - # is already destroyed by enos run. So temporarily setting it to continue on error in GHA + name: enos scenario destroy ${{ matrix.scenario.id.filter }} + continue-on-error: true + run: enos scenario destroy --timeout 60m0s --chdir ./enos ${{ matrix.scenario.id.filter }} + - if: steps.destroy.outcome == 'failure' + id: destroy_retry + name: Retry enos scenario destroy ${{ matrix.scenario.id.filter }} continue-on-error: true run: enos scenario destroy --timeout 60m0s --chdir ./enos ${{ matrix.scenario.id.filter }} - name: Clean up Enos runtime directories @@ -162,28 +164,34 @@ jobs: rm -rf /tmp/enos* rm -rf ./enos/support rm -rf ./enos/.enos - # Send a Slack notification to #feed-vault-enos-failures if the 'run' step fails. - # There is an incoming webhook set up on the "Enos Vault Failure Bot" Slackbot https://api.slack.com/apps/A05E31CH1LG/incoming-webhooks - - name: Send Slack notification on Enos run failure + # Send slack notifications to #feed-vault-enos-failures any of our enos scenario commands fail. + # There is an incoming webhook set up on the "Enos Vault Failure Bot" Slackbot: + # https://api.slack.com/apps/A05E31CH1LG/incoming-webhooks + - if: ${{ always() && ! cancelled() }} + name: Notify launch failed uses: hashicorp/actions-slack-status@v1 - if: ${{ always() && ! cancelled() }} with: - failure-message: "An Enos scenario `run` failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" - status: ${{ steps.run.outcome }} + failure-message: "enos scenario launch ${{ matrix.scenario.id.filter}} failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" + status: ${{ steps.launch.outcome }} slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} - # Send a Slack notification to #feed-vault-enos-failures if the 'run_retry' step fails. - - name: Send Slack notification on Enos run_retry failure + - if: ${{ always() && ! cancelled() }} + name: Notify retry launch failed uses: hashicorp/actions-slack-status@v1 - if: ${{ always() && ! cancelled() }} with: - failure-message: "An Enos scenario `run_retry` failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" - status: ${{ steps.run_retry.outcome }} + failure-message: "retry enos scenario launch ${{ matrix.scenario.id.filter}} failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" + status: ${{ steps.launch_retry.outcome }} slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} - # Send a Slack notification to #feed-vault-enos-failures if the 'destroy' step fails. - - name: Send Slack notification on Enos destroy failure + - if: ${{ always() && ! cancelled() }} + name: Notify destroy failed uses: hashicorp/actions-slack-status@v1 - if: ${{ always() && ! cancelled() }} with: - failure-message: "An Enos scenario `destroy` failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" + failure-message: "enos scenario destroy ${{ matrix.scenario.id.filter}} failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" status: ${{ steps.destroy.outcome }} slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} + - if: ${{ always() && ! cancelled() }} + name: Notify retry destroy failed + uses: hashicorp/actions-slack-status@v1 + with: + failure-message: "retry enos scenario destroy ${{ matrix.scenario.id.filter}} failed. \nTriggering event: `${{ github.event_name }}` \nActor: `${{ github.actor }}`" + status: ${{ steps.destroy_retry.outcome }} + slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/enos/enos-scenario-replication.hcl b/enos/enos-scenario-replication.hcl index 1fd753fa0..7834078c7 100644 --- a/enos/enos-scenario-replication.hcl +++ b/enos/enos-scenario-replication.hcl @@ -446,6 +446,7 @@ scenario "replication" { module = module.vault_setup_perf_primary depends_on = [ step.get_primary_cluster_ips, + step.get_secondary_cluster_ips, step.write_test_data_on_primary ] @@ -811,6 +812,11 @@ scenario "replication" { value = step.create_secondary_cluster.root_token } + output "performance_secondary_token" { + description = "The performance secondary replication token" + value = step.generate_secondary_token.secondary_token + } + output "remaining_hosts" { description = "The Vault cluster primary hosts after removing the leader and follower" value = step.get_remaining_hosts_replication_data.remaining_hosts diff --git a/enos/modules/vault_get_cluster_ips/scripts/get-follower-private-ips.sh b/enos/modules/vault_get_cluster_ips/scripts/get-follower-private-ips.sh index 369d896dd..084a11a35 100644 --- a/enos/modules/vault_get_cluster_ips/scripts/get-follower-private-ips.sh +++ b/enos/modules/vault_get_cluster_ips/scripts/get-follower-private-ips.sh @@ -25,11 +25,13 @@ while :; do # Vault >= 1.10.x has the operator members. If we have that then we'll use it. if $binpath operator -h 2>&1 | grep members &> /dev/null; then # Get the folllowers that are part of our private ips. - if followers=$($binpath operator members -format json | jq --argjson expected "$VAULT_INSTANCE_PRIVATE_IPS" -c '.Nodes | map(select(any(.; .active_node==false)) | .api_address | scan("[0-9]+.[0-9]+.[0-9]+.[0-9]+")) as $followers | $expected - ($expected - $followers)'); then - # Make sure that we got all the followers - if jq --argjson expected "$VAULT_INSTANCE_PRIVATE_IPS" --argjson followers "$followers" -ne '$expected | length as $el | $followers | length as $fl | $fl == $el-1' > /dev/null; then - echo "$followers" - exit 0 + if members=$($binpath operator members -format json); then + if followers=$(echo "$members" | jq --argjson expected "$VAULT_INSTANCE_PRIVATE_IPS" -c '.Nodes | map(select(any(.; .active_node==false)) | .api_address | scan("[0-9]+.[0-9]+.[0-9]+.[0-9]+")) as $followers | $expected - ($expected - $followers)'); then + # Make sure that we got all the followers + if jq --argjson expected "$VAULT_INSTANCE_PRIVATE_IPS" --argjson followers "$followers" -ne '$expected | length as $el | $followers | length as $fl | $fl == $el-1' > /dev/null; then + echo "$followers" + exit 0 + fi fi fi else