From 1bc410783d8e5e4f366d9f269c0d4fc279f60b75 Mon Sep 17 00:00:00 2001 From: Brian Kassouf Date: Mon, 1 Mar 2021 10:51:04 -0800 Subject: [PATCH] OSS/ENT Drift --- helper/testhelpers/testhelpers.go | 5 + helper/testhelpers/testhelpers_oss.go | 2 + vault/external_tests/raft/raft_test.go | 158 ++++++++++++++---- vault/logical_system.go | 2 +- .../system/storage/raftautosnapshots.mdx | 2 +- 5 files changed, 137 insertions(+), 32 deletions(-) diff --git a/helper/testhelpers/testhelpers.go b/helper/testhelpers/testhelpers.go index cefaa8af9..065512f8e 100644 --- a/helper/testhelpers/testhelpers.go +++ b/helper/testhelpers/testhelpers.go @@ -194,14 +194,17 @@ func AttemptUnsealCore(c *vault.TestCluster, core *vault.TestClusterCore) error } func EnsureStableActiveNode(t testing.T, cluster *vault.TestCluster) { + t.Helper() deriveStableActiveCore(t, cluster) } func DeriveStableActiveCore(t testing.T, cluster *vault.TestCluster) *vault.TestClusterCore { + t.Helper() return deriveStableActiveCore(t, cluster) } func deriveStableActiveCore(t testing.T, cluster *vault.TestCluster) *vault.TestClusterCore { + t.Helper() activeCore := DeriveActiveCore(t, cluster) minDuration := time.NewTimer(3 * time.Second) @@ -230,6 +233,7 @@ func deriveStableActiveCore(t testing.T, cluster *vault.TestCluster) *vault.Test } func DeriveActiveCore(t testing.T, cluster *vault.TestCluster) *vault.TestClusterCore { + t.Helper() for i := 0; i < 20; i++ { for _, core := range cluster.Cores { leaderResp, err := core.Client.Sys().Leader() @@ -247,6 +251,7 @@ func DeriveActiveCore(t testing.T, cluster *vault.TestCluster) *vault.TestCluste } func DeriveStandbyCores(t testing.T, cluster *vault.TestCluster) []*vault.TestClusterCore { + t.Helper() cores := make([]*vault.TestClusterCore, 0, 2) for _, core := range cluster.Cores { leaderResp, err := core.Client.Sys().Leader() diff --git a/helper/testhelpers/testhelpers_oss.go b/helper/testhelpers/testhelpers_oss.go index 1d42a30f3..30e2e15b3 100644 --- a/helper/testhelpers/testhelpers_oss.go +++ b/helper/testhelpers/testhelpers_oss.go @@ -7,6 +7,8 @@ import ( "github.com/mitchellh/go-testing-interface" ) +var IsEnterprise = false + // WaitForActiveNodeAndStandbys does nothing more than wait for the active node // on OSS. On enterprise it waits for perf standbys to be healthy too. func WaitForActiveNodeAndStandbys(t testing.T, cluster *vault.TestCluster) { diff --git a/vault/external_tests/raft/raft_test.go b/vault/external_tests/raft/raft_test.go index ef1d5dfc5..433165f0a 100644 --- a/vault/external_tests/raft/raft_test.go +++ b/vault/external_tests/raft/raft_test.go @@ -13,6 +13,7 @@ import ( "time" "github.com/hashicorp/go-cleanhttp" + "github.com/hashicorp/go-hclog" uuid "github.com/hashicorp/go-uuid" "github.com/hashicorp/vault/api" credUserpass "github.com/hashicorp/vault/builtin/credential/userpass" @@ -21,6 +22,7 @@ import ( "github.com/hashicorp/vault/helper/testhelpers/teststorage" vaulthttp "github.com/hashicorp/vault/http" "github.com/hashicorp/vault/physical/raft" + "github.com/hashicorp/vault/sdk/helper/logging" "github.com/hashicorp/vault/sdk/logical" "github.com/hashicorp/vault/vault" "github.com/stretchr/testify/require" @@ -28,12 +30,20 @@ import ( ) func raftCluster(t testing.TB) *vault.TestCluster { + return raftClusterWithPerfStandby(t, false) +} + +func raftClusterWithPerfStandby(t testing.TB, disablePerfStandby bool) *vault.TestCluster { conf := &vault.CoreConfig{ CredentialBackends: map[string]logical.Factory{ "userpass": credUserpass.Factory, }, } + conf.DisablePerformanceStandby = disablePerfStandby + var opts = vault.TestClusterOptions{HandlerFunc: vaulthttp.Handler} + opts.Logger = logging.NewVaultLogger(hclog.Trace).Named(t.Name()) + teststorage.RaftBackendSetup(conf, &opts) cluster := vault.NewTestCluster(t, conf, &opts) cluster.Start() @@ -400,35 +410,64 @@ func TestRaft_SnapshotAPI(t *testing.T) { } func TestRaft_SnapshotAPI_RekeyRotate_Backward(t *testing.T) { - tCases := []struct { - Name string - Rekey bool - Rotate bool - }{ + type testCase struct { + Name string + Rekey bool + Rotate bool + DisablePerfStandby bool + } + + tCases := []testCase{ { - Name: "rekey", - Rekey: true, - Rotate: false, + Name: "rekey", + Rekey: true, + Rotate: false, + DisablePerfStandby: true, }, { - Name: "rotate", - Rekey: false, - Rotate: true, + Name: "rotate", + Rekey: false, + Rotate: true, + DisablePerfStandby: true, }, { - Name: "both", - Rekey: true, - Rotate: true, + Name: "both", + Rekey: true, + Rotate: true, + DisablePerfStandby: true, }, } + if testhelpers.IsEnterprise { + tCases = append(tCases, []testCase{ + { + Name: "rekey-with-perf-standby", + Rekey: true, + Rotate: false, + DisablePerfStandby: false, + }, + { + Name: "rotate-with-perf-standby", + Rekey: false, + Rotate: true, + DisablePerfStandby: false, + }, + { + Name: "both-with-perf-standby", + Rekey: true, + Rotate: true, + DisablePerfStandby: false, + }, + }...) + } + for _, tCase := range tCases { t.Run(tCase.Name, func(t *testing.T) { // bind locally tCaseLocal := tCase t.Parallel() - cluster := raftCluster(t) + cluster := raftClusterWithPerfStandby(t, tCaseLocal.DisablePerfStandby) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client @@ -481,11 +520,17 @@ func TestRaft_SnapshotAPI_RekeyRotate_Backward(t *testing.T) { if err != nil { t.Fatal(err) } + + testhelpers.EnsureStableActiveNode(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) } if tCaseLocal.Rekey { // Rekey cluster.BarrierKeys = testhelpers.RekeyCluster(t, cluster, false) + + testhelpers.EnsureStableActiveNode(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) } if tCaseLocal.Rekey { @@ -520,6 +565,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Backward(t *testing.T) { } testhelpers.EnsureStableActiveNode(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) // Write some data so we can make sure we can read it later. This is testing // that we correctly reload the keyring @@ -550,17 +596,21 @@ func TestRaft_SnapshotAPI_RekeyRotate_Backward(t *testing.T) { } func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { - tCases := []struct { - Name string - Rekey bool - Rotate bool - ShouldSeal bool - }{ + type testCase struct { + Name string + Rekey bool + Rotate bool + ShouldSeal bool + DisablePerfStandby bool + } + + tCases := []testCase{ { - Name: "rekey", - Rekey: true, - Rotate: false, - ShouldSeal: false, + Name: "rekey", + Rekey: true, + Rotate: false, + ShouldSeal: false, + DisablePerfStandby: true, }, { Name: "rotate", @@ -568,7 +618,8 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { Rotate: true, // Rotate writes a new master key upgrade using the new term, which // we can no longer decrypt. We must seal here. - ShouldSeal: true, + ShouldSeal: true, + DisablePerfStandby: true, }, { Name: "both", @@ -576,17 +627,48 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { Rotate: true, // If we are moving forward and we have rekeyed and rotated there // isn't any way to restore the latest keys so expect to seal. - ShouldSeal: true, + ShouldSeal: true, + DisablePerfStandby: true, }, } + if testhelpers.IsEnterprise { + tCases = append(tCases, []testCase{ + { + Name: "rekey-with-perf-standby", + Rekey: true, + Rotate: false, + ShouldSeal: false, + DisablePerfStandby: false, + }, + { + Name: "rotate-with-perf-standby", + Rekey: false, + Rotate: true, + // Rotate writes a new master key upgrade using the new term, which + // we can no longer decrypt. We must seal here. + ShouldSeal: true, + DisablePerfStandby: false, + }, + { + Name: "both-with-perf-standby", + Rekey: true, + Rotate: true, + // If we are moving forward and we have rekeyed and rotated there + // isn't any way to restore the latest keys so expect to seal. + ShouldSeal: true, + DisablePerfStandby: false, + }, + }...) + } + for _, tCase := range tCases { t.Run(tCase.Name, func(t *testing.T) { // bind locally tCaseLocal := tCase t.Parallel() - cluster := raftCluster(t) + cluster := raftClusterWithPerfStandby(t, tCaseLocal.DisablePerfStandby) defer cluster.Cleanup() leaderClient := cluster.Cores[0].Client @@ -633,6 +715,9 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { if tCaseLocal.Rekey { // Rekey cluster.BarrierKeys = testhelpers.RekeyCluster(t, cluster, false) + + testhelpers.EnsureStableActiveNode(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) } if tCaseLocal.Rotate { // Set the key clean up to 0 so it's cleaned immediately. This @@ -647,8 +732,18 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { if err != nil { t.Fatal(err) } - // Let the key upgrade get deleted - time.Sleep(1 * time.Second) + + if !tCaseLocal.DisablePerfStandby { + // Without the key upgrade the perf standby nodes will seal and + // raft will get into a failure state. Make sure we get the + // cluster back into a healthy state before moving forward. + testhelpers.WaitForNCoresSealed(t, cluster, 2) + testhelpers.EnsureCoresUnsealed(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) + + active := testhelpers.DeriveActiveCore(t, cluster) + leaderClient = active.Client + } } // cache the new barrier keys @@ -688,6 +783,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { } testhelpers.EnsureStableActiveNode(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) if tCaseLocal.Rekey { // Restore snapshot, should fail. req = leaderClient.NewRequest("POST", "/v1/sys/storage/raft/snapshot") @@ -705,6 +801,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { if apiResp.Error() == nil || !strings.Contains(apiResp.Error().Error(), "could not verify hash file, possibly the snapshot is using a different set of unseal keys") { t.Fatalf("expected error verifying hash file, got %v", apiResp.Error()) } + } // Restore snapshot force @@ -725,6 +822,7 @@ func TestRaft_SnapshotAPI_RekeyRotate_Forward(t *testing.T) { case false: testhelpers.EnsureStableActiveNode(t, cluster) + testhelpers.WaitForActiveNodeAndPerfStandbys(t, cluster) // Write some data so we can make sure we can read it later. This is testing // that we correctly reload the keyring diff --git a/vault/logical_system.go b/vault/logical_system.go index 16b5fe489..383ca3b20 100644 --- a/vault/logical_system.go +++ b/vault/logical_system.go @@ -3881,7 +3881,7 @@ func (b *SystemBackend) rotateBarrierKey(ctx context.Context) error { b.Backend.Logger().Info("installed new encryption key") // In HA mode, we need to an upgrade path for the standby instances - if b.Core.ha != nil { + if b.Core.ha != nil && b.Core.KeyRotateGracePeriod() > 0 { // Create the upgrade path to the new term if err := b.Core.barrier.CreateUpgrade(ctx, newTerm); err != nil { b.Backend.Logger().Error("failed to create new upgrade", "term", newTerm, "error", err) diff --git a/website/content/api-docs/system/storage/raftautosnapshots.mdx b/website/content/api-docs/system/storage/raftautosnapshots.mdx index d8969ca8a..1dd23f71d 100644 --- a/website/content/api-docs/system/storage/raftautosnapshots.mdx +++ b/website/content/api-docs/system/storage/raftautosnapshots.mdx @@ -83,7 +83,7 @@ other mechanisms to provide access to cloud resources. - `aws_s3_server_side_encryption` `(boolean)` - Use AES256 to encrypt bucket contents. -- `aws_s3_server_kms_key` `(string)` - Use named KMS key, when `aws_s3_enable_kms=true` +- `aws_s3_kms_key` `(string)` - Use named KMS key, when `aws_s3_enable_kms=true` #### storage_type=google-gcs