Ignore errors from rollback manager invocations (#22235) (#22238)

* Ignore errors from rollback manager invocations

During reload and mount move operations, we want to ensure that errors
created by the final Rollback are not fatal (which risk failing
replication in Enterprise when the core/mounts table gets invalidated).
This mirrors the behavior of the periodic rollback manager, which
only logs the error.

This updates the noop backend to allow failing just rollback operations,
which we can use in tests to verify this behavior and ensure the core
operations (plugin reload, plugin move, and seal/unseal) are not broken
by this. Note that most of these operations were asynchronous from the
client's PoV and thus did not fail anyways prior to this change.



* Add changelog entry



* Update vault/external_tests/router/router_ext_test.go



---------

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
Co-authored-by: Alexander Scheel <alex.scheel@hashicorp.com>
Co-authored-by: Nick Cabatoff <ncabatoff@hashicorp.com>
This commit is contained in:
hc-github-team-secure-vault-core 2023-08-16 13:34:37 -04:00 committed by GitHub
parent f8cc240ab5
commit b30f78f66a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 4 deletions

3
changelog/22235.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
core: Log rollback manager failures during unmount, remount to prevent replication failures on secondary clusters.
```

View File

@ -68,3 +68,41 @@ func testRouter_MountSubpath(t *testing.T, mountPoints []string) {
cluster.UnsealCores(t) cluster.UnsealCores(t)
t.Logf("Done: %#v", mountPoints) t.Logf("Done: %#v", mountPoints)
} }
func TestRouter_UnmountRollbackIsntFatal(t *testing.T) {
coreConfig := &vault.CoreConfig{
LogicalBackends: map[string]logical.Factory{
"noop": vault.NoopBackendRollbackErrFactory,
},
}
cluster := vault.NewTestCluster(t, coreConfig, &vault.TestClusterOptions{
HandlerFunc: vaulthttp.Handler,
})
cluster.Start()
defer cluster.Cleanup()
vault.TestWaitActive(t, cluster.Cores[0].Core)
client := cluster.Cores[0].Client
if err := client.Sys().Mount("noop", &api.MountInput{
Type: "noop",
}); err != nil {
t.Fatalf("failed to mount PKI: %v", err)
}
if _, err := client.Logical().Write("sys/plugins/reload/backend", map[string]interface{}{
"mounts": "noop",
}); err != nil {
t.Fatalf("expected reload of noop with broken periodic func to succeed; got err=%v", err)
}
if _, err := client.Logical().Write("sys/remount", map[string]interface{}{
"from": "noop",
"to": "noop-to",
}); err != nil {
t.Fatalf("expected remount of noop with broken periodic func to succeed; got err=%v", err)
}
cluster.EnsureCoresSealed(t)
cluster.UnsealCores(t)
}

View File

@ -874,9 +874,13 @@ func (c *Core) unmountInternal(ctx context.Context, path string, updateStorage b
rCtx := namespace.ContextWithNamespace(c.activeContext, ns) rCtx := namespace.ContextWithNamespace(c.activeContext, ns)
if backend != nil && c.rollback != nil { if backend != nil && c.rollback != nil {
// Invoke the rollback manager a final time // Invoke the rollback manager a final time. This is not fatal as
// various periodic funcs (e.g., PKI) can legitimately error; the
// periodic rollback manager logs these errors rather than failing
// replication like returning this error would do.
if err := c.rollback.Rollback(rCtx, path); err != nil { if err := c.rollback.Rollback(rCtx, path); err != nil {
return err c.logger.Error("ignoring rollback error during unmount", "error", err, "path", path)
err = nil
} }
} }
if backend != nil && c.expiration != nil && updateStorage { if backend != nil && c.expiration != nil && updateStorage {
@ -1142,11 +1146,15 @@ func (c *Core) remountSecretsEngine(ctx context.Context, src, dst namespace.Moun
} }
if !c.IsDRSecondary() { if !c.IsDRSecondary() {
// Invoke the rollback manager a final time // Invoke the rollback manager a final time. This is not fatal as
// various periodic funcs (e.g., PKI) can legitimately error; the
// periodic rollback manager logs these errors rather than failing
// replication like returning this error would do.
rCtx := namespace.ContextWithNamespace(c.activeContext, ns) rCtx := namespace.ContextWithNamespace(c.activeContext, ns)
if c.rollback != nil && c.router.MatchingBackend(ctx, srcRelativePath) != nil { if c.rollback != nil && c.router.MatchingBackend(ctx, srcRelativePath) != nil {
if err := c.rollback.Rollback(rCtx, srcRelativePath); err != nil { if err := c.rollback.Rollback(rCtx, srcRelativePath); err != nil {
return err c.logger.Error("ignoring rollback error during remount", "error", err, "path", src.Namespace.Path+src.MountPath)
err = nil
} }
} }

View File

@ -29,17 +29,27 @@ type NoopBackend struct {
DefaultLeaseTTL time.Duration DefaultLeaseTTL time.Duration
MaxLeaseTTL time.Duration MaxLeaseTTL time.Duration
BackendType logical.BackendType BackendType logical.BackendType
RollbackErrs bool
} }
func NoopBackendFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) { func NoopBackendFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) {
return &NoopBackend{}, nil return &NoopBackend{}, nil
} }
func NoopBackendRollbackErrFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) {
return &NoopBackend{RollbackErrs: true}, nil
}
func (n *NoopBackend) HandleRequest(ctx context.Context, req *logical.Request) (*logical.Response, error) { func (n *NoopBackend) HandleRequest(ctx context.Context, req *logical.Request) (*logical.Response, error) {
if req.TokenEntry() != nil { if req.TokenEntry() != nil {
panic("got a non-nil TokenEntry") panic("got a non-nil TokenEntry")
} }
if n.RollbackErrs && req.Operation == "rollback" {
return nil, fmt.Errorf("no-op backend rollback has erred out")
}
var err error var err error
resp := n.Response resp := n.Response
if n.RequestHandler != nil { if n.RequestHandler != nil {