From 65e3489c45fc0302797c240c5f7b7b55756d50ff Mon Sep 17 00:00:00 2001 From: Hridoy Roy Date: Thu, 10 Jun 2021 12:29:32 -0700 Subject: [PATCH] Diagnose resource creation checks (#11627) * initial refactoring of unseal step in run * remove waitgroup * remove waitgroup * backup work * backup * backup * completely modularize run and move into diagnose * add diagnose errors for incorrect number of unseal keys * comment tests back in * backup * first subspan * finished subspanning but running into error with timeouts * remove runtime checks * merge main branch * meeting updates * remove telemetry block * roy comment * subspans for seal finalization and wrapping diagnose latency checks * backup while I fix something else * fix storage latency test errors * runtime checks * diagnose with timeout on seal --- command/operator_diagnose.go | 62 +++++++++++++++++++++- command/operator_diagnose_test.go | 88 +++++++++++++++++++++++++++++++ command/server.go | 1 + vault/core.go | 19 +++++-- 4 files changed, 164 insertions(+), 6 deletions(-) diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go index 008032d8e..227765c85 100644 --- a/command/operator_diagnose.go +++ b/command/operator_diagnose.go @@ -4,13 +4,14 @@ import ( "context" "encoding/json" "fmt" - "golang.org/x/term" "io" "os" "strings" "sync" "time" + "golang.org/x/term" + "github.com/docker/docker/pkg/ioutils" "github.com/hashicorp/consul/api" log "github.com/hashicorp/go-hclog" @@ -409,6 +410,7 @@ SEALFAIL: } return nil }) + diagnose.Test(ctx, "test-consul-direct-access-storage", func(ctx context.Context) error { if config.HAStorage == nil { diagnose.Skipped(ctx, "no HA storage configured") @@ -445,6 +447,26 @@ SEALFAIL: } diagnose.SpotOk(ctx, "find-cluster-addr", "") + // Run all the checks that are utilized when initializing a core object + // without actually calling core.Init. These are in the init-core section + // as they are runtime checks. + diagnose.Test(ctx, "init-core", func(ctx context.Context) error { + var newCoreError error + if coreConfig.RawConfig == nil { + return fmt.Errorf(CoreConfigUninitializedErr) + } + _, newCoreError = vault.CreateCore(&coreConfig) + if newCoreError != nil { + if vault.IsFatalError(newCoreError) { + return fmt.Errorf("Error initializing core: %s", newCoreError) + } + diagnose.Warn(ctx, wrapAtLength( + "WARNING! A non-fatal error occurred during initialization. Please "+ + "check the logs for more information.")) + } + return nil + }) + var lns []listenerutil.Listener diagnose.Test(ctx, "init-listeners", func(ctx context.Context) error { disableClustering := config.HAStorage != nil && config.HAStorage.DisableClustering @@ -505,5 +527,43 @@ SEALFAIL: }) // TODO: Diagnose logging configuration + + // The unseal diagnose check will simply attempt to use the barrier to encrypt and + // decrypt a mock value. It will not call runUnseal. + diagnose.Test(ctx, "unseal", diagnose.WithTimeout(30*time.Second, func(ctx context.Context) error { + if barrierWrapper == nil { + return fmt.Errorf("Diagnose could not create a barrier seal object") + } + barrierUUID, err := uuid.GenerateUUID() + if err != nil { + return fmt.Errorf("Diagnose could not create unique UUID for unsealing") + } + barrierEncValue := "diagnose-" + barrierUUID + ciphertext, err := barrierWrapper.Encrypt(ctx, []byte(barrierEncValue), nil) + if err != nil { + return fmt.Errorf("Error encrypting with seal barrier: %w", err) + } + plaintext, err := barrierWrapper.Decrypt(ctx, ciphertext, nil) + if err != nil { + return fmt.Errorf("Error decrypting with seal barrier: %w", err) + + } + if string(plaintext) != barrierEncValue { + return fmt.Errorf("barrier returned incorrect decrypted value for mock data") + } + return nil + })) + + // The following block contains static checks that are run during the + // startHttpServers portion of server run. In other words, they are static + // checks during resource creation. + diagnose.Test(ctx, "start-servers", func(ctx context.Context) error { + for _, ln := range lns { + if ln.Config == nil { + return fmt.Errorf("Found nil listener config after parsing") + } + } + return nil + }) return nil } diff --git a/command/operator_diagnose_test.go b/command/operator_diagnose_test.go index 4cd9183a7..565699e31 100644 --- a/command/operator_diagnose_test.go +++ b/command/operator_diagnose_test.go @@ -78,6 +78,94 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { }, }, }, + { + Name: "service-discovery", + Status: diagnose.OkStatus, + Children: []*diagnose.Result{ + { + Name: "test-serviceregistration-tls-consul", + Status: diagnose.OkStatus, + }, + { + Name: "test-consul-direct-access-service-discovery", + Status: diagnose.OkStatus, + }, + }, + }, + { + Name: "create-seal", + Status: diagnose.OkStatus, + }, + { + Name: "setup-core", + Status: diagnose.OkStatus, + Children: []*diagnose.Result{ + { + Name: "init-randreader", + Status: diagnose.OkStatus, + }, + }, + }, + { + Name: "setup-ha-storage", + Status: diagnose.OkStatus, + Children: []*diagnose.Result{ + { + Name: "create-ha-storage-backend", + Status: diagnose.OkStatus, + }, + { + Name: "test-consul-direct-access-storage", + Status: diagnose.OkStatus, + }, + { + Name: "test-ha-storage-tls-consul", + Status: diagnose.OkStatus, + }, + }, + }, + { + Name: "determine-redirect", + Status: diagnose.OkStatus, + }, + { + Name: "find-cluster-addr", + Status: diagnose.OkStatus, + }, + { + Name: "init-core", + Status: diagnose.OkStatus, + }, + { + Name: "init-listeners", + Status: diagnose.WarningStatus, + Children: []*diagnose.Result{ + { + Name: "create-listeners", + Status: diagnose.OkStatus, + }, + { + Name: "check-listener-tls", + Status: diagnose.WarningStatus, + Warnings: []string{ + "TLS is disabled in a Listener config stanza.", + }, + }, + }, + }, + { + Name: "unseal", + Status: diagnose.ErrorStatus, + Message: "Diagnose could not create a barrier seal object", + }, + { + Name: "start-servers", + Status: diagnose.OkStatus, + }, + { + Name: "finalize-seal-shamir", + Status: diagnose.OkStatus, + }, }, }, { diff --git a/command/server.go b/command/server.go index 8916d051d..cac839dc9 100644 --- a/command/server.go +++ b/command/server.go @@ -1209,6 +1209,7 @@ func (c *ServerCommand) Run(args []string) int { info["log level"] = logLevelString infoKeys = append(infoKeys, "log level") barrierSeal, barrierWrapper, unwrapSeal, seals, sealConfigError, err := setSeal(c, config, infoKeys, info) + // Check error here if err != nil { c.UI.Error(err.Error()) diff --git a/vault/core.go b/vault/core.go index 473e3f0c8..3f60d2366 100644 --- a/vault/core.go +++ b/vault/core.go @@ -708,8 +708,9 @@ func (c *CoreConfig) GetServiceRegistration() sr.ServiceRegistration { return nil } -// NewCore is used to construct a new core -func NewCore(conf *CoreConfig) (*Core, error) { +// CreateCore conducts static validations on the Core Config +// and returns an uninitialized core. +func CreateCore(conf *CoreConfig) (*Core, error) { if conf.HAPhysical != nil && conf.HAPhysical.HAEnabled() { if conf.RedirectAddr == "" { return nil, fmt.Errorf("missing API address, please set in configuration or via environment") @@ -897,8 +898,18 @@ func NewCore(conf *CoreConfig) (*Core, error) { }) } c.seal.SetCore(c) + return c, nil +} - if err := coreInit(c, conf); err != nil { +// NewCore is used to construct a new core +func NewCore(conf *CoreConfig) (*Core, error) { + var err error + c, err := CreateCore(conf) + if err != nil { + return nil, err + } + + if err = coreInit(c, conf); err != nil { return nil, err } @@ -918,8 +929,6 @@ func NewCore(conf *CoreConfig) (*Core, error) { } } - var err error - // Construct a new AES-GCM barrier c.barrier, err = NewAESGCMBarrier(c.physical) if err != nil {