5b63066c2c
* Create helpers which integrate with OpenTelemetry for diagnose collection * Go mod vendor * consul tls checks * draft for storage end to end check * Comments * Update vault/diagnose/helpers.go Co-authored-by: swayne275 <swayne275@gmail.com> * Add unit test/example * tweak output * More comments * add spot check concept * Get unit tests working on Result structs * Fix unit test * Get unit tests working, and make diagnose sessions local rather than global * Comments * Last comments * No need for init * :| * Fix helpers_test * cleaned up chan logic. Tests next. * fix tests * remove a comment * tests * remove a comment * cosmetic changes Co-authored-by: Scott G. Miller <smiller@hashicorp.com> Co-authored-by: swayne275 <swayne275@gmail.com>
274 lines
7.6 KiB
Go
274 lines
7.6 KiB
Go
package command
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/hashicorp/consul/api"
|
|
log "github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/vault/internalshared/listenerutil"
|
|
"github.com/hashicorp/vault/internalshared/reloadutil"
|
|
physconsul "github.com/hashicorp/vault/physical/consul"
|
|
"github.com/hashicorp/vault/sdk/version"
|
|
srconsul "github.com/hashicorp/vault/serviceregistration/consul"
|
|
"github.com/hashicorp/vault/vault/diagnose"
|
|
"github.com/mitchellh/cli"
|
|
"github.com/posener/complete"
|
|
)
|
|
|
|
const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE"
|
|
|
|
var (
|
|
_ cli.Command = (*OperatorDiagnoseCommand)(nil)
|
|
_ cli.CommandAutocomplete = (*OperatorDiagnoseCommand)(nil)
|
|
)
|
|
|
|
type OperatorDiagnoseCommand struct {
|
|
*BaseCommand
|
|
diagnose *diagnose.Session
|
|
|
|
flagDebug bool
|
|
flagSkips []string
|
|
flagConfigs []string
|
|
cleanupGuard sync.Once
|
|
|
|
reloadFuncsLock *sync.RWMutex
|
|
reloadFuncs *map[string][]reloadutil.ReloadFunc
|
|
startedCh chan struct{} // for tests
|
|
reloadedCh chan struct{} // for tests
|
|
skipEndEnd bool // for tests
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) Synopsis() string {
|
|
return "Troubleshoot problems starting Vault"
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) Help() string {
|
|
helpText := `
|
|
Usage: vault operator diagnose
|
|
|
|
This command troubleshoots Vault startup issues, such as TLS configuration or
|
|
auto-unseal. It should be run using the same environment variables and configuration
|
|
files as the "vault server" command, so that startup problems can be accurately
|
|
reproduced.
|
|
|
|
Start diagnose with a configuration file:
|
|
|
|
$ vault operator diagnose -config=/etc/vault/config.hcl
|
|
|
|
Perform a diagnostic check while Vault is still running:
|
|
|
|
$ vault operator diagnose -config=/etc/vault/config.hcl -skip=listener
|
|
|
|
` + c.Flags().Help()
|
|
return strings.TrimSpace(helpText)
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) Flags() *FlagSets {
|
|
set := NewFlagSets(c.UI)
|
|
f := set.NewFlagSet("Command Options")
|
|
|
|
f.StringSliceVar(&StringSliceVar{
|
|
Name: "config",
|
|
Target: &c.flagConfigs,
|
|
Completion: complete.PredictOr(
|
|
complete.PredictFiles("*.hcl"),
|
|
complete.PredictFiles("*.json"),
|
|
complete.PredictDirs("*"),
|
|
),
|
|
Usage: "Path to a Vault configuration file or directory of configuration " +
|
|
"files. This flag can be specified multiple times to load multiple " +
|
|
"configurations. If the path is a directory, all files which end in " +
|
|
".hcl or .json are loaded.",
|
|
})
|
|
|
|
f.StringSliceVar(&StringSliceVar{
|
|
Name: "skip",
|
|
Target: &c.flagSkips,
|
|
Usage: "Skip the health checks named as arguments. May be 'listener', 'storage', or 'autounseal'.",
|
|
})
|
|
|
|
f.BoolVar(&BoolVar{
|
|
Name: "debug",
|
|
Target: &c.flagDebug,
|
|
Default: false,
|
|
Usage: "Dump all information collected by Diagnose.",
|
|
})
|
|
return set
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) AutocompleteArgs() complete.Predictor {
|
|
return complete.PredictNothing
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) AutocompleteFlags() complete.Flags {
|
|
return c.Flags().Completions()
|
|
}
|
|
|
|
const (
|
|
status_unknown = "[ ] "
|
|
status_ok = "\u001b[32m[ ok ]\u001b[0m "
|
|
status_failed = "\u001b[31m[failed]\u001b[0m "
|
|
status_warn = "\u001b[33m[ warn ]\u001b[0m "
|
|
same_line = "\u001b[F"
|
|
)
|
|
|
|
func (c *OperatorDiagnoseCommand) Run(args []string) int {
|
|
f := c.Flags()
|
|
if err := f.Parse(args); err != nil {
|
|
c.UI.Error(err.Error())
|
|
return 1
|
|
}
|
|
return c.RunWithParsedFlags()
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int {
|
|
|
|
if len(c.flagConfigs) == 0 {
|
|
c.UI.Error("Must specify a configuration file using -config.")
|
|
return 1
|
|
}
|
|
|
|
c.UI.Output(version.GetVersion().FullVersionNumber(true))
|
|
ctx := diagnose.Context(context.Background(), c.diagnose)
|
|
err := c.offlineDiagnostics(ctx)
|
|
|
|
if err != nil {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error {
|
|
rloadFuncs := make(map[string][]reloadutil.ReloadFunc)
|
|
server := &ServerCommand{
|
|
// TODO: set up a different one?
|
|
// In particular, a UI instance that won't output?
|
|
BaseCommand: c.BaseCommand,
|
|
|
|
// TODO: refactor to a common place?
|
|
AuditBackends: auditBackends,
|
|
CredentialBackends: credentialBackends,
|
|
LogicalBackends: logicalBackends,
|
|
PhysicalBackends: physicalBackends,
|
|
ServiceRegistrations: serviceRegistrations,
|
|
|
|
// TODO: other ServerCommand options?
|
|
|
|
logger: log.NewInterceptLogger(nil),
|
|
allLoggers: []log.Logger{},
|
|
reloadFuncs: &rloadFuncs,
|
|
reloadFuncsLock: new(sync.RWMutex),
|
|
}
|
|
|
|
ctx, span := diagnose.StartSpan(ctx, "initialization")
|
|
defer span.End()
|
|
|
|
server.flagConfigs = c.flagConfigs
|
|
config, err := server.parseConfig()
|
|
if err != nil {
|
|
return diagnose.SpotError(ctx, "parse-config", err)
|
|
} else {
|
|
diagnose.SpotOk(ctx, "parse-config", "")
|
|
}
|
|
// Check Listener Information
|
|
// TODO: Run Diagnose checks on the actual net.Listeners
|
|
|
|
if err := diagnose.Test(ctx, "init-listeners", func(ctx context.Context) error {
|
|
disableClustering := config.HAStorage.DisableClustering
|
|
infoKeys := make([]string, 0, 10)
|
|
info := make(map[string]string)
|
|
status, lns, _, errMsg := server.InitListeners(config, disableClustering, &infoKeys, &info)
|
|
|
|
if status != 0 {
|
|
return errMsg
|
|
}
|
|
|
|
// Make sure we close all listeners from this point on
|
|
listenerCloseFunc := func() {
|
|
for _, ln := range lns {
|
|
ln.Listener.Close()
|
|
}
|
|
}
|
|
|
|
defer c.cleanupGuard.Do(listenerCloseFunc)
|
|
|
|
sanitizedListeners := make([]listenerutil.Listener, 0, len(config.Listeners))
|
|
for _, ln := range lns {
|
|
if ln.Config.TLSDisable {
|
|
diagnose.Warn(ctx, "TLS is disabled in a Listener config stanza.")
|
|
continue
|
|
}
|
|
if ln.Config.TLSDisableClientCerts {
|
|
diagnose.Warn(ctx, "TLS for a listener is turned on without requiring client certs.")
|
|
}
|
|
|
|
// Check ciphersuite and load ca/cert/key files
|
|
// TODO: TLSConfig returns a reloadFunc and a TLSConfig. We can use this to
|
|
// perform an active probe.
|
|
_, _, err := listenerutil.TLSConfig(ln.Config, make(map[string]string), c.UI)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sanitizedListeners = append(sanitizedListeners, listenerutil.Listener{
|
|
Listener: ln.Listener,
|
|
Config: ln.Config,
|
|
})
|
|
}
|
|
return diagnose.ListenerChecks(sanitizedListeners)
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Errors in these items could stop Vault from starting but are not yet covered:
|
|
// TODO: logging configuration
|
|
// TODO: SetupTelemetry
|
|
if err := diagnose.Test(ctx, "storage", func(ctx context.Context) error {
|
|
b, err := server.setupStorage(config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if config.Storage != nil && config.Storage.Type == storageTypeConsul {
|
|
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.Storage.Config, server.logger, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if config.HAStorage != nil && config.HAStorage.Type == storageTypeConsul {
|
|
err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.HAStorage.Config, server.logger, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Attempt to use storage backend
|
|
if !c.skipEndEnd {
|
|
err = diagnose.StorageEndToEndLatencyCheck(ctx, b)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return diagnose.Test(ctx, "service-discovery", func(ctx context.Context) error {
|
|
// Initialize the Service Discovery, if there is one
|
|
if config.ServiceRegistration != nil && config.ServiceRegistration.Type == "consul" {
|
|
// SetupSecureTLS for service discovery uses the same cert and key to set up physical
|
|
// storage. See the consul package in physical for details.
|
|
err = srconsul.SetupSecureTLS(api.DefaultConfig(), config.ServiceRegistration.Config, server.logger, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
}
|