2021-02-02 20:15:10 +00:00
package command
import (
2021-04-29 18:32:41 +00:00
"context"
2021-05-22 02:21:11 +00:00
"encoding/json"
2021-10-20 21:47:59 +00:00
"errors"
2021-05-22 02:21:11 +00:00
"fmt"
2021-05-25 22:23:20 +00:00
"io"
2021-05-22 02:21:11 +00:00
"os"
2021-02-02 20:15:10 +00:00
"strings"
2021-04-06 23:40:43 +00:00
"sync"
2021-05-25 22:23:20 +00:00
"time"
2021-02-02 20:15:10 +00:00
2021-06-10 19:29:32 +00:00
"golang.org/x/term"
2022-08-23 19:37:16 +00:00
wrapping "github.com/hashicorp/go-kms-wrapping/v2"
2021-06-25 21:18:34 +00:00
2021-05-22 02:21:11 +00:00
"github.com/docker/docker/pkg/ioutils"
2021-04-28 15:55:18 +00:00
"github.com/hashicorp/consul/api"
2021-02-02 20:15:10 +00:00
log "github.com/hashicorp/go-hclog"
2021-07-16 00:17:31 +00:00
"github.com/hashicorp/go-secure-stdlib/reloadutil"
2021-05-25 22:23:20 +00:00
uuid "github.com/hashicorp/go-uuid"
2021-06-17 18:09:37 +00:00
cserver "github.com/hashicorp/vault/command/server"
2022-06-15 00:53:19 +00:00
"github.com/hashicorp/vault/helper/constants"
2021-05-25 22:23:20 +00:00
"github.com/hashicorp/vault/helper/metricsutil"
"github.com/hashicorp/vault/internalshared/configutil"
2021-04-06 23:40:43 +00:00
"github.com/hashicorp/vault/internalshared/listenerutil"
2021-04-28 15:55:18 +00:00
physconsul "github.com/hashicorp/vault/physical/consul"
2021-06-17 17:04:21 +00:00
"github.com/hashicorp/vault/physical/raft"
2021-05-25 22:23:20 +00:00
"github.com/hashicorp/vault/sdk/physical"
2021-02-02 20:15:10 +00:00
"github.com/hashicorp/vault/sdk/version"
2021-05-25 22:23:20 +00:00
sr "github.com/hashicorp/vault/serviceregistration"
2021-04-28 15:55:18 +00:00
srconsul "github.com/hashicorp/vault/serviceregistration/consul"
2021-05-25 22:23:20 +00:00
"github.com/hashicorp/vault/vault"
2021-04-06 23:40:43 +00:00
"github.com/hashicorp/vault/vault/diagnose"
2022-09-06 18:11:04 +00:00
"github.com/hashicorp/vault/vault/hcp_link"
2021-02-02 20:15:10 +00:00
"github.com/mitchellh/cli"
"github.com/posener/complete"
)
const OperatorDiagnoseEnableEnv = "VAULT_DIAGNOSE"
2021-07-11 22:44:19 +00:00
const CoreConfigUninitializedErr = "Diagnose cannot attempt this step because core config could not be set."
2021-05-25 22:23:20 +00:00
2021-04-08 16:43:39 +00:00
var (
_ cli . Command = ( * OperatorDiagnoseCommand ) ( nil )
_ cli . CommandAutocomplete = ( * OperatorDiagnoseCommand ) ( nil )
)
2021-02-02 20:15:10 +00:00
type OperatorDiagnoseCommand struct {
* BaseCommand
2021-04-29 18:32:41 +00:00
diagnose * diagnose . Session
2021-02-02 20:15:10 +00:00
2021-04-06 23:40:43 +00:00
flagDebug bool
flagSkips [ ] string
flagConfigs [ ] string
cleanupGuard sync . Once
2021-05-25 22:23:20 +00:00
reloadFuncsLock * sync . RWMutex
reloadFuncs * map [ string ] [ ] reloadutil . ReloadFunc
ServiceRegistrations map [ string ] sr . Factory
startedCh chan struct { } // for tests
reloadedCh chan struct { } // for tests
skipEndEnd bool // for tests
2021-02-02 20:15:10 +00:00
}
func ( c * OperatorDiagnoseCommand ) Synopsis ( ) string {
return "Troubleshoot problems starting Vault"
}
func ( c * OperatorDiagnoseCommand ) Help ( ) string {
helpText := `
Usage : vault operator diagnose
This command troubleshoots Vault startup issues , such as TLS configuration or
auto - unseal . It should be run using the same environment variables and configuration
files as the "vault server" command , so that startup problems can be accurately
reproduced .
Start diagnose with a configuration file :
$ vault operator diagnose - config = / etc / vault / config . hcl
Perform a diagnostic check while Vault is still running :
$ vault operator diagnose - config = / etc / vault / config . hcl - skip = listener
` + c . Flags ( ) . Help ( )
return strings . TrimSpace ( helpText )
}
func ( c * OperatorDiagnoseCommand ) Flags ( ) * FlagSets {
set := NewFlagSets ( c . UI )
f := set . NewFlagSet ( "Command Options" )
f . StringSliceVar ( & StringSliceVar {
Name : "config" ,
Target : & c . flagConfigs ,
Completion : complete . PredictOr (
complete . PredictFiles ( "*.hcl" ) ,
complete . PredictFiles ( "*.json" ) ,
complete . PredictDirs ( "*" ) ,
) ,
Usage : "Path to a Vault configuration file or directory of configuration " +
"files. This flag can be specified multiple times to load multiple " +
"configurations. If the path is a directory, all files which end in " +
".hcl or .json are loaded." ,
} )
f . StringSliceVar ( & StringSliceVar {
Name : "skip" ,
Target : & c . flagSkips ,
Usage : "Skip the health checks named as arguments. May be 'listener', 'storage', or 'autounseal'." ,
} )
f . BoolVar ( & BoolVar {
Name : "debug" ,
Target : & c . flagDebug ,
Default : false ,
Usage : "Dump all information collected by Diagnose." ,
} )
2021-05-22 02:21:11 +00:00
f . StringVar ( & StringVar {
Name : "format" ,
Target : & c . flagFormat ,
Usage : "The output format" ,
} )
2021-02-02 20:15:10 +00:00
return set
}
func ( c * OperatorDiagnoseCommand ) AutocompleteArgs ( ) complete . Predictor {
return complete . PredictNothing
}
func ( c * OperatorDiagnoseCommand ) AutocompleteFlags ( ) complete . Flags {
return c . Flags ( ) . Completions ( )
}
2021-04-08 16:43:39 +00:00
const (
status_unknown = "[ ] "
status_ok = "\u001b[32m[ ok ]\u001b[0m "
status_failed = "\u001b[31m[failed]\u001b[0m "
status_warn = "\u001b[33m[ warn ]\u001b[0m "
same_line = "\u001b[F"
)
2021-02-02 20:15:10 +00:00
func ( c * OperatorDiagnoseCommand ) Run ( args [ ] string ) int {
f := c . Flags ( )
if err := f . Parse ( args ) ; err != nil {
c . UI . Error ( err . Error ( ) )
2021-06-04 16:52:49 +00:00
return 3
2021-02-02 20:15:10 +00:00
}
return c . RunWithParsedFlags ( )
}
func ( c * OperatorDiagnoseCommand ) RunWithParsedFlags ( ) int {
if len ( c . flagConfigs ) == 0 {
c . UI . Error ( "Must specify a configuration file using -config." )
2021-06-04 16:52:49 +00:00
return 3
2021-02-02 20:15:10 +00:00
}
2021-05-22 02:21:11 +00:00
if c . diagnose == nil {
if c . flagFormat == "json" {
c . diagnose = diagnose . New ( & ioutils . NopWriter { } )
} else {
c . UI . Output ( version . GetVersion ( ) . FullVersionNumber ( true ) )
c . diagnose = diagnose . New ( os . Stdout )
}
}
2021-04-29 18:32:41 +00:00
ctx := diagnose . Context ( context . Background ( ) , c . diagnose )
2021-07-13 23:25:04 +00:00
c . diagnose . SkipFilters = c . flagSkips
2021-05-22 02:21:11 +00:00
err := c . offlineDiagnostics ( ctx )
results := c . diagnose . Finalize ( ctx )
if c . flagFormat == "json" {
resultsJS , err := json . MarshalIndent ( results , "" , " " )
if err != nil {
2021-07-11 22:44:19 +00:00
fmt . Fprintf ( os . Stderr , "Error marshalling results: %v." , err )
2021-06-04 16:52:49 +00:00
return 4
2021-05-22 02:21:11 +00:00
}
c . UI . Output ( string ( resultsJS ) )
} else {
c . UI . Output ( "\nResults:" )
2021-06-07 16:29:36 +00:00
w , _ , err := term . GetSize ( 0 )
if err == nil {
results . Write ( os . Stdout , w )
} else {
results . Write ( os . Stdout , 0 )
}
2021-05-22 02:21:11 +00:00
}
2021-04-29 18:32:41 +00:00
if err != nil {
2021-06-04 16:52:49 +00:00
return 4
}
// Use a different return code
switch results . Status {
case diagnose . WarningStatus :
return 2
case diagnose . ErrorStatus :
2021-04-29 18:32:41 +00:00
return 1
}
return 0
}
func ( c * OperatorDiagnoseCommand ) offlineDiagnostics ( ctx context . Context ) error {
2021-04-06 23:40:43 +00:00
rloadFuncs := make ( map [ string ] [ ] reloadutil . ReloadFunc )
2021-02-02 20:15:10 +00:00
server := & ServerCommand {
// TODO: set up a different one?
// In particular, a UI instance that won't output?
BaseCommand : c . BaseCommand ,
// TODO: refactor to a common place?
AuditBackends : auditBackends ,
CredentialBackends : credentialBackends ,
LogicalBackends : logicalBackends ,
PhysicalBackends : physicalBackends ,
ServiceRegistrations : serviceRegistrations ,
// TODO: other ServerCommand options?
2021-07-15 21:54:59 +00:00
logger : log . NewInterceptLogger ( & log . LoggerOptions {
Level : log . Off ,
} ) ,
2021-04-06 23:40:43 +00:00
allLoggers : [ ] log . Logger { } ,
reloadFuncs : & rloadFuncs ,
reloadFuncsLock : new ( sync . RWMutex ) ,
2021-02-02 20:15:10 +00:00
}
2021-07-11 22:44:19 +00:00
ctx , span := diagnose . StartSpan ( ctx , "Vault Diagnose" )
2021-04-29 18:32:41 +00:00
defer span . End ( )
2021-05-27 17:17:52 +00:00
2021-06-01 17:43:51 +00:00
// OS Specific checks
diagnose . OSChecks ( ctx )
2021-05-27 17:17:52 +00:00
2021-06-17 18:09:37 +00:00
var config * cserver . Config
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Parse Configuration" , func ( ctx context . Context ) ( err error ) {
2021-06-17 18:09:37 +00:00
server . flagConfigs = c . flagConfigs
var configErrors [ ] configutil . ConfigError
config , configErrors , err = server . parseConfig ( )
if err != nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Could not parse configuration: %w." , err )
2021-06-17 18:09:37 +00:00
}
for _ , ce := range configErrors {
2021-07-11 22:44:19 +00:00
diagnose . Warn ( ctx , diagnose . CapitalizeFirstLetter ( ce . String ( ) ) + "." )
2021-06-17 18:09:37 +00:00
}
2021-07-11 22:44:19 +00:00
diagnose . Success ( ctx , "Vault configuration syntax is ok." )
2021-06-17 18:09:37 +00:00
return nil
} )
2021-06-23 22:35:52 +00:00
if config == nil {
return fmt . Errorf ( "No vault server configuration found." )
}
2021-04-06 23:40:43 +00:00
2021-10-20 21:47:59 +00:00
diagnose . Test ( ctx , "Check Telemetry" , func ( ctx context . Context ) ( err error ) {
if config . Telemetry == nil {
diagnose . Warn ( ctx , "Telemetry is using default configuration" )
diagnose . Advise ( ctx , "By default only Prometheus and JSON metrics are available. Ignore this warning if you are using telemetry or are using these metrics and are satisfied with the default retention time and gauge period." )
} else {
t := config . Telemetry
// If any Circonus setting is present but we're missing the basic fields...
if coalesce ( t . CirconusAPIURL , t . CirconusAPIToken , t . CirconusCheckID , t . CirconusCheckTags , t . CirconusCheckSearchTag ,
t . CirconusBrokerID , t . CirconusBrokerSelectTag , t . CirconusCheckForceMetricActivation , t . CirconusCheckInstanceID ,
t . CirconusCheckSubmissionURL , t . CirconusCheckDisplayName ) != nil {
if t . CirconusAPIURL == "" {
return errors . New ( "incomplete Circonus telemetry configuration, missing circonus_api_url" )
} else if t . CirconusAPIToken != "" {
return errors . New ( "incomplete Circonus telemetry configuration, missing circonus_api_token" )
}
}
if len ( t . DogStatsDTags ) > 0 && t . DogStatsDAddr == "" {
return errors . New ( "incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified" )
}
// If any Stackdriver setting is present but we're missing the basic fields...
if coalesce ( t . StackdriverNamespace , t . StackdriverLocation , t . StackdriverDebugLogs , t . StackdriverNamespace ) != nil {
if t . StackdriverProjectID == "" {
return errors . New ( "incomplete Stackdriver telemetry configuration, missing stackdriver_project_id" )
}
if t . StackdriverLocation == "" {
return errors . New ( "incomplete Stackdriver telemetry configuration, missing stackdriver_location" )
}
if t . StackdriverNamespace == "" {
return errors . New ( "incomplete Stackdriver telemetry configuration, missing stackdriver_namespace" )
}
}
}
return nil
} )
2021-05-25 22:23:20 +00:00
var metricSink * metricsutil . ClusterMetricSink
var metricsHelper * metricsutil . MetricsHelper
2021-04-06 23:40:43 +00:00
2021-05-25 22:23:20 +00:00
var backend * physical . Backend
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Storage" , func ( ctx context . Context ) error {
2021-06-17 17:04:21 +00:00
// Ensure that there is a storage stanza
if config . Storage == nil {
2021-07-11 22:44:19 +00:00
diagnose . Advise ( ctx , "To learn how to specify a storage backend, see the Vault server configuration documentation." )
return fmt . Errorf ( "No storage stanza in Vault server configuration." )
2021-06-17 17:04:21 +00:00
}
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Create Storage Backend" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
b , err := server . setupStorage ( config )
if err != nil {
return err
2021-04-29 18:32:41 +00:00
}
2021-06-17 17:04:21 +00:00
if b == nil {
2021-07-11 22:44:19 +00:00
diagnose . Advise ( ctx , "To learn how to specify a storage backend, see the Vault server configuration documentation." )
return fmt . Errorf ( "Storage backend could not be initialized." )
2021-06-17 17:04:21 +00:00
}
2021-05-25 22:23:20 +00:00
backend = & b
return nil
} )
2021-06-24 16:56:38 +00:00
if backend == nil {
diagnose . Fail ( ctx , "Diagnose could not initialize storage backend." )
span . End ( )
return fmt . Errorf ( "Diagnose could not initialize storage backend." )
}
2021-06-17 17:04:21 +00:00
// Check for raft quorum status
if config . Storage . Type == storageTypeRaft {
path := os . Getenv ( raft . EnvVaultRaftPath )
if path == "" {
path , ok := config . Storage . Config [ "path" ]
if ! ok {
2021-07-11 22:44:19 +00:00
diagnose . SpotError ( ctx , "Check Raft Folder Permissions" , fmt . Errorf ( "Storage folder path is required." ) )
2021-06-17 17:04:21 +00:00
}
diagnose . RaftFileChecks ( ctx , path )
}
2021-07-11 22:44:19 +00:00
diagnose . RaftStorageQuorum ( ctx , ( * backend ) . ( * raft . RaftBackend ) )
2021-04-06 23:40:43 +00:00
}
2021-06-17 17:04:21 +00:00
// Consul storage checks
2021-05-25 22:23:20 +00:00
if config . Storage != nil && config . Storage . Type == storageTypeConsul {
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Consul TLS" , func ( ctx context . Context ) error {
2021-06-17 18:09:37 +00:00
err := physconsul . SetupSecureTLS ( ctx , api . DefaultConfig ( ) , config . Storage . Config , server . logger , true )
2021-05-25 22:23:20 +00:00
if err != nil {
return err
}
return nil
} )
2021-04-06 23:40:43 +00:00
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Consul Direct Storage Access" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
dirAccess := diagnose . ConsulDirectAccess ( config . Storage . Config )
if dirAccess != "" {
diagnose . Warn ( ctx , dirAccess )
}
2021-07-11 22:44:19 +00:00
if dirAccess == diagnose . DirAccessErr {
diagnose . Advise ( ctx , diagnose . DirAccessAdvice )
}
2021-05-25 22:23:20 +00:00
return nil
} )
}
// Attempt to use storage backend
2021-06-17 19:15:19 +00:00
if ! c . skipEndEnd && config . Storage . Type != storageTypeRaft {
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Storage Access" , diagnose . WithTimeout ( 30 * time . Second , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
maxDurationCrudOperation := "write"
maxDuration := time . Duration ( 0 )
uuidSuffix , err := uuid . GenerateUUID ( )
if err != nil {
return err
}
uuid := "diagnose/latency/" + uuidSuffix
dur , err := diagnose . EndToEndLatencyCheckWrite ( ctx , uuid , * backend )
if err != nil {
return err
}
maxDuration = dur
dur , err = diagnose . EndToEndLatencyCheckRead ( ctx , uuid , * backend )
if err != nil {
return err
}
if dur > maxDuration {
maxDuration = dur
maxDurationCrudOperation = "read"
}
dur , err = diagnose . EndToEndLatencyCheckDelete ( ctx , uuid , * backend )
if err != nil {
return err
}
if dur > maxDuration {
maxDuration = dur
maxDurationCrudOperation = "delete"
}
if maxDuration > time . Duration ( 0 ) {
2021-07-11 22:44:19 +00:00
diagnose . Warn ( ctx , diagnose . LatencyWarning + fmt . Sprintf ( "duration: %s, operation: %s" , maxDuration , maxDurationCrudOperation ) )
2021-05-25 22:23:20 +00:00
}
return nil
} ) )
}
return nil
} )
2021-06-24 16:56:38 +00:00
// Return from top-level span when backend is nil
if backend == nil {
return fmt . Errorf ( "Diagnose could not initialize storage backend." )
}
2021-05-25 22:23:20 +00:00
var configSR sr . ServiceRegistration
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Service Discovery" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
if config . ServiceRegistration == nil || config . ServiceRegistration . Config == nil {
2021-07-11 22:44:19 +00:00
diagnose . Skipped ( ctx , "No service registration configured." )
2021-06-03 16:01:14 +00:00
return nil
2021-05-25 22:23:20 +00:00
}
srConfig := config . ServiceRegistration . Config
2021-04-29 18:32:41 +00:00
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Consul Service Discovery TLS" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
// SetupSecureTLS for service discovery uses the same cert and key to set up physical
// storage. See the consul package in physical for details.
2021-06-17 18:09:37 +00:00
err := srconsul . SetupSecureTLS ( ctx , api . DefaultConfig ( ) , srConfig , server . logger , true )
2021-04-29 18:32:41 +00:00
if err != nil {
return err
}
2021-05-25 22:23:20 +00:00
return nil
} )
2021-04-29 18:32:41 +00:00
2021-05-25 22:23:20 +00:00
if config . ServiceRegistration != nil && config . ServiceRegistration . Type == "consul" {
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Consul Direct Service Discovery" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
dirAccess := diagnose . ConsulDirectAccess ( config . ServiceRegistration . Config )
if dirAccess != "" {
diagnose . Warn ( ctx , dirAccess )
}
2021-07-11 22:44:19 +00:00
if dirAccess == diagnose . DirAccessErr {
diagnose . Advise ( ctx , diagnose . DirAccessAdvice )
}
2021-05-25 22:23:20 +00:00
return nil
2021-04-29 18:32:41 +00:00
} )
}
2021-05-25 22:23:20 +00:00
return nil
} )
2021-07-11 22:44:19 +00:00
sealcontext , sealspan := diagnose . StartSpan ( ctx , "Create Vault Server Configuration Seals" )
var seals [ ] vault . Seal
var sealConfigError error
barrierSeal , barrierWrapper , unwrapSeal , seals , sealConfigError , err := setSeal ( server , config , make ( [ ] string , 0 ) , make ( map [ string ] string ) )
// Check error here
if err != nil {
diagnose . Advise ( ctx , "For assistance with the seal stanza, see the Vault configuration documentation." )
diagnose . Fail ( sealcontext , fmt . Sprintf ( "Seal creation resulted in the following error: %s." , err . Error ( ) ) )
goto SEALFAIL
}
if sealConfigError != nil {
diagnose . Fail ( sealcontext , "Seal could not be configured: seals may already be initialized." )
goto SEALFAIL
}
2022-10-04 13:23:37 +00:00
for _ , seal := range seals {
// There is always one nil seal. We need to skip it so we don't start an empty Finalize-Seal-Shamir
// section.
if seal == nil {
continue
2021-07-11 22:44:19 +00:00
}
2022-10-04 13:23:37 +00:00
seal := seal // capture range variable
// Ensure that the seal finalizer is called, even if using verify-only
defer func ( seal * vault . Seal ) {
sealType := diagnose . CapitalizeFirstLetter ( ( * seal ) . BarrierType ( ) . String ( ) )
finalizeSealContext , finalizeSealSpan := diagnose . StartSpan ( ctx , "Finalize " + sealType + " Seal" )
err = ( * seal ) . Finalize ( finalizeSealContext )
if err != nil {
diagnose . Fail ( finalizeSealContext , "Error finalizing seal." )
diagnose . Advise ( finalizeSealContext , "This likely means that the barrier is still in use; therefore, finalizing the seal timed out." )
finalizeSealSpan . End ( )
}
finalizeSealSpan . End ( )
} ( & seal )
2021-07-11 22:44:19 +00:00
}
if barrierSeal == nil {
diagnose . Fail ( sealcontext , "Could not create barrier seal. No error was generated, but it is likely that the seal stanza is misconfigured. For guidance, see Vault's configuration documentation on the seal stanza." )
}
SEALFAIL :
sealspan . End ( )
diagnose . Test ( ctx , "Check Transit Seal TLS" , func ( ctx context . Context ) error {
2021-06-24 22:30:42 +00:00
var checkSealTransit bool
for _ , seal := range config . Seals {
if seal . Type == "transit" {
checkSealTransit = true
tlsSkipVerify , _ := seal . Config [ "tls_skip_verify" ]
if tlsSkipVerify == "true" {
2021-07-11 22:44:19 +00:00
diagnose . Warn ( ctx , "TLS verification is skipped. This is highly discouraged and decreases the security of data transmissions to and from the Vault server." )
2021-06-24 22:30:42 +00:00
return nil
}
// Checking tls_client_cert and tls_client_key
tlsClientCert , ok := seal . Config [ "tls_client_cert" ]
if ! ok {
2021-07-11 22:44:19 +00:00
diagnose . Warn ( ctx , "Missing tls_client_cert in the seal configuration." )
2021-06-24 22:30:42 +00:00
return nil
}
tlsClientKey , ok := seal . Config [ "tls_client_key" ]
if ! ok {
2021-07-11 22:44:19 +00:00
diagnose . Warn ( ctx , "Missing tls_client_key in the seal configuration." )
2021-06-24 22:30:42 +00:00
return nil
}
_ , err := diagnose . TLSFileChecks ( tlsClientCert , tlsClientKey )
if err != nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "The TLS certificate and key configured through the tls_client_cert and tls_client_key fields of the transit seal configuration are invalid: %w." , err )
2021-06-24 22:30:42 +00:00
}
// checking tls_ca_cert
tlsCACert , ok := seal . Config [ "tls_ca_cert" ]
if ! ok {
2021-07-11 22:44:19 +00:00
diagnose . Warn ( ctx , "Missing tls_ca_cert in the seal configuration." )
2021-06-24 22:30:42 +00:00
return nil
}
2021-07-11 22:44:19 +00:00
warnings , err := diagnose . TLSCAFileCheck ( tlsCACert )
if len ( warnings ) != 0 {
for _ , warning := range warnings {
diagnose . Warn ( ctx , warning )
}
}
2021-06-24 22:30:42 +00:00
if err != nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "The TLS CA certificate configured through the tls_ca_cert field of the transit seal configuration is invalid: %w." , err )
2021-06-24 22:30:42 +00:00
}
}
}
if ! checkSealTransit {
2021-07-11 22:44:19 +00:00
diagnose . Skipped ( ctx , "No transit seal found in seal configuration." )
2021-06-24 22:30:42 +00:00
}
return nil
} )
2021-05-25 22:23:20 +00:00
var coreConfig vault . CoreConfig
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Create Core Configuration" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
var secureRandomReader io . Reader
// prepare a secure random reader for core
2021-07-11 22:44:19 +00:00
randReaderTestName := "Initialize Randomness for Core"
2021-05-25 22:23:20 +00:00
secureRandomReader , err = configutil . CreateSecureRandomReaderFunc ( config . SharedConfig , barrierWrapper )
2021-04-28 15:55:18 +00:00
if err != nil {
2021-07-11 22:44:19 +00:00
return diagnose . SpotError ( ctx , randReaderTestName , fmt . Errorf ( "Could not initialize randomness for core: %w." , err ) )
2021-04-28 15:55:18 +00:00
}
2021-07-11 22:44:19 +00:00
diagnose . SpotOk ( ctx , randReaderTestName , "" )
2021-05-25 22:23:20 +00:00
coreConfig = createCoreConfig ( server , config , * backend , configSR , barrierSeal , unwrapSeal , metricsHelper , metricSink , secureRandomReader )
return nil
2021-06-24 17:43:49 +00:00
} )
2021-05-02 23:21:06 +00:00
2021-05-25 22:23:20 +00:00
var disableClustering bool
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "HA Storage" , func ( ctx context . Context ) error {
diagnose . Test ( ctx , "Create HA Storage Backend" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
// Initialize the separate HA storage backend, if it exists
disableClustering , err = initHaBackend ( server , config , & coreConfig , * backend )
2021-04-29 18:32:41 +00:00
if err != nil {
return err
}
2021-05-25 22:23:20 +00:00
return nil
} )
2021-06-10 19:29:32 +00:00
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check HA Consul Direct Storage Access" , func ( ctx context . Context ) error {
2021-06-03 16:01:14 +00:00
if config . HAStorage == nil {
2021-07-11 22:44:19 +00:00
diagnose . Skipped ( ctx , "No HA storage stanza is configured." )
2021-06-03 16:01:14 +00:00
} else {
dirAccess := diagnose . ConsulDirectAccess ( config . HAStorage . Config )
if dirAccess != "" {
diagnose . Warn ( ctx , dirAccess )
}
2021-07-11 22:44:19 +00:00
if dirAccess == diagnose . DirAccessErr {
diagnose . Advise ( ctx , diagnose . DirAccessAdvice )
}
2021-05-02 23:21:06 +00:00
}
2021-05-25 22:23:20 +00:00
return nil
} )
if config . HAStorage != nil && config . HAStorage . Type == storageTypeConsul {
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Consul TLS" , func ( ctx context . Context ) error {
2021-06-15 16:53:29 +00:00
err = physconsul . SetupSecureTLS ( ctx , api . DefaultConfig ( ) , config . HAStorage . Config , server . logger , true )
2021-05-25 22:23:20 +00:00
if err != nil {
return err
}
return nil
} )
2021-04-28 15:55:18 +00:00
}
2021-05-25 22:23:20 +00:00
return nil
} )
2021-04-28 15:55:18 +00:00
2021-05-25 22:23:20 +00:00
// Determine the redirect address from environment variables
err = determineRedirectAddr ( server , & coreConfig , config )
if err != nil {
2021-07-11 22:44:19 +00:00
return diagnose . SpotError ( ctx , "Determine Redirect Address" , fmt . Errorf ( "Redirect Address could not be determined: %w." , err ) )
2021-05-25 22:23:20 +00:00
}
2021-07-11 22:44:19 +00:00
diagnose . SpotOk ( ctx , "Determine Redirect Address" , "" )
2021-05-25 22:23:20 +00:00
err = findClusterAddress ( server , & coreConfig , config , disableClustering )
if err != nil {
2021-07-15 23:01:51 +00:00
return diagnose . SpotError ( ctx , "Check Cluster Address" , fmt . Errorf ( "Cluster Address could not be determined or was invalid: %w." , err ) ,
diagnose . Advice ( "Please check that the API and Cluster addresses are different, and that the API, Cluster and Redirect addresses have both a host and port." ) )
2021-05-25 22:23:20 +00:00
}
2021-07-11 22:44:19 +00:00
diagnose . SpotOk ( ctx , "Check Cluster Address" , "Cluster address is logically valid and can be found." )
2021-05-25 22:23:20 +00:00
2021-06-25 21:18:34 +00:00
var vaultCore * vault . Core
2021-06-10 19:29:32 +00:00
// Run all the checks that are utilized when initializing a core object
// without actually calling core.Init. These are in the init-core section
// as they are runtime checks.
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Check Core Creation" , func ( ctx context . Context ) error {
2021-06-10 19:29:32 +00:00
var newCoreError error
if coreConfig . RawConfig == nil {
return fmt . Errorf ( CoreConfigUninitializedErr )
}
2021-06-25 21:18:34 +00:00
core , newCoreError := vault . CreateCore ( & coreConfig )
2021-06-10 19:29:32 +00:00
if newCoreError != nil {
if vault . IsFatalError ( newCoreError ) {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Error initializing core: %s." , newCoreError )
2021-06-10 19:29:32 +00:00
}
diagnose . Warn ( ctx , wrapAtLength (
2021-07-11 22:44:19 +00:00
"A non-fatal error occurred during initialization. Please check the logs for more information." ) )
2021-06-25 21:18:34 +00:00
} else {
vaultCore = core
2021-06-10 19:29:32 +00:00
}
return nil
} )
2021-06-25 21:18:34 +00:00
if vaultCore == nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Diagnose could not initialize the Vault core from the Vault server configuration." )
2021-06-25 21:18:34 +00:00
}
2021-07-11 22:44:19 +00:00
licenseCtx , licenseSpan := diagnose . StartSpan ( ctx , "Check For Autoloaded License" )
2021-06-25 21:18:34 +00:00
// If we are not in enterprise, return from the check
if ! constants . IsEnterprise {
diagnose . Skipped ( licenseCtx , "License check will not run on OSS Vault." )
} else {
// Load License from environment variables. These take precedence over the
// configured license.
if envLicensePath := os . Getenv ( EnvVaultLicensePath ) ; envLicensePath != "" {
coreConfig . LicensePath = envLicensePath
}
if envLicense := os . Getenv ( EnvVaultLicense ) ; envLicense != "" {
coreConfig . License = envLicense
}
2021-08-02 17:50:49 +00:00
vault . DiagnoseCheckLicense ( licenseCtx , vaultCore , coreConfig , false )
2021-06-25 21:18:34 +00:00
}
licenseSpan . End ( )
2021-05-25 22:23:20 +00:00
var lns [ ] listenerutil . Listener
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Start Listeners" , func ( ctx context . Context ) error {
2021-06-03 16:01:14 +00:00
disableClustering := config . HAStorage != nil && config . HAStorage . DisableClustering
2021-05-25 22:23:20 +00:00
infoKeys := make ( [ ] string , 0 , 10 )
info := make ( map [ string ] string )
var listeners [ ] listenerutil . Listener
var status int
2021-06-24 17:43:49 +00:00
diagnose . ListenerChecks ( ctx , config . Listeners )
2021-07-11 22:44:19 +00:00
diagnose . Test ( ctx , "Create Listeners" , func ( ctx context . Context ) error {
2021-05-25 22:23:20 +00:00
status , listeners , _ , err = server . InitListeners ( config , disableClustering , & infoKeys , & info )
if status != 0 {
2021-04-29 18:32:41 +00:00
return err
}
2021-05-25 22:23:20 +00:00
return nil
} )
2021-05-02 20:33:13 +00:00
2021-05-25 22:23:20 +00:00
lns = listeners
// Make sure we close all listeners from this point on
listenerCloseFunc := func ( ) {
for _ , ln := range lns {
ln . Listener . Close ( )
2021-05-02 20:33:13 +00:00
}
}
2021-06-24 17:43:49 +00:00
c . cleanupGuard . Do ( listenerCloseFunc )
2021-06-15 16:53:29 +00:00
2021-04-29 18:32:41 +00:00
return nil
} )
2021-05-22 02:21:11 +00:00
2021-05-25 22:23:20 +00:00
// TODO: Diagnose logging configuration
2021-06-10 19:29:32 +00:00
// The unseal diagnose check will simply attempt to use the barrier to encrypt and
// decrypt a mock value. It will not call runUnseal.
2021-07-12 20:22:56 +00:00
diagnose . Test ( ctx , "Check Autounseal Encryption" , diagnose . WithTimeout ( 30 * time . Second , func ( ctx context . Context ) error {
if barrierSeal == nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Diagnose could not create a barrier seal object." )
2021-06-10 19:29:32 +00:00
}
2022-08-23 19:37:16 +00:00
if barrierSeal . BarrierType ( ) == wrapping . WrapperTypeShamir {
2021-07-12 20:22:56 +00:00
diagnose . Skipped ( ctx , "Skipping barrier encryption test. Only supported for auto-unseal." )
return nil
}
2021-06-10 19:29:32 +00:00
barrierUUID , err := uuid . GenerateUUID ( )
if err != nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Diagnose could not create unique UUID for unsealing." )
2021-06-10 19:29:32 +00:00
}
barrierEncValue := "diagnose-" + barrierUUID
ciphertext , err := barrierWrapper . Encrypt ( ctx , [ ] byte ( barrierEncValue ) , nil )
if err != nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Error encrypting with seal barrier: %w." , err )
2021-06-10 19:29:32 +00:00
}
plaintext , err := barrierWrapper . Decrypt ( ctx , ciphertext , nil )
if err != nil {
return fmt . Errorf ( "Error decrypting with seal barrier: %w" , err )
}
if string ( plaintext ) != barrierEncValue {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Barrier returned incorrect decrypted value for mock data." )
2021-06-10 19:29:32 +00:00
}
return nil
} ) )
// The following block contains static checks that are run during the
// startHttpServers portion of server run. In other words, they are static
2021-07-11 22:44:19 +00:00
// checks during resource creation. Currently there is nothing important in this
// diagnose check. For now it is a placeholder for any checks that will be done
// before server run.
diagnose . Test ( ctx , "Check Server Before Runtime" , func ( ctx context . Context ) error {
2021-06-10 19:29:32 +00:00
for _ , ln := range lns {
if ln . Config == nil {
2021-07-11 22:44:19 +00:00
return fmt . Errorf ( "Found no listener config after parsing the Vault configuration." )
2021-06-10 19:29:32 +00:00
}
}
return nil
} )
2022-09-06 18:11:04 +00:00
// Checking HCP link to make sure Vault could connect to SCADA.
// If it could not connect to SCADA in 5 seconds, diagnose reports an issue
if ! constants . IsEnterprise {
diagnose . Skipped ( ctx , "HCP link check will not run on OSS Vault." )
} else {
if config . HCPLinkConf != nil {
2022-09-12 15:10:01 +00:00
// we need to override API and Passthrough capabilities
// as they could not be initialized when Vault http handler
// is not fully initialized
config . HCPLinkConf . EnablePassThroughCapability = false
config . HCPLinkConf . EnableAPICapability = false
2022-09-06 18:11:04 +00:00
diagnose . Test ( ctx , "Check HCP Connection" , func ( ctx context . Context ) error {
hcpLink , err := hcp_link . NewHCPLink ( config . HCPLinkConf , vaultCore , server . logger )
if err != nil || hcpLink == nil {
return fmt . Errorf ( "failed to start HCP link, %w" , err )
}
// check if a SCADA session is established successfully
deadline := time . Now ( ) . Add ( 5 * time . Second )
linkSessionStatus := "disconnected"
for time . Now ( ) . Before ( deadline ) {
2022-09-12 15:10:01 +00:00
linkSessionStatus = hcpLink . GetConnectionStatusMessage ( hcpLink . GetScadaSessionStatus ( ) )
2022-09-06 18:11:04 +00:00
if linkSessionStatus == "connected" {
break
}
time . Sleep ( 500 * time . Millisecond )
}
if linkSessionStatus != "connected" {
return fmt . Errorf ( "failed to connect to HCP in 5 seconds. HCP session status is: %s" , linkSessionStatus )
}
err = hcpLink . Shutdown ( )
if err != nil {
return fmt . Errorf ( "failed to shutdown HCP link: %w" , err )
}
return nil
} )
}
}
2021-05-22 02:21:11 +00:00
return nil
2021-04-28 15:55:18 +00:00
}
2021-10-20 21:47:59 +00:00
func coalesce ( values ... interface { } ) interface { } {
for _ , val := range values {
if val != nil && val != "" {
return val
}
}
return nil
}