From 36ae22c07af3d80bebcc5281d1e53352f449ed9d Mon Sep 17 00:00:00 2001 From: Hridoy Roy Date: Wed, 28 Apr 2021 08:55:18 -0700 Subject: [PATCH] Consul TLS Checks for Diagnose [draft] (#11467) * consul tls checks * fix some tests * complete physical and service registration tls checks --- command/operator_diagnose.go | 78 +++++++++++++---- command/operator_diagnose_test.go | 30 ++++++- command/server.go | 2 +- .../config_bad_https_storage.hcl | 50 +++++++++++ .../config_diagnose_hastorage_bad_https.hcl | 50 +++++++++++ .../diagnose_bad_https_consul_sr.hcl | 50 +++++++++++ .../server/test-fixtures/nostore_config.hcl | 2 +- physical/consul/consul.go | 61 +++++++++----- .../consul/consul_service_registration.go | 83 ++++++++++++------- 9 files changed, 332 insertions(+), 74 deletions(-) create mode 100644 command/server/test-fixtures/config_bad_https_storage.hcl create mode 100644 command/server/test-fixtures/config_diagnose_hastorage_bad_https.hcl create mode 100644 command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go index 96d47e41c..485fd7799 100644 --- a/command/operator_diagnose.go +++ b/command/operator_diagnose.go @@ -1,13 +1,17 @@ package command import ( + "fmt" "strings" "sync" + "github.com/hashicorp/consul/api" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/internalshared/listenerutil" "github.com/hashicorp/vault/internalshared/reloadutil" + physconsul "github.com/hashicorp/vault/physical/consul" "github.com/hashicorp/vault/sdk/version" + srconsul "github.com/hashicorp/vault/serviceregistration/consul" "github.com/hashicorp/vault/vault/diagnose" "github.com/mitchellh/cli" "github.com/posener/complete" @@ -118,6 +122,7 @@ func (c *OperatorDiagnoseCommand) Run(args []string) int { } func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int { + if len(c.flagConfigs) == 0 { c.UI.Error("Must specify a configuration file using -config.") return 1 @@ -146,27 +151,25 @@ func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int { } phase := "Parse configuration" - c.UI.Output(status_unknown + phase) server.flagConfigs = c.flagConfigs config, err := server.parseConfig() if err != nil { - c.UI.Output(same_line + status_failed + phase) - c.UI.Output("Error while reading configuration files:") - c.UI.Output(err.Error()) + c.outputErrorsAndWarnings(fmt.Sprintf("Error while reading configuration files: %s", err.Error()), phase, nil) return 1 } + c.UI.Output(same_line + status_ok + phase) // Check Listener Information + phase = "Check Listeners" // TODO: Run Diagnose checks on the actual net.Listeners - + var warnings []string disableClustering := config.HAStorage.DisableClustering infoKeys := make([]string, 0, 10) info := make(map[string]string) status, lns, _, errMsg := server.InitListeners(config, disableClustering, &infoKeys, &info) if status != 0 { - c.UI.Output("Error parsing listener configuration.") - c.UI.Error(errMsg.Error()) + c.outputErrorsAndWarnings(fmt.Sprintf("Error parsing listener configuration. %s", errMsg.Error()), phase, nil) return 1 } @@ -182,11 +185,11 @@ func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int { sanitizedListeners := make([]listenerutil.Listener, 0, len(config.Listeners)) for _, ln := range lns { if ln.Config.TLSDisable { - c.UI.Warn("WARNING! TLS is disabled in a Listener config stanza.") + warnings = append(warnings, "WARNING! TLS is disabled in a Listener config stanza.") continue } if ln.Config.TLSDisableClientCerts { - c.UI.Warn("WARNING! TLS for a listener is turned on without requiring client certs.") + warnings = append(warnings, "WARNING! TLS for a listener is turned on without requiring client certs.") } // Check ciphersuite and load ca/cert/key files @@ -194,8 +197,7 @@ func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int { // perform an active probe. _, _, err := listenerutil.TLSConfig(ln.Config, make(map[string]string), c.UI) if err != nil { - c.UI.Output("Error creating TLS Configuration out of config file: ") - c.UI.Output(err.Error()) + c.outputErrorsAndWarnings(fmt.Sprintf("Error creating TLS Configuration out of config file: %s", err.Error()), phase, warnings) return 1 } @@ -206,26 +208,68 @@ func (c *OperatorDiagnoseCommand) RunWithParsedFlags() int { } err = diagnose.ListenerChecks(sanitizedListeners) if err != nil { - c.UI.Output("Diagnose caught configuration errors: ") - c.UI.Output(err.Error()) + c.outputErrorsAndWarnings(fmt.Sprintf("Diagnose caught configuration errors: %s", err.Error()), phase, warnings) return 1 } + c.UI.Output(same_line + status_ok + phase) + // Errors in these items could stop Vault from starting but are not yet covered: // TODO: logging configuration // TODO: SetupTelemetry // TODO: check for storage backend - c.UI.Output(same_line + status_ok + phase) phase = "Access storage" - c.UI.Output(status_unknown + phase) + warnings = nil + _, err = server.setupStorage(config) if err != nil { - c.UI.Output(same_line + status_failed + phase) - c.UI.Output(err.Error()) + c.outputErrorsAndWarnings(fmt.Sprintf(err.Error()), phase, warnings) return 1 } + + if config.Storage != nil && config.Storage.Type == storageTypeConsul { + err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.Storage.Config, server.logger, true) + if err != nil { + c.outputErrorsAndWarnings(fmt.Sprintf(err.Error()), phase, warnings) + return 1 + } + } + + if config.HAStorage != nil && config.HAStorage.Type == storageTypeConsul { + err = physconsul.SetupSecureTLS(api.DefaultConfig(), config.HAStorage.Config, server.logger, true) + if err != nil { + c.outputErrorsAndWarnings(fmt.Sprintf(err.Error()), phase, warnings) + return 1 + } + } + + c.UI.Output(same_line + status_ok + phase) + + // Initialize the Service Discovery, if there is one + phase = "Service discovery" + if config.ServiceRegistration != nil && config.ServiceRegistration.Type == "consul" { + // SetupSecureTLS for service discovery uses the same cert and key to set up physical + // storage. See the consul package in physical for details. + err = srconsul.SetupSecureTLS(api.DefaultConfig(), config.ServiceRegistration.Config, server.logger, true) + if err != nil { + c.outputErrorsAndWarnings(fmt.Sprintf(err.Error()), phase, warnings) + return 1 + } + } c.UI.Output(same_line + status_ok + phase) return 0 } + +func (c *OperatorDiagnoseCommand) outputErrorsAndWarnings(err, phase string, warnings []string) { + + c.UI.Output(same_line + status_failed + phase) + + if warnings != nil && len(warnings) > 0 { + for _, warning := range warnings { + c.UI.Warn(warning) + } + } + c.UI.Error(err) +} diff --git a/command/operator_diagnose_test.go b/command/operator_diagnose_test.go index b0b78deba..2ab5299e9 100644 --- a/command/operator_diagnose_test.go +++ b/command/operator_diagnose_test.go @@ -33,7 +33,7 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { []string{ "-config", "./server/test-fixtures/config_diagnose_ok.hcl", }, - []string{"Parse configuration\n\x1b[F\x1b[32m[ ok ]\x1b[0m Parse configuration\n[ ] Access storage\n\x1b[F\x1b[32m[ ok ]\x1b[0m Access storage\n"}, + []string{"Parse configuration\n\x1b[F\x1b[32m[ ok ]\x1b[0m Check Listeners\n\x1b[F\x1b[32m[ ok ]\x1b[0m Access storage\n\x1b[F\x1b[32m[ ok ]\x1b[0m Service discovery\n"}, 0, }, { @@ -41,7 +41,7 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { []string{ "-config", "./server/test-fixtures/nostore_config.hcl", }, - []string{"Parse configuration\n\x1b[F\x1b[32m[ ok ]\x1b[0m Parse configuration\n[ ] Access storage\n\x1b[F\x1b[31m[failed]\x1b[0m Access storage\nA storage backend must be specified\n"}, + []string{"Parse configuration\n\x1b[F\x1b[32m[ ok ]\x1b[0m Check Listeners\n\x1b[F\x1b[31m[failed]\x1b[0m Access storage\nA storage backend must be specified"}, 1, }, { @@ -49,9 +49,33 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { []string{ "-config", "./server/test-fixtures/tls_config_ok.hcl", }, - []string{"Parse configuration\n\x1b[F\x1b[32m[ ok ]\x1b[0m Parse configuration\n[ ] Access storage\n\x1b[F\x1b[32m[ ok ]\x1b[0m Access storage\n"}, + []string{"Parse configuration\n\x1b[F\x1b[32m[ ok ]\x1b[0m Check Listeners\n\x1b[F\x1b[32m[ ok ]\x1b[0m Access storage\n\x1b[F\x1b[32m[ ok ]\x1b[0m Service discovery"}, 0, }, + { + "diagnose_invalid_https_storage", + []string{ + "-config", "./server/test-fixtures/config_bad_https_storage.hcl", + }, + []string{"Access storage\nfailed to verify certificate: x509: certificate has expired or is not yet valid:"}, + 1, + }, + { + "diagnose_invalid_https_hastorage", + []string{ + "-config", "./server/test-fixtures/config_diagnose_hastorage_bad_https.hcl", + }, + []string{"Access storage\nfailed to verify certificate: x509: certificate has expired or is not yet valid:"}, + 1, + }, + { + "diagnose_invalid_https_sr", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_https_consul_sr.hcl", + }, + []string{"Service discovery\nfailed to verify certificate: x509: certificate has expired or is not yet valid:"}, + 1, + }, } t.Run("validations", func(t *testing.T) { diff --git a/command/server.go b/command/server.go index 8740ee684..0f78a95e8 100644 --- a/command/server.go +++ b/command/server.go @@ -1154,7 +1154,7 @@ func (c *ServerCommand) Run(args []string) int { } // Prevent server startup if migration is active - // TODO: how to incorporate this check into Diagnose? + // TODO: Use OpenTelemetry to integrate this into Diagnose if c.storageMigrationActive(backend) { return 1 } diff --git a/command/server/test-fixtures/config_bad_https_storage.hcl b/command/server/test-fixtures/config_bad_https_storage.hcl new file mode 100644 index 000000000..20cc4e2c7 --- /dev/null +++ b/command/server/test-fixtures/config_bad_https_storage.hcl @@ -0,0 +1,50 @@ +disable_cache = true +disable_mlock = true + +ui = true + +listener "tcp" { + address = "127.0.0.1:1027" + tls_disable = true +} + +backend "consul" { + foo = "bar" + advertise_addr = "foo" + scheme = "https" + tls_cert_file = "./../vault/diagnose/test-fixtures/expiredcert.pem" + tls_key_file = "./../vault/diagnose/test-fixtures/expiredprivatekey.pem" +} + +ha_backend "consul" { + bar = "baz" + advertise_addr = "snafu" + disable_clustering = "true" +} + +service_registration "consul" { + foo = "bar" +} + +telemetry { + statsd_address = "bar" + usage_gauge_period = "5m" + maximum_gauge_cardinality = 100 + + statsite_address = "foo" + dogstatsd_addr = "127.0.0.1:7254" + dogstatsd_tags = ["tag_1:val_1", "tag_2:val_2"] + metrics_prefix = "myprefix" +} + +sentinel { + additional_enabled_modules = [] +} + +max_lease_ttl = "10h" +default_lease_ttl = "10h" +cluster_name = "testcluster" +pid_file = "./pidfile" +raw_storage_endpoint = true +disable_sealwrap = true +disable_printable_check = true diff --git a/command/server/test-fixtures/config_diagnose_hastorage_bad_https.hcl b/command/server/test-fixtures/config_diagnose_hastorage_bad_https.hcl new file mode 100644 index 000000000..1ffdba05e --- /dev/null +++ b/command/server/test-fixtures/config_diagnose_hastorage_bad_https.hcl @@ -0,0 +1,50 @@ +disable_cache = true +disable_mlock = true + +ui = true + +listener "tcp" { + address = "127.0.0.1:1028" + tls_disable = true +} + +backend "consul" { + foo = "bar" + advertise_addr = "foo" +} + +ha_backend "consul" { + bar = "baz" + advertise_addr = "snafu" + disable_clustering = "true" + scheme = "https" + tls_cert_file = "./../vault/diagnose/test-fixtures/expiredcert.pem" + tls_key_file = "./../vault/diagnose/test-fixtures/expiredprivatekey.pem" +} + +service_registration "consul" { + foo = "bar" +} + +telemetry { + statsd_address = "bar" + usage_gauge_period = "5m" + maximum_gauge_cardinality = 100 + + statsite_address = "foo" + dogstatsd_addr = "127.0.0.1:7254" + dogstatsd_tags = ["tag_1:val_1", "tag_2:val_2"] + metrics_prefix = "myprefix" +} + +sentinel { + additional_enabled_modules = [] +} + +max_lease_ttl = "10h" +default_lease_ttl = "10h" +cluster_name = "testcluster" +pid_file = "./pidfile" +raw_storage_endpoint = true +disable_sealwrap = true +disable_printable_check = true diff --git a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl new file mode 100644 index 000000000..29ddc6fab --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl @@ -0,0 +1,50 @@ +disable_cache = true +disable_mlock = true + +ui = true + +listener "tcp" { + address = "127.0.0.1:1029" + tls_disable = true +} + +backend "consul" { + foo = "bar" + advertise_addr = "foo" +} + +ha_backend "consul" { + bar = "baz" + advertise_addr = "snafu" + disable_clustering = "true" +} + +service_registration "consul" { + foo = "bar" + address = "https://127.0.0.1:8200" + tls_cert_file = "./../vault/diagnose/test-fixtures/expiredcert.pem" + tls_key_file = "./../vault/diagnose/test-fixtures/expiredprivatekey.pem" +} + +telemetry { + statsd_address = "bar" + usage_gauge_period = "5m" + maximum_gauge_cardinality = 100 + + statsite_address = "foo" + dogstatsd_addr = "127.0.0.1:7254" + dogstatsd_tags = ["tag_1:val_1", "tag_2:val_2"] + metrics_prefix = "myprefix" +} + +sentinel { + additional_enabled_modules = [] +} + +max_lease_ttl = "10h" +default_lease_ttl = "10h" +cluster_name = "testcluster" +pid_file = "./pidfile" +raw_storage_endpoint = true +disable_sealwrap = true +disable_printable_check = true diff --git a/command/server/test-fixtures/nostore_config.hcl b/command/server/test-fixtures/nostore_config.hcl index bdfa051ab..667570cb0 100644 --- a/command/server/test-fixtures/nostore_config.hcl +++ b/command/server/test-fixtures/nostore_config.hcl @@ -5,7 +5,7 @@ ui = true listener "tcp" { address = "127.0.0.1:1024" - tls_disable = true + tls_disable = true } ha_backend "consul" { diff --git a/physical/consul/consul.go b/physical/consul/consul.go index 91ffdf2ff..ae0703e4a 100644 --- a/physical/consul/consul.go +++ b/physical/consul/consul.go @@ -18,6 +18,7 @@ import ( "github.com/hashicorp/vault/sdk/helper/parseutil" "github.com/hashicorp/vault/sdk/helper/tlsutil" "github.com/hashicorp/vault/sdk/physical" + "github.com/hashicorp/vault/vault/diagnose" "golang.org/x/net/http2" ) @@ -129,6 +130,29 @@ func NewConsulBackend(conf map[string]string, logger log.Logger) (physical.Backe // Set MaxIdleConnsPerHost to the number of processes used in expiration.Restore consulConf.Transport.MaxIdleConnsPerHost = consts.ExpirationRestoreWorkerCount + SetupSecureTLS(consulConf, conf, logger, false) + + consulConf.HttpClient = &http.Client{Transport: consulConf.Transport} + client, err := api.NewClient(consulConf) + if err != nil { + return nil, errwrap.Wrapf("client setup failed: {{err}}", err) + } + + // Setup the backend + c := &ConsulBackend{ + path: path, + client: client, + kv: client.KV(), + permitPool: physical.NewPermitPool(maxParInt), + consistencyMode: consistencyMode, + + sessionTTL: sessionTTL, + lockWaitTime: lockWaitTime, + } + return c, nil +} + +func SetupSecureTLS(consulConf *api.Config, conf map[string]string, logger log.Logger, isDiagnose bool) error { if addr, ok := conf["address"]; ok { consulConf.Address = addr if logger.IsDebug() { @@ -162,37 +186,32 @@ func NewConsulBackend(conf map[string]string, logger log.Logger) (physical.Backe } if consulConf.Scheme == "https" { + if isDiagnose { + certPath, okCert := conf["tls_cert_file"] + keyPath, okKey := conf["tls_key_file"] + if okCert && okKey { + err := diagnose.TLSFileChecks(certPath, keyPath) + if err != nil { + return err + } + } else { + return fmt.Errorf("key or cert path: %s, %s, cannot be loaded from consul config file", certPath, keyPath) + } + } + // Use the parsed Address instead of the raw conf['address'] tlsClientConfig, err := tlsutil.SetupTLSConfig(conf, consulConf.Address) if err != nil { - return nil, err + return err } consulConf.Transport.TLSClientConfig = tlsClientConfig if err := http2.ConfigureTransport(consulConf.Transport); err != nil { - return nil, err + return err } logger.Debug("configured TLS") } - - consulConf.HttpClient = &http.Client{Transport: consulConf.Transport} - client, err := api.NewClient(consulConf) - if err != nil { - return nil, errwrap.Wrapf("client setup failed: {{err}}", err) - } - - // Setup the backend - c := &ConsulBackend{ - path: path, - client: client, - kv: client.KV(), - permitPool: physical.NewPermitPool(maxParInt), - consistencyMode: consistencyMode, - - sessionTTL: sessionTTL, - lockWaitTime: lockWaitTime, - } - return c, nil + return nil } // Used to run multiple entries via a transaction diff --git a/serviceregistration/consul/consul_service_registration.go b/serviceregistration/consul/consul_service_registration.go index ff3146195..ba41995a7 100644 --- a/serviceregistration/consul/consul_service_registration.go +++ b/serviceregistration/consul/consul_service_registration.go @@ -15,12 +15,14 @@ import ( "time" "github.com/hashicorp/consul/api" + "github.com/hashicorp/errwrap" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/helper/parseutil" "github.com/hashicorp/vault/sdk/helper/strutil" "github.com/hashicorp/vault/sdk/helper/tlsutil" sr "github.com/hashicorp/vault/serviceregistration" + "github.com/hashicorp/vault/vault/diagnose" atomicB "go.uber.org/atomic" "golang.org/x/net/http2" ) @@ -145,6 +147,39 @@ func NewServiceRegistration(conf map[string]string, logger log.Logger, state sr. // Set MaxIdleConnsPerHost to the number of processes used in expiration.Restore consulConf.Transport.MaxIdleConnsPerHost = consts.ExpirationRestoreWorkerCount + SetupSecureTLS(consulConf, conf, logger, false) + + consulConf.HttpClient = &http.Client{Transport: consulConf.Transport} + client, err := api.NewClient(consulConf) + if err != nil { + return nil, errwrap.Wrapf("client setup failed: {{err}}", err) + } + + // Setup the backend + c := &serviceRegistration{ + Client: client, + + logger: logger, + serviceName: service, + serviceTags: strutil.ParseDedupLowercaseAndSortStrings(tags, ","), + serviceAddress: serviceAddr, + checkTimeout: checkTimeout, + disableRegistration: disableRegistration, + + notifyActiveCh: make(chan struct{}), + notifySealedCh: make(chan struct{}), + notifyPerfStandbyCh: make(chan struct{}), + notifyInitializedCh: make(chan struct{}), + + isActive: atomicB.NewBool(state.IsActive), + isSealed: atomicB.NewBool(state.IsSealed), + isPerfStandby: atomicB.NewBool(state.IsPerformanceStandby), + isInitialized: atomicB.NewBool(state.IsInitialized), + } + return c, nil +} + +func SetupSecureTLS(consulConf *api.Config, conf map[string]string, logger log.Logger, isDiagnose bool) error { if addr, ok := conf["address"]; ok { consulConf.Address = addr if logger.IsDebug() { @@ -178,47 +213,33 @@ func NewServiceRegistration(conf map[string]string, logger log.Logger, state sr. } if consulConf.Scheme == "https" { + + if isDiagnose { + certPath, okCert := conf["tls_cert_file"] + keyPath, okKey := conf["tls_key_file"] + if okCert && okKey { + err := diagnose.TLSFileChecks(certPath, keyPath) + if err != nil { + return err + } + } else { + return fmt.Errorf("key or cert path: %s, %s, cannot be loaded from consul config file", certPath, keyPath) + } + } + // Use the parsed Address instead of the raw conf['address'] tlsClientConfig, err := tlsutil.SetupTLSConfig(conf, consulConf.Address) if err != nil { - return nil, err + return err } consulConf.Transport.TLSClientConfig = tlsClientConfig if err := http2.ConfigureTransport(consulConf.Transport); err != nil { - return nil, err + return err } logger.Debug("configured TLS") } - - consulConf.HttpClient = &http.Client{Transport: consulConf.Transport} - client, err := api.NewClient(consulConf) - if err != nil { - return nil, fmt.Errorf("client setup failed: %w", err) - } - - // Setup the backend - c := &serviceRegistration{ - Client: client, - - logger: logger, - serviceName: service, - serviceTags: strutil.ParseDedupLowercaseAndSortStrings(tags, ","), - serviceAddress: serviceAddr, - checkTimeout: checkTimeout, - disableRegistration: disableRegistration, - - notifyActiveCh: make(chan struct{}), - notifySealedCh: make(chan struct{}), - notifyPerfStandbyCh: make(chan struct{}), - notifyInitializedCh: make(chan struct{}), - - isActive: atomicB.NewBool(state.IsActive), - isSealed: atomicB.NewBool(state.IsSealed), - isPerfStandby: atomicB.NewBool(state.IsPerformanceStandby), - isInitialized: atomicB.NewBool(state.IsInitialized), - } - return c, nil + return nil } func (c *serviceRegistration) Run(shutdownCh <-chan struct{}, wait *sync.WaitGroup, redirectAddr string) error {