From 9f62768cc7019ba92acead2a64c04ea5f78c3bc0 Mon Sep 17 00:00:00 2001 From: Scott Miller Date: Wed, 20 Oct 2021 16:47:59 -0500 Subject: [PATCH] Diagnose partial/missing telemetry configuration (#12802) * Diagnose partial/missing telemetry configuration * changelog * fixup * not sure which component? --- changelog/12802.txt | 3 ++ command/operator_diagnose.go | 46 ++++++++++++++++ command/operator_diagnose_test.go | 52 +++++++++++++++++++ .../diagnose_bad_https_consul_sr.hcl | 11 ---- .../test-fixtures/diagnose_bad_telemetry1.hcl | 18 +++++++ .../test-fixtures/diagnose_bad_telemetry2.hcl | 18 +++++++ .../test-fixtures/diagnose_bad_telemetry3.hcl | 18 +++++++ 7 files changed, 155 insertions(+), 11 deletions(-) create mode 100644 changelog/12802.txt create mode 100644 command/server/test-fixtures/diagnose_bad_telemetry1.hcl create mode 100644 command/server/test-fixtures/diagnose_bad_telemetry2.hcl create mode 100644 command/server/test-fixtures/diagnose_bad_telemetry3.hcl diff --git a/changelog/12802.txt b/changelog/12802.txt new file mode 100644 index 000000000..9c49bf146 --- /dev/null +++ b/changelog/12802.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: Operator diagnose now tests for missing or partial telemetry configurations. +``` \ No newline at end of file diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go index 7e95134dc..d79328015 100644 --- a/command/operator_diagnose.go +++ b/command/operator_diagnose.go @@ -3,6 +3,7 @@ package command import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -249,6 +250,42 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error return fmt.Errorf("No vault server configuration found.") } + diagnose.Test(ctx, "Check Telemetry", func(ctx context.Context) (err error) { + if config.Telemetry == nil { + diagnose.Warn(ctx, "Telemetry is using default configuration") + diagnose.Advise(ctx, "By default only Prometheus and JSON metrics are available. Ignore this warning if you are using telemetry or are using these metrics and are satisfied with the default retention time and gauge period.") + } else { + t := config.Telemetry + // If any Circonus setting is present but we're missing the basic fields... + if coalesce(t.CirconusAPIURL, t.CirconusAPIToken, t.CirconusCheckID, t.CirconusCheckTags, t.CirconusCheckSearchTag, + t.CirconusBrokerID, t.CirconusBrokerSelectTag, t.CirconusCheckForceMetricActivation, t.CirconusCheckInstanceID, + t.CirconusCheckSubmissionURL, t.CirconusCheckDisplayName) != nil { + if t.CirconusAPIURL == "" { + return errors.New("incomplete Circonus telemetry configuration, missing circonus_api_url") + } else if t.CirconusAPIToken != "" { + return errors.New("incomplete Circonus telemetry configuration, missing circonus_api_token") + } + } + if len(t.DogStatsDTags) > 0 && t.DogStatsDAddr == "" { + return errors.New("incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified") + } + + // If any Stackdriver setting is present but we're missing the basic fields... + if coalesce(t.StackdriverNamespace, t.StackdriverLocation, t.StackdriverDebugLogs, t.StackdriverNamespace) != nil { + if t.StackdriverProjectID == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_project_id") + } + if t.StackdriverLocation == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_location") + } + if t.StackdriverNamespace == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_namespace") + } + } + } + return nil + }) + var metricSink *metricsutil.ClusterMetricSink var metricsHelper *metricsutil.MetricsHelper @@ -676,3 +713,12 @@ SEALFAIL: }) return nil } + +func coalesce(values ...interface{}) interface{} { + for _, val := range values { + if val != nil && val != "" { + return val + } + } + return nil +} diff --git a/command/operator_diagnose_test.go b/command/operator_diagnose_test.go index 5768c95e1..d99740ff6 100644 --- a/command/operator_diagnose_test.go +++ b/command/operator_diagnose_test.go @@ -415,6 +415,58 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { }, }, }, + { + "diagnose_telemetry_partial_circonus", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry1.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete Circonus telemetry configuration, missing circonus_api_url", + }, + }, + }, + { + "diagnose_telemetry_partial_dogstats", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry2.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified", + }, + }, + }, + { + "diagnose_telemetry_partial_stackdriver", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry3.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete Stackdriver telemetry configuration, missing stackdriver_project_id", + }, + }, + }, + { + "diagnose_telemetry_default", + []string{ + "-config", "./server/test-fixtures/config4.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.WarningStatus, + Warnings: []string{"Telemetry is using default configuration"}, + }, + }, + }, } t.Run("validations", func(t *testing.T) { diff --git a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl index 49d1de056..6faecaab7 100644 --- a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl +++ b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl @@ -28,17 +28,6 @@ service_registration "consul" { tls_key_file = "./../vault/diagnose/test-fixtures/expiredprivatekey.pem" } -telemetry { - statsd_address = "bar" - usage_gauge_period = "5m" - maximum_gauge_cardinality = 100 - - statsite_address = "foo" - dogstatsd_addr = "127.0.0.1:7254" - dogstatsd_tags = ["tag_1:val_1", "tag_2:val_2"] - metrics_prefix = "myprefix" -} - sentinel { additional_enabled_modules = [] } diff --git a/command/server/test-fixtures/diagnose_bad_telemetry1.hcl b/command/server/test-fixtures/diagnose_bad_telemetry1.hcl new file mode 100644 index 000000000..f7629bdd0 --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry1.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + circonus_check_id = "bar" +} + +cluster_addr = "127.0.0.1:8201" diff --git a/command/server/test-fixtures/diagnose_bad_telemetry2.hcl b/command/server/test-fixtures/diagnose_bad_telemetry2.hcl new file mode 100644 index 000000000..5c967e3ef --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry2.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + dogstatsd_tags = ["bar"] +} + +cluster_addr = "127.0.0.1:8201" diff --git a/command/server/test-fixtures/diagnose_bad_telemetry3.hcl b/command/server/test-fixtures/diagnose_bad_telemetry3.hcl new file mode 100644 index 000000000..f96692584 --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry3.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + stackdriver_namespace = "bar" +} + +cluster_addr = "127.0.0.1:8201"