package configutil import ( "context" "errors" "fmt" "time" "github.com/hashicorp/go-secure-stdlib/parseutil" monitoring "cloud.google.com/go/monitoring/apiv3" "github.com/armon/go-metrics" "github.com/armon/go-metrics/circonus" "github.com/armon/go-metrics/datadog" "github.com/armon/go-metrics/prometheus" stackdriver "github.com/google/go-metrics-stackdriver" stackdrivervault "github.com/google/go-metrics-stackdriver/vault" "github.com/hashicorp/go-multierror" "github.com/hashicorp/hcl" "github.com/hashicorp/hcl/hcl/ast" "github.com/hashicorp/vault/helper/metricsutil" "github.com/mitchellh/cli" "google.golang.org/api/option" ) const ( PrometheusDefaultRetentionTime = 24 * time.Hour UsageGaugeDefaultPeriod = 10 * time.Minute MaximumGaugeCardinalityDefault = 500 LeaseMetricsEpsilonDefault = time.Hour NumLeaseMetricsTimeBucketsDefault = 168 ) // Telemetry is the telemetry configuration for the server type Telemetry struct { FoundKeys []string `hcl:",decodedFields"` UnusedKeys UnusedKeyMap `hcl:",unusedKeyPositions"` StatsiteAddr string `hcl:"statsite_address"` StatsdAddr string `hcl:"statsd_address"` DisableHostname bool `hcl:"disable_hostname"` EnableHostnameLabel bool `hcl:"enable_hostname_label"` MetricsPrefix string `hcl:"metrics_prefix"` UsageGaugePeriod time.Duration UsageGaugePeriodRaw interface{} `hcl:"usage_gauge_period,alias:UsageGaugePeriod"` MaximumGaugeCardinality int `hcl:"maximum_gauge_cardinality"` // Circonus: see https://github.com/circonus-labs/circonus-gometrics // for more details on the various configuration options. // Valid configuration combinations: // - CirconusAPIToken // metric management enabled (search for existing check or create a new one) // - CirconusSubmissionUrl // metric management disabled (use check with specified submission_url, // broker must be using a public SSL certificate) // - CirconusAPIToken + CirconusCheckSubmissionURL // metric management enabled (use check with specified submission_url) // - CirconusAPIToken + CirconusCheckID // metric management enabled (use check with specified id) // CirconusAPIToken is a valid API Token used to create/manage check. If provided, // metric management is enabled. // Default: none CirconusAPIToken string `hcl:"circonus_api_token"` // CirconusAPIApp is an app name associated with API token. // Default: "consul" CirconusAPIApp string `hcl:"circonus_api_app"` // CirconusAPIURL is the base URL to use for contacting the Circonus API. // Default: "https://api.circonus.com/v2" CirconusAPIURL string `hcl:"circonus_api_url"` // CirconusSubmissionInterval is the interval at which metrics are submitted to Circonus. // Default: 10s CirconusSubmissionInterval string `hcl:"circonus_submission_interval"` // CirconusCheckSubmissionURL is the check.config.submission_url field from a // previously created HTTPTRAP check. // Default: none CirconusCheckSubmissionURL string `hcl:"circonus_submission_url"` // CirconusCheckID is the check id (not check bundle id) from a previously created // HTTPTRAP check. The numeric portion of the check._cid field. // Default: none CirconusCheckID string `hcl:"circonus_check_id"` // CirconusCheckForceMetricActivation will force enabling metrics, as they are encountered, // if the metric already exists and is NOT active. If check management is enabled, the default // behavior is to add new metrics as they are encountered. If the metric already exists in the // check, it will *NOT* be activated. This setting overrides that behavior. // Default: "false" CirconusCheckForceMetricActivation string `hcl:"circonus_check_force_metric_activation"` // CirconusCheckInstanceID serves to uniquely identify the metrics coming from this "instance". // It can be used to maintain metric continuity with transient or ephemeral instances as // they move around within an infrastructure. // Default: hostname:app CirconusCheckInstanceID string `hcl:"circonus_check_instance_id"` // CirconusCheckSearchTag is a special tag which, when coupled with the instance id, helps to // narrow down the search results when neither a Submission URL or Check ID is provided. // Default: service:app (e.g. service:consul) CirconusCheckSearchTag string `hcl:"circonus_check_search_tag"` // CirconusCheckTags is a comma separated list of tags to apply to the check. Note that // the value of CirconusCheckSearchTag will always be added to the check. // Default: none CirconusCheckTags string `hcl:"circonus_check_tags"` // CirconusCheckDisplayName is the name for the check which will be displayed in the Circonus UI. // Default: value of CirconusCheckInstanceID CirconusCheckDisplayName string `hcl:"circonus_check_display_name"` // CirconusBrokerID is an explicit broker to use when creating a new check. The numeric portion // of broker._cid. If metric management is enabled and neither a Submission URL nor Check ID // is provided, an attempt will be made to search for an existing check using Instance ID and // Search Tag. If one is not found, a new HTTPTRAP check will be created. // Default: use Select Tag if provided, otherwise, a random Enterprise Broker associated // with the specified API token or the default Circonus Broker. // Default: none CirconusBrokerID string `hcl:"circonus_broker_id"` // CirconusBrokerSelectTag is a special tag which will be used to select a broker when // a Broker ID is not provided. The best use of this is to as a hint for which broker // should be used based on *where* this particular instance is running. // (e.g. a specific geo location or datacenter, dc:sfo) // Default: none CirconusBrokerSelectTag string `hcl:"circonus_broker_select_tag"` // Dogstats: // DogStatsdAddr is the address of a dogstatsd instance. If provided, // metrics will be sent to that instance DogStatsDAddr string `hcl:"dogstatsd_addr"` // DogStatsdTags are the global tags that should be sent with each packet to dogstatsd // It is a list of strings, where each string looks like "my_tag_name:my_tag_value" DogStatsDTags []string `hcl:"dogstatsd_tags"` // Prometheus: // PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0. // Default: 24h PrometheusRetentionTime time.Duration `hcl:"-"` PrometheusRetentionTimeRaw interface{} `hcl:"prometheus_retention_time"` // Stackdriver: // StackdriverProjectID is the project to publish stackdriver metrics to. StackdriverProjectID string `hcl:"stackdriver_project_id"` // StackdriverLocation is the GCP or AWS region of the monitored resource. StackdriverLocation string `hcl:"stackdriver_location"` // StackdriverNamespace is the namespace identifier, such as a cluster name. StackdriverNamespace string `hcl:"stackdriver_namespace"` // StackdriverDebugLogs will write additional stackdriver related debug logs to stderr. StackdriverDebugLogs bool `hcl:"stackdriver_debug_logs"` // How often metrics for lease expiry will be aggregated LeaseMetricsEpsilon time.Duration LeaseMetricsEpsilonRaw interface{} `hcl:"lease_metrics_epsilon"` // Number of buckets by time that will be used in lease aggregation NumLeaseMetricsTimeBuckets int `hcl:"num_lease_metrics_buckets"` // Whether or not telemetry should add labels for namespaces LeaseMetricsNameSpaceLabels bool `hcl:"add_lease_metrics_namespace_labels"` // FilterDefault is the default for whether to allow a metric that's not // covered by the prefix filter. FilterDefault *bool `hcl:"filter_default"` // PrefixFilter is a list of filter rules to apply for allowing // or blocking metrics by prefix. PrefixFilter []string `hcl:"prefix_filter"` } func (t *Telemetry) Validate(source string) []ConfigError { return ValidateUnusedFields(t.UnusedKeys, source) } func (t *Telemetry) GoString() string { return fmt.Sprintf("*%#v", *t) } func parseTelemetry(result *SharedConfig, list *ast.ObjectList) error { if len(list.Items) > 1 { return fmt.Errorf("only one 'telemetry' block is permitted") } // Get our one item item := list.Items[0] if result.Telemetry == nil { result.Telemetry = &Telemetry{} } if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil { return multierror.Prefix(err, "telemetry:") } if result.Telemetry.PrometheusRetentionTimeRaw != nil { var err error if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil { return err } result.Telemetry.PrometheusRetentionTimeRaw = nil } else { result.Telemetry.PrometheusRetentionTime = PrometheusDefaultRetentionTime } if result.Telemetry.UsageGaugePeriodRaw != nil { if result.Telemetry.UsageGaugePeriodRaw == "none" { result.Telemetry.UsageGaugePeriod = 0 } else { var err error if result.Telemetry.UsageGaugePeriod, err = parseutil.ParseDurationSecond(result.Telemetry.UsageGaugePeriodRaw); err != nil { return err } result.Telemetry.UsageGaugePeriodRaw = nil } } else { result.Telemetry.UsageGaugePeriod = UsageGaugeDefaultPeriod } if result.Telemetry.MaximumGaugeCardinality == 0 { result.Telemetry.MaximumGaugeCardinality = MaximumGaugeCardinalityDefault } if result.Telemetry.LeaseMetricsEpsilonRaw != nil { if result.Telemetry.LeaseMetricsEpsilonRaw == "none" { result.Telemetry.LeaseMetricsEpsilonRaw = 0 } else { var err error if result.Telemetry.LeaseMetricsEpsilon, err = parseutil.ParseDurationSecond(result.Telemetry.LeaseMetricsEpsilonRaw); err != nil { return err } result.Telemetry.LeaseMetricsEpsilonRaw = nil } } else { result.Telemetry.LeaseMetricsEpsilon = LeaseMetricsEpsilonDefault } if result.Telemetry.NumLeaseMetricsTimeBuckets == 0 { result.Telemetry.NumLeaseMetricsTimeBuckets = NumLeaseMetricsTimeBucketsDefault } return nil } type SetupTelemetryOpts struct { Config *Telemetry Ui cli.Ui ServiceName string DisplayName string UserAgent string ClusterName string } // SetupTelemetry is used to setup the telemetry sub-systems and returns the // in-memory sink to be used in http configuration func SetupTelemetry(opts *SetupTelemetryOpts) (*metrics.InmemSink, *metricsutil.ClusterMetricSink, bool, error) { if opts == nil { return nil, nil, false, errors.New("nil opts passed into SetupTelemetry") } if opts.Config == nil { opts.Config = &Telemetry{} } /* Setup telemetry Aggregate on 10 second intervals for 1 minute. Expose the metrics over stderr when there is a SIGUSR1 received. */ inm := metrics.NewInmemSink(10*time.Second, time.Minute) metrics.DefaultInmemSignal(inm) if opts.Config.MetricsPrefix != "" { opts.ServiceName = opts.Config.MetricsPrefix } metricsConf := metrics.DefaultConfig(opts.ServiceName) metricsConf.EnableHostname = !opts.Config.DisableHostname metricsConf.EnableHostnameLabel = opts.Config.EnableHostnameLabel if opts.Config.FilterDefault != nil { metricsConf.FilterDefault = *opts.Config.FilterDefault } // Configure the statsite sink var fanout metrics.FanoutSink var prometheusEnabled bool // Configure the Prometheus sink if opts.Config.PrometheusRetentionTime != 0 { prometheusEnabled = true prometheusOpts := prometheus.PrometheusOpts{ Expiration: opts.Config.PrometheusRetentionTime, } sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts) if err != nil { return nil, nil, false, err } fanout = append(fanout, sink) } if opts.Config.StatsiteAddr != "" { sink, err := metrics.NewStatsiteSink(opts.Config.StatsiteAddr) if err != nil { return nil, nil, false, err } fanout = append(fanout, sink) } // Configure the statsd sink if opts.Config.StatsdAddr != "" { sink, err := metrics.NewStatsdSink(opts.Config.StatsdAddr) if err != nil { return nil, nil, false, err } fanout = append(fanout, sink) } // Configure the Circonus sink if opts.Config.CirconusAPIToken != "" || opts.Config.CirconusCheckSubmissionURL != "" { cfg := &circonus.Config{} cfg.Interval = opts.Config.CirconusSubmissionInterval cfg.CheckManager.API.TokenKey = opts.Config.CirconusAPIToken cfg.CheckManager.API.TokenApp = opts.Config.CirconusAPIApp cfg.CheckManager.API.URL = opts.Config.CirconusAPIURL cfg.CheckManager.Check.SubmissionURL = opts.Config.CirconusCheckSubmissionURL cfg.CheckManager.Check.ID = opts.Config.CirconusCheckID cfg.CheckManager.Check.ForceMetricActivation = opts.Config.CirconusCheckForceMetricActivation cfg.CheckManager.Check.InstanceID = opts.Config.CirconusCheckInstanceID cfg.CheckManager.Check.SearchTag = opts.Config.CirconusCheckSearchTag cfg.CheckManager.Check.DisplayName = opts.Config.CirconusCheckDisplayName cfg.CheckManager.Check.Tags = opts.Config.CirconusCheckTags cfg.CheckManager.Broker.ID = opts.Config.CirconusBrokerID cfg.CheckManager.Broker.SelectTag = opts.Config.CirconusBrokerSelectTag if cfg.CheckManager.API.TokenApp == "" { cfg.CheckManager.API.TokenApp = opts.ServiceName } if cfg.CheckManager.Check.DisplayName == "" { cfg.CheckManager.Check.DisplayName = opts.DisplayName } if cfg.CheckManager.Check.SearchTag == "" { cfg.CheckManager.Check.SearchTag = fmt.Sprintf("service:%s", opts.ServiceName) } sink, err := circonus.NewCirconusSink(cfg) if err != nil { return nil, nil, false, err } sink.Start() fanout = append(fanout, sink) } if opts.Config.DogStatsDAddr != "" { var tags []string if opts.Config.DogStatsDTags != nil { tags = opts.Config.DogStatsDTags } sink, err := datadog.NewDogStatsdSink(opts.Config.DogStatsDAddr, metricsConf.HostName) if err != nil { return nil, nil, false, fmt.Errorf("failed to start DogStatsD sink: %w", err) } sink.SetTags(tags) fanout = append(fanout, sink) } // Configure the stackdriver sink if opts.Config.StackdriverProjectID != "" { client, err := monitoring.NewMetricClient(context.Background(), option.WithUserAgent(opts.UserAgent)) if err != nil { return nil, nil, false, fmt.Errorf("Failed to create stackdriver client: %v", err) } sink := stackdriver.NewSink(client, &stackdriver.Config{ LabelExtractor: stackdrivervault.Extractor, Bucketer: stackdrivervault.Bucketer, ProjectID: opts.Config.StackdriverProjectID, Location: opts.Config.StackdriverLocation, Namespace: opts.Config.StackdriverNamespace, DebugLogs: opts.Config.StackdriverDebugLogs, }) fanout = append(fanout, sink) } // Initialize the global sink if len(fanout) > 1 { // Hostname enabled will create poor quality metrics name for prometheus if !opts.Config.DisableHostname { opts.Ui.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.") } } else { metricsConf.EnableHostname = false } fanout = append(fanout, inm) globalMetrics, err := metrics.NewGlobal(metricsConf, fanout) if err != nil { return nil, nil, false, err } // Intialize a wrapper around the global sink; this will be passed to Core // and to any backend. wrapper := metricsutil.NewClusterMetricSink(opts.ClusterName, globalMetrics) wrapper.MaxGaugeCardinality = opts.Config.MaximumGaugeCardinality wrapper.GaugeInterval = opts.Config.UsageGaugePeriod wrapper.TelemetryConsts.LeaseMetricsEpsilon = opts.Config.LeaseMetricsEpsilon wrapper.TelemetryConsts.LeaseMetricsNameSpaceLabels = opts.Config.LeaseMetricsNameSpaceLabels wrapper.TelemetryConsts.NumLeaseMetricsTimeBuckets = opts.Config.NumLeaseMetricsTimeBuckets // Parse the metric filters telemetryAllowedPrefixes, telemetryBlockedPrefixes, err := parsePrefixFilter(opts.Config.PrefixFilter) if err != nil { return nil, nil, false, err } metrics.UpdateFilter(telemetryAllowedPrefixes, telemetryBlockedPrefixes) return inm, wrapper, prometheusEnabled, nil } func parsePrefixFilter(prefixFilters []string) ([]string, []string, error) { var telemetryAllowedPrefixes, telemetryBlockedPrefixes []string for _, rule := range prefixFilters { if rule == "" { return nil, nil, fmt.Errorf("Cannot have empty filter rule in prefix_filter") } switch rule[0] { case '+': telemetryAllowedPrefixes = append(telemetryAllowedPrefixes, rule[1:]) case '-': telemetryBlockedPrefixes = append(telemetryBlockedPrefixes, rule[1:]) default: return nil, nil, fmt.Errorf("Filter rule must begin with either '+' or '-': %q", rule) } } return telemetryAllowedPrefixes, telemetryBlockedPrefixes, nil }