From 5e98e86c3bff7c7b4925615920b94ee48b1be58d Mon Sep 17 00:00:00 2001 From: Joshua Timmons Date: Mon, 28 Aug 2023 16:44:14 -0400 Subject: [PATCH] Reduce the frequency of metric exports to minutely (#18584) (#18601) --- .changelog/18584.txt | 3 +++ agent/hcp/deps.go | 2 +- agent/hcp/telemetry/otel_exporter.go | 20 ++++++++-------- agent/hcp/telemetry/otel_exporter_test.go | 12 +++++----- agent/hcp/telemetry/otel_sink.go | 29 ++++++++++++++++++----- 5 files changed, 43 insertions(+), 23 deletions(-) create mode 100644 .changelog/18584.txt diff --git a/.changelog/18584.txt b/.changelog/18584.txt new file mode 100644 index 000000000..e7329655b --- /dev/null +++ b/.changelog/18584.txt @@ -0,0 +1,3 @@ +```release-note:improvement +Reduce the frequency of metric exports from Consul to HCP from every 10s to every 1m +``` \ No newline at end of file diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 0d52c7b54..ab8288e34 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -82,7 +82,7 @@ func sink( return nil, fmt.Errorf("failed to init config provider: %w", err) } - reader := telemetry.NewOTELReader(metricsClient, cfgProvider, telemetry.DefaultExportInterval) + reader := telemetry.NewOTELReader(metricsClient, cfgProvider) sinkOpts := &telemetry.OTELSinkOpts{ Reader: reader, ConfigProvider: cfgProvider, diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 084657816..ba92d4419 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -24,17 +24,17 @@ type EndpointProvider interface { GetEndpoint() *url.URL } -// OTELExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. +// otelExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. // The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. // This allows us to use a custom client - HCP authenticated MetricsClient. -type OTELExporter struct { +type otelExporter struct { client MetricsClient endpointProvider EndpointProvider } -// NewOTELExporter returns a configured OTELExporter. -func NewOTELExporter(client MetricsClient, endpointProvider EndpointProvider) *OTELExporter { - return &OTELExporter{ +// newOTELExporter returns a configured OTELExporter. +func newOTELExporter(client MetricsClient, endpointProvider EndpointProvider) *otelExporter { + return &otelExporter{ client: client, endpointProvider: endpointProvider, } @@ -42,14 +42,14 @@ func NewOTELExporter(client MetricsClient, endpointProvider EndpointProvider) *O // Temporality returns the Cumulative temporality for metrics aggregation. // Telemetry Gateway stores metrics in Prometheus format, so use Cummulative aggregation as default. -func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { +func (e *otelExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { return metricdata.CumulativeTemporality } // Aggregation returns the Aggregation to use for an instrument kind. // The default implementation provided by the OTEL Metrics SDK library DefaultAggregationSelector panics. // This custom version replicates that logic, but removes the panic. -func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { +func (e *otelExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { switch kind { case metric.InstrumentKindObservableGauge: return aggregation.LastValue{} @@ -64,7 +64,7 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre } // Export serializes and transmits metric data to a receiver. -func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { +func (e *otelExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { endpoint := e.endpointProvider.GetEndpoint() if endpoint == nil { return nil @@ -86,13 +86,13 @@ func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceM } // ForceFlush is a no-op, as the MetricsClient client holds no state. -func (e *OTELExporter) ForceFlush(ctx context.Context) error { +func (e *otelExporter) ForceFlush(ctx context.Context) error { goMetrics.IncrCounter(internalMetricExporterForceFlush, 1) return ctx.Err() } // Shutdown is a no-op, as the MetricsClient is a HTTP client that requires no graceful shutdown. -func (e *OTELExporter) Shutdown(ctx context.Context) error { +func (e *otelExporter) Shutdown(ctx context.Context) error { goMetrics.IncrCounter(internalMetricExporterShutdown, 1) return ctx.Err() } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 53b7bd316..704817f64 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -37,7 +37,7 @@ func (m *mockEndpointProvider) GetEndpoint() *url.URL { return m.endpoint } func TestTemporality(t *testing.T) { t.Parallel() - exp := &OTELExporter{} + exp := &otelExporter{} require.Equal(t, metricdata.CumulativeTemporality, exp.Temporality(metric.InstrumentKindCounter)) } @@ -63,7 +63,7 @@ func TestAggregation(t *testing.T) { test := test t.Run(name, func(t *testing.T) { t.Parallel() - exp := &OTELExporter{} + exp := &otelExporter{} require.Equal(t, test.expAgg, exp.Aggregation(test.kind)) }) } @@ -122,7 +122,7 @@ func TestExport(t *testing.T) { } } - exp := NewOTELExporter(test.client, provider) + exp := newOTELExporter(test.client, provider) err := exp.Export(context.Background(), test.metrics) if test.wantErr != "" { @@ -179,7 +179,7 @@ func TestExport_CustomMetrics(t *testing.T) { u, err := url.Parse(testExportEndpoint) require.NoError(t, err) - exp := NewOTELExporter(tc.client, &mockEndpointProvider{ + exp := newOTELExporter(tc.client, &mockEndpointProvider{ endpoint: u, }) @@ -209,7 +209,7 @@ func TestExport_CustomMetrics(t *testing.T) { func TestForceFlush(t *testing.T) { t.Parallel() - exp := &OTELExporter{} + exp := &otelExporter{} ctx, cancel := context.WithCancel(context.Background()) cancel() @@ -219,7 +219,7 @@ func TestForceFlush(t *testing.T) { func TestShutdown(t *testing.T) { t.Parallel() - exp := &OTELExporter{} + exp := &otelExporter{} ctx, cancel := context.WithCancel(context.Background()) cancel() diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 49a6d5950..a35b1827d 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -10,20 +10,34 @@ import ( "time" gometrics "github.com/armon/go-metrics" - "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" otelsdk "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" + + "github.com/hashicorp/go-hclog" ) -// DefaultExportInterval is a default time interval between export of aggregated metrics. -const DefaultExportInterval = 10 * time.Second +const ( + // defaultExportInterval is a default time interval between export of aggregated metrics. + // At the time of writing this is the same as the otelsdk.Reader's default export interval. + defaultExportInterval = 60 * time.Second + + // defaultExportTimeout is the time the otelsdk.Reader waits on an export before cancelling it. + // At the time of writing this is the same as the otelsdk.Reader's default export timeout default. + // + // note: in practice we are more likely to hit the http.Client Timeout in telemetry.MetricsClient. + // That http.Client Timeout is 15 seconds (at the time of writing). The otelsdk.Reader will use + // defaultExportTimeout for the entire Export call, but since the http.Client's Timeout is 15s, + // we should hit that first before reaching the 30 second timeout set here. + defaultExportTimeout = 30 * time.Second +) // ConfigProvider is required to provide custom metrics processing. type ConfigProvider interface { // GetLabels should return a set of OTEL attributes added by default all metrics. GetLabels() map[string]string + // GetFilters should return filtesr that are required to enable metric processing. // Filters act as an allowlist to collect only the required metrics. GetFilters() *regexp.Regexp @@ -72,9 +86,12 @@ type OTELSink struct { // NewOTELReader returns a configured OTEL PeriodicReader to export metrics every X seconds. // It configures the reader with a custom OTELExporter with a MetricsClient to transform and export // metrics in OTLP format to an external url. -func NewOTELReader(client MetricsClient, endpointProvider EndpointProvider, exportInterval time.Duration) otelsdk.Reader { - exporter := NewOTELExporter(client, endpointProvider) - return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) +func NewOTELReader(client MetricsClient, endpointProvider EndpointProvider) otelsdk.Reader { + return otelsdk.NewPeriodicReader( + newOTELExporter(client, endpointProvider), + otelsdk.WithInterval(defaultExportInterval), + otelsdk.WithTimeout(defaultExportTimeout), + ) } // NewOTELSink returns a sink which fits the Go Metrics MetricsSink interface.