first pass on agent-configured prometheusDefs and adding defs for every consul metric

This commit is contained in:
Kit Patella 2020-11-12 18:12:12 -08:00
parent 9fdbc68c92
commit 9533372ded
23 changed files with 687 additions and 78 deletions

View File

@ -5,11 +5,128 @@ import (
"net/http"
"strings"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/structs"
)
// TODO(kit): Add help strings for each
var CatalogCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_register"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_register"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_deregister"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_datacenters"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_deregister"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_deregister"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_datacenters"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_datacenters"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_services"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "error", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_node_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_node_services"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_node_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_gateway_services"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_gateway_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_gateway_services"},
Help: "",
},
}
func (s *HTTPHandlers) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1,
[]metrics.Label{{Name: "node", Value: s.nodeName()}})

View File

@ -6,7 +6,8 @@ import (
"sync"
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging"
@ -15,6 +16,32 @@ import (
"golang.org/x/time/rate"
)
var ACLCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "acl", "token", "cache_hit"},
Help: "",
},
{
Name: []string{"consul", "acl", "token", "cache_miss"},
Help: "",
},
}
var ACLSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "acl", "resolveTokenLegacy"},
Help: "",
},
{
Name: []string{"consul", "acl", "ResolveToken"},
Help: "",
},
{
Name: []string{"consul", "acl", "ResolveTokenToIdentity"},
Help: "",
},
}
// These must be kept in sync with the constants in command/agent/acl.go.
const (
// anonymousToken is the token ID we re-write to if there is no token ID

View File

@ -11,7 +11,8 @@ import (
"regexp"
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/authmethod"
"github.com/hashicorp/consul/agent/consul/state"
@ -30,6 +31,73 @@ const (
aclBootstrapReset = "acl-bootstrap-reset"
)
var ACLEndpointSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "acl", "token", "clone"},
Help: "",
},
{
Name: []string{"consul", "acl", "token", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "token", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "policy", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "policy", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "policy", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "role", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "role", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "bindingrule", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "bindingrule", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "authmethod", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "authmethod", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "login"},
Help: "",
},
{
Name: []string{"consul", "acl", "login"},
Help: "",
},
{
Name: []string{"consul", "acl", "logout"},
Help: "",
},
{
Name: []string{"consul", "acl", "logout"},
Help: "",
},
}
// Regex for matching
var (
validPolicyName = regexp.MustCompile(`^[A-Za-z0-9\-_]{1,128}$`)

View File

@ -5,6 +5,7 @@ import (
"fmt"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/raft"
@ -12,6 +13,17 @@ import (
"github.com/hashicorp/serf/serf"
)
var AutopilotGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "autopilot", "failure_tolerance"},
Help: "",
},
{
Name: []string{"consul", "autopilot", "healthy"},
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
},
}
// AutopilotDelegate is a Consul delegate for autopilot operations.
type AutopilotDelegate struct {
server *Server

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
@ -17,6 +18,52 @@ import (
"github.com/hashicorp/go-uuid"
)
var CatalogCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "catalog", "service", "query"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "query"},
Help: "",
},
{
Name: []string{"consul", "catalog", "service", "query-tag"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "query-tag"},
Help: "",
},
{
Name: []string{"consul", "catalog", "service", "query-tags"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "query-tags"},
Help: "",
},
{
Name: []string{"consul", "catalog", "service", "not-found"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "not-found"},
Help: "",
},
}
var CatalogSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "catalog", "deregister"},
Help: "",
},
{
Name: []string{"consul", "catalog", "register"},
Help: "",
},
}
// Catalog endpoint is used to manipulate the service catalog
type Catalog struct {
srv *Server

View File

@ -9,6 +9,7 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/pool"
"github.com/hashicorp/consul/agent/router"
"github.com/hashicorp/consul/agent/structs"
@ -21,6 +22,21 @@ import (
"golang.org/x/time/rate"
)
var ClientCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "client", "rpc"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
},
{
Name: []string{"consul", "client", "rpc", "exceeded"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
},
{
Name: []string{"consul", "client", "rpc", "failed"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
},
}
const (
// serfEventBacklog is the maximum number of unprocessed Serf Events
// that will be held in queue before new serf events block. A

View File

@ -4,6 +4,8 @@ import (
"fmt"
"time"
"github.com/armon/go-metrics/prometheus"
metrics "github.com/armon/go-metrics"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state"
@ -12,6 +14,33 @@ import (
"github.com/mitchellh/copystructure"
)
var ConfigSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "config_entry", "apply"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "get"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "list"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "listAll"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "delete"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "resolve_service_config"},
Help: "",
},
}
// The ConfigEntry endpoint is used to query centralized config information
type ConfigEntry struct {
srv *Server

View File

@ -5,13 +5,33 @@ import (
"fmt"
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
memdb "github.com/hashicorp/go-memdb"
)
var FederationStateSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "federation_state", "apply"},
Help: "",
},
{
Name: []string{"consul", "federation_state", "get"},
Help: "",
},
{
Name: []string{"consul", "federation_state", "list"},
Help: "",
},
{
Name: []string{"consul", "federation_state", "list_mesh_gateways"},
Help: "",
},
}
var (
errFederationStatesNotEnabled = errors.New("Federation states are currently disabled until all servers in the datacenter support the feature")
)

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/state"
@ -16,6 +17,13 @@ import (
"github.com/hashicorp/go-memdb"
)
var IntentionSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "intention", "apply"},
Help: "",
},
}
var (
// ErrIntentionNotFound is returned if the intention lookup failed.
ErrIntentionNotFound = errors.New("Intention not found")
@ -252,6 +260,7 @@ func (s *Intention) Apply(
if done, err := s.srv.ForwardRPC("Intention.Apply", args, args, reply); done {
return err
}
// TODO(Kit): Why do we have summaries for intentions both with and without the consul namespace?
defer metrics.MeasureSince([]string{"consul", "intention", "apply"}, time.Now())
defer metrics.MeasureSince([]string{"intention", "apply"}, time.Now())

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
@ -14,6 +15,13 @@ import (
"github.com/hashicorp/go-memdb"
)
var KVSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "kvs", "apply"},
Help: "This measures the time it takes to complete an update to the KV store.",
},
}
// KVS endpoint is used to manipulate the Key-Value store
type KVS struct {
srv *Server

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
@ -15,6 +16,25 @@ import (
"github.com/hashicorp/go-uuid"
)
var PreparedQuerySummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "prepared-query", "apply"},
Help: "",
},
{
Name: []string{"consul", "prepared-query", "explain"},
Help: "",
},
{
Name: []string{"consul", "prepared-query", "execute"},
Help: "",
},
{
Name: []string{"consul", "prepared-query", "execute_remote"},
Help: "",
},
}
// PreparedQuery manages the prepared query endpoint.
type PreparedQuery struct {
srv *Server

View File

@ -13,6 +13,7 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/consul/wanfed"
@ -31,6 +32,47 @@ import (
"github.com/hashicorp/yamux"
)
var RPCCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "rpc", "accept_conn"},
Help: "",
},
{
Name: []string{"consul", "rpc", "raft_handoff"},
Help: "",
},
{
Name: []string{"consul", "rpc", "request_error"},
Help: "",
},
{
Name: []string{"consul", "rpc", "request"},
Help: "",
},
{
Name: []string{"consul", "rpc", "cross-dc"},
Help: "",
},
{
Name: []string{"consul", "rpc", "query"},
Help: "",
},
}
var RPCGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "rpc", "queries_blocking"},
Help: "",
},
}
var RPCSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "rpc", "consistentRead"},
Help: "",
},
}
const (
// jitterFraction is a the limit to the amount of jitter we apply
// to a user specified MaxQueryTime. We divide the specified time by

View File

@ -17,7 +17,7 @@ import (
"sync/atomic"
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
connlimit "github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
@ -50,6 +50,8 @@ import (
"github.com/hashicorp/consul/types"
)
// NOTE The "consul.client.rpc" and "consul.client.rpc.exceeded" counters are defined in consul/client.go
// These are the protocol versions that Consul can _understand_. These are
// Consul-level protocol versions, that are used to configure the Serf
// protocol versions.

View File

@ -4,10 +4,34 @@ import (
"fmt"
"time"
"github.com/armon/go-metrics/prometheus"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/agent/structs"
)
var SessionGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "session_ttl", "active"},
Help: "",
},
{
Name: []string{"consul", "raft", "applied_index"},
Help: "",
},
{
Name: []string{"consul", "raft", "last_index"},
Help: "",
},
}
var SessionSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "session_ttl", "invalidate"},
Help: "",
},
}
const (
// maxInvalidateAttempts limits how many invalidate attempts are made
maxInvalidateAttempts = 6

View File

@ -5,12 +5,24 @@ import (
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-hclog"
)
var TxnSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "txn", "apply"},
Help: "This measures the time spent applying a transaction operation.",
},
{
Name: []string{"consul", "txn", "read"},
Help: "",
},
}
// Txn endpoint is used to perform multi-object atomic transactions.
type Txn struct {
srv *Server

View File

@ -5,12 +5,29 @@ import (
"errors"
"time"
"github.com/armon/go-metrics/prometheus"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/go-hclog"
)
var Gauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "state", "nodes"},
Help: "",
},
{
Name: []string{"consul", "state", "services"},
Help: "",
},
{
Name: []string{"consul", "state", "service_instances"},
Help: "",
},
}
// Config holds the settings for various parameters for the
// UsageMetricsReporter
type Config struct {

View File

@ -10,6 +10,8 @@ import (
"sync/atomic"
"time"
"github.com/armon/go-metrics/prometheus"
metrics "github.com/armon/go-metrics"
radix "github.com/armon/go-radix"
"github.com/coredns/coredns/plugin/pkg/dnsutil"
@ -26,6 +28,24 @@ import (
"github.com/hashicorp/consul/logging"
)
var DNSCounters = []prometheus.CounterDefinition{
{
Name: []string{"dns", "stale_queries"},
Help: "",
},
}
var DNSSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"dns", "ptr_query"},
Help: "",
},
{
Name: []string{"dns", "domain_query"},
Help: "",
},
}
const (
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
// records. Limit further to prevent unintentional configuration

View File

@ -5,11 +5,48 @@ import (
"sync/atomic"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"google.golang.org/grpc"
"google.golang.org/grpc/stats"
)
var defaultMetrics = metrics.Default()
var StatsGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "grpc", "server", "connections"},
Help: "",
},
{
Name: []string{"consul", "grpc", "client", "connections"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "streams"},
Help: "",
},
}
var StatsCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "grpc", "client", "request", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "request", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "client", "connection", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "connection", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "stream", "count"},
Help: "",
},
}
// statsHandler is a grpc/stats.StatsHandler which emits connection and
// request metrics to go-metrics.

View File

@ -17,6 +17,7 @@ import (
"github.com/NYTimes/gziphandler"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/config"
@ -31,6 +32,13 @@ import (
"github.com/pkg/errors"
)
var HTTPSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "api", "http"},
Help: "",
},
}
// MethodNotAllowedError should be returned by a handler when the HTTP method is not allowed.
type MethodNotAllowedError struct {
Method string

View File

@ -9,8 +9,8 @@ import (
"sync/atomic"
"time"
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
@ -20,6 +20,29 @@ import (
"github.com/hashicorp/go-hclog"
)
var StateCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "acl", "blocked", "service", "deregistration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "check", "deregistration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "service", "registration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "check", "registration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "node", "registration"},
Help: "",
},
}
const fullSyncReadMaxStale = 2 * time.Second
// Config is the configuration for the State.

View File

@ -8,6 +8,10 @@ import (
"sync"
"time"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/consul/usagemetrics"
"github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/go-hclog"
"google.golang.org/grpc/grpclog"
grpcresolver "google.golang.org/grpc/resolver"
@ -72,7 +76,7 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error)
return d, fmt.Errorf("failed to setup node ID: %w", err)
}
d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry)
d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry, getPrometheusDefs())
if err != nil {
return d, fmt.Errorf("failed to initialize telemetry: %w", err)
}
@ -177,3 +181,91 @@ func registerWithGRPC(b grpcresolver.Builder) {
defer registerLock.Unlock()
grpcresolver.Register(b)
}
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
func getPrometheusDefs() lib.PrometheusDefs {
var gauges = [][]prometheus.GaugeDefinition{
consul.AutopilotGauges,
consul.RPCGauges,
consul.SessionGauges,
grpc.StatsGauges,
usagemetrics.Gauges,
}
var gaugeDefs []prometheus.GaugeDefinition
for _, g := range gauges {
gaugeDefs = append(gaugeDefs, g...)
}
raftCounters := []prometheus.CounterDefinition{
// TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define them here because it's important that they're always
// present for Consul users setting up dashboards.
{
Name: []string{"consul", "raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.",
},
{
Name: []string{"consul", "raft", "state", "candidate"},
Help: "This increments whenever a Consul server starts an election.",
},
{
Name: []string{"consul", "raft", "state", "leader"},
Help: "This increments whenever a Consul server becomes a leader.",
},
}
var counters = [][]prometheus.CounterDefinition{
CatalogCounters,
consul.ACLCounters,
consul.CatalogCounters,
consul.ClientCounters,
consul.RPCCounters,
grpc.StatsCounters,
local.StateCounters,
raftCounters,
}
var counterDefs []prometheus.CounterDefinition
for _, c := range counters {
counterDefs = append(counterDefs, c...)
}
raftSummaries := []prometheus.SummaryDefinition{
// TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define them here because it's important that they're always
// present for Consul users setting up dashboards.
{
Name: []string{"consul", "raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
},
{
Name: []string{"consul", "raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
}
var summaries = [][]prometheus.SummaryDefinition{
HTTPSummaries,
consul.ACLSummaries,
consul.ACLEndpointSummaries,
consul.CatalogSummaries,
consul.FederationStateSummaries,
consul.IntentionSummaries,
consul.KVSummaries,
consul.PreparedQuerySummaries,
consul.RPCSummaries,
consul.SessionSummaries,
consul.TxnSummaries,
raftSummaries,
}
var summaryDefs []prometheus.SummaryDefinition
for _, s := range summaries {
summaryDefs = append(summaryDefs, s...)
}
return lib.PrometheusDefs{
Gauges: gaugeDefs,
Counters: counterDefs,
Summaries: summaryDefs,
}
}

View File

@ -54,7 +54,9 @@ func (p *Proxy) Serve() error {
// Initial setup
// Setup telemetry if configured
_, err := lib.InitTelemetry(newCfg.Telemetry)
// NOTE(kit): As far as I can tell, all of the metrics in the proxy are generated at runtime, so we
// don't have any static metrics we initialize at start.
_, err := lib.InitTelemetry(newCfg.Telemetry, lib.EmptyPrometheusDefs())
if err != nil {
p.logger.Error("proxy telemetry config error", "error", err)
}

View File

@ -276,79 +276,17 @@ func dogstatdSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err
return sink, nil
}
func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) {
func prometheusSink(cfg TelemetryConfig, hostname string, defs PrometheusDefs) (metrics.MetricSink, error) {
if cfg.PrometheusRetentionTime.Nanoseconds() < 1 {
return nil, nil
}
// TODO(kit) define these in vars in the package/file they're used
gaugeDefs := []prometheus.GaugeDefinition{
{
Name: []string{"consul", "autopilot", "healthy"},
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
},
}
// TODO(kit) define these in vars in the package/file they're used
counterDefs := []prometheus.CounterDefinition{
{
Name: []string{"consul", "raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.",
},
{
Name: []string{"consul", "raft", "state", "candidate"},
Help: "This increments whenever a Consul server starts an election.",
},
{
Name: []string{"consul", "raft", "state", "leader"},
Help: "This increments whenever a Consul server becomes a leader.",
},
{
Name: []string{"consul", "client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"consul", "runtime", "total_gc_pause_ns"},
Help: "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started.",
},
{
Name: []string{"consul", "client", "rpc"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
},
{
Name: []string{"consul", "client", "rpc", "exceeded"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
},
{
Name: []string{"consul", "client", "rpc", "failed"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
},
}
// TODO(kit) define these in vars in the package/file they're used
summaryDefs := []prometheus.SummaryDefinition{
{
Name: []string{"consul", "kvs", "apply"},
Help: "This measures the time it takes to complete an update to the KV store.",
},
{
Name: []string{"consul", "txn", "apply"},
Help: "This measures the time spent applying a transaction operation.",
},
{
Name: []string{"consul", "raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
},
{
Name: []string{"consul", "raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
}
prometheusOpts := prometheus.PrometheusOpts{
Expiration: cfg.PrometheusRetentionTime,
GaugeDefinitions: gaugeDefs,
CounterDefinitions: counterDefs,
SummaryDefinitions: summaryDefs,
GaugeDefinitions: defs.Gauges,
CounterDefinitions: defs.Counters,
SummaryDefinitions: defs.Summaries,
}
sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
@ -399,9 +337,25 @@ func circonusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err
return sink, nil
}
// PrometheusDefs wraps collections of metric definitions to pass into the PrometheusSink
type PrometheusDefs struct {
Gauges []prometheus.GaugeDefinition
Counters []prometheus.CounterDefinition
Summaries []prometheus.SummaryDefinition
}
// EmptyPrometheusDefs returns a PrometheusDefs struct where each of the slices have zero elements, but not nil.
func EmptyPrometheusDefs() PrometheusDefs {
return PrometheusDefs{
Gauges: []prometheus.GaugeDefinition{},
Counters: []prometheus.CounterDefinition{},
Summaries: []prometheus.SummaryDefinition{},
}
}
// InitTelemetry configures go-metrics based on map of telemetry config
// values as returned by Runtimecfg.Config().
func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) {
func InitTelemetry(cfg TelemetryConfig, defs PrometheusDefs) (*metrics.InmemSink, error) {
if cfg.Disable {
return nil, nil
}
@ -440,9 +394,12 @@ func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) {
if err := addSink(circonusSink); err != nil {
return nil, err
}
if err := addSink(prometheusSink); err != nil {
promSink, err := prometheusSink(cfg, metricsConf.HostName, defs)
if err != nil {
return nil, err
}
sinks = append(sinks, promSink)
if len(sinks) > 0 {
sinks = append(sinks, memSink)