Merge pull request #9261 from hashicorp/telemetry/fix-missing-and-stale-docs-2
Telemetry/fix missing and stale docs
This commit is contained in:
commit
669783f965
|
@ -37,7 +37,7 @@ import (
|
||||||
var Gauges = []prometheus.GaugeDefinition{
|
var Gauges = []prometheus.GaugeDefinition{
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "cache", "entries_count"},
|
Name: []string{"consul", "cache", "entries_count"},
|
||||||
Help: "",
|
Help: "Represents the number of entries in this cache.",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,19 +45,19 @@ var Gauges = []prometheus.GaugeDefinition{
|
||||||
var Counters = []prometheus.CounterDefinition{
|
var Counters = []prometheus.CounterDefinition{
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "cache", "bypass"},
|
Name: []string{"consul", "cache", "bypass"},
|
||||||
Help: "",
|
Help: "Counts how many times a request bypassed the cache because no cache-key was provided.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "cache", "fetch_success"},
|
Name: []string{"consul", "cache", "fetch_success"},
|
||||||
Help: "",
|
Help: "Counts the number of successful fetches by the cache.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "cache", "fetch_error"},
|
Name: []string{"consul", "cache", "fetch_error"},
|
||||||
Help: "",
|
Help: "Counts the number of failed fetches by the cache.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "cache", "evict_expired"},
|
Name: []string{"consul", "cache", "evict_expired"},
|
||||||
Help: "",
|
Help: "Counts the number of expired entries that are evicted.",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "connect", "query"},
|
Name: []string{"catalog", "connect", "query"},
|
||||||
Help: "",
|
Help: "Increments for each connect-based catalog query for the given service.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "service", "query-tag"},
|
Name: []string{"catalog", "service", "query-tag"},
|
||||||
|
@ -33,7 +33,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "connect", "query-tag"},
|
Name: []string{"catalog", "connect", "query-tag"},
|
||||||
Help: "",
|
Help: "Increments for each connect-based catalog query for the given service with the given tag.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "service", "query-tags"},
|
Name: []string{"catalog", "service", "query-tags"},
|
||||||
|
@ -41,7 +41,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "connect", "query-tags"},
|
Name: []string{"catalog", "connect", "query-tags"},
|
||||||
Help: "",
|
Help: "Increments for each connect-based catalog query for the given service with the given tags.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "service", "not-found"},
|
Name: []string{"catalog", "service", "not-found"},
|
||||||
|
@ -49,7 +49,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"catalog", "connect", "not-found"},
|
Name: []string{"catalog", "connect", "not-found"},
|
||||||
Help: "",
|
Help: "Increments for each connect-based catalog query where the given service could not be found.",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,43 +53,43 @@ var CommandsSummaries = []prometheus.SummaryDefinition{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "fsm", "intention"},
|
Name: []string{"consul", "fsm", "intention"},
|
||||||
Help: "",
|
Help: "Deprecated - use fsm_intention instead",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "intention"},
|
Name: []string{"fsm", "intention"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply an intention operation to the FSM.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"consul", "fsm", "ca"},
|
Name: []string{"consul", "fsm", "ca"},
|
||||||
Help: "",
|
Help: "Deprecated - use fsm_ca instead",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: []string{"fsm", "ca"},
|
||||||
|
Help: "Measures the time it takes to apply CA configuration operations to the FSM.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "ca", "leaf"},
|
Name: []string{"fsm", "ca", "leaf"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply an operation while signing a leaf certificate.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "acl", "token"},
|
Name: []string{"fsm", "acl", "token"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply an ACL token operation to the FSM.",
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: []string{"fsm", "ca", "leaf"},
|
|
||||||
Help: "",
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "acl", "policy"},
|
Name: []string{"fsm", "acl", "policy"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply an ACL policy operation to the FSM.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "acl", "bindingrule"},
|
Name: []string{"fsm", "acl", "bindingrule"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply an ACL binding rule operation to the FSM.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "acl", "authmethod"},
|
Name: []string{"fsm", "acl", "authmethod"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply an ACL authmethod operation to the FSM.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"fsm", "system_metadata"},
|
Name: []string{"fsm", "system_metadata"},
|
||||||
Help: "",
|
Help: "Measures the time it takes to apply a system metadata operation to the FSM.",
|
||||||
},
|
},
|
||||||
// TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is
|
// TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is
|
||||||
// possible to statically declare these when we know all of the names, but I didn't get to it
|
// possible to statically declare these when we know all of the names, but I didn't get to it
|
||||||
|
@ -378,8 +378,12 @@ func (c *FSM) applyIntentionOperation(buf []byte, index uint64) interface{} {
|
||||||
panic(fmt.Errorf("failed to decode request: %v", err))
|
panic(fmt.Errorf("failed to decode request: %v", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(kit): We should deprecate this first metric that writes the metrics_prefix itself,
|
||||||
|
// the config we use to flag this out, telemetry.disable_compat_1.9 is on the agent - how do
|
||||||
|
// we access it here?
|
||||||
defer metrics.MeasureSinceWithLabels([]string{"consul", "fsm", "intention"}, time.Now(),
|
defer metrics.MeasureSinceWithLabels([]string{"consul", "fsm", "intention"}, time.Now(),
|
||||||
[]metrics.Label{{Name: "op", Value: string(req.Op)}})
|
[]metrics.Label{{Name: "op", Value: string(req.Op)}})
|
||||||
|
|
||||||
defer metrics.MeasureSinceWithLabels([]string{"fsm", "intention"}, time.Now(),
|
defer metrics.MeasureSinceWithLabels([]string{"fsm", "intention"}, time.Now(),
|
||||||
[]metrics.Label{{Name: "op", Value: string(req.Op)}})
|
[]metrics.Label{{Name: "op", Value: string(req.Op)}})
|
||||||
|
|
||||||
|
@ -474,6 +478,7 @@ func (c *FSM) applyConnectCAOperation(buf []byte, index uint64) interface{} {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyConnectCALeafOperation applies an operation while signing a leaf certificate.
|
||||||
func (c *FSM) applyConnectCALeafOperation(buf []byte, index uint64) interface{} {
|
func (c *FSM) applyConnectCALeafOperation(buf []byte, index uint64) interface{} {
|
||||||
var req structs.CALeafRequest
|
var req structs.CALeafRequest
|
||||||
if err := structs.Decode(buf, &req); err != nil {
|
if err := structs.Decode(buf, &req); err != nil {
|
||||||
|
|
|
@ -16,11 +16,11 @@ var SessionGauges = []prometheus.GaugeDefinition{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"raft", "applied_index"},
|
Name: []string{"raft", "applied_index"},
|
||||||
Help: "",
|
Help: "Represents the raft applied index.",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: []string{"raft", "last_index"},
|
Name: []string{"raft", "last_index"},
|
||||||
Help: "",
|
Help: "Represents the raft last index.",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ func (s *Server) clearAllSessionTimers() {
|
||||||
s.sessionTimers.StopAll()
|
s.sessionTimers.StopAll()
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateMetrics is a long running routine used to uddate a
|
// updateMetrics is a long running routine used to update a
|
||||||
// number of server periodic metrics
|
// number of server periodic metrics
|
||||||
func (s *Server) updateMetrics() {
|
func (s *Server) updateMetrics() {
|
||||||
for {
|
for {
|
||||||
|
|
|
@ -194,8 +194,12 @@ These metrics are used to monitor the health of the Consul servers.
|
||||||
| `consul.acl.resolveTokenLegacy` | This measures the time it takes to resolve an ACL token using the legacy ACL system. | ms | timer |
|
| `consul.acl.resolveTokenLegacy` | This measures the time it takes to resolve an ACL token using the legacy ACL system. | ms | timer |
|
||||||
| `consul.acl.ResolveToken` | This measures the time it takes to resolve an ACL token. | ms | timer |
|
| `consul.acl.ResolveToken` | This measures the time it takes to resolve an ACL token. | ms | timer |
|
||||||
| `consul.acl.ResolveTokenToIdentity` | This measures the time it takes to resolve an ACL token to an Identity. | ms | timer |
|
| `consul.acl.ResolveTokenToIdentity` | This measures the time it takes to resolve an ACL token to an Identity. | ms | timer |
|
||||||
| `consul.acl.token.cache_hit` | Increments if Consul is able to resolve a token's identity, or a legacy token, from the cache. | cache read op | counter |
|
| `consul.acl.token.cache_hit` | Increments if Consul is able to resolve a token's identity, or a legacy token, from the cache. | cache read op | counter |
|
||||||
| `consul.acl.token.cache_miss` | Increments if Consul cannot resolve a token's identity, or a legacy token, from the cache. | cache read op | counter |
|
| `consul.acl.token.cache_miss` | Increments if Consul cannot resolve a token's identity, or a legacy token, from the cache. | cache read op | counter |
|
||||||
|
| `consul.cache.bypass` | Counts how many times a request bypassed the cache because no cache-key was provided. | counter | counter |
|
||||||
|
| `consul.cache.fetch_success` | Counts the number of successful fetches by the cache. | counter | counter |
|
||||||
|
| `consul.cache.fetch_error` | Counts the number of failed fetches by the cache. | counter | counter |
|
||||||
|
| `consul.cache.evict_expired` | Counts the number of expired entries that are evicted. | counter | counter |
|
||||||
| `consul.raft.fsm.snapshot` | This metric measures the time taken by the FSM to record the current state for the snapshot. | ms | timer |
|
| `consul.raft.fsm.snapshot` | This metric measures the time taken by the FSM to record the current state for the snapshot. | ms | timer |
|
||||||
| `consul.raft.fsm.apply` | This metric gives the number of logs committed since the last interval. | commit logs / interval | counter |
|
| `consul.raft.fsm.apply` | This metric gives the number of logs committed since the last interval. | commit logs / interval | counter |
|
||||||
| `consul.raft.commitNumLogs` | This metric measures the count of logs processed for application to the FSM in a single batch. | logs | gauge |
|
| `consul.raft.commitNumLogs` | This metric measures the count of logs processed for application to the FSM in a single batch. | logs | gauge |
|
||||||
|
@ -207,6 +211,8 @@ These metrics are used to monitor the health of the Consul servers.
|
||||||
| `consul.raft.replication.heartbeat` | This metric measures the time taken to invoke appendEntries on a peer, so that it doesn’t timeout on a periodic basis. | ms | timer |
|
| `consul.raft.replication.heartbeat` | This metric measures the time taken to invoke appendEntries on a peer, so that it doesn’t timeout on a periodic basis. | ms | timer |
|
||||||
| `consul.serf.snapshot.appendLine` | This metric measures the time taken by the Consul agent to append an entry into the existing log. | ms | timer |
|
| `consul.serf.snapshot.appendLine` | This metric measures the time taken by the Consul agent to append an entry into the existing log. | ms | timer |
|
||||||
| `consul.serf.snapshot.compact` | This metric measures the time taken by the Consul agent to compact a log. This operation occurs only when the snapshot becomes large enough to justify the compaction . | ms | timer |
|
| `consul.serf.snapshot.compact` | This metric measures the time taken by the Consul agent to compact a log. This operation occurs only when the snapshot becomes large enough to justify the compaction . | ms | timer |
|
||||||
|
| `consul.raft.applied_index` | Represents the raft applied index. | index | gauge |
|
||||||
|
| `consul.raft.last_index` | Represents the raft applied index. | index | gauge |
|
||||||
| `consul.raft.state.leader` | This increments whenever a Consul server becomes a leader. If there are frequent leadership changes this may be indication that the servers are overloaded and aren't meeting the soft real-time requirements for Raft, or that there are networking problems between the servers. | leadership transitions / interval | counter |
|
| `consul.raft.state.leader` | This increments whenever a Consul server becomes a leader. If there are frequent leadership changes this may be indication that the servers are overloaded and aren't meeting the soft real-time requirements for Raft, or that there are networking problems between the servers. | leadership transitions / interval | counter |
|
||||||
| `consul.raft.state.candidate` | This increments whenever a Consul server starts an election. If this increments without a leadership change occurring it could indicate that a single server is overloaded or is experiencing network connectivity issues. | election attempts / interval | counter |
|
| `consul.raft.state.candidate` | This increments whenever a Consul server starts an election. If this increments without a leadership change occurring it could indicate that a single server is overloaded or is experiencing network connectivity issues. | election attempts / interval | counter |
|
||||||
| `consul.raft.apply` | This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers. | raft transactions / interval | counter |
|
| `consul.raft.apply` | This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers. | raft transactions / interval | counter |
|
||||||
|
@ -243,6 +249,14 @@ These metrics are used to monitor the health of the Consul servers.
|
||||||
| `consul.fsm.txn` | This measures the time it takes to apply the given transaction update to the FSM. | ms | timer |
|
| `consul.fsm.txn` | This measures the time it takes to apply the given transaction update to the FSM. | ms | timer |
|
||||||
| `consul.fsm.autopilot` | This measures the time it takes to apply the given autopilot update to the FSM. | ms | timer |
|
| `consul.fsm.autopilot` | This measures the time it takes to apply the given autopilot update to the FSM. | ms | timer |
|
||||||
| `consul.fsm.persist` | This measures the time it takes to persist the FSM to a raft snapshot. | ms | timer |
|
| `consul.fsm.persist` | This measures the time it takes to persist the FSM to a raft snapshot. | ms | timer |
|
||||||
|
| `consul.fsm.intention` | Measures the time it takes to apply an intention operation to the state store. | ms | timer |
|
||||||
|
| `consul.fsm.ca` | Measures the time it takes to apply CA configuration operations to the FSM. | ms | timer |
|
||||||
|
| `consul.fsm.ca.leaf` | Measures the time it takes to apply an operation while signing a leaf certificate. | ms | timer |
|
||||||
|
| `consul.fsm.acl.token` | Measures the time it takes to apply an ACL token operation to the FSM. | ms | timer |
|
||||||
|
| `consul.fsm.acl.policy` | Measures the time it takes to apply an ACL policy operation to the FSM. | ms | timer |
|
||||||
|
| `consul.fsm.acl.bindingrule` | Measures the time it takes to apply an ACL binding rule operation to the FSM. | ms | timer |
|
||||||
|
| `consul.fsm.acl.authmethod` | Measures the time it takes to apply an ACL authmethod operation to the FSM. | ms | timer |
|
||||||
|
| `consul.fsm.system_metadata` | Measures the time it takes to apply a system metadata operation to the FSM. | ms | timer |
|
||||||
| `consul.kvs.apply` | This measures the time it takes to complete an update to the KV store. | ms | timer |
|
| `consul.kvs.apply` | This measures the time it takes to complete an update to the KV store. | ms | timer |
|
||||||
| `consul.leader.barrier` | This measures the time spent waiting for the raft barrier upon gaining leadership. | ms | timer |
|
| `consul.leader.barrier` | This measures the time spent waiting for the raft barrier upon gaining leadership. | ms | timer |
|
||||||
| `consul.leader.reconcile` | This measures the time spent updating the raft store from the serf member information. | ms | timer |
|
| `consul.leader.reconcile` | This measures the time spent updating the raft store from the serf member information. | ms | timer |
|
||||||
|
@ -306,6 +320,10 @@ These metrics give insight into the health of the cluster as a whole.
|
||||||
| `consul.catalog.service.query-tag..` | This increments for each catalog query for the given service with the given tag. | queries | counter |
|
| `consul.catalog.service.query-tag..` | This increments for each catalog query for the given service with the given tag. | queries | counter |
|
||||||
| `consul.catalog.service.query-tags..` | This increments for each catalog query for the given service with the given tags. | queries | counter |
|
| `consul.catalog.service.query-tags..` | This increments for each catalog query for the given service with the given tags. | queries | counter |
|
||||||
| `consul.catalog.service.not-found.` | This increments for each catalog query where the given service could not be found. | queries | counter |
|
| `consul.catalog.service.not-found.` | This increments for each catalog query where the given service could not be found. | queries | counter |
|
||||||
|
| `consul.catalog.connect.query.` | This increments for each connect-based catalog query for the given service. | queries | counter |
|
||||||
|
| `consul.catalog.connect.query-tag..` | This increments for each connect-based catalog query for the given service with the given tag. | queries | counter |
|
||||||
|
| `consul.catalog.connect.query-tags..` | This increments for each connect-based catalog query for the given service with the given tags. | queries | counter |
|
||||||
|
| `consul.catalog.connect.not-found.` | This increments for each connect-based catalog query where the given service could not be found. | queries | counter |
|
||||||
|
|
||||||
## Connect Built-in Proxy Metrics
|
## Connect Built-in Proxy Metrics
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue