Merge pull request #11090 from hashicorp/clly/kv-usage-metrics
Add KVUsage to consul state usage metrics
This commit is contained in:
commit
bc04a155fb
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
telemetry: Add new metrics for the count of KV entries in the Consul store.
|
||||
```
|
|
@ -10,6 +10,7 @@ import (
|
|||
|
||||
const (
|
||||
serviceNamesUsageTable = "service-names"
|
||||
kvUsageTable = "kv-entries"
|
||||
|
||||
tableUsage = "usage"
|
||||
)
|
||||
|
@ -54,6 +55,11 @@ type NodeUsage struct {
|
|||
EnterpriseNodeUsage
|
||||
}
|
||||
|
||||
type KVUsage struct {
|
||||
KVCount int
|
||||
EnterpriseKVUsage
|
||||
}
|
||||
|
||||
type uniqueServiceState int
|
||||
|
||||
const (
|
||||
|
@ -95,6 +101,9 @@ func updateUsage(tx WriteTxn, changes Changes) error {
|
|||
} else {
|
||||
serviceNameChanges[svc.CompoundServiceName()] += delta
|
||||
}
|
||||
case "kvs":
|
||||
usageDeltas[change.Table] += delta
|
||||
addEnterpriseKVUsage(usageDeltas, change)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -269,6 +278,26 @@ func (s *Store) ServiceUsage() (uint64, ServiceUsage, error) {
|
|||
return serviceInstances.Index, results, nil
|
||||
}
|
||||
|
||||
func (s *Store) KVUsage() (uint64, KVUsage, error) {
|
||||
tx := s.db.ReadTxn()
|
||||
defer tx.Abort()
|
||||
|
||||
kvs, err := firstUsageEntry(tx, "kvs")
|
||||
if err != nil {
|
||||
return 0, KVUsage{}, fmt.Errorf("failed kvs lookup: %s", err)
|
||||
}
|
||||
|
||||
usage := KVUsage{
|
||||
KVCount: kvs.Count,
|
||||
}
|
||||
results, err := compileEnterpriseKVUsage(tx, usage)
|
||||
if err != nil {
|
||||
return 0, KVUsage{}, fmt.Errorf("failed kvs lookup: %s", err)
|
||||
}
|
||||
|
||||
return kvs.Index, results, nil
|
||||
}
|
||||
|
||||
func firstUsageEntry(tx ReadTxn, id string) (*UsageEntry, error) {
|
||||
usage, err := tx.First(tableUsage, indexID, id)
|
||||
if err != nil {
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
|
||||
type EnterpriseServiceUsage struct{}
|
||||
type EnterpriseNodeUsage struct{}
|
||||
type EnterpriseKVUsage struct{}
|
||||
|
||||
func addEnterpriseNodeUsage(map[string]int, memdb.Change) {}
|
||||
|
||||
|
@ -17,6 +18,8 @@ func addEnterpriseServiceInstanceUsage(map[string]int, memdb.Change) {}
|
|||
|
||||
func addEnterpriseServiceUsage(map[string]int, map[structs.ServiceName]uniqueServiceState) {}
|
||||
|
||||
func addEnterpriseKVUsage(map[string]int, memdb.Change) {}
|
||||
|
||||
func compileEnterpriseServiceUsage(tx ReadTxn, usage ServiceUsage) (ServiceUsage, error) {
|
||||
return usage, nil
|
||||
}
|
||||
|
@ -24,3 +27,7 @@ func compileEnterpriseServiceUsage(tx ReadTxn, usage ServiceUsage) (ServiceUsage
|
|||
func compileEnterpriseNodeUsage(tx ReadTxn, usage NodeUsage) (NodeUsage, error) {
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
func compileEnterpriseKVUsage(tx ReadTxn, usage KVUsage) (KVUsage, error) {
|
||||
return usage, nil
|
||||
}
|
||||
|
|
|
@ -45,6 +45,44 @@ func TestStateStore_Usage_NodeUsage_Delete(t *testing.T) {
|
|||
require.Equal(t, usage.Nodes, 1)
|
||||
}
|
||||
|
||||
func TestStateStore_Usage_KVUsage(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
// No keys have been registered, and thus no usage entry exists
|
||||
idx, usage, err := s.KVUsage()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, idx, uint64(0))
|
||||
require.Equal(t, usage.KVCount, 0)
|
||||
|
||||
testSetKey(t, s, 0, "key-1", "0", nil)
|
||||
testSetKey(t, s, 1, "key-2", "0", nil)
|
||||
testSetKey(t, s, 2, "key-2", "1", nil)
|
||||
|
||||
idx, usage, err = s.KVUsage()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, idx, uint64(2))
|
||||
require.Equal(t, usage.KVCount, 2)
|
||||
}
|
||||
|
||||
func TestStateStore_Usage_KVUsage_Delete(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
testSetKey(t, s, 0, "key-1", "0", nil)
|
||||
testSetKey(t, s, 1, "key-2", "0", nil)
|
||||
testSetKey(t, s, 2, "key-2", "1", nil)
|
||||
|
||||
idx, usage, err := s.KVUsage()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, idx, uint64(2))
|
||||
require.Equal(t, usage.KVCount, 2)
|
||||
|
||||
require.NoError(t, s.KVSDelete(3, "key-2", nil))
|
||||
idx, usage, err = s.KVUsage()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, idx, uint64(3))
|
||||
require.Equal(t, usage.KVCount, 1)
|
||||
}
|
||||
|
||||
func TestStateStore_Usage_ServiceUsageEmpty(t *testing.T) {
|
||||
s := testStateStore(t)
|
||||
|
||||
|
|
|
@ -36,6 +36,10 @@ var Gauges = []prometheus.GaugeDefinition{
|
|||
Name: []string{"consul", "members", "servers"},
|
||||
Help: "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "kv", "entries"},
|
||||
Help: "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.10.3.",
|
||||
},
|
||||
}
|
||||
|
||||
type getMembersFunc func() []serf.Member
|
||||
|
@ -145,6 +149,7 @@ func (u *UsageMetricsReporter) Run(ctx context.Context) {
|
|||
}
|
||||
|
||||
func (u *UsageMetricsReporter) runOnce() {
|
||||
u.logger.Trace("Starting usage run")
|
||||
state := u.stateProvider.State()
|
||||
|
||||
_, nodeUsage, err := state.NodeUsage()
|
||||
|
@ -163,6 +168,14 @@ func (u *UsageMetricsReporter) runOnce() {
|
|||
|
||||
members := u.memberUsage()
|
||||
u.emitMemberUsage(members)
|
||||
|
||||
_, kvUsage, err := state.KVUsage()
|
||||
if err != nil {
|
||||
u.logger.Warn("failed to retrieve kv entry usage from state store", "error", err)
|
||||
}
|
||||
|
||||
u.emitKVUsage(kvUsage)
|
||||
|
||||
}
|
||||
|
||||
func (u *UsageMetricsReporter) memberUsage() []serf.Member {
|
||||
|
|
|
@ -58,3 +58,11 @@ func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage state.ServiceUsage)
|
|||
u.metricLabels,
|
||||
)
|
||||
}
|
||||
|
||||
func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) {
|
||||
metrics.SetGaugeWithLabels(
|
||||
[]string{"consul", "state", "kv_entries"},
|
||||
float32(kvUsage.KVCount),
|
||||
u.metricLabels,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -57,6 +57,11 @@ func TestUsageReporter_emitNodeUsage_OSS(t *testing.T) {
|
|||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.kv_entries",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
},
|
||||
getMembersFunc: func() []serf.Member { return []serf.Member{} },
|
||||
},
|
||||
|
@ -114,6 +119,11 @@ func TestUsageReporter_emitNodeUsage_OSS(t *testing.T) {
|
|||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.kv_entries",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -199,6 +209,11 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
|
|||
{Name: "datacenter", Value: "dc1"},
|
||||
},
|
||||
},
|
||||
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.kv_entries",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
},
|
||||
getMembersFunc: func() []serf.Member { return []serf.Member{} },
|
||||
},
|
||||
|
@ -276,6 +291,11 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
|
|||
{Name: "datacenter", Value: "dc1"},
|
||||
},
|
||||
},
|
||||
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.kv_entries",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -314,3 +334,156 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsageReporter_emitKVUsage_OSS(t *testing.T) {
|
||||
type testCase struct {
|
||||
modfiyStateStore func(t *testing.T, s *state.Store)
|
||||
getMembersFunc getMembersFunc
|
||||
expectedGauges map[string]metrics.GaugeValue
|
||||
}
|
||||
cases := map[string]testCase{
|
||||
"empty-state": {
|
||||
expectedGauges: map[string]metrics.GaugeValue{
|
||||
// --- node ---
|
||||
"consul.usage.test.consul.state.nodes;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.nodes",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
// --- member ---
|
||||
"consul.usage.test.consul.members.clients;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.members.clients",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.members.servers;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.members.servers",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
// --- service ---
|
||||
"consul.usage.test.consul.state.services;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.services",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.state.service_instances;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.service_instances",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.kv_entries",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
},
|
||||
getMembersFunc: func() []serf.Member { return []serf.Member{} },
|
||||
},
|
||||
"nodes": {
|
||||
modfiyStateStore: func(t *testing.T, s *state.Store) {
|
||||
require.NoError(t, s.EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.1"}))
|
||||
require.NoError(t, s.EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.2"}))
|
||||
require.NoError(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"}))
|
||||
|
||||
require.NoError(t, s.KVSSet(4, &structs.DirEntry{Key: "a", Value: []byte{1}}))
|
||||
require.NoError(t, s.KVSSet(5, &structs.DirEntry{Key: "b", Value: []byte{1}}))
|
||||
require.NoError(t, s.KVSSet(6, &structs.DirEntry{Key: "c", Value: []byte{1}}))
|
||||
require.NoError(t, s.KVSSet(7, &structs.DirEntry{Key: "d", Value: []byte{1}}))
|
||||
require.NoError(t, s.KVSDelete(8, "d", &structs.EnterpriseMeta{}))
|
||||
require.NoError(t, s.KVSDelete(9, "c", &structs.EnterpriseMeta{}))
|
||||
require.NoError(t, s.KVSSet(10, &structs.DirEntry{Key: "e", Value: []byte{1}}))
|
||||
require.NoError(t, s.KVSSet(11, &structs.DirEntry{Key: "f", Value: []byte{1}}))
|
||||
},
|
||||
getMembersFunc: func() []serf.Member {
|
||||
return []serf.Member{
|
||||
{
|
||||
Name: "foo",
|
||||
Tags: map[string]string{"role": "consul"},
|
||||
Status: serf.StatusAlive,
|
||||
},
|
||||
{
|
||||
Name: "bar",
|
||||
Tags: map[string]string{"role": "consul"},
|
||||
Status: serf.StatusAlive,
|
||||
},
|
||||
{
|
||||
Name: "baz",
|
||||
Tags: map[string]string{"role": "node"},
|
||||
Status: serf.StatusAlive,
|
||||
},
|
||||
}
|
||||
},
|
||||
expectedGauges: map[string]metrics.GaugeValue{
|
||||
// --- node ---
|
||||
"consul.usage.test.consul.state.nodes;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.nodes",
|
||||
Value: 3,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
// --- member ---
|
||||
"consul.usage.test.consul.members.servers;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.members.servers",
|
||||
Value: 2,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.members.clients;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.members.clients",
|
||||
Value: 1,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
// --- service ---
|
||||
"consul.usage.test.consul.state.services;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.services",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.state.service_instances;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.service_instances",
|
||||
Value: 0,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
|
||||
Name: "consul.usage.test.consul.state.kv_entries",
|
||||
Value: 4,
|
||||
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for name, tcase := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
// Only have a single interval for the test
|
||||
sink := metrics.NewInmemSink(1*time.Minute, 1*time.Minute)
|
||||
cfg := metrics.DefaultConfig("consul.usage.test")
|
||||
cfg.EnableHostname = false
|
||||
metrics.NewGlobal(cfg, sink)
|
||||
|
||||
mockStateProvider := &mockStateProvider{}
|
||||
s, err := newStateStore()
|
||||
require.NoError(t, err)
|
||||
if tcase.modfiyStateStore != nil {
|
||||
tcase.modfiyStateStore(t, s)
|
||||
}
|
||||
mockStateProvider.On("State").Return(s)
|
||||
|
||||
reporter, err := NewUsageMetricsReporter(
|
||||
new(Config).
|
||||
WithStateProvider(mockStateProvider).
|
||||
WithLogger(testutil.Logger(t)).
|
||||
WithDatacenter("dc1").
|
||||
WithGetMembersFunc(tcase.getMembersFunc),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
reporter.runOnce()
|
||||
|
||||
intervals := sink.Data()
|
||||
require.Len(t, intervals, 1)
|
||||
intv := intervals[0]
|
||||
|
||||
assertEqualGaugeMaps(t, tcase.expectedGauges, intv.Gauges)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -313,6 +313,7 @@ This is a full list of metrics emitted by Consul.
|
|||
| `consul.state.nodes` | Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0. | number of objects | gauge |
|
||||
| `consul.state.services` | Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0. | number of objects | gauge |
|
||||
| `consul.state.service_instances` | Measures the current number of unique service instances registered with Consul. It is only emitted by Consul servers. Added in v1.9.0. | number of objects | gauge |
|
||||
| `consul.state.kv_entries` | Measures the current number of unique KV entries written in Consul. It is only emitted by Consul servers. Added in v1.10.3. | number of objects | gauge |
|
||||
| `consul.members.clients` | Measures the current number of client agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6. | number of clients | gauge |
|
||||
| `consul.members.servers` | Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6. | number of servers | gauge |
|
||||
| `consul.dns.stale_queries` | Increments when an agent serves a query within the allowed stale threshold. | queries | counter |
|
||||
|
|
Loading…
Reference in New Issue