metrics: activity log (#10514)

* core: add vault.identity.entity.active.monthly log
* Fixed end-of-month metrics and unit test.
* Added metric covering month-to-date (not broken down by namespace.)
* Updated documentation
* Added changelog.

Co-authored-by: mgritter <mgritter@hashicorp.com>
This commit is contained in:
Aleksandr Bezobchuk 2021-01-26 17:37:07 -05:00 committed by GitHub
parent 160c8e4dbe
commit 2ec8f9a222
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 159 additions and 3 deletions

3
changelog/10514.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
core: add metrics for active entity count
```

View File

@ -1499,6 +1499,17 @@ func (a *ActivityLog) HandleTokenCreation(entry *logical.TokenEntry) {
}
}
func (a *ActivityLog) namespaceToLabel(ctx context.Context, nsID string) string {
ns, err := NamespaceByID(ctx, nsID, a.core)
if err != nil || ns == nil {
return fmt.Sprintf("deleted-%v", nsID)
}
if ns.Path == "" {
return "root"
}
return ns.Path
}
// goroutine to process the request in the intent log, creating precomputed queries.
// We expect the return value won't be checked, so log errors as they occur
// (but for unit testing having the error return should help.)
@ -1601,7 +1612,13 @@ func (a *ActivityLog) precomputedQueryWorker() error {
byNamespace[nsID].Tokens += v
}
}
endTime := timeutil.EndOfMonth(time.Unix(lastMonth, 0).UTC())
activePeriodStart := timeutil.MonthsPreviousTo(a.defaultReportMonths, endTime)
// If not enough data, report as much as we have in the window
if activePeriodStart.Before(times[len(times)-1]) {
activePeriodStart = times[len(times)-1]
}
for _, startTime := range times {
// Do not work back further than the current retention window,
@ -1627,12 +1644,33 @@ func (a *ActivityLog) precomputedQueryWorker() error {
EndTime: endTime,
Namespaces: make([]*activity.NamespaceRecord, 0, len(byNamespace)),
}
for nsID, counts := range byNamespace {
pq.Namespaces = append(pq.Namespaces, &activity.NamespaceRecord{
NamespaceID: nsID,
Entities: uint64(len(counts.Entities)),
NonEntityTokens: counts.Tokens,
})
// If this is the most recent month, or the start of the reporting period, output
// a metric for each namespace.
if startTime == times[0] {
a.metrics.SetGaugeWithLabels(
[]string{"identity", "entity", "active", "monthly"},
float32(len(counts.Entities)),
[]metricsutil.Label{
{Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)},
},
)
} else if startTime == activePeriodStart {
a.metrics.SetGaugeWithLabels(
[]string{"identity", "entity", "active", "reporting_period"},
float32(len(counts.Entities)),
[]metricsutil.Label{
{Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)},
},
)
}
}
err = a.queryStore.Put(ctx, pq)
@ -1641,7 +1679,7 @@ func (a *ActivityLog) precomputedQueryWorker() error {
}
}
// Delete the intent log
// delete the intent log
a.view.Delete(ctx, activityIntentLogKey)
a.logger.Info("finished computing queries", "month", endTime)
@ -1694,3 +1732,33 @@ func (a *ActivityLog) retentionWorker(currentTime time.Time, retentionMonths int
return nil
}
// Periodic report of number of active entities, with the current month.
// We don't break this down by namespace because that would require going to storage (that information
// is not currently stored in memory.)
func (a *ActivityLog) PartialMonthMetrics(ctx context.Context) ([]metricsutil.GaugeLabelValues, error) {
a.fragmentLock.RLock()
defer a.fragmentLock.RUnlock()
if !a.enabled {
// Empty list
return []metricsutil.GaugeLabelValues{}, nil
}
count := len(a.activeEntities)
return []metricsutil.GaugeLabelValues{
{
Labels: []metricsutil.Label{},
Value: float32(count),
},
}, nil
}
func (c *Core) activeEntityGaugeCollector(ctx context.Context) ([]metricsutil.GaugeLabelValues, error) {
c.stateLock.RLock()
a := c.activityLog
c.stateLock.RUnlock()
if a == nil {
return []metricsutil.GaugeLabelValues{}, nil
}
return a.PartialMonthMetrics(ctx)
}

View File

@ -1895,7 +1895,7 @@ func TestActivityLog_Precompute(t *testing.T) {
october := timeutil.StartOfMonth(time.Date(2020, 10, 1, 0, 0, 0, 0, time.UTC))
november := timeutil.StartOfMonth(time.Date(2020, 11, 1, 0, 0, 0, 0, time.UTC))
core, _, _ := TestCoreUnsealed(t)
core, _, _, sink := TestCoreUnsealedWithMetrics(t)
a := core.activityLog
ctx := namespace.RootContext(nil)
@ -2137,8 +2137,84 @@ func TestActivityLog_Precompute(t *testing.T) {
for i := 0; i <= tc.ExpectedUpTo; i++ {
checkPrecomputedQuery(i)
}
}
// Check metrics on the last precomputed query
// (otherwise we need a way to reset the in-memory metrics between test cases.)
intervals := sink.Data()
// Test crossed an interval boundary, don't try to deal with it.
if len(intervals) > 1 {
t.Skip("Detected interval crossing.")
}
expectedGauges := []struct {
Name string
NamespaceLabel string
Value float32
}{
// october values
{
"identity.entity.active.monthly",
"root",
15.0,
},
{
"identity.entity.active.monthly",
"deleted-bbbbb", // No namespace entry for this fake ID
5.0,
},
{
"identity.entity.active.monthly",
"deleted-ccccc",
5.0,
},
// august-september values
{
"identity.entity.active.reporting_period",
"root",
20.0,
},
{
"identity.entity.active.reporting_period",
"deleted-aaaaa",
5.0,
},
{
"identity.entity.active.reporting_period",
"deleted-bbbbb",
10.0,
},
{
"identity.entity.active.reporting_period",
"deleted-ccccc",
5.0,
},
}
for _, g := range expectedGauges {
found := false
for _, actual := range intervals[0].Gauges {
actualNamespaceLabel := ""
for _, l := range actual.Labels {
if l.Name == "namespace" {
actualNamespaceLabel = l.Value
break
}
}
if actual.Name == g.Name && actualNamespaceLabel == g.NamespaceLabel {
found = true
if actual.Value != g.Value {
t.Errorf("Mismatched value for %v %v %v != %v",
g.Name, g.NamespaceLabel, actual.Value, g.Value)
}
break
}
}
if !found {
t.Errorf("No guage found for %v %v",
g.Name, g.NamespaceLabel)
}
}
}
type BlockingInmemStorage struct {

View File

@ -211,6 +211,12 @@ func (c *Core) emitMetrics(stopCh chan struct{}) {
c.entityGaugeCollectorByMount,
"",
},
{
[]string{"identity", "entity", "active", "partial_month"},
[]metrics.Label{{"gauge", "identity_active_month"}},
c.activeEntityGaugeCollector,
"",
},
}
// Disable collection if configured, or if we're a performance standby

View File

@ -162,6 +162,9 @@ These metrics cover measurement of token, identity, and lease operations, and co
| `vault.expire.register` | Time taken for register operations | ms | summary |
| `vault.expire.register-auth` | Time taken for register authentication operations which create lease entries without lease ID | ms | summary |
| `vault.identity.num_entities` | Number of identity entities stored in Vault | entities | gauge |
| `vault.identity.entity.active.monthly` (cluster, namespace) | Number of distinct entities that created a token during the past month, per namespace. Only available if client count is enabled. Reported at the start of each month. | entities | gauge |
| `vault.identity.entity.active.partial_month` (cluster) | Total number of distinct entities that created a token during the current month. Only available if client count is enabled. Reported periodically within each month. | entities | gauge |
| `vault.identity.entity.active.reporting_period` (cluster, namespace) | Number of distinct entities that created a token in the past N months, as defined by the client count default reporting period. Only available if client count is enabled. Reported at the start of each month. | entities | gauge |
| `vault.identity.entity.alias.count` (cluster, namespace, auth_method, mount_point) | Number of identity entities aliases stored in Vault, grouped by the auth mount that created them. This gauge is computed every 10 minutes. | aliases | gauge |
| `vault.identity.entity.count` (cluster, namespace) | Number of identity entities stored in Vault, grouped by namespace. | entities | gauge |
| `vault.identity.entity.creation` (cluster, namespace, auth_method, mount_point) | Number of identity entities created, grouped by the auth mount that created them. | entities | counter |