metrics: activity log (#10514)
* core: add vault.identity.entity.active.monthly log * Fixed end-of-month metrics and unit test. * Added metric covering month-to-date (not broken down by namespace.) * Updated documentation * Added changelog. Co-authored-by: mgritter <mgritter@hashicorp.com>
This commit is contained in:
parent
160c8e4dbe
commit
2ec8f9a222
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
core: add metrics for active entity count
|
||||
```
|
|
@ -1499,6 +1499,17 @@ func (a *ActivityLog) HandleTokenCreation(entry *logical.TokenEntry) {
|
|||
}
|
||||
}
|
||||
|
||||
func (a *ActivityLog) namespaceToLabel(ctx context.Context, nsID string) string {
|
||||
ns, err := NamespaceByID(ctx, nsID, a.core)
|
||||
if err != nil || ns == nil {
|
||||
return fmt.Sprintf("deleted-%v", nsID)
|
||||
}
|
||||
if ns.Path == "" {
|
||||
return "root"
|
||||
}
|
||||
return ns.Path
|
||||
}
|
||||
|
||||
// goroutine to process the request in the intent log, creating precomputed queries.
|
||||
// We expect the return value won't be checked, so log errors as they occur
|
||||
// (but for unit testing having the error return should help.)
|
||||
|
@ -1601,7 +1612,13 @@ func (a *ActivityLog) precomputedQueryWorker() error {
|
|||
byNamespace[nsID].Tokens += v
|
||||
}
|
||||
}
|
||||
|
||||
endTime := timeutil.EndOfMonth(time.Unix(lastMonth, 0).UTC())
|
||||
activePeriodStart := timeutil.MonthsPreviousTo(a.defaultReportMonths, endTime)
|
||||
// If not enough data, report as much as we have in the window
|
||||
if activePeriodStart.Before(times[len(times)-1]) {
|
||||
activePeriodStart = times[len(times)-1]
|
||||
}
|
||||
|
||||
for _, startTime := range times {
|
||||
// Do not work back further than the current retention window,
|
||||
|
@ -1627,12 +1644,33 @@ func (a *ActivityLog) precomputedQueryWorker() error {
|
|||
EndTime: endTime,
|
||||
Namespaces: make([]*activity.NamespaceRecord, 0, len(byNamespace)),
|
||||
}
|
||||
|
||||
for nsID, counts := range byNamespace {
|
||||
pq.Namespaces = append(pq.Namespaces, &activity.NamespaceRecord{
|
||||
NamespaceID: nsID,
|
||||
Entities: uint64(len(counts.Entities)),
|
||||
NonEntityTokens: counts.Tokens,
|
||||
})
|
||||
|
||||
// If this is the most recent month, or the start of the reporting period, output
|
||||
// a metric for each namespace.
|
||||
if startTime == times[0] {
|
||||
a.metrics.SetGaugeWithLabels(
|
||||
[]string{"identity", "entity", "active", "monthly"},
|
||||
float32(len(counts.Entities)),
|
||||
[]metricsutil.Label{
|
||||
{Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)},
|
||||
},
|
||||
)
|
||||
} else if startTime == activePeriodStart {
|
||||
a.metrics.SetGaugeWithLabels(
|
||||
[]string{"identity", "entity", "active", "reporting_period"},
|
||||
float32(len(counts.Entities)),
|
||||
[]metricsutil.Label{
|
||||
{Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)},
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
err = a.queryStore.Put(ctx, pq)
|
||||
|
@ -1641,7 +1679,7 @@ func (a *ActivityLog) precomputedQueryWorker() error {
|
|||
}
|
||||
}
|
||||
|
||||
// Delete the intent log
|
||||
// delete the intent log
|
||||
a.view.Delete(ctx, activityIntentLogKey)
|
||||
|
||||
a.logger.Info("finished computing queries", "month", endTime)
|
||||
|
@ -1694,3 +1732,33 @@ func (a *ActivityLog) retentionWorker(currentTime time.Time, retentionMonths int
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Periodic report of number of active entities, with the current month.
|
||||
// We don't break this down by namespace because that would require going to storage (that information
|
||||
// is not currently stored in memory.)
|
||||
func (a *ActivityLog) PartialMonthMetrics(ctx context.Context) ([]metricsutil.GaugeLabelValues, error) {
|
||||
a.fragmentLock.RLock()
|
||||
defer a.fragmentLock.RUnlock()
|
||||
if !a.enabled {
|
||||
// Empty list
|
||||
return []metricsutil.GaugeLabelValues{}, nil
|
||||
}
|
||||
count := len(a.activeEntities)
|
||||
|
||||
return []metricsutil.GaugeLabelValues{
|
||||
{
|
||||
Labels: []metricsutil.Label{},
|
||||
Value: float32(count),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Core) activeEntityGaugeCollector(ctx context.Context) ([]metricsutil.GaugeLabelValues, error) {
|
||||
c.stateLock.RLock()
|
||||
a := c.activityLog
|
||||
c.stateLock.RUnlock()
|
||||
if a == nil {
|
||||
return []metricsutil.GaugeLabelValues{}, nil
|
||||
}
|
||||
return a.PartialMonthMetrics(ctx)
|
||||
}
|
||||
|
|
|
@ -1895,7 +1895,7 @@ func TestActivityLog_Precompute(t *testing.T) {
|
|||
october := timeutil.StartOfMonth(time.Date(2020, 10, 1, 0, 0, 0, 0, time.UTC))
|
||||
november := timeutil.StartOfMonth(time.Date(2020, 11, 1, 0, 0, 0, 0, time.UTC))
|
||||
|
||||
core, _, _ := TestCoreUnsealed(t)
|
||||
core, _, _, sink := TestCoreUnsealedWithMetrics(t)
|
||||
a := core.activityLog
|
||||
ctx := namespace.RootContext(nil)
|
||||
|
||||
|
@ -2137,8 +2137,84 @@ func TestActivityLog_Precompute(t *testing.T) {
|
|||
for i := 0; i <= tc.ExpectedUpTo; i++ {
|
||||
checkPrecomputedQuery(i)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Check metrics on the last precomputed query
|
||||
// (otherwise we need a way to reset the in-memory metrics between test cases.)
|
||||
|
||||
intervals := sink.Data()
|
||||
// Test crossed an interval boundary, don't try to deal with it.
|
||||
if len(intervals) > 1 {
|
||||
t.Skip("Detected interval crossing.")
|
||||
}
|
||||
expectedGauges := []struct {
|
||||
Name string
|
||||
NamespaceLabel string
|
||||
Value float32
|
||||
}{
|
||||
// october values
|
||||
{
|
||||
"identity.entity.active.monthly",
|
||||
"root",
|
||||
15.0,
|
||||
},
|
||||
{
|
||||
"identity.entity.active.monthly",
|
||||
"deleted-bbbbb", // No namespace entry for this fake ID
|
||||
5.0,
|
||||
},
|
||||
{
|
||||
"identity.entity.active.monthly",
|
||||
"deleted-ccccc",
|
||||
5.0,
|
||||
},
|
||||
// august-september values
|
||||
{
|
||||
"identity.entity.active.reporting_period",
|
||||
"root",
|
||||
20.0,
|
||||
},
|
||||
{
|
||||
"identity.entity.active.reporting_period",
|
||||
"deleted-aaaaa",
|
||||
5.0,
|
||||
},
|
||||
{
|
||||
"identity.entity.active.reporting_period",
|
||||
"deleted-bbbbb",
|
||||
10.0,
|
||||
},
|
||||
{
|
||||
"identity.entity.active.reporting_period",
|
||||
"deleted-ccccc",
|
||||
5.0,
|
||||
},
|
||||
}
|
||||
for _, g := range expectedGauges {
|
||||
found := false
|
||||
for _, actual := range intervals[0].Gauges {
|
||||
actualNamespaceLabel := ""
|
||||
for _, l := range actual.Labels {
|
||||
if l.Name == "namespace" {
|
||||
actualNamespaceLabel = l.Value
|
||||
break
|
||||
}
|
||||
}
|
||||
if actual.Name == g.Name && actualNamespaceLabel == g.NamespaceLabel {
|
||||
found = true
|
||||
if actual.Value != g.Value {
|
||||
t.Errorf("Mismatched value for %v %v %v != %v",
|
||||
g.Name, g.NamespaceLabel, actual.Value, g.Value)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("No guage found for %v %v",
|
||||
g.Name, g.NamespaceLabel)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type BlockingInmemStorage struct {
|
||||
|
|
|
@ -211,6 +211,12 @@ func (c *Core) emitMetrics(stopCh chan struct{}) {
|
|||
c.entityGaugeCollectorByMount,
|
||||
"",
|
||||
},
|
||||
{
|
||||
[]string{"identity", "entity", "active", "partial_month"},
|
||||
[]metrics.Label{{"gauge", "identity_active_month"}},
|
||||
c.activeEntityGaugeCollector,
|
||||
"",
|
||||
},
|
||||
}
|
||||
|
||||
// Disable collection if configured, or if we're a performance standby
|
||||
|
|
|
@ -162,6 +162,9 @@ These metrics cover measurement of token, identity, and lease operations, and co
|
|||
| `vault.expire.register` | Time taken for register operations | ms | summary |
|
||||
| `vault.expire.register-auth` | Time taken for register authentication operations which create lease entries without lease ID | ms | summary |
|
||||
| `vault.identity.num_entities` | Number of identity entities stored in Vault | entities | gauge |
|
||||
| `vault.identity.entity.active.monthly` (cluster, namespace) | Number of distinct entities that created a token during the past month, per namespace. Only available if client count is enabled. Reported at the start of each month. | entities | gauge |
|
||||
| `vault.identity.entity.active.partial_month` (cluster) | Total number of distinct entities that created a token during the current month. Only available if client count is enabled. Reported periodically within each month. | entities | gauge |
|
||||
| `vault.identity.entity.active.reporting_period` (cluster, namespace) | Number of distinct entities that created a token in the past N months, as defined by the client count default reporting period. Only available if client count is enabled. Reported at the start of each month. | entities | gauge |
|
||||
| `vault.identity.entity.alias.count` (cluster, namespace, auth_method, mount_point) | Number of identity entities aliases stored in Vault, grouped by the auth mount that created them. This gauge is computed every 10 minutes. | aliases | gauge |
|
||||
| `vault.identity.entity.count` (cluster, namespace) | Number of identity entities stored in Vault, grouped by namespace. | entities | gauge |
|
||||
| `vault.identity.entity.creation` (cluster, namespace, auth_method, mount_point) | Number of identity entities created, grouped by the auth mount that created them. | entities | counter |
|
||||
|
|
Loading…
Reference in New Issue