From 7aa1bce6fb563280428fb616cde7abbe6d30ac7b Mon Sep 17 00:00:00 2001 From: miagilepner Date: Fri, 19 May 2023 16:42:50 +0200 Subject: [PATCH] VAULT-15703: Reload automated reporting (#20680) * support config reloading for census * changelog * second changelog entry for license updates * correct changelog PR --- changelog/20680.txt | 6 ++++++ command/server.go | 3 +++ vault/acme_billing_system_view.go | 10 ++++++++-- vault/activity_log.go | 32 ++++++++++++++++++++++++++----- vault/activity_log_test.go | 12 ++++++------ vault/census.go | 11 +++++++++-- vault/core.go | 17 ++++++++-------- vault/logical_system_activity.go | 23 +++++++++++++++++----- vault/request_handling.go | 7 +++++-- 9 files changed, 91 insertions(+), 30 deletions(-) create mode 100644 changelog/20680.txt diff --git a/changelog/20680.txt b/changelog/20680.txt new file mode 100644 index 000000000..ff80ac466 --- /dev/null +++ b/changelog/20680.txt @@ -0,0 +1,6 @@ +```release-note:improvement +core (enterprise): support reloading configuration for automated reporting via SIGHUP +``` +```release-note:improvement +core (enterprise): license updates trigger a reload of reporting and the activity log +``` \ No newline at end of file diff --git a/command/server.go b/command/server.go index 1eccb3b5e..f950cfef9 100644 --- a/command/server.go +++ b/command/server.go @@ -1670,6 +1670,9 @@ func (c *ServerCommand) Run(args []string) int { c.UI.Error(err.Error()) } + if err := core.ReloadCensus(); err != nil { + c.UI.Error(err.Error()) + } select { case c.licenseReloadedCh <- err: default: diff --git a/vault/acme_billing_system_view.go b/vault/acme_billing_system_view.go index cf87833e4..6b05717f3 100644 --- a/vault/acme_billing_system_view.go +++ b/vault/acme_billing_system_view.go @@ -53,8 +53,14 @@ func (a *acmeBillingSystemViewImpl) CreateActivityCountEventForIdentifiers(ctx c // Log so users can correlate ACME requests to client count tokens. activityType := "acme" - a.core.activityLog.logger.Debug(fmt.Sprintf("Handling ACME client count event for [%v] -> %v", identifiers, clientID)) - a.core.activityLog.AddActivityToFragment(clientID, a.entry.NamespaceID, time.Now().Unix(), activityType, a.entry.Accessor) + a.core.activityLogLock.RLock() + activityLog := a.core.activityLog + a.core.activityLogLock.RUnlock() + if activityLog == nil { + return nil + } + activityLog.logger.Debug(fmt.Sprintf("Handling ACME client count event for [%v] -> %v", identifiers, clientID)) + activityLog.AddActivityToFragment(clientID, a.entry.NamespaceID, time.Now().Unix(), activityType, a.entry.Accessor) return nil } diff --git a/vault/activity_log.go b/vault/activity_log.go index 73ddb33b7..c4dedf977 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1067,12 +1067,25 @@ func (a *ActivityLog) queriesAvailable(ctx context.Context) (bool, error) { // setupActivityLog hooks up the singleton ActivityLog into Core. func (c *Core) setupActivityLog(ctx context.Context, wg *sync.WaitGroup) error { + c.activityLogLock.Lock() + defer c.activityLogLock.Unlock() + return c.setupActivityLogLocked(ctx, wg) +} + +// setupActivityLogLocked hooks up the singleton ActivityLog into Core. +// this function should be called with activityLogLock. +func (c *Core) setupActivityLogLocked(ctx context.Context, wg *sync.WaitGroup) error { logger := c.baseLogger.Named("activity") c.AddLogger(logger) if os.Getenv("VAULT_DISABLE_ACTIVITY_LOG") != "" { - logger.Info("activity log disabled via environment variable") - return nil + if c.CensusLicensingEnabled() { + logger.Warn("activity log disabled via environment variable while reporting is enabled. " + + "Reporting will override, and the activity log will be enabled") + } else { + logger.Info("activity log disabled via environment variable") + return nil + } } view := c.systemBarrierView.SubView(activitySubPath) @@ -1113,15 +1126,16 @@ func (c *Core) setupActivityLog(ctx context.Context, wg *sync.WaitGroup) error { }(manager.retentionMonths) manager.CensusReportDone = make(chan bool) - go c.activityLog.CensusReport(ctx, c.censusAgent, c.billingStart) + go c.activityLog.CensusReport(ctx, c.CensusAgent(), c.BillingStart()) } return nil } -// stopActivityLog removes the ActivityLog from Core +// stopActivityLogLocked removes the ActivityLog from Core // and frees any resources. -func (c *Core) stopActivityLog() { +// this function should be called with activityLogLock +func (c *Core) stopActivityLogLocked() { // preSeal may run before startActivityLog got a chance to complete. if c.activityLog != nil { // Shut down background worker @@ -1131,6 +1145,14 @@ func (c *Core) stopActivityLog() { c.activityLog = nil } +// stopActivityLog removes the ActivityLog from Core +// and frees any resources. +func (c *Core) stopActivityLog() { + c.activityLogLock.Lock() + defer c.activityLogLock.Unlock() + c.stopActivityLogLocked() +} + func (a *ActivityLog) StartOfNextMonth() time.Time { a.l.RLock() defer a.l.RUnlock() diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index 2ea84a209..19316611b 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -838,8 +838,8 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "retention_months": 24, "enabled": activityLogEnabledDefaultValue, "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.billingStart, + "reporting_enabled": core.CensusLicensingEnabled(), + "billing_start_timestamp": core.BillingStart(), "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } @@ -922,8 +922,8 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "retention_months": 2, "enabled": "enable", "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.billingStart, + "reporting_enabled": core.CensusLicensingEnabled(), + "billing_start_timestamp": core.BillingStart(), "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } @@ -961,8 +961,8 @@ func TestActivityLog_API_ConfigCRUD(t *testing.T) { "retention_months": 24, "enabled": activityLogEnabledDefaultValue, "queries_available": false, - "reporting_enabled": core.censusLicensingEnabled, - "billing_start_timestamp": core.billingStart, + "reporting_enabled": core.CensusLicensingEnabled(), + "billing_start_timestamp": core.BillingStart(), "minimum_retention_months": core.activityLog.configOverrides.MinimumRetentionMonths, } diff --git a/vault/census.go b/vault/census.go index 9d916dc14..bb1f4bc61 100644 --- a/vault/census.go +++ b/vault/census.go @@ -2,8 +2,15 @@ package vault +import "time" + // CensusAgent is a stub for OSS -type CensusReporter struct{} +type CensusReporter interface{} // setupCensusAgent is a stub for OSS. -func (c *Core) setupCensusAgent() error { return nil } +func (c *Core) setupCensusAgent() error { return nil } +func (c *Core) BillingStart() time.Time { return time.Time{} } +func (c *Core) CensusLicensingEnabled() bool { return false } +func (c *Core) CensusAgent() CensusReporter { return nil } +func (c *Core) ReloadCensus() error { return nil } +func (c *Core) teardownCensusAgent() error { return nil } diff --git a/vault/core.go b/vault/core.go index b870e70e5..29d8370e4 100644 --- a/vault/core.go +++ b/vault/core.go @@ -419,6 +419,8 @@ type Core struct { // activityLog is used to track active client count activityLog *ActivityLog + // activityLogLock protects the activityLog and activityLogConfig + activityLogLock sync.RWMutex // metricsCh is used to stop the metrics streaming metricsCh chan struct{} @@ -633,16 +635,11 @@ type Core struct { clusterHeartbeatInterval time.Duration + // activityLogConfig contains override values for the activity log + // it is protected by activityLogLock activityLogConfig ActivityLogCoreConfig - // censusAgent is the mechanism used for reporting Vault's billing data. - censusAgent CensusReporter - - // censusLicensingEnabled records whether Vault is exporting census metrics - censusLicensingEnabled bool - - // billingStart keeps track of the billing start time for exporting census metrics - billingStart time.Time + censusConfig atomic.Value // activeTime is set on active nodes indicating the time at which this node // became active. @@ -2575,6 +2572,10 @@ func (c *Core) preSeal() error { result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err)) } c.stopActivityLog() + // Clean up the censusAgent on seal + if err := c.teardownCensusAgent(); err != nil { + result = multierror.Append(result, fmt.Errorf("error tearing down reporting agent: %w", err)) + } if err := c.teardownCredentials(context.Background()); err != nil { result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err)) diff --git a/vault/logical_system_activity.go b/vault/logical_system_activity.go index cf06420a7..607df3014 100644 --- a/vault/logical_system_activity.go +++ b/vault/logical_system_activity.go @@ -203,7 +203,9 @@ func parseStartEndTimes(a *ActivityLog, d *framework.FieldData) (time.Time, time // This endpoint is not used by the UI. The UI's "export" feature is entirely client-side. func (b *SystemBackend) handleClientExport(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -234,7 +236,9 @@ func (b *SystemBackend) handleClientExport(ctx context.Context, req *logical.Req } func (b *SystemBackend) handleClientMetricQuery(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -264,7 +268,9 @@ func (b *SystemBackend) handleClientMetricQuery(ctx context.Context, req *logica } func (b *SystemBackend) handleMonthlyActivityCount(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -283,7 +289,9 @@ func (b *SystemBackend) handleMonthlyActivityCount(ctx context.Context, req *log } func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -308,15 +316,17 @@ func (b *SystemBackend) handleActivityConfigRead(ctx context.Context, req *logic "retention_months": config.RetentionMonths, "enabled": config.Enabled, "queries_available": qa, - "reporting_enabled": b.Core.censusLicensingEnabled, - "billing_start_timestamp": b.Core.billingStart, + "reporting_enabled": b.Core.CensusLicensingEnabled(), + "billing_start_timestamp": b.Core.BillingStart(), "minimum_retention_months": a.configOverrides.MinimumRetentionMonths, }, }, nil } func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *logical.Request, d *framework.FieldData) (*logical.Response, error) { + b.Core.activityLogLock.RLock() a := b.Core.activityLog + b.Core.activityLogLock.RUnlock() if a == nil { return logical.ErrorResponse("no activity log present"), nil } @@ -367,7 +377,7 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log activityLogEnabledDefault && config.Enabled == "default" && enabledStr == "disable" { // if census is enabled, the activity log cannot be disabled - if a.core.censusLicensingEnabled { + if a.core.CensusLicensingEnabled() { return logical.ErrorResponse("cannot disable the activity log while Reporting is enabled"), logical.ErrInvalidRequest } warnings = append(warnings, "the current monthly segment will be deleted because the activity log was disabled") @@ -382,6 +392,9 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log } } + a.core.activityLogLock.RLock() + minimumRetentionMonths := a.configOverrides.MinimumRetentionMonths + a.core.activityLogLock.RUnlock() enabled := config.Enabled == "enable" if !enabled && config.Enabled == "default" { enabled = activityLogEnabledDefault @@ -391,8 +404,8 @@ func (b *SystemBackend) handleActivityConfigUpdate(ctx context.Context, req *log return logical.ErrorResponse("retention_months cannot be 0 while enabled"), logical.ErrInvalidRequest } - if a.core.censusLicensingEnabled && config.RetentionMonths < a.configOverrides.MinimumRetentionMonths { - return logical.ErrorResponse("retention_months must be at least %d while Reporting is enabled", a.configOverrides.MinimumRetentionMonths), logical.ErrInvalidRequest + if a.core.CensusLicensingEnabled() && config.RetentionMonths < minimumRetentionMonths { + return logical.ErrorResponse("retention_months must be at least %d while Reporting is enabled", minimumRetentionMonths), logical.ErrInvalidRequest } // Store the config diff --git a/vault/request_handling.go b/vault/request_handling.go index a415dca41..29b227774 100644 --- a/vault/request_handling.go +++ b/vault/request_handling.go @@ -434,9 +434,12 @@ func (c *Core) CheckToken(ctx context.Context, req *logical.Request, unauth bool auth.PolicyResults.GrantingPolicies = append(auth.PolicyResults.GrantingPolicies, authResults.SentinelResults.GrantingPolicies...) } + c.activityLogLock.RLock() + activityLog := c.activityLog + c.activityLogLock.RUnlock() // If it is an authenticated ( i.e with vault token ) request, increment client count - if !unauth && c.activityLog != nil { - c.activityLog.HandleTokenUsage(ctx, te, clientID, isTWE) + if !unauth && activityLog != nil { + activityLog.HandleTokenUsage(ctx, te, clientID, isTWE) } return auth, te, nil }