2020-09-02 15:24:17 +00:00
package usagemetrics
import (
"context"
"errors"
"time"
2020-11-13 02:12:12 +00:00
"github.com/armon/go-metrics/prometheus"
2020-09-02 15:24:17 +00:00
"github.com/armon/go-metrics"
"github.com/hashicorp/go-hclog"
2021-06-03 15:25:53 +00:00
"github.com/hashicorp/serf/serf"
2021-08-18 14:27:15 +00:00
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/logging"
2020-09-02 15:24:17 +00:00
)
2020-11-13 02:12:12 +00:00
var Gauges = [ ] prometheus . GaugeDefinition {
{
2020-11-14 00:26:08 +00:00
Name : [ ] string { "consul" , "state" , "nodes" } ,
2020-11-16 19:02:11 +00:00
Help : "Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0." ,
2020-11-13 02:12:12 +00:00
} ,
{
2020-11-14 00:26:08 +00:00
Name : [ ] string { "consul" , "state" , "services" } ,
2020-11-16 19:02:11 +00:00
Help : "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0." ,
2020-11-13 02:12:12 +00:00
} ,
{
2020-11-14 00:26:08 +00:00
Name : [ ] string { "consul" , "state" , "service_instances" } ,
2020-11-16 19:02:11 +00:00
Help : "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0." ,
2020-11-13 02:12:12 +00:00
} ,
2021-06-03 15:25:53 +00:00
{
Name : [ ] string { "consul" , "members" , "clients" } ,
Help : "Measures the current number of client agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6." ,
} ,
{
Name : [ ] string { "consul" , "members" , "servers" } ,
Help : "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6." ,
} ,
2021-09-17 19:36:34 +00:00
{
Name : [ ] string { "consul" , "kv" , "entries" } ,
Help : "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.10.3." ,
} ,
2021-10-05 18:34:24 +00:00
{
Name : [ ] string { "consul" , "state" , "connect_instances" } ,
Help : "Measures the current number of unique connect service instances registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4." ,
} ,
{
Name : [ ] string { "consul" , "state" , "config_entries" } ,
Help : "Measures the current number of unique configuration entries registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4." ,
} ,
2020-11-13 02:12:12 +00:00
}
2021-06-03 15:25:53 +00:00
type getMembersFunc func ( ) [ ] serf . Member
2020-09-02 15:24:17 +00:00
// Config holds the settings for various parameters for the
// UsageMetricsReporter
type Config struct {
logger hclog . Logger
metricLabels [ ] metrics . Label
stateProvider StateProvider
tickerInterval time . Duration
2021-06-03 15:25:53 +00:00
getMembersFunc getMembersFunc
2020-09-02 15:24:17 +00:00
}
// WithDatacenter adds the datacenter as a label to all metrics emitted by the
// UsageMetricsReporter
func ( c * Config ) WithDatacenter ( dc string ) * Config {
c . metricLabels = append ( c . metricLabels , metrics . Label { Name : "datacenter" , Value : dc } )
return c
}
// WithLogger takes a logger and creates a new, named sub-logger to use when
// running
func ( c * Config ) WithLogger ( logger hclog . Logger ) * Config {
c . logger = logger . Named ( logging . UsageMetrics )
return c
}
// WithReportingInterval specifies the interval on which UsageMetricsReporter
// should emit metrics
func ( c * Config ) WithReportingInterval ( dur time . Duration ) * Config {
c . tickerInterval = dur
return c
}
func ( c * Config ) WithStateProvider ( sp StateProvider ) * Config {
c . stateProvider = sp
return c
}
2021-06-03 15:25:53 +00:00
// WithGetMembersFunc specifies the function used to identify cluster members
func ( c * Config ) WithGetMembersFunc ( fn getMembersFunc ) * Config {
c . getMembersFunc = fn
return c
}
2020-09-02 15:24:17 +00:00
// StateProvider defines an inteface for retrieving a state.Store handle. In
// non-test code, this is satisfied by the fsm.FSM struct.
type StateProvider interface {
State ( ) * state . Store
}
// UsageMetricsReporter provides functionality for emitting usage metrics into
// the metrics stream. This makes it essentially a translation layer
// between the state store and metrics stream.
type UsageMetricsReporter struct {
logger hclog . Logger
metricLabels [ ] metrics . Label
stateProvider StateProvider
tickerInterval time . Duration
2021-06-03 15:25:53 +00:00
getMembersFunc getMembersFunc
2020-09-02 15:24:17 +00:00
}
func NewUsageMetricsReporter ( cfg * Config ) ( * UsageMetricsReporter , error ) {
if cfg . stateProvider == nil {
return nil , errors . New ( "must provide a StateProvider to usage reporter" )
}
2021-06-03 15:25:53 +00:00
if cfg . getMembersFunc == nil {
return nil , errors . New ( "must provide a getMembersFunc to usage reporter" )
}
2020-09-02 15:24:17 +00:00
if cfg . logger == nil {
cfg . logger = hclog . NewNullLogger ( )
}
if cfg . tickerInterval == 0 {
// Metrics are aggregated every 10 seconds, so we default to that.
cfg . tickerInterval = 10 * time . Second
}
u := & UsageMetricsReporter {
logger : cfg . logger ,
stateProvider : cfg . stateProvider ,
metricLabels : cfg . metricLabels ,
tickerInterval : cfg . tickerInterval ,
2021-06-03 15:25:53 +00:00
getMembersFunc : cfg . getMembersFunc ,
2020-09-02 15:24:17 +00:00
}
return u , nil
}
// Run must be run in a goroutine, and can be stopped by closing or sending
// data to the passed in shutdownCh
func ( u * UsageMetricsReporter ) Run ( ctx context . Context ) {
ticker := time . NewTicker ( u . tickerInterval )
for {
select {
case <- ctx . Done ( ) :
u . logger . Debug ( "usage metrics reporter shutting down" )
ticker . Stop ( )
return
case <- ticker . C :
u . runOnce ( )
}
}
}
func ( u * UsageMetricsReporter ) runOnce ( ) {
2021-09-21 15:52:46 +00:00
u . logger . Trace ( "Starting usage run" )
2020-09-02 15:24:17 +00:00
state := u . stateProvider . State ( )
2021-08-18 14:27:15 +00:00
_ , nodeUsage , err := state . NodeUsage ( )
2020-09-02 15:24:17 +00:00
if err != nil {
u . logger . Warn ( "failed to retrieve nodes from state store" , "error" , err )
}
2021-08-18 14:27:15 +00:00
u . emitNodeUsage ( nodeUsage )
2020-09-02 15:24:17 +00:00
_ , serviceUsage , err := state . ServiceUsage ( )
if err != nil {
u . logger . Warn ( "failed to retrieve services from state store" , "error" , err )
}
2020-10-09 16:01:45 +00:00
u . emitServiceUsage ( serviceUsage )
2021-06-03 15:25:53 +00:00
2021-08-18 14:27:15 +00:00
members := u . memberUsage ( )
u . emitMemberUsage ( members )
2021-09-17 19:36:34 +00:00
_ , kvUsage , err := state . KVUsage ( )
if err != nil {
2021-09-21 15:52:46 +00:00
u . logger . Warn ( "failed to retrieve kv entry usage from state store" , "error" , err )
2021-09-17 19:36:34 +00:00
}
u . emitKVUsage ( kvUsage )
2021-10-07 21:19:55 +00:00
_ , configUsage , err := state . ConfigEntryUsage ( )
2021-10-01 18:22:30 +00:00
if err != nil {
u . logger . Warn ( "failed to retrieve config usage from state store" , "error" , err )
}
2021-10-07 21:19:55 +00:00
u . emitConfigEntryUsage ( configUsage )
2021-06-03 15:25:53 +00:00
}
2021-08-18 14:27:15 +00:00
func ( u * UsageMetricsReporter ) memberUsage ( ) [ ] serf . Member {
2021-06-03 15:25:53 +00:00
if u . getMembersFunc == nil {
2021-08-18 14:27:15 +00:00
return nil
2021-06-03 15:25:53 +00:00
}
mems := u . getMembersFunc ( )
if len ( mems ) <= 0 {
u . logger . Warn ( "cluster reported zero members" )
}
2021-08-18 14:27:15 +00:00
out := make ( [ ] serf . Member , 0 , len ( mems ) )
2021-06-03 15:25:53 +00:00
for _ , m := range mems {
if m . Status != serf . StatusAlive {
continue
}
2021-08-18 14:27:15 +00:00
out = append ( out , m )
2021-06-03 15:25:53 +00:00
}
2021-08-18 14:27:15 +00:00
return out
2020-09-02 15:24:17 +00:00
}