tagged metrics config options should be on telemetry config

better api example, add telemetry documentation
This commit is contained in:
Chelsea Holland Komlo 2017-09-05 21:58:35 +00:00
parent 11a676c901
commit 66fa05405a
12 changed files with 107 additions and 91 deletions

View File

@ -157,6 +157,9 @@ type Client struct {
// baseLabels are used when emitting tagged metrics. All client metrics will
// have these tags, and optionally more.
baseLabels []metrics.Label
// Subset of global telemetry configuration options for the client
clientTelemetry *ClientTelemetry
}
var (
@ -166,8 +169,15 @@ var (
noServersErr = errors.New("no servers")
)
// ClientTelemetry is a subset of global telemetry configuration options that
// are relevant for the client
type ClientTelemetry struct {
DisableTaggedMetrics bool
BackwardsCompatibleMetrics bool
}
// NewClient is used to create a new client from the given configuration
func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulService ConsulServiceAPI, logger *log.Logger) (*Client, error) {
func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulService ConsulServiceAPI, logger *log.Logger, telemetry *ClientTelemetry) (*Client, error) {
// Create the tls wrapper
var tlsWrap tlsutil.RegionWrapper
if cfg.TLSConfig.EnableRPC {
@ -180,6 +190,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
// Create the client
c := &Client{
clientTelemetry: telemetry,
config: cfg,
consulCatalog: consulCatalog,
consulService: consulService,
@ -1879,14 +1890,14 @@ func (c *Client) emitStats() {
// setGaugeForMemoryStats proxies metrics for memory specific statistics
func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats) {
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), c.baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "total"}, float32(hStats.Memory.Total))
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "available"}, float32(hStats.Memory.Available))
metrics.SetGauge([]string{"client", "host", "memory", nodeID, "used"}, float32(hStats.Memory.Used))
@ -1897,7 +1908,7 @@ func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats)
// setGaugeForCPUStats proxies metrics for CPU specific statistics
func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats) {
for _, cpu := range hStats.CPU {
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{"cpu", cpu.CPU})
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels)
@ -1906,7 +1917,7 @@ func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats) {
metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "total"}, float32(cpu.Total))
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "user"}, float32(cpu.User))
metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "idle"}, float32(cpu.Idle))
@ -1918,7 +1929,7 @@ func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats) {
// setGaugeForDiskStats proxies metrics for disk specific statistics
func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats) {
for _, disk := range hStats.DiskStats {
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{"disk", disk.Device})
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels)
@ -1928,7 +1939,7 @@ func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats) {
metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "size"}, float32(disk.Size))
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used"}, float32(disk.Used))
metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "available"}, float32(disk.Available))
@ -1947,14 +1958,14 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
allocated := c.getAllocatedResources(node)
// Emit allocated
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.MemoryMB), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.DiskMB), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.CPU), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "iops"}, float32(allocated.IOPS), c.baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocated", "memory", nodeID}, float32(allocated.MemoryMB))
metrics.SetGauge([]string{"client", "allocated", "disk", nodeID}, float32(allocated.DiskMB))
metrics.SetGauge([]string{"client", "allocated", "cpu", nodeID}, float32(allocated.CPU))
@ -1962,12 +1973,12 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
}
for _, n := range allocated.Networks {
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{"device", n.Device})
metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocated", "network", n.Device, nodeID}, float32(n.MBits))
}
}
@ -1978,14 +1989,14 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
unallocatedCpu := total.CPU - res.CPU - allocated.CPU
unallocatedIops := total.IOPS - res.IOPS - allocated.IOPS
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "iops"}, float32(unallocatedIops), c.baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "unallocated", "memory", nodeID}, float32(unallocatedMem))
metrics.SetGauge([]string{"client", "unallocated", "disk", nodeID}, float32(unallocatedDisk))
metrics.SetGauge([]string{"client", "unallocated", "cpu", nodeID}, float32(unallocatedCpu))
@ -2003,7 +2014,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
unallocatedMbits := totalMbits - n.MBits
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{"device", n.Device})
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels)
}
@ -2016,10 +2027,10 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
// No lables are required so we emit with only a key/value syntax
func (c *Client) setGaugeForUptime(hStats *stats.HostStats) {
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"uptime"}, float32(hStats.Uptime), c.baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"uptime"}, float32(hStats.Uptime))
}
}
@ -2062,7 +2073,7 @@ func (c *Client) emitClientMetrics() {
}
}
if !c.config.DisableTaggedMetrics {
if !c.clientTelemetry.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), c.baseLabels)
@ -2070,7 +2081,7 @@ func (c *Client) emitClientMetrics() {
metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), c.baseLabels)
}
if c.config.BackwardsCompatibleMetrics {
if c.clientTelemetry.BackwardsCompatibleMetrics {
metrics.SetGauge([]string{"client", "allocations", "migrating", nodeID}, float32(migrating))
metrics.SetGauge([]string{"client", "allocations", "blocked", nodeID}, float32(blocked))
metrics.SetGauge([]string{"client", "allocations", "pending", nodeID}, float32(pending))

View File

@ -122,7 +122,8 @@ func testClient(t *testing.T, cb func(c *config.Config)) *Client {
catalog := consul.NewMockCatalog(logger)
mockService := newMockConsulServiceClient()
mockService.logger = logger
client, err := NewClient(conf, catalog, mockService, logger)
telemetry := &ClientTelemetry{DisableTaggedMetrics: false, BackwardsCompatibleMetrics: false}
client, err := NewClient(conf, catalog, mockService, logger, telemetry)
if err != nil {
t.Fatalf("err: %v", err)
}
@ -794,7 +795,8 @@ func TestClient_SaveRestoreState(t *testing.T) {
catalog := consul.NewMockCatalog(logger)
mockService := newMockConsulServiceClient()
mockService.logger = logger
c2, err := NewClient(c1.config, catalog, mockService, logger)
telemetry := &ClientTelemetry{DisableTaggedMetrics: false, BackwardsCompatibleMetrics: false}
c2, err := NewClient(c1.config, catalog, mockService, logger, telemetry)
if err != nil {
t.Fatalf("err: %v", err)
}

View File

@ -494,7 +494,12 @@ func (a *Agent) setupClient() error {
}
// Create the client
client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger)
clientTelemetry := &client.ClientTelemetry{
DisableTaggedMetrics: a.config.Telemetry.DisableTaggedMetrics,
BackwardsCompatibleMetrics: a.config.Telemetry.BackwardsCompatibleMetrics,
}
client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger, clientTelemetry)
if err != nil {
return fmt.Errorf("client setup failed: %v", err)
}

View File

@ -60,8 +60,6 @@ client {
gc_inode_usage_threshold = 91
gc_max_allocs = 50
no_host_uuid = false
disable_tagged_metrics = true
backwards_compatible_metrics = true
}
server {
enabled = true
@ -98,6 +96,8 @@ telemetry {
collection_interval = "3s"
publish_allocation_metrics = true
publish_node_metrics = true
disable_tagged_metrics = true
backwards_compatible_metrics = true
}
leave_on_interrupt = true
leave_on_terminate = true

View File

@ -229,14 +229,6 @@ type ClientConfig struct {
// NoHostUUID disables using the host's UUID and will force generation of a
// random UUID.
NoHostUUID *bool `mapstructure:"no_host_uuid"`
// DisableTaggedMetrics disables a new version of generating metrics which
// uses tags
DisableTaggedMetrics bool `mapstructure:"disable_tagged_metrics"`
// BackwardsCompatibleMetrics allows for generating metrics in a simple
// key/value structure as done in older versions of Nomad
BackwardsCompatibleMetrics bool `mapstructure:"backwards_compatible_metrics"`
}
// ACLConfig is configuration specific to the ACL system
@ -371,6 +363,14 @@ type Telemetry struct {
PublishAllocationMetrics bool `mapstructure:"publish_allocation_metrics"`
PublishNodeMetrics bool `mapstructure:"publish_node_metrics"`
// DisableTaggedMetrics disables a new version of generating metrics which
// uses tags
DisableTaggedMetrics bool `mapstructure:"disable_tagged_metrics"`
// BackwardsCompatibleMetrics allows for generating metrics in a simple
// key/value structure as done in older versions of Nomad
BackwardsCompatibleMetrics bool `mapstructure:"backwards_compatible_metrics"`
// Circonus: see https://github.com/circonus-labs/circonus-gometrics
// for more details on the various configuration options.
// Valid configuration combinations:
@ -1105,14 +1105,6 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
result.NoHostUUID = b.NoHostUUID
}
if b.DisableTaggedMetrics {
result.DisableTaggedMetrics = b.DisableTaggedMetrics
}
if b.BackwardsCompatibleMetrics {
result.BackwardsCompatibleMetrics = b.BackwardsCompatibleMetrics
}
// Add the servers
result.Servers = append(result.Servers, b.Servers...)
@ -1214,6 +1206,15 @@ func (a *Telemetry) Merge(b *Telemetry) *Telemetry {
if b.CirconusBrokerSelectTag != "" {
result.CirconusBrokerSelectTag = b.CirconusBrokerSelectTag
}
if b.DisableTaggedMetrics {
result.DisableTaggedMetrics = b.DisableTaggedMetrics
}
if b.BackwardsCompatibleMetrics {
result.BackwardsCompatibleMetrics = b.BackwardsCompatibleMetrics
}
return &result
}

View File

@ -357,8 +357,6 @@ func parseClient(result **ClientConfig, list *ast.ObjectList) error {
"gc_parallel_destroys",
"gc_max_allocs",
"no_host_uuid",
"disable_tagged_metrics",
"backwards_compatible_metrics",
}
if err := checkHCLKeys(listVal, valid); err != nil {
return err
@ -635,6 +633,8 @@ func parseTelemetry(result **Telemetry, list *ast.ObjectList) error {
"circonus_check_tags",
"circonus_broker_id",
"circonus_broker_select_tag",
"disable_tagged_metrics",
"backwards_compatible_metrics",
}
if err := checkHCLKeys(listVal, valid); err != nil {
return err

View File

@ -75,14 +75,12 @@ func TestConfig_Parse(t *testing.T) {
ReservedPorts: "1,100,10-12",
ParsedReservedPorts: []int{1, 10, 11, 12, 100},
},
GCInterval: 6 * time.Second,
GCParallelDestroys: 6,
GCDiskUsageThreshold: 82,
GCInodeUsageThreshold: 91,
GCMaxAllocs: 50,
NoHostUUID: helper.BoolToPtr(false),
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
GCInterval: 6 * time.Second,
GCParallelDestroys: 6,
GCDiskUsageThreshold: 82,
GCInodeUsageThreshold: 91,
GCMaxAllocs: 50,
NoHostUUID: helper.BoolToPtr(false),
},
Server: &ServerConfig{
Enabled: true,
@ -113,14 +111,16 @@ func TestConfig_Parse(t *testing.T) {
ReplicationToken: "foobar",
},
Telemetry: &Telemetry{
StatsiteAddr: "127.0.0.1:1234",
StatsdAddr: "127.0.0.1:2345",
DisableHostname: true,
UseNodeName: false,
CollectionInterval: "3s",
collectionInterval: 3 * time.Second,
PublishAllocationMetrics: true,
PublishNodeMetrics: true,
StatsiteAddr: "127.0.0.1:1234",
StatsdAddr: "127.0.0.1:2345",
DisableHostname: true,
UseNodeName: false,
CollectionInterval: "3s",
collectionInterval: 3 * time.Second,
PublishAllocationMetrics: true,
PublishNodeMetrics: true,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
},
LeaveOnInt: true,
LeaveOnTerm: true,

View File

@ -55,6 +55,8 @@ func TestConfig_Merge(t *testing.T) {
StatsdAddr: "127.0.0.1:8125",
DataDogAddr: "127.0.0.1:8125",
DisableHostname: false,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
CirconusAPIToken: "0",
CirconusAPIApp: "nomadic",
CirconusAPIURL: "http://api.circonus.com/v2",
@ -89,8 +91,6 @@ func TestConfig_Merge(t *testing.T) {
ReservedPorts: "1,10-30,55",
ParsedReservedPorts: []int{1, 2, 4},
},
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
},
Server: &ServerConfig{
Enabled: false,
@ -185,6 +185,8 @@ func TestConfig_Merge(t *testing.T) {
DisableHostname: true,
PublishNodeMetrics: true,
PublishAllocationMetrics: true,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
CirconusAPIToken: "1",
CirconusAPIApp: "nomad",
CirconusAPIURL: "https://api.circonus.com/v2",
@ -226,12 +228,10 @@ func TestConfig_Merge(t *testing.T) {
ReservedPorts: "2,10-30,55",
ParsedReservedPorts: []int{1, 2, 3},
},
GCInterval: 6 * time.Second,
GCParallelDestroys: 6,
GCDiskUsageThreshold: 71,
GCInodeUsageThreshold: 86,
DisableTaggedMetrics: true,
BackwardsCompatibleMetrics: true,
GCInterval: 6 * time.Second,
GCParallelDestroys: 6,
GCDiskUsageThreshold: 71,
GCInodeUsageThreshold: 86,
},
Server: &ServerConfig{
Enabled: true,

View File

@ -1,5 +1,5 @@
---
layout: metrics
layout: api
page_title: Metrics - HTTP API
sidebar_current: metrics-search
description: |-
@ -25,8 +25,7 @@ The table below shows this endpoint's support for
### Sample Request
```text
$ curl \
https://nomad.rocks/v1/metrics
$ curl https://nomad.rocks/v1/metrics
```
### Sample Response
@ -35,16 +34,14 @@ $ curl \
{
"Counters":[
{
"Count":1,
"Labels":{
},
"Count":11,
"Labels":{},
"Max":1.0,
"Mean":1.0,
"Min":1.0,
"Name":"nomad.client.consul.sync_failure",
"Name":"nomad.nomad.rpc.query",
"Stddev":0.0,
"Sum":1.0
"Sum":11.0
}
],
"Gauges":[
@ -68,11 +65,8 @@ $ curl \
"Samples":[
{
"Count":20,
"Labels":{
},
"Max":0.03
544100001454353,
"Labels":{},
"Max":0.03544100001454353,
"Mean":0.023678050097078084,
"Min":0.00956599973142147,
"Name":"nomad.memberlist.gossip",
@ -81,12 +75,9 @@ $ curl \
},
{
"Count":1,
"Labels":{
},
"Labels":{},
"Max":0.0964059978723526,
"Mean
":0.0964059978723526,
"Mean":0.0964059978723526,
"Min":0.0964059978723526,
"Name":"nomad.nomad.client.update_status",
"Stddev":0.0,

View File

@ -58,6 +58,14 @@ The following options are available on all telemetry configurations.
- `publish_node_metrics` `(bool: false)` - Specifies if Nomad should publish
runtime metrics of nodes.
- `backwards_compatible_metrics` `(bool: false)` - Specifies if Nomad should
publish metrics that are backwards compatible with versions below 0.7, as
post version 0.7, Nomad emits tagged metrics.
- `disable_tagged_metrics` `(bool: false)` - Specifies if Nomad should not emit
tagged metrics and only emit metrics compatible with versions below Nomad
0.7.
### `statsite`
These `telemetry` parameters apply to
@ -164,10 +172,4 @@ These `telemetry` parameters apply to
datacenter, dc:sfo).
### Tagged Metrics
We are slowly moving towards supporting tagged metrics in Nomad. Currently,
tagged metrics are available on Nomad clients. For more information about how
tagged metrics are useful for analysis, see this article by DataDog on [tagged
metrics](https://www.datadoghq.com/blog/the-power-of-tagged-metrics/).

View File

@ -16,7 +16,7 @@ This data can be accessed via an HTTP endpoint or via sending a signal to the
Nomad process.
Via HTTP, this data is available at `/metrics`. See
[Metrics](/api/metrics_endpoint.html) for more information.
[Metrics](/api/metrics.html) for more information.
To view this data via sending a signal to the Nomad process: on Unix,

View File

@ -51,6 +51,10 @@
<a href="/api/nodes.html">Nodes</a>
</li>
<li<%= sidebar_current("api-metrics") %>>
<a href="/api/metrics.html">Search</a>
</li>
<li<%= sidebar_current("api-operator") %>>
<a href="/api/operator.html">Operator</a>
</li>