501 lines
13 KiB
Go
501 lines
13 KiB
Go
// Copyright 2016 Circonus, Inc. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package checkmgr provides a check management interface to circonus-gometrics
|
|
package checkmgr
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"net/url"
|
|
"os"
|
|
"path"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/circonus-labs/circonus-gometrics/api"
|
|
"github.com/pkg/errors"
|
|
"github.com/tv42/httpunix"
|
|
)
|
|
|
|
// Check management offers:
|
|
//
|
|
// Create a check if one cannot be found matching specific criteria
|
|
// Manage metrics in the supplied check (enabling new metrics as they are submitted)
|
|
//
|
|
// To disable check management, leave Config.Api.Token.Key blank
|
|
//
|
|
// use cases:
|
|
// configure without api token - check management disabled
|
|
// - configuration parameters other than Check.SubmissionUrl, Debug and Log are ignored
|
|
// - note: SubmissionUrl is **required** in this case as there is no way to derive w/o api
|
|
// configure with api token - check management enabled
|
|
// - all other configuration parameters affect how the trap url is obtained
|
|
// 1. provided (Check.SubmissionUrl)
|
|
// 2. via check lookup (CheckConfig.Id)
|
|
// 3. via a search using CheckConfig.InstanceId + CheckConfig.SearchTag
|
|
// 4. a new check is created
|
|
|
|
const (
|
|
defaultCheckType = "httptrap"
|
|
defaultTrapMaxURLAge = "60s" // 60 seconds
|
|
defaultBrokerMaxResponseTime = "500ms" // 500 milliseconds
|
|
defaultForceMetricActivation = "false"
|
|
statusActive = "active"
|
|
)
|
|
|
|
// CheckConfig options for check
|
|
type CheckConfig struct {
|
|
// a specific submission url
|
|
SubmissionURL string
|
|
// a specific check id (not check bundle id)
|
|
ID string
|
|
// unique instance id string
|
|
// used to search for a check to use
|
|
// used as check.target when creating a check
|
|
InstanceID string
|
|
// explicitly set check.target (default: instance id)
|
|
TargetHost string
|
|
// a custom display name for the check (as viewed in UI Checks)
|
|
// default: instance id
|
|
DisplayName string
|
|
// unique check searching tag (or tags)
|
|
// used to search for a check to use (combined with instanceid)
|
|
// used as a regular tag when creating a check
|
|
SearchTag string
|
|
// httptrap check secret (for creating a check)
|
|
Secret string
|
|
// additional tags to add to a check (when creating a check)
|
|
// these tags will not be added to an existing check
|
|
Tags string
|
|
// max amount of time to to hold on to a submission url
|
|
// when a given submission fails (due to retries) if the
|
|
// time the url was last updated is > than this, the trap
|
|
// url will be refreshed (e.g. if the broker is changed
|
|
// in the UI) **only relevant when check management is enabled**
|
|
// e.g. 5m, 30m, 1h, etc.
|
|
MaxURLAge string
|
|
// force metric activation - if a metric has been disabled via the UI
|
|
// the default behavior is to *not* re-activate the metric; this setting
|
|
// overrides the behavior and will re-activate the metric when it is
|
|
// encountered. "(true|false)", default "false"
|
|
ForceMetricActivation string
|
|
// Type of check to use (default: httptrap)
|
|
Type string
|
|
// Custom check config fields (default: none)
|
|
CustomConfigFields map[string]string
|
|
}
|
|
|
|
// BrokerConfig options for broker
|
|
type BrokerConfig struct {
|
|
// a specific broker id (numeric portion of cid)
|
|
ID string
|
|
// one or more tags used to select 1-n brokers from which to select
|
|
// when creating a new check (e.g. datacenter:abc or loc:dfw,dc:abc)
|
|
SelectTag string
|
|
// for a broker to be considered viable it must respond to a
|
|
// connection attempt within this amount of time e.g. 200ms, 2s, 1m
|
|
MaxResponseTime string
|
|
// TLS configuration to use when communicating within broker
|
|
TLSConfig *tls.Config
|
|
}
|
|
|
|
// Config options
|
|
type Config struct {
|
|
Log *log.Logger
|
|
Debug bool
|
|
|
|
// Circonus API config
|
|
API api.Config
|
|
// Check specific configuration options
|
|
Check CheckConfig
|
|
// Broker specific configuration options
|
|
Broker BrokerConfig
|
|
}
|
|
|
|
// CheckTypeType check type
|
|
type CheckTypeType string
|
|
|
|
// CheckInstanceIDType check instance id
|
|
type CheckInstanceIDType string
|
|
|
|
// CheckTargetType check target/host
|
|
type CheckTargetType string
|
|
|
|
// CheckSecretType check secret
|
|
type CheckSecretType string
|
|
|
|
// CheckTagsType check tags
|
|
type CheckTagsType string
|
|
|
|
// CheckDisplayNameType check display name
|
|
type CheckDisplayNameType string
|
|
|
|
// BrokerCNType broker common name
|
|
type BrokerCNType string
|
|
|
|
// CheckManager settings
|
|
type CheckManager struct {
|
|
enabled bool
|
|
Log *log.Logger
|
|
Debug bool
|
|
apih *api.API
|
|
|
|
initialized bool
|
|
initializedmu sync.RWMutex
|
|
|
|
// check
|
|
checkType CheckTypeType
|
|
checkID api.IDType
|
|
checkInstanceID CheckInstanceIDType
|
|
checkTarget CheckTargetType
|
|
checkSearchTag api.TagType
|
|
checkSecret CheckSecretType
|
|
checkTags api.TagType
|
|
customConfigFields map[string]string
|
|
checkSubmissionURL api.URLType
|
|
checkDisplayName CheckDisplayNameType
|
|
forceMetricActivation bool
|
|
forceCheckUpdate bool
|
|
|
|
// metric tags
|
|
metricTags map[string][]string
|
|
mtmu sync.Mutex
|
|
|
|
// broker
|
|
brokerID api.IDType
|
|
brokerSelectTag api.TagType
|
|
brokerMaxResponseTime time.Duration
|
|
brokerTLS *tls.Config
|
|
|
|
// state
|
|
checkBundle *api.CheckBundle
|
|
cbmu sync.Mutex
|
|
availableMetrics map[string]bool
|
|
availableMetricsmu sync.Mutex
|
|
trapURL api.URLType
|
|
trapCN BrokerCNType
|
|
trapLastUpdate time.Time
|
|
trapMaxURLAge time.Duration
|
|
trapmu sync.Mutex
|
|
certPool *x509.CertPool
|
|
sockRx *regexp.Regexp
|
|
}
|
|
|
|
// Trap config
|
|
type Trap struct {
|
|
URL *url.URL
|
|
TLS *tls.Config
|
|
IsSocket bool
|
|
SockTransport *httpunix.Transport
|
|
}
|
|
|
|
// NewCheckManager returns a new check manager
|
|
func NewCheckManager(cfg *Config) (*CheckManager, error) {
|
|
return New(cfg)
|
|
}
|
|
|
|
// New returns a new check manager
|
|
func New(cfg *Config) (*CheckManager, error) {
|
|
|
|
if cfg == nil {
|
|
return nil, errors.New("invalid Check Manager configuration (nil)")
|
|
}
|
|
|
|
cm := &CheckManager{enabled: true, initialized: false}
|
|
|
|
// Setup logging for check manager
|
|
cm.Debug = cfg.Debug
|
|
cm.Log = cfg.Log
|
|
if cm.Debug && cm.Log == nil {
|
|
cm.Log = log.New(os.Stderr, "", log.LstdFlags)
|
|
}
|
|
if cm.Log == nil {
|
|
cm.Log = log.New(ioutil.Discard, "", log.LstdFlags)
|
|
}
|
|
|
|
{
|
|
rx, err := regexp.Compile(`^http\+unix://(?P<sockfile>.+)/write/(?P<id>.+)$`)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "compiling socket regex")
|
|
}
|
|
cm.sockRx = rx
|
|
}
|
|
|
|
if cfg.Check.SubmissionURL != "" {
|
|
cm.checkSubmissionURL = api.URLType(cfg.Check.SubmissionURL)
|
|
}
|
|
|
|
// Blank API Token *disables* check management
|
|
if cfg.API.TokenKey == "" {
|
|
cm.enabled = false
|
|
}
|
|
|
|
if !cm.enabled && cm.checkSubmissionURL == "" {
|
|
return nil, errors.New("invalid check manager configuration (no API token AND no submission url)")
|
|
}
|
|
|
|
if cm.enabled {
|
|
// initialize api handle
|
|
cfg.API.Debug = cm.Debug
|
|
cfg.API.Log = cm.Log
|
|
apih, err := api.New(&cfg.API)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "initializing api client")
|
|
}
|
|
cm.apih = apih
|
|
}
|
|
|
|
// initialize check related data
|
|
if cfg.Check.Type != "" {
|
|
cm.checkType = CheckTypeType(cfg.Check.Type)
|
|
} else {
|
|
cm.checkType = defaultCheckType
|
|
}
|
|
|
|
idSetting := "0"
|
|
if cfg.Check.ID != "" {
|
|
idSetting = cfg.Check.ID
|
|
}
|
|
id, err := strconv.Atoi(idSetting)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "converting check id")
|
|
}
|
|
cm.checkID = api.IDType(id)
|
|
|
|
cm.checkInstanceID = CheckInstanceIDType(cfg.Check.InstanceID)
|
|
cm.checkTarget = CheckTargetType(cfg.Check.TargetHost)
|
|
cm.checkDisplayName = CheckDisplayNameType(cfg.Check.DisplayName)
|
|
cm.checkSecret = CheckSecretType(cfg.Check.Secret)
|
|
|
|
fma := defaultForceMetricActivation
|
|
if cfg.Check.ForceMetricActivation != "" {
|
|
fma = cfg.Check.ForceMetricActivation
|
|
}
|
|
fm, err := strconv.ParseBool(fma)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "parsing force metric activation")
|
|
}
|
|
cm.forceMetricActivation = fm
|
|
|
|
_, an := path.Split(os.Args[0])
|
|
hn, err := os.Hostname()
|
|
if err != nil {
|
|
hn = "unknown"
|
|
}
|
|
if cm.checkInstanceID == "" {
|
|
cm.checkInstanceID = CheckInstanceIDType(fmt.Sprintf("%s:%s", hn, an))
|
|
}
|
|
if cm.checkDisplayName == "" {
|
|
cm.checkDisplayName = CheckDisplayNameType(cm.checkInstanceID)
|
|
}
|
|
if cm.checkTarget == "" {
|
|
cm.checkTarget = CheckTargetType(cm.checkInstanceID)
|
|
}
|
|
|
|
if cfg.Check.SearchTag == "" {
|
|
cm.checkSearchTag = []string{fmt.Sprintf("service:%s", an)}
|
|
} else {
|
|
cm.checkSearchTag = strings.Split(strings.Replace(cfg.Check.SearchTag, " ", "", -1), ",")
|
|
}
|
|
|
|
if cfg.Check.Tags != "" {
|
|
cm.checkTags = strings.Split(strings.Replace(cfg.Check.Tags, " ", "", -1), ",")
|
|
}
|
|
|
|
cm.customConfigFields = make(map[string]string)
|
|
if len(cfg.Check.CustomConfigFields) > 0 {
|
|
for fld, val := range cfg.Check.CustomConfigFields {
|
|
cm.customConfigFields[fld] = val
|
|
}
|
|
}
|
|
|
|
dur := cfg.Check.MaxURLAge
|
|
if dur == "" {
|
|
dur = defaultTrapMaxURLAge
|
|
}
|
|
maxDur, err := time.ParseDuration(dur)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "parsing max url age")
|
|
}
|
|
cm.trapMaxURLAge = maxDur
|
|
|
|
// setup broker
|
|
idSetting = "0"
|
|
if cfg.Broker.ID != "" {
|
|
idSetting = cfg.Broker.ID
|
|
}
|
|
id, err = strconv.Atoi(idSetting)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "parsing broker id")
|
|
}
|
|
cm.brokerID = api.IDType(id)
|
|
|
|
if cfg.Broker.SelectTag != "" {
|
|
cm.brokerSelectTag = strings.Split(strings.Replace(cfg.Broker.SelectTag, " ", "", -1), ",")
|
|
}
|
|
|
|
dur = cfg.Broker.MaxResponseTime
|
|
if dur == "" {
|
|
dur = defaultBrokerMaxResponseTime
|
|
}
|
|
maxDur, err = time.ParseDuration(dur)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "parsing broker max response time")
|
|
}
|
|
cm.brokerMaxResponseTime = maxDur
|
|
|
|
// add user specified tls config for broker if provided
|
|
cm.brokerTLS = cfg.Broker.TLSConfig
|
|
|
|
// metrics
|
|
cm.availableMetrics = make(map[string]bool)
|
|
cm.metricTags = make(map[string][]string)
|
|
|
|
return cm, nil
|
|
}
|
|
|
|
// Initialize for sending metrics
|
|
func (cm *CheckManager) Initialize() {
|
|
|
|
// if not managing the check, quicker initialization
|
|
if !cm.enabled {
|
|
err := cm.initializeTrapURL()
|
|
if err == nil {
|
|
cm.initializedmu.Lock()
|
|
cm.initialized = true
|
|
cm.initializedmu.Unlock()
|
|
} else {
|
|
cm.Log.Printf("[WARN] error initializing trap %s", err.Error())
|
|
}
|
|
return
|
|
}
|
|
|
|
// background initialization when we have to reach out to the api
|
|
go func() {
|
|
cm.apih.EnableExponentialBackoff()
|
|
err := cm.initializeTrapURL()
|
|
if err == nil {
|
|
cm.initializedmu.Lock()
|
|
cm.initialized = true
|
|
cm.initializedmu.Unlock()
|
|
} else {
|
|
cm.Log.Printf("[WARN] error initializing trap %s", err.Error())
|
|
}
|
|
cm.apih.DisableExponentialBackoff()
|
|
}()
|
|
}
|
|
|
|
// IsReady reflects if the check has been initialied and metrics can be sent to Circonus
|
|
func (cm *CheckManager) IsReady() bool {
|
|
cm.initializedmu.RLock()
|
|
defer cm.initializedmu.RUnlock()
|
|
return cm.initialized
|
|
}
|
|
|
|
// GetSubmissionURL returns submission url for circonus
|
|
func (cm *CheckManager) GetSubmissionURL() (*Trap, error) {
|
|
if cm.trapURL == "" {
|
|
return nil, errors.Errorf("get submission url - submission url unavailable")
|
|
}
|
|
|
|
trap := &Trap{}
|
|
|
|
u, err := url.Parse(string(cm.trapURL))
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "get submission url")
|
|
}
|
|
trap.URL = u
|
|
|
|
if u.Scheme == "http+unix" {
|
|
service := "circonus-agent"
|
|
sockPath := ""
|
|
metricID := ""
|
|
|
|
subNames := cm.sockRx.SubexpNames()
|
|
matches := cm.sockRx.FindAllStringSubmatch(string(cm.trapURL), -1)
|
|
for _, match := range matches {
|
|
for idx, val := range match {
|
|
switch subNames[idx] {
|
|
case "sockfile":
|
|
sockPath = val
|
|
case "id":
|
|
metricID = val
|
|
}
|
|
}
|
|
}
|
|
|
|
if sockPath == "" || metricID == "" {
|
|
return nil, errors.Errorf("get submission url - invalid socket url (%s)", cm.trapURL)
|
|
}
|
|
|
|
u, err = url.Parse(fmt.Sprintf("http+unix://%s/write/%s", service, metricID))
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "get submission url")
|
|
}
|
|
trap.URL = u
|
|
|
|
trap.SockTransport = &httpunix.Transport{
|
|
DialTimeout: 100 * time.Millisecond,
|
|
RequestTimeout: 1 * time.Second,
|
|
ResponseHeaderTimeout: 1 * time.Second,
|
|
}
|
|
trap.SockTransport.RegisterLocation(service, sockPath)
|
|
trap.IsSocket = true
|
|
}
|
|
|
|
if u.Scheme == "https" {
|
|
// preference user-supplied TLS configuration
|
|
if cm.brokerTLS != nil {
|
|
trap.TLS = cm.brokerTLS
|
|
return trap, nil
|
|
}
|
|
|
|
if cm.certPool == nil {
|
|
if err := cm.loadCACert(); err != nil {
|
|
return nil, errors.Wrap(err, "get submission url")
|
|
}
|
|
}
|
|
t := &tls.Config{
|
|
RootCAs: cm.certPool,
|
|
}
|
|
if cm.trapCN != "" {
|
|
t.ServerName = string(cm.trapCN)
|
|
}
|
|
trap.TLS = t
|
|
}
|
|
|
|
return trap, nil
|
|
}
|
|
|
|
// ResetTrap URL, force request to the API for the submission URL and broker ca cert
|
|
func (cm *CheckManager) ResetTrap() error {
|
|
if cm.trapURL == "" {
|
|
return nil
|
|
}
|
|
|
|
cm.trapURL = ""
|
|
cm.certPool = nil // force re-fetching CA cert (if custom TLS config not supplied)
|
|
return cm.initializeTrapURL()
|
|
}
|
|
|
|
// RefreshTrap check when the last time the URL was reset, reset if needed
|
|
func (cm *CheckManager) RefreshTrap() error {
|
|
if cm.trapURL == "" {
|
|
return nil
|
|
}
|
|
|
|
if time.Since(cm.trapLastUpdate) >= cm.trapMaxURLAge {
|
|
return cm.ResetTrap()
|
|
}
|
|
|
|
return nil
|
|
}
|