409 lines
13 KiB
Go
409 lines
13 KiB
Go
package client
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/client/config"
|
|
"github.com/hashicorp/nomad/client/driver"
|
|
"github.com/hashicorp/nomad/client/fingerprint"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
// FingerprintManager runs a client fingerprinters on a continuous basis, and
|
|
// updates the client when the node has changed
|
|
type FingerprintManager struct {
|
|
getConfig func() *config.Config
|
|
node *structs.Node
|
|
nodeLock sync.Mutex
|
|
shutdownCh chan struct{}
|
|
|
|
// updateNodeAttributes is a callback to the client to update the state of its
|
|
// associated node
|
|
updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node
|
|
|
|
// updateNodeFromDriver is a callback to the client to update the state of a
|
|
// specific driver for the node
|
|
updateNodeFromDriver func(string, *structs.DriverInfo, *structs.DriverInfo) *structs.Node
|
|
logger *log.Logger
|
|
}
|
|
|
|
// NewFingerprintManager is a constructor that creates and returns an instance
|
|
// of FingerprintManager
|
|
func NewFingerprintManager(getConfig func() *config.Config,
|
|
node *structs.Node,
|
|
shutdownCh chan struct{},
|
|
updateNodeAttributes func(*cstructs.FingerprintResponse) *structs.Node,
|
|
updateNodeFromDriver func(string, *structs.DriverInfo, *structs.DriverInfo) *structs.Node,
|
|
logger *log.Logger) *FingerprintManager {
|
|
return &FingerprintManager{
|
|
getConfig: getConfig,
|
|
updateNodeAttributes: updateNodeAttributes,
|
|
updateNodeFromDriver: updateNodeFromDriver,
|
|
node: node,
|
|
shutdownCh: shutdownCh,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
// setNode updates the current client node
|
|
func (fm *FingerprintManager) setNode(node *structs.Node) {
|
|
fm.nodeLock.Lock()
|
|
defer fm.nodeLock.Unlock()
|
|
fm.node = node
|
|
}
|
|
|
|
// getNode returns the current client node
|
|
func (fm *FingerprintManager) getNode() *structs.Node {
|
|
fm.nodeLock.Lock()
|
|
defer fm.nodeLock.Unlock()
|
|
return fm.node
|
|
}
|
|
|
|
// Run starts the process of fingerprinting the node. It does an initial pass,
|
|
// identifying whitelisted and blacklisted fingerprints/drivers. Then, for
|
|
// those which require periotic checking, it starts a periodic process for
|
|
// each.
|
|
func (fp *FingerprintManager) Run() error {
|
|
// First, set up all fingerprints
|
|
cfg := fp.getConfig()
|
|
whitelistFingerprints := cfg.ReadStringListToMap("fingerprint.whitelist")
|
|
whitelistFingerprintsEnabled := len(whitelistFingerprints) > 0
|
|
blacklistFingerprints := cfg.ReadStringListToMap("fingerprint.blacklist")
|
|
|
|
fp.logger.Printf("[DEBUG] client.fingerprint_manager: built-in fingerprints: %v", fingerprint.BuiltinFingerprints())
|
|
|
|
var availableFingerprints []string
|
|
var skippedFingerprints []string
|
|
for _, name := range fingerprint.BuiltinFingerprints() {
|
|
// Skip modules that are not in the whitelist if it is enabled.
|
|
if _, ok := whitelistFingerprints[name]; whitelistFingerprintsEnabled && !ok {
|
|
skippedFingerprints = append(skippedFingerprints, name)
|
|
continue
|
|
}
|
|
// Skip modules that are in the blacklist
|
|
if _, ok := blacklistFingerprints[name]; ok {
|
|
skippedFingerprints = append(skippedFingerprints, name)
|
|
continue
|
|
}
|
|
|
|
availableFingerprints = append(availableFingerprints, name)
|
|
}
|
|
|
|
if err := fp.setupFingerprinters(availableFingerprints); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(skippedFingerprints) != 0 {
|
|
fp.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprint modules skipped due to white/blacklist: %v", skippedFingerprints)
|
|
}
|
|
|
|
// Next, set up drivers
|
|
// Build the white/blacklists of drivers.
|
|
whitelistDrivers := cfg.ReadStringListToMap("driver.whitelist")
|
|
whitelistDriversEnabled := len(whitelistDrivers) > 0
|
|
blacklistDrivers := cfg.ReadStringListToMap("driver.blacklist")
|
|
|
|
var availDrivers []string
|
|
var skippedDrivers []string
|
|
|
|
for name := range driver.BuiltinDrivers {
|
|
// Skip fingerprinting drivers that are not in the whitelist if it is
|
|
// enabled.
|
|
if _, ok := whitelistDrivers[name]; whitelistDriversEnabled && !ok {
|
|
skippedDrivers = append(skippedDrivers, name)
|
|
continue
|
|
}
|
|
// Skip fingerprinting drivers that are in the blacklist
|
|
if _, ok := blacklistDrivers[name]; ok {
|
|
skippedDrivers = append(skippedDrivers, name)
|
|
continue
|
|
}
|
|
|
|
availDrivers = append(availDrivers, name)
|
|
}
|
|
|
|
if err := fp.setupDrivers(availDrivers); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(skippedDrivers) > 0 {
|
|
fp.logger.Printf("[DEBUG] client.fingerprint_manager: drivers skipped due to white/blacklist: %v", skippedDrivers)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// setupFingerprints is used to fingerprint the node to see if these attributes are
|
|
// supported
|
|
func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error {
|
|
var appliedFingerprints []string
|
|
|
|
for _, name := range fingerprints {
|
|
f, err := fingerprint.NewFingerprint(name, fm.logger)
|
|
|
|
if err != nil {
|
|
fm.logger.Printf("[ERR] client.fingerprint_manager: fingerprinting for %v failed: %+v", name, err)
|
|
return err
|
|
}
|
|
|
|
detected, err := fm.fingerprint(name, f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// log the fingerprinters which have been applied
|
|
if detected {
|
|
appliedFingerprints = append(appliedFingerprints, name)
|
|
}
|
|
|
|
p, period := f.Periodic()
|
|
if p {
|
|
go fm.runFingerprint(f, period, name)
|
|
}
|
|
}
|
|
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: detected fingerprints %v", appliedFingerprints)
|
|
return nil
|
|
}
|
|
|
|
// setupDrivers is used to fingerprint the node to see if these drivers are
|
|
// supported
|
|
func (fm *FingerprintManager) setupDrivers(drivers []string) error {
|
|
var availDrivers []string
|
|
driverCtx := driver.NewDriverContext("", "", "", "", fm.getConfig(), fm.getNode(), fm.logger, nil)
|
|
for _, name := range drivers {
|
|
|
|
d, err := driver.NewDriver(name, driverCtx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Pass true for whether the health check is periodic here, so that the
|
|
// fingerprinter will not set the initial health check status (this is set
|
|
// below, with an empty health status so that a node event is not
|
|
// triggered)
|
|
// Later, the periodic health checker will update this value for drivers
|
|
// where health checks are enabled.
|
|
detected, err := fm.fingerprintDriver(name, d, true)
|
|
if err != nil {
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting driver %v failed: %+v", name, err)
|
|
return err
|
|
}
|
|
|
|
// Start a periodic watcher to detect changes to a drivers health and
|
|
// attributes.
|
|
go fm.watchDriver(d, name)
|
|
|
|
// Log the fingerprinters which have been applied
|
|
if detected {
|
|
availDrivers = append(availDrivers, name)
|
|
}
|
|
}
|
|
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: detected drivers %v", availDrivers)
|
|
return nil
|
|
}
|
|
|
|
// runFingerprint runs each fingerprinter individually on an ongoing basis
|
|
func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, period time.Duration, name string) {
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting %s every %v", name, period)
|
|
|
|
timer := time.NewTimer(period)
|
|
defer timer.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-timer.C:
|
|
timer.Reset(period)
|
|
|
|
_, err := fm.fingerprint(name, f)
|
|
if err != nil {
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for %v failed: %+v", name, err)
|
|
continue
|
|
}
|
|
|
|
case <-fm.shutdownCh:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// fingerprint does an initial fingerprint of the client. If the fingerprinter
|
|
// is meant to be run continuously, a process is launched to perform this
|
|
// fingerprint on an ongoing basis in the background.
|
|
func (fm *FingerprintManager) fingerprint(name string, f fingerprint.Fingerprint) (bool, error) {
|
|
var response cstructs.FingerprintResponse
|
|
|
|
fm.nodeLock.Lock()
|
|
request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node}
|
|
err := f.Fingerprint(request, &response)
|
|
fm.nodeLock.Unlock()
|
|
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if node := fm.updateNodeAttributes(&response); node != nil {
|
|
fm.setNode(node)
|
|
}
|
|
|
|
return response.Detected, nil
|
|
}
|
|
|
|
// watchDrivers facilitates the different periods between fingerprint and
|
|
// health checking a driver
|
|
func (fm *FingerprintManager) watchDriver(d driver.Driver, name string) {
|
|
var fingerprintTicker, healthTicker <-chan time.Time
|
|
|
|
// Determine whether the fingerprinter is periodic and health checking
|
|
isPeriodic, fingerprintPeriod := d.Periodic()
|
|
hc, isHealthCheck := d.(fingerprint.HealthCheck)
|
|
|
|
// Nothing to do since the state of this driver will never change
|
|
if !isPeriodic && !isHealthCheck {
|
|
return
|
|
}
|
|
|
|
// Setup the required tickers
|
|
if isPeriodic {
|
|
ticker := time.NewTicker(fingerprintPeriod)
|
|
fingerprintTicker = ticker.C
|
|
defer ticker.Stop()
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: fingerprinting driver %s every %v", name, fingerprintPeriod)
|
|
}
|
|
|
|
var isHealthCheckPeriodic bool
|
|
if isHealthCheck {
|
|
// Determine the interval at which to health check
|
|
req := &cstructs.HealthCheckIntervalRequest{}
|
|
var healthCheckResp cstructs.HealthCheckIntervalResponse
|
|
|
|
if err := hc.GetHealthCheckInterval(req, &healthCheckResp); err != nil {
|
|
fm.logger.Printf("[ERR] client.fingerprint_manager: error getting health check interval for driver %s: %v", name, err)
|
|
} else if healthCheckResp.Eligible {
|
|
isHealthCheckPeriodic = true
|
|
ticker := time.NewTicker(healthCheckResp.Period)
|
|
healthTicker = ticker.C
|
|
defer ticker.Stop()
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking driver %s every %v", name, healthCheckResp.Period)
|
|
}
|
|
}
|
|
|
|
driverEverDetected := false
|
|
for {
|
|
select {
|
|
case <-fm.shutdownCh:
|
|
return
|
|
case <-fingerprintTicker:
|
|
if _, err := fm.fingerprintDriver(name, d, isHealthCheckPeriodic); err != nil {
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: periodic fingerprinting for driver %v failed: %+v", name, err)
|
|
}
|
|
|
|
fm.nodeLock.Lock()
|
|
driver, detected := fm.node.Drivers[name]
|
|
|
|
// Memoize the driver detected status, so that we know whether to run the
|
|
// health check or not.
|
|
if detected && driver != nil && driver.Detected {
|
|
if !driverEverDetected {
|
|
driverEverDetected = true
|
|
}
|
|
}
|
|
fm.nodeLock.Unlock()
|
|
case <-healthTicker:
|
|
if driverEverDetected {
|
|
if err := fm.runDriverHealthCheck(name, hc); err != nil {
|
|
fm.logger.Printf("[DEBUG] client.fingerprint_manager: health checking for %v failed: %v", name, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// fingerprintDriver is a temporary solution to move towards DriverInfo and
|
|
// away from annotating a node's attributes to demonstrate support for a
|
|
// particular driver. Takes the FingerprintResponse and converts it to the
|
|
// proper DriverInfo update and then sets the prefix attributes as well
|
|
func (fm *FingerprintManager) fingerprintDriver(name string, f fingerprint.Fingerprint, hasPeriodicHealthCheck bool) (bool, error) {
|
|
var response cstructs.FingerprintResponse
|
|
|
|
fm.nodeLock.Lock()
|
|
|
|
// Determine if the driver has been detected before.
|
|
originalNode, haveDriver := fm.node.Drivers[name]
|
|
firstDetection := !haveDriver
|
|
|
|
// Determine if the driver is healthy
|
|
var driverIsHealthy bool
|
|
if haveDriver && originalNode.Healthy {
|
|
driverIsHealthy = true
|
|
}
|
|
|
|
// Fingerprint the driver.
|
|
request := &cstructs.FingerprintRequest{Config: fm.getConfig(), Node: fm.node}
|
|
err := f.Fingerprint(request, &response)
|
|
fm.nodeLock.Unlock()
|
|
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// Remove the health check attribute indicating the status of the driver,
|
|
// as the overall driver info object should indicate this.
|
|
delete(response.Attributes, fmt.Sprintf("driver.%s", name))
|
|
|
|
fingerprintInfo := &structs.DriverInfo{
|
|
Attributes: response.Attributes,
|
|
Detected: response.Detected,
|
|
}
|
|
|
|
// We set the health status based on the detection state of the driver if:
|
|
// * It is the first time we are fingerprinting the driver. This gives all
|
|
// drivers an initial health.
|
|
// * If the driver becomes undetected. This gives us an immediate unhealthy
|
|
// state and description when it transistions from detected and healthy to
|
|
// undetected.
|
|
// * If the driver does not have its own health checks. Then we always
|
|
// couple the states.
|
|
var healthInfo *structs.DriverInfo
|
|
if firstDetection || !hasPeriodicHealthCheck || !response.Detected && driverIsHealthy {
|
|
state := " "
|
|
if !response.Detected {
|
|
state = " not "
|
|
}
|
|
|
|
healthInfo = &structs.DriverInfo{
|
|
Healthy: response.Detected,
|
|
HealthDescription: fmt.Sprintf("Driver %s is%sdetected", name, state),
|
|
UpdateTime: time.Now(),
|
|
}
|
|
}
|
|
|
|
if node := fm.updateNodeFromDriver(name, fingerprintInfo, healthInfo); node != nil {
|
|
fm.setNode(node)
|
|
}
|
|
|
|
return response.Detected, nil
|
|
}
|
|
|
|
// runDriverHealthCheck checks the health of the specified resource.
|
|
func (fm *FingerprintManager) runDriverHealthCheck(name string, hc fingerprint.HealthCheck) error {
|
|
request := &cstructs.HealthCheckRequest{}
|
|
var response cstructs.HealthCheckResponse
|
|
if err := hc.HealthCheck(request, &response); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Update the status of the node irregardless if there was an error- in the
|
|
// case of periodic health checks, an error will occur if a health check
|
|
// fails
|
|
if node := fm.updateNodeFromDriver(name, nil, response.Drivers[name]); node != nil {
|
|
fm.setNode(node)
|
|
}
|
|
|
|
return nil
|
|
}
|