open-nomad/command/monitor.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package command

import (
	"fmt"
	"sort"
	"strings"
	"sync"
	"time"

	"github.com/hashicorp/nomad/api"
	"github.com/mitchellh/cli"
)

const (
	// updateWait is the amount of time to wait between status
	// updates. Because the monitor is poll-based, we use this
	// delay to avoid overwhelming the API server.
	updateWait = time.Second
)

// evalState is used to store the current "state of the world"
// in the context of monitoring an evaluation.
type evalState struct {
	status     string
	desc       string
	node       string
	deployment string
	job        string
	allocs     map[string]*allocState
	wait       time.Duration
	index      uint64
}

// newEvalState creates and initializes a new monitorState
func newEvalState() *evalState {
	return &evalState{
		status: api.EvalStatusPending,
		allocs: make(map[string]*allocState),
	}
}

// allocState is used to track the state of an allocation
type allocState struct {
	id          string
	group       string
	node        string
	desired     string
	desiredDesc string
	client      string
	clientDesc  string
	index       uint64
}

// monitor wraps an evaluation monitor and holds metadata and
// state information.
type monitor struct {
	ui     cli.Ui
	client *api.Client
	state  *evalState

	// length determines the number of characters for identifiers in the ui.
	length int

	sync.Mutex
}

// newMonitor returns a new monitor. The returned monitor will
// write output information to the provided ui. The length parameter determines
// the number of characters for identifiers in the ui.
func newMonitor(ui cli.Ui, client *api.Client, length int) *monitor {
	if colorUi, ok := ui.(*cli.ColoredUi); ok {
		// Disable Info color for monitored output
		ui = &cli.ColoredUi{
			ErrorColor: colorUi.ErrorColor,
			WarnColor:  colorUi.WarnColor,
			InfoColor:  cli.UiColorNone,
			Ui:         colorUi.Ui,
		}
	}
	mon := &monitor{
		ui: &cli.PrefixedUi{
			InfoPrefix:   "==> ",
			OutputPrefix: "    ",
			ErrorPrefix:  "==> ",
			Ui:           ui,
		},
		client: client,
		state:  newEvalState(),
		length: length,
	}
	return mon
}

// update is used to update our monitor with new state. It can be
// called whether the passed information is new or not, and will
// only dump update messages when state changes.
func (m *monitor) update(update *evalState) {
	m.Lock()
	defer m.Unlock()

	existing := m.state

	// Swap in the new state at the end
	defer func() {
		m.state = update
	}()

	// Check if the evaluation was triggered by a node
	if existing.node == "" && update.node != "" {
		m.ui.Output(fmt.Sprintf("%s: Evaluation triggered by node %q",
			formatTime(time.Now()), limit(update.node, m.length)))
	}

	// Check if the evaluation was triggered by a job
	if existing.job == "" && update.job != "" {
		m.ui.Output(fmt.Sprintf("%s: Evaluation triggered by job %q",
			formatTime(time.Now()), update.job))
	}

	// Check if the evaluation was triggered by a deployment
	if existing.deployment == "" && update.deployment != "" {
		m.ui.Output(fmt.Sprintf("%s: Evaluation within deployment: %q",
			formatTime(time.Now()), limit(update.deployment, m.length)))
	}

	// Check the allocations
	for allocID, alloc := range update.allocs {
		if existing, ok := existing.allocs[allocID]; !ok {
			switch {
			case alloc.index < update.index:
				// New alloc with create index lower than the eval
				// create index indicates modification
				m.ui.Output(fmt.Sprintf(
					"%s: Allocation %q modified: node %q, group %q",
					formatTime(time.Now()), limit(alloc.id, m.length),
					limit(alloc.node, m.length), alloc.group))

			case alloc.desired == api.AllocDesiredStatusRun:
				// New allocation with desired status running
				m.ui.Output(fmt.Sprintf(
					"%s: Allocation %q created: node %q, group %q",
					formatTime(time.Now()), limit(alloc.id, m.length),
					limit(alloc.node, m.length), alloc.group))
			}
		} else {
			switch {
			case existing.client != alloc.client:
				description := ""
				if alloc.clientDesc != "" {
					description = fmt.Sprintf(" (%s)", alloc.clientDesc)
				}
				// Allocation status has changed
				m.ui.Output(fmt.Sprintf(
					"%s: Allocation %q status changed: %q -> %q%s",
					formatTime(time.Now()), limit(alloc.id, m.length),
					existing.client, alloc.client, description))
			}
		}
	}

	// Check if the status changed. We skip any transitions to pending status.
	if existing.status != "" &&
		update.status != api.AllocClientStatusPending &&
		existing.status != update.status {
		m.ui.Output(fmt.Sprintf("%s: Evaluation status changed: %q -> %q",
			formatTime(time.Now()), existing.status, update.status))
	}
}

// monitor is used to start monitoring the given evaluation ID. It
// writes output directly to the monitor's ui, and returns the
// exit code for the command.
//
// The return code will be 0 on successful evaluation. If there are
// problems scheduling the job (impossible constraints, resources
// exhausted, etc), then the return code will be 2. For any other
// failures (API connectivity, internal errors, etc), the return code
// will be 1.
func (m *monitor) monitor(evalID string) int {
	// Track if we encounter a scheduling failure. This can only be
	// detected while querying allocations, so we use this bool to
	// carry that status into the return code.
	var schedFailure bool

	// Add the initial pending state
	m.update(newEvalState())

	m.ui.Info(fmt.Sprintf("%s: Monitoring evaluation %q",
		formatTime(time.Now()), limit(evalID, m.length)))

	for {
		// Query the evaluation
		eval, _, err := m.client.Evaluations().Info(evalID, nil)
		if err != nil {
			m.ui.Error(fmt.Sprintf("No evaluation with id %q found", evalID))
			return 1
		}

		// Create the new eval state.
		state := newEvalState()
		state.status = eval.Status
		state.desc = eval.StatusDescription
		state.node = eval.NodeID
		state.job = eval.JobID
		state.deployment = eval.DeploymentID
		state.wait = eval.Wait
		state.index = eval.CreateIndex

		// Query the allocations associated with the evaluation
		allocs, _, err := m.client.Evaluations().Allocations(eval.ID, nil)
		if err != nil {
			m.ui.Error(fmt.Sprintf("%s: Error reading allocations: %s", formatTime(time.Now()), err))
			return 1
		}

		// Add the allocs to the state
		for _, alloc := range allocs {
			state.allocs[alloc.ID] = &allocState{
				id:          alloc.ID,
				group:       alloc.TaskGroup,
				node:        alloc.NodeID,
				desired:     alloc.DesiredStatus,
				desiredDesc: alloc.DesiredDescription,
				client:      alloc.ClientStatus,
				clientDesc:  alloc.ClientDescription,
				index:       alloc.CreateIndex,
			}
		}

		// Update the state
		m.update(state)

		switch eval.Status {
		case api.EvalStatusComplete, api.EvalStatusFailed, api.EvalStatusCancelled:
			if len(eval.FailedTGAllocs) == 0 {
				m.ui.Info(fmt.Sprintf("%s: Evaluation %q finished with status %q",
					formatTime(time.Now()), limit(eval.ID, m.length), eval.Status))
			} else {
				// There were failures making the allocations
				schedFailure = true
				m.ui.Info(fmt.Sprintf("%s: Evaluation %q finished with status %q but failed to place all allocations:",
					formatTime(time.Now()), limit(eval.ID, m.length), eval.Status))

				// Print the failures per task group
				for tg, metrics := range eval.FailedTGAllocs {
					noun := "allocation"
					if metrics.CoalescedFailures > 0 {
						noun += "s"
					}
					m.ui.Output(fmt.Sprintf("%s: Task Group %q (failed to place %d %s):",
						formatTime(time.Now()), tg, metrics.CoalescedFailures+1, noun))
					metrics := formatAllocMetrics(metrics, false, "  ")
					for _, line := range strings.Split(metrics, "\n") {
						m.ui.Output(line)
					}
				}

				if eval.BlockedEval != "" {
					m.ui.Output(fmt.Sprintf("%s: Evaluation %q waiting for additional capacity to place remainder",
						formatTime(time.Now()), limit(eval.BlockedEval, m.length)))
				}
			}
		default:
			// Wait for the next update
			time.Sleep(updateWait)
			continue
		}

		// Monitor the next eval in the chain, if present
		if eval.NextEval != "" {
			if eval.Wait.Nanoseconds() != 0 {
				m.ui.Info(fmt.Sprintf(
					"%s: Monitoring next evaluation %q in %s",
					formatTime(time.Now()), limit(eval.NextEval, m.length), eval.Wait))

				// Skip some unnecessary polling
				time.Sleep(eval.Wait)
			}

			// Reset the state and monitor the new eval
			m.state = newEvalState()
			return m.monitor(eval.NextEval)
		}
		break
	}

	// Monitor the deployment if it exists
	dID := m.state.deployment
	if dID != "" {
		m.ui.Info(fmt.Sprintf("%s: Monitoring deployment %q", formatTime(time.Now()), limit(dID, m.length)))

		var verbose bool
		if m.length == fullId {
			verbose = true
		} else {
			verbose = false
		}

		meta := new(Meta)
		meta.Ui = m.ui
		cmd := &DeploymentStatusCommand{Meta: *meta}
		status, err := cmd.monitor(m.client, dID, 0, m.state.wait, verbose)
		if err != nil || status != api.DeploymentStatusSuccessful {
			return 1
		}
	}

	// Treat scheduling failures specially using a dedicated exit code.
	// This makes it easier to detect failures from the CLI.
	if schedFailure {
		return 2
	}

	return 0
}

func formatAllocMetrics(metrics *api.AllocationMetric, scores bool, prefix string) string {
	// Print a helpful message if we have an eligibility problem
	var out string
	if metrics.NodesEvaluated == 0 {
		out += fmt.Sprintf("%s* No nodes were eligible for evaluation\n", prefix)
	}

	// Print a helpful message if the user has asked for a DC that has no
	// available nodes.
	for dc, available := range metrics.NodesAvailable {
		if available == 0 {
			out += fmt.Sprintf("%s* No nodes are available in datacenter %q\n", prefix, dc)
		}
	}

	// Print filter info
	for class, num := range metrics.ClassFiltered {
		out += fmt.Sprintf("%s* Class %q: %d nodes excluded by filter\n", prefix, class, num)
	}
	for cs, num := range metrics.ConstraintFiltered {
		out += fmt.Sprintf("%s* Constraint %q: %d nodes excluded by filter\n", prefix, cs, num)
	}

	// Print exhaustion info
	if ne := metrics.NodesExhausted; ne > 0 {
		out += fmt.Sprintf("%s* Resources exhausted on %d nodes\n", prefix, ne)
	}
	for class, num := range metrics.ClassExhausted {
		out += fmt.Sprintf("%s* Class %q exhausted on %d nodes\n", prefix, class, num)
	}
	for dim, num := range metrics.DimensionExhausted {
		out += fmt.Sprintf("%s* Dimension %q exhausted on %d nodes\n", prefix, dim, num)
	}

	// Print quota info
	for _, dim := range metrics.QuotaExhausted {
		out += fmt.Sprintf("%s* Quota limit hit %q\n", prefix, dim)
	}

	// Print scores
	if scores {
		if len(metrics.ScoreMetaData) > 0 {
			scoreOutput := make([]string, len(metrics.ScoreMetaData)+1)

			// Find all possible scores and build header row.
			allScores := make(map[string]struct{})
			for _, scoreMeta := range metrics.ScoreMetaData {
				for score := range scoreMeta.Scores {
					allScores[score] = struct{}{}
				}
			}
			// Sort scores alphabetically.
			scores := make([]string, 0, len(allScores))
			for score := range allScores {
				scores = append(scores, score)
			}
			sort.Strings(scores)
			scoreOutput[0] = fmt.Sprintf("Node|%s|final score", strings.Join(scores, "|"))

			// Build row for each score.
			for i, scoreMeta := range metrics.ScoreMetaData {
				scoreOutput[i+1] = fmt.Sprintf("%v|", scoreMeta.NodeID)
				for _, scorerName := range scores {
					scoreVal := scoreMeta.Scores[scorerName]
					scoreOutput[i+1] += fmt.Sprintf("%.3g|", scoreVal)
				}
				scoreOutput[i+1] += fmt.Sprintf("%.3g", scoreMeta.NormScore)
			}

			out += formatList(scoreOutput)
		} else {
			// Backwards compatibility for old allocs
			for name, score := range metrics.Scores {
				out += fmt.Sprintf("%s* Score %q = %f\n", prefix, name, score)
			}
		}
	}

	out = strings.TrimSuffix(out, "\n")
	return out
}