2015-09-16 20:58:33 +00:00
|
|
|
package command
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/hashicorp/nomad/api"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
"github.com/mitchellh/cli"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2015-09-18 17:03:23 +00:00
|
|
|
// updateWait is the amount of time to wait between status
|
|
|
|
// updates. Because the monitor is poll-based, we use this
|
|
|
|
// delay to avoid overwhelming the API server.
|
|
|
|
updateWait = time.Second
|
2015-09-16 20:58:33 +00:00
|
|
|
)
|
|
|
|
|
2015-09-18 17:03:23 +00:00
|
|
|
// evalState is used to store the current "state of the world"
|
|
|
|
// in the context of monitoring an evaluation.
|
|
|
|
type evalState struct {
|
|
|
|
status string
|
|
|
|
desc string
|
2015-09-21 18:25:22 +00:00
|
|
|
node string
|
2015-09-18 17:03:23 +00:00
|
|
|
allocs map[string]*allocState
|
|
|
|
wait time.Duration
|
|
|
|
index uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// allocState is used to track the state of an allocation
|
|
|
|
type allocState struct {
|
|
|
|
id string
|
|
|
|
group string
|
|
|
|
node string
|
|
|
|
desired string
|
|
|
|
desiredDesc string
|
|
|
|
client string
|
|
|
|
index uint64
|
2015-09-21 18:25:22 +00:00
|
|
|
|
|
|
|
// full is the allocation struct with full details. This
|
|
|
|
// must be queried for explicitly so it is only included
|
|
|
|
// if there is important error information inside.
|
|
|
|
full *api.Allocation
|
2015-09-18 17:03:23 +00:00
|
|
|
}
|
|
|
|
|
2015-09-16 20:58:33 +00:00
|
|
|
// monitor wraps an evaluation monitor and holds metadata and
|
|
|
|
// state information.
|
|
|
|
type monitor struct {
|
|
|
|
ui cli.Ui
|
|
|
|
client *api.Client
|
2015-09-16 23:27:55 +00:00
|
|
|
state *evalState
|
2015-09-16 20:58:33 +00:00
|
|
|
|
|
|
|
sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
// newMonitor returns a new monitor. The returned monitor will
|
|
|
|
// write output information to the provided ui.
|
|
|
|
func newMonitor(ui cli.Ui, client *api.Client) *monitor {
|
2015-09-18 17:03:23 +00:00
|
|
|
mon := &monitor{
|
2015-09-17 00:36:14 +00:00
|
|
|
ui: &cli.PrefixedUi{
|
|
|
|
InfoPrefix: "==> ",
|
|
|
|
OutputPrefix: " ",
|
|
|
|
ErrorPrefix: "==> ",
|
|
|
|
Ui: ui,
|
|
|
|
},
|
2015-09-16 20:58:33 +00:00
|
|
|
client: client,
|
|
|
|
}
|
2015-09-18 17:03:23 +00:00
|
|
|
mon.init()
|
|
|
|
return mon
|
2015-09-16 20:58:33 +00:00
|
|
|
}
|
|
|
|
|
2015-09-18 17:03:23 +00:00
|
|
|
// init allocates substructures
|
|
|
|
func (m *monitor) init() {
|
|
|
|
m.state = &evalState{
|
|
|
|
allocs: make(map[string]*allocState),
|
|
|
|
}
|
2015-09-16 21:45:21 +00:00
|
|
|
}
|
|
|
|
|
2015-09-16 20:58:33 +00:00
|
|
|
// update is used to update our monitor with new state. It can be
|
|
|
|
// called whether the passed information is new or not, and will
|
|
|
|
// only dump update messages when state changes.
|
2015-09-21 18:25:22 +00:00
|
|
|
func (m *monitor) update(update *evalState) {
|
2015-09-16 20:58:33 +00:00
|
|
|
m.Lock()
|
|
|
|
defer m.Unlock()
|
|
|
|
|
|
|
|
existing := m.state
|
|
|
|
|
2015-09-21 18:25:22 +00:00
|
|
|
// Swap in the new state at the end
|
|
|
|
defer func() {
|
|
|
|
m.state = update
|
|
|
|
}()
|
2015-09-16 20:58:33 +00:00
|
|
|
|
2015-09-16 21:45:21 +00:00
|
|
|
// Check the allocations
|
|
|
|
for allocID, alloc := range update.allocs {
|
2015-09-16 22:37:08 +00:00
|
|
|
if existing, ok := existing.allocs[allocID]; !ok {
|
2015-09-18 03:18:33 +00:00
|
|
|
switch {
|
|
|
|
case alloc.desired == structs.AllocDesiredStatusFailed:
|
|
|
|
// New allocs with desired state failed indicate
|
|
|
|
// scheduling failure.
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf("Scheduling error for group %q (%s)",
|
2015-09-18 16:37:33 +00:00
|
|
|
alloc.group, alloc.desiredDesc))
|
2015-09-18 03:18:33 +00:00
|
|
|
|
2015-09-21 00:38:25 +00:00
|
|
|
// Generate a more descriptive error for why the allocation
|
|
|
|
// failed and dump it to the screen
|
2015-09-21 18:25:22 +00:00
|
|
|
if alloc.full != nil {
|
|
|
|
dumpAllocStatus(m.ui, alloc.full)
|
2015-09-21 00:38:25 +00:00
|
|
|
}
|
|
|
|
|
2015-09-18 03:18:33 +00:00
|
|
|
case alloc.index < update.index:
|
|
|
|
// New alloc with create index lower than the eval
|
|
|
|
// create index indicates modification
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf(
|
2015-09-18 03:18:33 +00:00
|
|
|
"Allocation %q modified: node %q, group %q",
|
|
|
|
alloc.id, alloc.node, alloc.group))
|
|
|
|
|
|
|
|
case alloc.desired == structs.AllocDesiredStatusRun:
|
|
|
|
// New allocation with desired status running
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf(
|
2015-09-18 03:18:33 +00:00
|
|
|
"Allocation %q created: node %q, group %q",
|
|
|
|
alloc.id, alloc.node, alloc.group))
|
2015-09-16 22:37:08 +00:00
|
|
|
}
|
|
|
|
} else {
|
2015-09-18 03:18:33 +00:00
|
|
|
switch {
|
|
|
|
case existing.client != alloc.client:
|
|
|
|
// Allocation status has changed
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf(
|
2015-09-18 03:18:33 +00:00
|
|
|
"Allocation %q status changed: %q -> %q",
|
2015-09-16 22:37:08 +00:00
|
|
|
alloc.id, existing.client, alloc.client))
|
2015-09-16 21:45:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-16 20:58:33 +00:00
|
|
|
// Check if the status changed
|
|
|
|
if existing.status != update.status {
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf("Evaluation status changed: %q -> %q",
|
2015-09-21 18:25:22 +00:00
|
|
|
existing.status, update.status))
|
2015-09-16 20:58:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the wait time is different
|
|
|
|
if existing.wait == 0 && update.wait != 0 {
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf("Waiting %s before running eval",
|
2015-09-21 18:25:22 +00:00
|
|
|
update.wait))
|
2015-09-16 20:58:33 +00:00
|
|
|
}
|
|
|
|
|
2015-09-21 18:25:22 +00:00
|
|
|
// Check if the node changed
|
|
|
|
if existing.node == "" && update.node != "" {
|
2015-09-18 17:03:23 +00:00
|
|
|
m.ui.Output(fmt.Sprintf("Evaluation was assigned node ID %q",
|
2015-09-21 18:25:22 +00:00
|
|
|
update.node))
|
2015-09-16 20:58:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// monitor is used to start monitoring the given evaluation ID. It
|
|
|
|
// writes output directly to the monitor's ui, and returns the
|
2015-09-21 19:19:34 +00:00
|
|
|
// exit code for the command.
|
|
|
|
//
|
|
|
|
// The return code will be 0 on successful evaluation. If there are
|
|
|
|
// problems scheduling the job (impossible constraints, resources
|
|
|
|
// exhausted, etc), then the return code will be 2. For any other
|
|
|
|
// failures (API connectivity, internal errors, etc), the return code
|
|
|
|
// will be 1.
|
2015-09-16 20:58:33 +00:00
|
|
|
func (m *monitor) monitor(evalID string) int {
|
2015-09-21 19:19:34 +00:00
|
|
|
// Track if we encounter a scheduling failure. This can only be
|
|
|
|
// detected while querying allocations, so we use this bool to
|
|
|
|
// carry that status into the return code.
|
|
|
|
var schedFailure bool
|
|
|
|
|
2015-09-16 21:45:21 +00:00
|
|
|
m.ui.Info(fmt.Sprintf("Monitoring evaluation %q", evalID))
|
2015-09-16 20:58:33 +00:00
|
|
|
for {
|
2015-09-16 23:27:55 +00:00
|
|
|
// Query the evaluation
|
2015-09-16 20:58:33 +00:00
|
|
|
eval, _, err := m.client.Evaluations().Info(evalID, nil)
|
|
|
|
if err != nil {
|
|
|
|
m.ui.Error(fmt.Sprintf("Error reading evaluation: %s", err))
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
2015-09-21 18:25:22 +00:00
|
|
|
// Create the new eval state.
|
|
|
|
state := &evalState{
|
|
|
|
status: eval.Status,
|
|
|
|
desc: eval.StatusDescription,
|
|
|
|
node: eval.NodeID,
|
|
|
|
allocs: make(map[string]*allocState),
|
|
|
|
wait: eval.Wait,
|
|
|
|
index: eval.CreateIndex,
|
|
|
|
}
|
|
|
|
|
2015-09-16 23:27:55 +00:00
|
|
|
// Query the allocations associated with the evaluation
|
2015-09-16 21:45:21 +00:00
|
|
|
allocs, _, err := m.client.Evaluations().Allocations(evalID, nil)
|
|
|
|
if err != nil {
|
|
|
|
m.ui.Error(fmt.Sprintf("Error reading allocations: %s", err))
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
2015-09-21 18:25:22 +00:00
|
|
|
// Add the allocs to the state
|
|
|
|
for _, alloc := range allocs {
|
|
|
|
state.allocs[alloc.ID] = &allocState{
|
|
|
|
id: alloc.ID,
|
|
|
|
group: alloc.TaskGroup,
|
|
|
|
node: alloc.NodeID,
|
|
|
|
desired: alloc.DesiredStatus,
|
|
|
|
desiredDesc: alloc.DesiredDescription,
|
|
|
|
client: alloc.ClientStatus,
|
|
|
|
index: alloc.CreateIndex,
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have a scheduling error, query the full allocation
|
|
|
|
// to get the details.
|
|
|
|
if alloc.DesiredStatus == structs.AllocDesiredStatusFailed {
|
2015-09-21 19:19:34 +00:00
|
|
|
schedFailure = true
|
2015-09-21 18:25:22 +00:00
|
|
|
failed, _, err := m.client.Allocations().Info(alloc.ID, nil)
|
|
|
|
if err != nil {
|
|
|
|
m.ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
state.allocs[alloc.ID].full = failed
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-16 20:58:33 +00:00
|
|
|
// Update the state
|
2015-09-21 18:25:22 +00:00
|
|
|
m.update(state)
|
2015-09-16 21:45:21 +00:00
|
|
|
|
|
|
|
switch eval.Status {
|
|
|
|
case structs.EvalStatusComplete, structs.EvalStatusFailed:
|
|
|
|
m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q",
|
|
|
|
eval.ID, eval.Status))
|
|
|
|
default:
|
|
|
|
// Wait for the next update
|
2015-09-18 17:03:23 +00:00
|
|
|
time.Sleep(updateWait)
|
2015-09-16 21:45:21 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Monitor the next eval, if it exists.
|
|
|
|
if eval.NextEval != "" {
|
2015-09-18 17:03:23 +00:00
|
|
|
m.init()
|
|
|
|
return m.monitor(eval.NextEval)
|
2015-09-16 21:45:21 +00:00
|
|
|
}
|
2015-09-16 23:20:19 +00:00
|
|
|
break
|
2015-09-16 20:58:33 +00:00
|
|
|
}
|
|
|
|
|
2015-09-21 19:19:34 +00:00
|
|
|
// Treat scheduling failures specially using a dedicated exit code.
|
|
|
|
// This makes it easier to detect failures from the CLI.
|
|
|
|
if schedFailure {
|
|
|
|
return 2
|
|
|
|
}
|
|
|
|
|
2015-09-16 20:58:33 +00:00
|
|
|
return 0
|
|
|
|
}
|