open-nomad/command/status.go

362 lines
8.6 KiB
Go
Raw Normal View History

2015-09-11 07:38:15 +00:00
package command
import (
"bytes"
"encoding/gob"
2015-09-11 07:38:15 +00:00
"fmt"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
2015-09-11 07:38:15 +00:00
)
2016-05-26 00:06:20 +00:00
const (
// maxFailedTGs is the maximum number of task groups we show failure reasons
// for before defering to eval-status
maxFailedTGs = 5
)
2015-09-11 07:38:15 +00:00
type StatusCommand struct {
Meta
2016-05-26 00:06:20 +00:00
length int
showEvals, verbose bool
2015-09-11 07:38:15 +00:00
}
func (c *StatusCommand) Help() string {
helpText := `
2016-01-27 20:00:31 +00:00
Usage: nomad status [options] <job>
2015-09-11 07:38:15 +00:00
2015-09-13 18:39:49 +00:00
Display status information about jobs. If no job ID is given,
2015-09-11 17:00:55 +00:00
a list of all known jobs will be dumped.
2015-09-11 07:38:15 +00:00
General Options:
2015-09-11 07:38:15 +00:00
` + generalOptionsUsage() + `
Status Options:
-short
Display short output. Used only when a single job is being
2016-05-26 00:06:20 +00:00
queried, and drops verbose information about allocations.
-evals
Display the evaluations associated with the job.
-verbose
Display full information.
`
2015-09-11 07:38:15 +00:00
return strings.TrimSpace(helpText)
}
func (c *StatusCommand) Synopsis() string {
2015-09-13 18:39:49 +00:00
return "Display status information about jobs"
2015-09-11 07:38:15 +00:00
}
func (c *StatusCommand) Run(args []string) int {
2016-05-26 00:06:20 +00:00
var short bool
flags := c.Meta.FlagSet("status", FlagSetClient)
2015-09-11 07:38:15 +00:00
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&short, "short", false, "")
2016-05-26 00:06:20 +00:00
flags.BoolVar(&c.showEvals, "evals", false, "")
flags.BoolVar(&c.verbose, "verbose", false, "")
2015-09-11 07:38:15 +00:00
if err := flags.Parse(args); err != nil {
return 1
}
// Check that we either got no jobs or exactly one.
args = flags.Args()
if len(args) > 1 {
2015-09-11 07:38:15 +00:00
c.Ui.Error(c.Help())
return 1
}
// Truncate the id unless full length is requested
c.length = shortId
2016-05-26 00:06:20 +00:00
if c.verbose {
c.length = fullId
}
2015-09-11 07:38:15 +00:00
// Get the HTTP client
client, err := c.Meta.Client()
2015-09-11 07:38:15 +00:00
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
2015-09-11 07:38:15 +00:00
return 1
}
// Invoke list mode if no job ID.
if len(args) == 0 {
2015-09-11 07:38:15 +00:00
jobs, _, err := client.Jobs().List(nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err))
2015-09-11 07:38:15 +00:00
return 1
}
2015-09-11 17:00:55 +00:00
// No output if we have no jobs
if len(jobs) == 0 {
c.Ui.Output("No running jobs")
2015-09-11 17:00:55 +00:00
return 0
}
2015-09-13 18:39:49 +00:00
out := make([]string, len(jobs)+1)
out[0] = "ID|Type|Priority|Status"
2015-09-13 18:39:49 +00:00
for i, job := range jobs {
out[i+1] = fmt.Sprintf("%s|%s|%d|%s",
2015-09-11 07:38:15 +00:00
job.ID,
job.Type,
job.Priority,
2015-09-13 18:39:49 +00:00
job.Status)
2015-09-11 07:38:15 +00:00
}
2015-09-15 23:44:38 +00:00
c.Ui.Output(formatList(out))
2015-09-11 07:38:15 +00:00
return 0
}
// Try querying the job
jobID := args[0]
2016-03-17 23:48:45 +00:00
jobs, _, err := client.Jobs().PrefixList(jobID)
2015-09-11 07:38:15 +00:00
if err != nil {
2016-03-17 23:48:45 +00:00
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
return 1
}
if len(jobs) == 0 {
c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
return 1
}
if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID {
2016-03-17 23:48:45 +00:00
out := make([]string, len(jobs)+1)
out[0] = "ID|Type|Priority|Status"
for i, job := range jobs {
out[i+1] = fmt.Sprintf("%s|%s|%d|%s",
job.ID,
job.Type,
job.Priority,
job.Status)
}
2016-03-17 23:48:45 +00:00
c.Ui.Output(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", formatList(out)))
return 0
}
// Prefix lookup matched a single job
job, _, err := client.Jobs().Info(jobs[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
return 1
2015-09-11 07:38:15 +00:00
}
// Check if it is periodic
sJob, err := convertApiJob(job)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
return 1
}
periodic := sJob.IsPeriodic()
2015-09-11 07:38:15 +00:00
// Format the job info
basic := []string{
2015-09-15 23:44:38 +00:00
fmt.Sprintf("ID|%s", job.ID),
fmt.Sprintf("Name|%s", job.Name),
fmt.Sprintf("Type|%s", job.Type),
fmt.Sprintf("Priority|%d", job.Priority),
fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")),
fmt.Sprintf("Status|%s", job.Status),
fmt.Sprintf("Periodic|%v", periodic),
2015-09-11 07:38:15 +00:00
}
if periodic {
basic = append(basic, fmt.Sprintf("Next Periodic Launch|%v",
sJob.Periodic.Next(time.Now().UTC())))
}
c.Ui.Output(formatKV(basic))
2016-01-07 22:24:25 +00:00
// Exit early
if short {
return 0
}
2016-01-07 22:24:25 +00:00
// Print periodic job information
if periodic {
if err := c.outputPeriodicInfo(client, job); err != nil {
c.Ui.Error(err.Error())
return 1
}
2016-01-07 22:24:25 +00:00
return 0
}
2016-01-07 22:24:25 +00:00
if err := c.outputJobInfo(client, job); err != nil {
c.Ui.Error(err.Error())
return 1
}
2016-01-07 22:24:25 +00:00
return 0
}
// outputPeriodicInfo prints information about the passed periodic job. If a
// request fails, an error is returned.
2016-01-07 22:24:25 +00:00
func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error {
// Generate the prefix that matches launched jobs from the periodic job.
prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix)
children, _, err := client.Jobs().PrefixList(prefix)
2016-01-07 22:24:25 +00:00
if err != nil {
return fmt.Errorf("Error querying job: %s", err)
}
if len(children) == 0 {
2016-01-08 18:32:08 +00:00
c.Ui.Output("\nNo instances of periodic job found")
return nil
}
out := make([]string, 1)
out[0] = "ID|Status"
for _, child := range children {
// Ensure that we are only showing jobs whose parent is the requested
// job.
if child.ParentID != job.ID {
continue
}
out = append(out, fmt.Sprintf("%s|%s",
child.ID,
child.Status))
}
c.Ui.Output(fmt.Sprintf("\nPreviously launched jobs:\n%s", formatList(out)))
2016-01-07 22:24:25 +00:00
return nil
}
// outputJobInfo prints information about the passed non-periodic job. If a
// request fails, an error is returned.
2016-01-07 22:24:25 +00:00
func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
var evals, allocs []string
2016-05-26 00:06:20 +00:00
// Query the allocations
jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil)
if err != nil {
return fmt.Errorf("Error querying job allocations: %s", err)
}
2016-01-07 22:24:25 +00:00
// Query the evaluations
jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
if err != nil {
return fmt.Errorf("Error querying job evaluations: %s", err)
}
2016-05-26 00:06:20 +00:00
// Determine latest evaluation with failures whose follow up hasn't
// completed.
evalsByID := make(map[string]*api.Evaluation, len(jobEvals))
for _, eval := range jobEvals {
evalsByID[eval.ID] = eval
}
var latestFailedPlacement *api.Evaluation
for _, eval := range evalsByID {
if len(eval.FailedTGAllocs) == 0 {
// Skip evals without failures
continue
}
// Check if created blocked eval is finished
if blocked, ok := evalsByID[eval.BlockedEval]; ok {
2016-05-26 01:06:47 +00:00
if blocked.Status != "blocked" {
2016-05-26 00:06:20 +00:00
continue
}
}
if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex {
latestFailedPlacement = eval
}
2016-01-07 22:24:25 +00:00
}
// Format the evals
evals = make([]string, len(jobEvals)+1)
2016-05-26 00:06:20 +00:00
evals[0] = "ID|Priority|Triggered By|Status|Placement Failures"
2016-01-07 22:24:25 +00:00
for i, eval := range jobEvals {
2016-05-26 00:06:20 +00:00
evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%t",
limit(eval.ID, c.length),
2016-01-07 22:24:25 +00:00
eval.Priority,
eval.TriggeredBy,
2016-05-26 00:06:20 +00:00
eval.Status,
len(eval.FailedTGAllocs) != 0,
)
}
if c.verbose || c.showEvals {
c.Ui.Output("\n==> Evaluations")
c.Ui.Output(formatList(evals))
}
if latestFailedPlacement != nil {
c.outputFailedPlacements(latestFailedPlacement)
2016-01-07 22:24:25 +00:00
}
// Format the allocs
c.Ui.Output("\n==> Allocations")
if len(jobAllocs) > 0 {
allocs = make([]string, len(jobAllocs)+1)
allocs[0] = "ID|Eval ID|Node ID|Task Group|Desired|Status"
for i, alloc := range jobAllocs {
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
limit(alloc.ID, c.length),
limit(alloc.EvalID, c.length),
limit(alloc.NodeID, c.length),
alloc.TaskGroup,
alloc.DesiredStatus,
alloc.ClientStatus)
}
c.Ui.Output(formatList(allocs))
} else {
c.Ui.Output("No allocations placed")
}
2016-01-07 22:24:25 +00:00
return nil
2015-09-11 07:38:15 +00:00
}
2016-05-26 00:06:20 +00:00
func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 {
return
}
c.Ui.Output("\n==> Last Placement Failure")
sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs)
for i, tg := range sorted {
if i >= maxFailedTGs {
break
}
metrics := failedEval.FailedTGAllocs[tg]
noun := "allocation"
if metrics.CoalescedFailures > 0 {
noun += "s"
}
c.Ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
dumpAllocMetrics(c.Ui, metrics, false)
c.Ui.Output("")
}
if len(sorted) > maxFailedTGs {
trunc := fmt.Sprintf("Placement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID)
c.Ui.Output(trunc)
}
}
// convertApiJob is used to take a *api.Job and convert it to an *struct.Job.
// This function is just a hammer and probably needs to be revisited.
func convertApiJob(in *api.Job) (*structs.Job, error) {
gob.Register(map[string]interface{}{})
gob.Register([]interface{}{})
var structJob *structs.Job
buf := new(bytes.Buffer)
if err := gob.NewEncoder(buf).Encode(in); err != nil {
return nil, err
}
if err := gob.NewDecoder(buf).Decode(&structJob); err != nil {
return nil, err
}
return structJob, nil
}