open-nomad/command/job_status.go
Preetha Appan 10e7d6df6d
Remove compat code associated with many previous versions of nomad
This removes compat code for namespaces (0.7), Drain(0.8) and other
older features from releases older than Nomad 0.7
2019-06-25 19:05:25 -05:00

675 lines
19 KiB
Go

package command
import (
"fmt"
"sort"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/posener/complete"
)
const (
// maxFailedTGs is the maximum number of task groups we show failure reasons
// for before deferring to eval-status
maxFailedTGs = 5
)
type JobStatusCommand struct {
Meta
length int
evals bool
allAllocs bool
verbose bool
}
func (c *JobStatusCommand) Help() string {
helpText := `
Usage: nomad status [options] <job>
Display status information about a job. If no job ID is given, a list of all
known jobs will be displayed.
General Options:
` + generalOptionsUsage() + `
Status Options:
-short
Display short output. Used only when a single job is being
queried, and drops verbose information about allocations.
-evals
Display the evaluations associated with the job.
-all-allocs
Display all allocations matching the job ID, including those from an older
instance of the job.
-verbose
Display full information.
`
return strings.TrimSpace(helpText)
}
func (c *JobStatusCommand) Synopsis() string {
return "Display status information about a job"
}
func (c *JobStatusCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-all-allocs": complete.PredictNothing,
"-evals": complete.PredictNothing,
"-short": complete.PredictNothing,
"-verbose": complete.PredictNothing,
})
}
func (c *JobStatusCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictFunc(func(a complete.Args) []string {
client, err := c.Meta.Client()
if err != nil {
return nil
}
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil)
if err != nil {
return []string{}
}
return resp.Matches[contexts.Jobs]
})
}
func (c *JobStatusCommand) Name() string { return "status" }
func (c *JobStatusCommand) Run(args []string) int {
var short bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&short, "short", false, "")
flags.BoolVar(&c.evals, "evals", false, "")
flags.BoolVar(&c.allAllocs, "all-allocs", false, "")
flags.BoolVar(&c.verbose, "verbose", false, "")
if err := flags.Parse(args); err != nil {
return 1
}
// Check that we either got no jobs or exactly one.
args = flags.Args()
if len(args) > 1 {
c.Ui.Error("This command takes either no arguments or one: <job>")
c.Ui.Error(commandErrorText(c))
return 1
}
// Truncate the id unless full length is requested
c.length = shortId
if c.verbose {
c.length = fullId
}
// Get the HTTP client
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
// Invoke list mode if no job ID.
if len(args) == 0 {
jobs, _, err := client.Jobs().List(nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err))
return 1
}
if len(jobs) == 0 {
// No output if we have no jobs
c.Ui.Output("No running jobs")
} else {
c.Ui.Output(createStatusListOutput(jobs))
}
return 0
}
// Try querying the job
jobID := args[0]
jobs, _, err := client.Jobs().PrefixList(jobID)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
return 1
}
if len(jobs) == 0 {
c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
return 1
}
if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID {
c.Ui.Error(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs)))
return 1
}
// Prefix lookup matched a single job
job, _, err := client.Jobs().Info(jobs[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
return 1
}
periodic := job.IsPeriodic()
parameterized := job.IsParameterized()
// Format the job info
basic := []string{
fmt.Sprintf("ID|%s", *job.ID),
fmt.Sprintf("Name|%s", *job.Name),
fmt.Sprintf("Submit Date|%s", formatTime(time.Unix(0, *job.SubmitTime))),
fmt.Sprintf("Type|%s", *job.Type),
fmt.Sprintf("Priority|%d", *job.Priority),
fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")),
fmt.Sprintf("Status|%s", getStatusString(*job.Status, job.Stop)),
fmt.Sprintf("Periodic|%v", periodic),
fmt.Sprintf("Parameterized|%v", parameterized),
}
if periodic && !parameterized {
if *job.Stop {
basic = append(basic, fmt.Sprintf("Next Periodic Launch|none (job stopped)"))
} else {
location, err := job.Periodic.GetLocation()
if err == nil {
now := time.Now().In(location)
next, err := job.Periodic.Next(now)
if err == nil {
basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s",
fmt.Sprintf("%s (%s from now)",
formatTime(next), formatTimeDifference(now, next, time.Second))))
}
}
}
}
c.Ui.Output(formatKV(basic))
// Exit early
if short {
return 0
}
// Print periodic job information
if periodic && !parameterized {
if err := c.outputPeriodicInfo(client, job); err != nil {
c.Ui.Error(err.Error())
return 1
}
} else if parameterized {
if err := c.outputParameterizedInfo(client, job); err != nil {
c.Ui.Error(err.Error())
return 1
}
} else {
if err := c.outputJobInfo(client, job); err != nil {
c.Ui.Error(err.Error())
return 1
}
}
return 0
}
// outputPeriodicInfo prints information about the passed periodic job. If a
// request fails, an error is returned.
func (c *JobStatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error {
// Output the summary
if err := c.outputJobSummary(client, job); err != nil {
return err
}
// Generate the prefix that matches launched jobs from the periodic job.
prefix := fmt.Sprintf("%s%s", *job.ID, structs.PeriodicLaunchSuffix)
children, _, err := client.Jobs().PrefixList(prefix)
if err != nil {
return fmt.Errorf("Error querying job: %s", err)
}
if len(children) == 0 {
c.Ui.Output("\nNo instances of periodic job found")
return nil
}
out := make([]string, 1)
out[0] = "ID|Status"
for _, child := range children {
// Ensure that we are only showing jobs whose parent is the requested
// job.
if child.ParentID != *job.ID {
continue
}
out = append(out, fmt.Sprintf("%s|%s",
child.ID,
child.Status))
}
c.Ui.Output(c.Colorize().Color("\n[bold]Previously Launched Jobs[reset]"))
c.Ui.Output(formatList(out))
return nil
}
// outputParameterizedInfo prints information about a parameterized job. If a
// request fails, an error is returned.
func (c *JobStatusCommand) outputParameterizedInfo(client *api.Client, job *api.Job) error {
// Output parameterized job details
c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job[reset]"))
parameterizedJob := make([]string, 3)
parameterizedJob[0] = fmt.Sprintf("Payload|%s", job.ParameterizedJob.Payload)
parameterizedJob[1] = fmt.Sprintf("Required Metadata|%v", strings.Join(job.ParameterizedJob.MetaRequired, ", "))
parameterizedJob[2] = fmt.Sprintf("Optional Metadata|%v", strings.Join(job.ParameterizedJob.MetaOptional, ", "))
c.Ui.Output(formatKV(parameterizedJob))
// Output the summary
if err := c.outputJobSummary(client, job); err != nil {
return err
}
// Generate the prefix that matches launched jobs from the parameterized job.
prefix := fmt.Sprintf("%s%s", *job.ID, structs.DispatchLaunchSuffix)
children, _, err := client.Jobs().PrefixList(prefix)
if err != nil {
return fmt.Errorf("Error querying job: %s", err)
}
if len(children) == 0 {
c.Ui.Output("\nNo dispatched instances of parameterized job found")
return nil
}
out := make([]string, 1)
out[0] = "ID|Status"
for _, child := range children {
// Ensure that we are only showing jobs whose parent is the requested
// job.
if child.ParentID != *job.ID {
continue
}
out = append(out, fmt.Sprintf("%s|%s",
child.ID,
child.Status))
}
c.Ui.Output(c.Colorize().Color("\n[bold]Dispatched Jobs[reset]"))
c.Ui.Output(formatList(out))
return nil
}
// outputJobInfo prints information about the passed non-periodic job. If a
// request fails, an error is returned.
func (c *JobStatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
// Query the allocations
jobAllocs, _, err := client.Jobs().Allocations(*job.ID, c.allAllocs, nil)
if err != nil {
return fmt.Errorf("Error querying job allocations: %s", err)
}
// Query the evaluations
jobEvals, _, err := client.Jobs().Evaluations(*job.ID, nil)
if err != nil {
return fmt.Errorf("Error querying job evaluations: %s", err)
}
latestDeployment, _, err := client.Jobs().LatestDeployment(*job.ID, nil)
if err != nil {
return fmt.Errorf("Error querying latest job deployment: %s", err)
}
// Output the summary
if err := c.outputJobSummary(client, job); err != nil {
return err
}
// Determine latest evaluation with failures whose follow up hasn't
// completed, this is done while formatting
var latestFailedPlacement *api.Evaluation
blockedEval := false
// Format the evals
evals := make([]string, len(jobEvals)+1)
evals[0] = "ID|Priority|Triggered By|Status|Placement Failures"
for i, eval := range jobEvals {
failures, _ := evalFailureStatus(eval)
evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s",
limit(eval.ID, c.length),
eval.Priority,
eval.TriggeredBy,
eval.Status,
failures,
)
if eval.Status == "blocked" {
blockedEval = true
}
if len(eval.FailedTGAllocs) == 0 {
// Skip evals without failures
continue
}
if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex {
latestFailedPlacement = eval
}
}
if c.verbose || c.evals {
c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]"))
c.Ui.Output(formatList(evals))
}
if blockedEval && latestFailedPlacement != nil {
c.outputFailedPlacements(latestFailedPlacement)
}
c.outputReschedulingEvals(client, job, jobAllocs, c.length)
if latestDeployment != nil {
c.Ui.Output(c.Colorize().Color("\n[bold]Latest Deployment[reset]"))
c.Ui.Output(c.Colorize().Color(c.formatDeployment(latestDeployment)))
}
// Format the allocs
c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]"))
c.Ui.Output(formatAllocListStubs(jobAllocs, c.verbose, c.length))
return nil
}
func (c *JobStatusCommand) formatDeployment(d *api.Deployment) string {
// Format the high-level elements
high := []string{
fmt.Sprintf("ID|%s", limit(d.ID, c.length)),
fmt.Sprintf("Status|%s", d.Status),
fmt.Sprintf("Description|%s", d.StatusDescription),
}
base := formatKV(high)
if len(d.TaskGroups) == 0 {
return base
}
base += "\n\n[bold]Deployed[reset]\n"
base += formatDeploymentGroups(d, c.length)
return base
}
func formatAllocListStubs(stubs []*api.AllocationListStub, verbose bool, uuidLength int) string {
if len(stubs) == 0 {
return "No allocations placed"
}
allocs := make([]string, len(stubs)+1)
if verbose {
allocs[0] = "ID|Eval ID|Node ID|Node Name|Task Group|Version|Desired|Status|Created|Modified"
for i, alloc := range stubs {
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%d|%s|%s|%s|%s",
limit(alloc.ID, uuidLength),
limit(alloc.EvalID, uuidLength),
limit(alloc.NodeID, uuidLength),
alloc.NodeName,
alloc.TaskGroup,
alloc.JobVersion,
alloc.DesiredStatus,
alloc.ClientStatus,
formatUnixNanoTime(alloc.CreateTime),
formatUnixNanoTime(alloc.ModifyTime))
}
} else {
allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified"
for i, alloc := range stubs {
now := time.Now()
createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now)
modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now)
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s",
limit(alloc.ID, uuidLength),
limit(alloc.NodeID, uuidLength),
alloc.TaskGroup,
alloc.JobVersion,
alloc.DesiredStatus,
alloc.ClientStatus,
createTimePretty,
modTimePretty)
}
}
return formatList(allocs)
}
func formatAllocList(allocations []*api.Allocation, verbose bool, uuidLength int) string {
if len(allocations) == 0 {
return "No allocations placed"
}
allocs := make([]string, len(allocations)+1)
if verbose {
allocs[0] = "ID|Eval ID|Node ID|Task Group|Version|Desired|Status|Created|Modified"
for i, alloc := range allocations {
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s",
limit(alloc.ID, uuidLength),
limit(alloc.EvalID, uuidLength),
limit(alloc.NodeID, uuidLength),
alloc.TaskGroup,
alloc.Job.Version,
alloc.DesiredStatus,
alloc.ClientStatus,
formatUnixNanoTime(alloc.CreateTime),
formatUnixNanoTime(alloc.ModifyTime))
}
} else {
allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified"
for i, alloc := range allocations {
now := time.Now()
createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now)
modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now)
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s",
limit(alloc.ID, uuidLength),
limit(alloc.NodeID, uuidLength),
alloc.TaskGroup,
alloc.Job.Version,
alloc.DesiredStatus,
alloc.ClientStatus,
createTimePretty,
modTimePretty)
}
}
return formatList(allocs)
}
// outputJobSummary displays the given jobs summary and children job summary
// where appropriate
func (c *JobStatusCommand) outputJobSummary(client *api.Client, job *api.Job) error {
// Query the summary
summary, _, err := client.Jobs().Summary(*job.ID, nil)
if err != nil {
return fmt.Errorf("Error querying job summary: %s", err)
}
if summary == nil {
return nil
}
periodic := job.IsPeriodic()
parameterizedJob := job.IsParameterized()
// Print the summary
if !periodic && !parameterizedJob {
c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]"))
summaries := make([]string, len(summary.Summary)+1)
summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost"
taskGroups := make([]string, 0, len(summary.Summary))
for taskGroup := range summary.Summary {
taskGroups = append(taskGroups, taskGroup)
}
sort.Strings(taskGroups)
for idx, taskGroup := range taskGroups {
tgs := summary.Summary[taskGroup]
summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d",
taskGroup, tgs.Queued, tgs.Starting,
tgs.Running, tgs.Failed,
tgs.Complete, tgs.Lost,
)
}
c.Ui.Output(formatList(summaries))
}
// Always display the summary if we are periodic or parameterized, but
// only display if the summary is non-zero on normal jobs
if summary.Children != nil && (parameterizedJob || periodic || summary.Children.Sum() > 0) {
if parameterizedJob {
c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job Summary[reset]"))
} else {
c.Ui.Output(c.Colorize().Color("\n[bold]Children Job Summary[reset]"))
}
summaries := make([]string, 2)
summaries[0] = "Pending|Running|Dead"
summaries[1] = fmt.Sprintf("%d|%d|%d",
summary.Children.Pending, summary.Children.Running, summary.Children.Dead)
c.Ui.Output(formatList(summaries))
}
return nil
}
// outputReschedulingEvals displays eval IDs and time for any
// delayed evaluations by task group
func (c *JobStatusCommand) outputReschedulingEvals(client *api.Client, job *api.Job, allocListStubs []*api.AllocationListStub, uuidLength int) error {
// Get the most recent alloc ID by task group
mostRecentAllocs := make(map[string]*api.AllocationListStub)
for _, alloc := range allocListStubs {
a, ok := mostRecentAllocs[alloc.TaskGroup]
if !ok || alloc.ModifyTime > a.ModifyTime {
mostRecentAllocs[alloc.TaskGroup] = alloc
}
}
followUpEvalIds := make(map[string]string)
for tg, alloc := range mostRecentAllocs {
if alloc.FollowupEvalID != "" {
followUpEvalIds[tg] = alloc.FollowupEvalID
}
}
if len(followUpEvalIds) == 0 {
return nil
}
// Print the reschedule info section
var delayedEvalInfos []string
taskGroups := make([]string, 0, len(followUpEvalIds))
for taskGroup := range followUpEvalIds {
taskGroups = append(taskGroups, taskGroup)
}
sort.Strings(taskGroups)
var evalDetails []string
first := true
for _, taskGroup := range taskGroups {
evalID := followUpEvalIds[taskGroup]
evaluation, _, err := client.Evaluations().Info(evalID, nil)
// Eval time is not critical output,
// so don't return it on errors, if its not set, or its already in the past
if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) {
continue
}
evalTime := prettyTimeDiff(evaluation.WaitUntil, time.Now())
if c.verbose {
if first {
delayedEvalInfos = append(delayedEvalInfos, "Task Group|Reschedule Policy|Eval ID|Eval Time")
}
rp := job.LookupTaskGroup(taskGroup).ReschedulePolicy
evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s|%s", taskGroup, rp.String(), limit(evalID, uuidLength), evalTime))
} else {
if first {
delayedEvalInfos = append(delayedEvalInfos, "Task Group|Eval ID|Eval Time")
}
evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s", taskGroup, limit(evalID, uuidLength), evalTime))
}
first = false
}
if len(evalDetails) == 0 {
return nil
}
// Only show this section if there is pending evals
delayedEvalInfos = append(delayedEvalInfos, evalDetails...)
c.Ui.Output(c.Colorize().Color("\n[bold]Future Rescheduling Attempts[reset]"))
c.Ui.Output(formatList(delayedEvalInfos))
return nil
}
func (c *JobStatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 {
return
}
c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]"))
sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs)
for i, tg := range sorted {
if i >= maxFailedTGs {
break
}
c.Ui.Output(fmt.Sprintf("Task Group %q:", tg))
metrics := failedEval.FailedTGAllocs[tg]
c.Ui.Output(formatAllocMetrics(metrics, false, " "))
if i != len(sorted)-1 {
c.Ui.Output("")
}
}
if len(sorted) > maxFailedTGs {
trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID)
c.Ui.Output(trunc)
}
}
// list general information about a list of jobs
func createStatusListOutput(jobs []*api.JobListStub) string {
out := make([]string, len(jobs)+1)
out[0] = "ID|Type|Priority|Status|Submit Date"
for i, job := range jobs {
out[i+1] = fmt.Sprintf("%s|%s|%d|%s|%s",
job.ID,
getTypeString(job),
job.Priority,
getStatusString(job.Status, &job.Stop),
formatTime(time.Unix(0, job.SubmitTime)))
}
return formatList(out)
}
func getTypeString(job *api.JobListStub) string {
t := job.Type
if job.Periodic {
t += "/periodic"
}
if job.ParameterizedJob {
t += "/parameterized"
}
return t
}
func getStatusString(status string, stop *bool) string {
if stop != nil && *stop {
return fmt.Sprintf("%s (stopped)", status)
}
return status
}