Merge pull request #659 from hashicorp/f-periodic-status
Improve nomad status on periodic jobs
This commit is contained in:
commit
1ab849049c
|
@ -144,6 +144,7 @@ type Job struct {
|
||||||
// jobs during list operations.
|
// jobs during list operations.
|
||||||
type JobListStub struct {
|
type JobListStub struct {
|
||||||
ID string
|
ID string
|
||||||
|
ParentID string
|
||||||
Name string
|
Name string
|
||||||
Type string
|
Type string
|
||||||
Priority int
|
Priority int
|
||||||
|
|
|
@ -61,5 +61,5 @@ func (c *EvalMonitorCommand) Run(args []string) int {
|
||||||
|
|
||||||
// Start monitoring
|
// Start monitoring
|
||||||
mon := newMonitor(c.Ui, client)
|
mon := newMonitor(c.Ui, client)
|
||||||
return mon.monitor(evalID)
|
return mon.monitor(evalID, true)
|
||||||
}
|
}
|
||||||
|
|
|
@ -161,14 +161,15 @@ func (m *monitor) update(update *evalState) {
|
||||||
|
|
||||||
// monitor is used to start monitoring the given evaluation ID. It
|
// monitor is used to start monitoring the given evaluation ID. It
|
||||||
// writes output directly to the monitor's ui, and returns the
|
// writes output directly to the monitor's ui, and returns the
|
||||||
// exit code for the command.
|
// exit code for the command. If allowPrefix is false, monitor will only accept
|
||||||
|
// exact matching evalIDs.
|
||||||
//
|
//
|
||||||
// The return code will be 0 on successful evaluation. If there are
|
// The return code will be 0 on successful evaluation. If there are
|
||||||
// problems scheduling the job (impossible constraints, resources
|
// problems scheduling the job (impossible constraints, resources
|
||||||
// exhausted, etc), then the return code will be 2. For any other
|
// exhausted, etc), then the return code will be 2. For any other
|
||||||
// failures (API connectivity, internal errors, etc), the return code
|
// failures (API connectivity, internal errors, etc), the return code
|
||||||
// will be 1.
|
// will be 1.
|
||||||
func (m *monitor) monitor(evalID string) int {
|
func (m *monitor) monitor(evalID string, allowPrefix bool) int {
|
||||||
// Track if we encounter a scheduling failure. This can only be
|
// Track if we encounter a scheduling failure. This can only be
|
||||||
// detected while querying allocations, so we use this bool to
|
// detected while querying allocations, so we use this bool to
|
||||||
// carry that status into the return code.
|
// carry that status into the return code.
|
||||||
|
@ -182,6 +183,11 @@ func (m *monitor) monitor(evalID string) int {
|
||||||
// Query the evaluation
|
// Query the evaluation
|
||||||
eval, _, err := m.client.Evaluations().Info(evalID, nil)
|
eval, _, err := m.client.Evaluations().Info(evalID, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if !allowPrefix {
|
||||||
|
m.ui.Error(fmt.Sprintf("No evaluation with id %q found", evalID))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
evals, _, err := m.client.Evaluations().PrefixList(evalID)
|
evals, _, err := m.client.Evaluations().PrefixList(evalID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.ui.Error(fmt.Sprintf("Error reading evaluation: %s", err))
|
m.ui.Error(fmt.Sprintf("Error reading evaluation: %s", err))
|
||||||
|
@ -279,7 +285,7 @@ func (m *monitor) monitor(evalID string) int {
|
||||||
|
|
||||||
// Reset the state and monitor the new eval
|
// Reset the state and monitor the new eval
|
||||||
m.state = newEvalState()
|
m.state = newEvalState()
|
||||||
return m.monitor(eval.NextEval)
|
return m.monitor(eval.NextEval, allowPrefix)
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
|
@ -250,7 +250,7 @@ func TestMonitor_Monitor(t *testing.T) {
|
||||||
doneCh := make(chan struct{})
|
doneCh := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
defer close(doneCh)
|
defer close(doneCh)
|
||||||
code = mon.monitor(evalID)
|
code = mon.monitor(evalID, false)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wait for completion
|
// Wait for completion
|
||||||
|
@ -296,7 +296,7 @@ func TestMonitor_MonitorWithPrefix(t *testing.T) {
|
||||||
doneCh := make(chan struct{})
|
doneCh := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
defer close(doneCh)
|
defer close(doneCh)
|
||||||
code = mon.monitor(evalID[:4])
|
code = mon.monitor(evalID[:4], true)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wait for completion
|
// Wait for completion
|
||||||
|
|
|
@ -128,7 +128,7 @@ func (c *RunCommand) Run(args []string) int {
|
||||||
|
|
||||||
// Detach was not specified, so start monitoring
|
// Detach was not specified, so start monitoring
|
||||||
mon := newMonitor(c.Ui, client)
|
mon := newMonitor(c.Ui, client)
|
||||||
return mon.monitor(evalID)
|
return mon.monitor(evalID, false)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -151,56 +151,110 @@ func (c *StatusCommand) Run(args []string) int {
|
||||||
|
|
||||||
c.Ui.Output(formatKV(basic))
|
c.Ui.Output(formatKV(basic))
|
||||||
|
|
||||||
if !periodic && !short {
|
// Exit early
|
||||||
var evals, allocs []string
|
if short {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
// Query the evaluations
|
// Print periodic job information
|
||||||
jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
|
if periodic {
|
||||||
if err != nil {
|
if err := c.outputPeriodicInfo(client, job); err != nil {
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying job evaluations: %s", err))
|
c.Ui.Error(err.Error())
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query the allocations
|
return 0
|
||||||
jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil)
|
}
|
||||||
if err != nil {
|
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying job allocations: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format the evals
|
if err := c.outputJobInfo(client, job); err != nil {
|
||||||
evals = make([]string, len(jobEvals)+1)
|
c.Ui.Error(err.Error())
|
||||||
evals[0] = "ID|Priority|TriggeredBy|Status"
|
return 1
|
||||||
for i, eval := range jobEvals {
|
|
||||||
evals[i+1] = fmt.Sprintf("%s|%d|%s|%s",
|
|
||||||
eval.ID,
|
|
||||||
eval.Priority,
|
|
||||||
eval.TriggeredBy,
|
|
||||||
eval.Status)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format the allocs
|
|
||||||
allocs = make([]string, len(jobAllocs)+1)
|
|
||||||
allocs[0] = "ID|EvalID|NodeID|TaskGroup|Desired|Status"
|
|
||||||
for i, alloc := range jobAllocs {
|
|
||||||
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
|
||||||
alloc.ID,
|
|
||||||
alloc.EvalID,
|
|
||||||
alloc.NodeID,
|
|
||||||
alloc.TaskGroup,
|
|
||||||
alloc.DesiredStatus,
|
|
||||||
alloc.ClientStatus)
|
|
||||||
}
|
|
||||||
|
|
||||||
c.Ui.Output("\n==> Evaluations")
|
|
||||||
c.Ui.Output(formatList(evals))
|
|
||||||
c.Ui.Output("\n==> Allocations")
|
|
||||||
c.Ui.Output(formatList(allocs))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// outputPeriodicInfo prints information about the passed periodic job. If a
|
||||||
|
// request fails, an error is returned.
|
||||||
|
func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error {
|
||||||
|
// Generate the prefix that matches launched jobs from the periodic job.
|
||||||
|
prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix)
|
||||||
|
children, _, err := client.Jobs().PrefixList(prefix)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error querying job: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(children) == 0 {
|
||||||
|
c.Ui.Output("\nNo previously launched jobs")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]string, 1)
|
||||||
|
out[0] = "ID|Status"
|
||||||
|
for _, child := range children {
|
||||||
|
// Ensure that we are only showing jobs whose parent is the requested
|
||||||
|
// job.
|
||||||
|
if child.ParentID != job.ID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, fmt.Sprintf("%s|%s",
|
||||||
|
child.ID,
|
||||||
|
child.Status))
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Ui.Output(fmt.Sprintf("\nPreviously launched jobs:\n%s", formatList(out)))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// outputJobInfo prints information about the passed non-periodic job. If a
|
||||||
|
// request fails, an error is returned.
|
||||||
|
func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
|
||||||
|
var evals, allocs []string
|
||||||
|
|
||||||
|
// Query the evaluations
|
||||||
|
jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error querying job evaluations: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query the allocations
|
||||||
|
jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error querying job allocations: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format the evals
|
||||||
|
evals = make([]string, len(jobEvals)+1)
|
||||||
|
evals[0] = "ID|Priority|TriggeredBy|Status"
|
||||||
|
for i, eval := range jobEvals {
|
||||||
|
evals[i+1] = fmt.Sprintf("%s|%d|%s|%s",
|
||||||
|
eval.ID,
|
||||||
|
eval.Priority,
|
||||||
|
eval.TriggeredBy,
|
||||||
|
eval.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format the allocs
|
||||||
|
allocs = make([]string, len(jobAllocs)+1)
|
||||||
|
allocs[0] = "ID|EvalID|NodeID|TaskGroup|Desired|Status"
|
||||||
|
for i, alloc := range jobAllocs {
|
||||||
|
allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
||||||
|
alloc.ID,
|
||||||
|
alloc.EvalID,
|
||||||
|
alloc.NodeID,
|
||||||
|
alloc.TaskGroup,
|
||||||
|
alloc.DesiredStatus,
|
||||||
|
alloc.ClientStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Ui.Output("\n==> Evaluations")
|
||||||
|
c.Ui.Output(formatList(evals))
|
||||||
|
c.Ui.Output("\n==> Allocations")
|
||||||
|
c.Ui.Output(formatList(allocs))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// convertApiJob is used to take a *api.Job and convert it to an *struct.Job.
|
// convertApiJob is used to take a *api.Job and convert it to an *struct.Job.
|
||||||
// This function is just a hammer and probably needs to be revisited.
|
// This function is just a hammer and probably needs to be revisited.
|
||||||
func convertApiJob(in *api.Job) (*structs.Job, error) {
|
func convertApiJob(in *api.Job) (*structs.Job, error) {
|
||||||
|
|
|
@ -104,6 +104,11 @@ func (c *StopCommand) Run(args []string) int {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are stopping a periodic job there won't be an evalID.
|
||||||
|
if evalID == "" {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
if detach {
|
if detach {
|
||||||
c.Ui.Output(evalID)
|
c.Ui.Output(evalID)
|
||||||
return 0
|
return 0
|
||||||
|
@ -111,5 +116,5 @@ func (c *StopCommand) Run(args []string) int {
|
||||||
|
|
||||||
// Start monitoring the stop eval
|
// Start monitoring the stop eval
|
||||||
mon := newMonitor(c.Ui, client)
|
mon := newMonitor(c.Ui, client)
|
||||||
return mon.monitor(evalID)
|
return mon.monitor(evalID, false)
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,12 +12,6 @@ import (
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
// The string appended to the periodic jobs ID when launching derived
|
|
||||||
// instances of it.
|
|
||||||
JobLaunchSuffix = "/periodic-"
|
|
||||||
)
|
|
||||||
|
|
||||||
// PeriodicDispatch is used to track and launch periodic jobs. It maintains the
|
// PeriodicDispatch is used to track and launch periodic jobs. It maintains the
|
||||||
// set of periodic jobs and creates derived jobs and evaluations per
|
// set of periodic jobs and creates derived jobs and evaluations per
|
||||||
// instantiation which is determined by the periodic spec.
|
// instantiation which is determined by the periodic spec.
|
||||||
|
@ -85,7 +79,7 @@ func (s *Server) DispatchJob(job *structs.Job) error {
|
||||||
// RunningChildren checks whether the passed job has any running children.
|
// RunningChildren checks whether the passed job has any running children.
|
||||||
func (s *Server) RunningChildren(job *structs.Job) (bool, error) {
|
func (s *Server) RunningChildren(job *structs.Job) (bool, error) {
|
||||||
state := s.fsm.State()
|
state := s.fsm.State()
|
||||||
prefix := fmt.Sprintf("%s%s", job.ID, JobLaunchSuffix)
|
prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix)
|
||||||
iter, err := state.JobsByIDPrefix(prefix)
|
iter, err := state.JobsByIDPrefix(prefix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
|
@ -418,18 +412,18 @@ func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) (
|
||||||
// deriveJobID returns a job ID based on the parent periodic job and the launch
|
// deriveJobID returns a job ID based on the parent periodic job and the launch
|
||||||
// time.
|
// time.
|
||||||
func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string {
|
func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string {
|
||||||
return fmt.Sprintf("%s%s%d", periodicJob.ID, JobLaunchSuffix, time.Unix())
|
return fmt.Sprintf("%s%s%d", periodicJob.ID, structs.PeriodicLaunchSuffix, time.Unix())
|
||||||
}
|
}
|
||||||
|
|
||||||
// LaunchTime returns the launch time of the job. This is only valid for
|
// LaunchTime returns the launch time of the job. This is only valid for
|
||||||
// jobs created by PeriodicDispatch and will otherwise return an error.
|
// jobs created by PeriodicDispatch and will otherwise return an error.
|
||||||
func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) {
|
func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) {
|
||||||
index := strings.LastIndex(jobID, JobLaunchSuffix)
|
index := strings.LastIndex(jobID, structs.PeriodicLaunchSuffix)
|
||||||
if index == -1 {
|
if index == -1 {
|
||||||
return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID)
|
return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID)
|
||||||
}
|
}
|
||||||
|
|
||||||
launch, err := strconv.Atoi(jobID[index+len(JobLaunchSuffix):])
|
launch, err := strconv.Atoi(jobID[index+len(structs.PeriodicLaunchSuffix):])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID)
|
return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID)
|
||||||
}
|
}
|
||||||
|
|
|
@ -517,7 +517,7 @@ func TestPeriodicHeap_Order(t *testing.T) {
|
||||||
func deriveChildJob(parent *structs.Job) *structs.Job {
|
func deriveChildJob(parent *structs.Job) *structs.Job {
|
||||||
childjob := mock.Job()
|
childjob := mock.Job()
|
||||||
childjob.ParentID = parent.ID
|
childjob.ParentID = parent.ID
|
||||||
childjob.ID = fmt.Sprintf("%s%s%v", parent.ID, JobLaunchSuffix, time.Now().Unix())
|
childjob.ID = fmt.Sprintf("%s%s%v", parent.ID, structs.PeriodicLaunchSuffix, time.Now().Unix())
|
||||||
return childjob
|
return childjob
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -894,6 +894,7 @@ func (j *Job) LookupTaskGroup(name string) *TaskGroup {
|
||||||
func (j *Job) Stub() *JobListStub {
|
func (j *Job) Stub() *JobListStub {
|
||||||
return &JobListStub{
|
return &JobListStub{
|
||||||
ID: j.ID,
|
ID: j.ID,
|
||||||
|
ParentID: j.ParentID,
|
||||||
Name: j.Name,
|
Name: j.Name,
|
||||||
Type: j.Type,
|
Type: j.Type,
|
||||||
Priority: j.Priority,
|
Priority: j.Priority,
|
||||||
|
@ -913,6 +914,7 @@ func (j *Job) IsPeriodic() bool {
|
||||||
// for the job list
|
// for the job list
|
||||||
type JobListStub struct {
|
type JobListStub struct {
|
||||||
ID string
|
ID string
|
||||||
|
ParentID string
|
||||||
Name string
|
Name string
|
||||||
Type string
|
Type string
|
||||||
Priority int
|
Priority int
|
||||||
|
@ -1023,6 +1025,12 @@ func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
|
||||||
return time.Time{}
|
return time.Time{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
|
||||||
|
// when launching derived instances of it.
|
||||||
|
PeriodicLaunchSuffix = "/periodic-"
|
||||||
|
)
|
||||||
|
|
||||||
// PeriodicLaunch tracks the last launch time of a periodic job.
|
// PeriodicLaunch tracks the last launch time of a periodic job.
|
||||||
type PeriodicLaunch struct {
|
type PeriodicLaunch struct {
|
||||||
ID string // ID of the periodic job.
|
ID string // ID of the periodic job.
|
||||||
|
|
Loading…
Reference in a new issue