Improve DeadlineTime helper
This commit is contained in:
parent
b7c993f0e5
commit
a37329189a
|
@ -176,6 +176,9 @@ type Node struct {
|
||||||
type DrainStrategy struct {
|
type DrainStrategy struct {
|
||||||
// DrainSpec is the user declared drain specification
|
// DrainSpec is the user declared drain specification
|
||||||
DrainSpec
|
DrainSpec
|
||||||
|
|
||||||
|
// DeadlineTime is the deadline time for the drain.
|
||||||
|
DeadlineTime time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// DrainSpec describes a Node's drain behavior.
|
// DrainSpec describes a Node's drain behavior.
|
||||||
|
|
|
@ -21,6 +21,10 @@ func TestClient(t testing.T, cb func(c *config.Config)) *Client {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loosen GC threshold
|
||||||
|
conf.GCDiskUsageThreshold = 98.0
|
||||||
|
conf.GCInodeUsageThreshold = 98.0
|
||||||
|
|
||||||
// Tighten the fingerprinter timeouts
|
// Tighten the fingerprinter timeouts
|
||||||
if conf.Options == nil {
|
if conf.Options == nil {
|
||||||
conf.Options = make(map[string]string)
|
conf.Options = make(map[string]string)
|
||||||
|
|
|
@ -157,24 +157,41 @@ func (n *NodeDrainer) Run() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getNextDeadline is a helper that takes a set of draining nodes and returns the
|
||||||
|
// next deadline. It also returns a boolean if there is a deadline.
|
||||||
|
func getNextDeadline(nodes map[string]*structs.Node) (time.Time, bool) {
|
||||||
|
var nextDeadline time.Time
|
||||||
|
found := false
|
||||||
|
for _, node := range nodes {
|
||||||
|
inf, d := node.DrainStrategy.DeadlineTime()
|
||||||
|
if !inf && (nextDeadline.IsZero() || d.Before(nextDeadline)) {
|
||||||
|
nextDeadline = d
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nextDeadline, found
|
||||||
|
}
|
||||||
|
|
||||||
// nodeDrainer is the core node draining main loop and should be started in a
|
// nodeDrainer is the core node draining main loop and should be started in a
|
||||||
// goroutine when a server establishes leadership.
|
// goroutine when a server establishes leadership.
|
||||||
func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) {
|
func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) {
|
||||||
nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state)
|
nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state)
|
||||||
|
|
||||||
// Wait for a node's drain deadline to expire
|
// Wait for a node's drain deadline to expire
|
||||||
var nextDeadline time.Time
|
nextDeadline, ok := getNextDeadline(nodes)
|
||||||
for _, node := range nodes {
|
|
||||||
if nextDeadline.IsZero() {
|
|
||||||
nextDeadline = node.DrainStrategy.DeadlineTime()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if deadline := node.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) {
|
|
||||||
nextDeadline = deadline
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
deadlineTimer := time.NewTimer(time.Until(nextDeadline))
|
deadlineTimer := time.NewTimer(time.Until(nextDeadline))
|
||||||
|
stopDeadlineTimer := func() {
|
||||||
|
if !deadlineTimer.Stop() {
|
||||||
|
select {
|
||||||
|
case <-deadlineTimer.C:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
stopDeadlineTimer()
|
||||||
|
}
|
||||||
|
|
||||||
// Watch for nodes to start or stop draining
|
// Watch for nodes to start or stop draining
|
||||||
nodeWatcher := newNodeWatcher(n.logger, nodes, nodesIndex, state)
|
nodeWatcher := newNodeWatcher(n.logger, nodes, nodesIndex, state)
|
||||||
|
@ -197,33 +214,14 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
||||||
// update draining nodes
|
// update draining nodes
|
||||||
n.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes))
|
n.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes))
|
||||||
|
|
||||||
// update deadline timer
|
d, ok := getNextDeadline(nodes)
|
||||||
changed := false
|
if ok && !nextDeadline.Equal(d) {
|
||||||
for _, n := range nodes {
|
nextDeadline = d
|
||||||
if nextDeadline.IsZero() {
|
|
||||||
nextDeadline = n.DrainStrategy.DeadlineTime()
|
|
||||||
changed = true
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if deadline := n.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) {
|
|
||||||
nextDeadline = deadline
|
|
||||||
changed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// if changed reset the timer
|
|
||||||
if changed {
|
|
||||||
n.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline)
|
n.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline)
|
||||||
if !deadlineTimer.Stop() {
|
stopDeadlineTimer()
|
||||||
// timer may have been recv'd in a
|
|
||||||
// previous loop, so don't block
|
|
||||||
select {
|
|
||||||
case <-deadlineTimer.C:
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
deadlineTimer.Reset(time.Until(nextDeadline))
|
deadlineTimer.Reset(time.Until(nextDeadline))
|
||||||
|
} else if !ok {
|
||||||
|
stopDeadlineTimer()
|
||||||
}
|
}
|
||||||
|
|
||||||
case jobs := <-jobWatcher.WaitCh():
|
case jobs := <-jobWatcher.WaitCh():
|
||||||
|
@ -275,7 +273,8 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
||||||
|
|
||||||
// track number of allocs left on this node to be drained
|
// track number of allocs left on this node to be drained
|
||||||
allocsLeft := false
|
allocsLeft := false
|
||||||
deadlineReached := node.DrainStrategy.DeadlineTime().Before(now)
|
inf, deadline := node.DrainStrategy.DeadlineTime()
|
||||||
|
deadlineReached := !inf && deadline.Before(now)
|
||||||
for _, alloc := range allocs {
|
for _, alloc := range allocs {
|
||||||
jobkey := jobKey{alloc.Namespace, alloc.JobID}
|
jobkey := jobKey{alloc.Namespace, alloc.JobID}
|
||||||
|
|
||||||
|
@ -307,8 +306,13 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
||||||
|
|
||||||
// Don't bother collecting system/batch jobs for nodes that haven't hit their deadline
|
// Don't bother collecting system/batch jobs for nodes that haven't hit their deadline
|
||||||
if job.Type != structs.JobTypeService && !deadlineReached {
|
if job.Type != structs.JobTypeService && !deadlineReached {
|
||||||
n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s",
|
if inf, d := node.DrainStrategy.DeadlineTime(); inf {
|
||||||
job.Type, job.Name, node.DrainStrategy.DeadlineTime().Sub(now))
|
n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because node has an infinite deadline",
|
||||||
|
job.Type, job.Name)
|
||||||
|
} else {
|
||||||
|
n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s",
|
||||||
|
job.Type, job.Name, d.Sub(now))
|
||||||
|
}
|
||||||
skipJob[jobkey] = struct{}{}
|
skipJob[jobkey] = struct{}{}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -370,7 +374,7 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
||||||
|
|
||||||
tgKey := makeTaskGroupKey(alloc)
|
tgKey := makeTaskGroupKey(alloc)
|
||||||
|
|
||||||
if node.DrainStrategy.DeadlineTime().Before(now) {
|
if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) {
|
||||||
n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6])
|
n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6])
|
||||||
// Alloc's Node has reached its deadline
|
// Alloc's Node has reached its deadline
|
||||||
stoplist.add(drainingJob.job, alloc)
|
stoplist.add(drainingJob.job, alloc)
|
||||||
|
@ -494,7 +498,7 @@ func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*struc
|
||||||
nodes[node.ID] = node
|
nodes[node.ID] = node
|
||||||
|
|
||||||
// No point in tracking draining allocs as the deadline has been reached
|
// No point in tracking draining allocs as the deadline has been reached
|
||||||
if node.DrainStrategy.DeadlineTime().Before(now) {
|
if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
||||||
serviceJob := mock.Job()
|
serviceJob := mock.Job()
|
||||||
serviceJob.Name = "service-job"
|
serviceJob.Name = "service-job"
|
||||||
serviceJob.Type = structs.JobTypeService
|
serviceJob.Type = structs.JobTypeService
|
||||||
|
serviceJob.Constraints = nil
|
||||||
serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{
|
serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{
|
||||||
MaxParallel: 1,
|
MaxParallel: 1,
|
||||||
HealthCheck: structs.MigrateStrategyHealthStates,
|
HealthCheck: structs.MigrateStrategyHealthStates,
|
||||||
|
@ -76,6 +77,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
||||||
systemJob := mock.SystemJob()
|
systemJob := mock.SystemJob()
|
||||||
systemJob.Name = "system-job"
|
systemJob.Name = "system-job"
|
||||||
systemJob.Type = structs.JobTypeSystem
|
systemJob.Type = structs.JobTypeSystem
|
||||||
|
systemJob.Constraints = nil
|
||||||
//FIXME hack until system job reschedule policy validation is fixed
|
//FIXME hack until system job reschedule policy validation is fixed
|
||||||
systemJob.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: time.Minute}
|
systemJob.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: time.Minute}
|
||||||
systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver"
|
systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver"
|
||||||
|
@ -90,6 +92,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
||||||
batchJob := mock.Job()
|
batchJob := mock.Job()
|
||||||
batchJob.Name = "batch-job"
|
batchJob.Name = "batch-job"
|
||||||
batchJob.Type = structs.JobTypeBatch
|
batchJob.Type = structs.JobTypeBatch
|
||||||
|
batchJob.Constraints = nil
|
||||||
batchJob.TaskGroups[0].Name = "batch-group"
|
batchJob.TaskGroups[0].Name = "batch-group"
|
||||||
batchJob.TaskGroups[0].Migrate = nil
|
batchJob.TaskGroups[0].Migrate = nil
|
||||||
batchJob.TaskGroups[0].Tasks[0].Name = "batch-task"
|
batchJob.TaskGroups[0].Tasks[0].Name = "batch-task"
|
||||||
|
@ -159,6 +162,11 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
||||||
t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus)
|
t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if resp, err := rpc.EvalList(); err == nil {
|
||||||
|
for _, eval := range resp.Evaluations {
|
||||||
|
t.Logf("% #v\n", pretty.Formatter(eval))
|
||||||
|
}
|
||||||
|
}
|
||||||
t.Fatalf("failed waiting for all allocs to start: %v", err)
|
t.Fatalf("failed waiting for all allocs to start: %v", err)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -57,18 +57,18 @@ func (n *nodeWatcher) run(ctx context.Context) {
|
||||||
for _, newNode := range newNodes {
|
for _, newNode := range newNodes {
|
||||||
if existingNode, ok := n.nodes[newNode.ID]; ok {
|
if existingNode, ok := n.nodes[newNode.ID]; ok {
|
||||||
// Node was draining, see if it has changed
|
// Node was draining, see if it has changed
|
||||||
if !newNode.Drain {
|
if newNode.DrainStrategy == nil {
|
||||||
// Node stopped draining
|
// Node stopped draining
|
||||||
delete(n.nodes, newNode.ID)
|
delete(n.nodes, newNode.ID)
|
||||||
changed = true
|
changed = true
|
||||||
} else if !newNode.DrainStrategy.DeadlineTime().Equal(existingNode.DrainStrategy.DeadlineTime()) {
|
} else if !newNode.DrainStrategy.Equal(existingNode.DrainStrategy) {
|
||||||
// Update deadline
|
// Update deadline
|
||||||
n.nodes[newNode.ID] = newNode
|
n.nodes[newNode.ID] = newNode
|
||||||
changed = true
|
changed = true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Node was not draining
|
// Node was not draining
|
||||||
if newNode.Drain {
|
if newNode.DrainStrategy != nil {
|
||||||
// Node started draining
|
// Node started draining
|
||||||
n.nodes[newNode.ID] = newNode
|
n.nodes[newNode.ID] = newNode
|
||||||
changed = true
|
changed = true
|
||||||
|
|
|
@ -443,6 +443,11 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mark the deadline time
|
||||||
|
if args.DrainStrategy != nil && args.DrainStrategy.Deadline.Nanoseconds() > 0 {
|
||||||
|
args.DrainStrategy.ForceDeadline = time.Now().Add(args.DrainStrategy.Deadline)
|
||||||
|
}
|
||||||
|
|
||||||
// Commit this update via Raft
|
// Commit this update via Raft
|
||||||
_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
|
_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -1214,9 +1214,9 @@ type DrainStrategy struct {
|
||||||
// DrainSpec is the user declared drain specification
|
// DrainSpec is the user declared drain specification
|
||||||
DrainSpec
|
DrainSpec
|
||||||
|
|
||||||
// StartTime as nanoseconds since Unix epoch indicating when a drain
|
// ForceDeadline is the deadline time for the drain after which drains will
|
||||||
// began for deadline calcuations.
|
// be forced
|
||||||
StartTime int64
|
ForceDeadline time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *DrainStrategy) Copy() *DrainStrategy {
|
func (d *DrainStrategy) Copy() *DrainStrategy {
|
||||||
|
@ -1229,16 +1229,47 @@ func (d *DrainStrategy) Copy() *DrainStrategy {
|
||||||
return nd
|
return nd
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeadlineTime returns the Time this drain's deadline will be reached or the
|
// DeadlineTime returns a boolean whether the drain strategy allows an infinite
|
||||||
// zero value for Time if DrainStrategy is nil or Duration is <= 0.
|
// duration or otherwise the deadline time. The force drain is captured by the
|
||||||
func (d *DrainStrategy) DeadlineTime() time.Time {
|
// deadline time being in the past.
|
||||||
|
func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) {
|
||||||
|
// Treat the nil case as a force drain so during an upgrade where a node may
|
||||||
|
// not have a drain strategy but has Drain set to true, it is treated as a
|
||||||
|
// force to mimick old behavior.
|
||||||
if d == nil {
|
if d == nil {
|
||||||
return time.Time{}
|
return false, time.Time{}
|
||||||
}
|
}
|
||||||
if d.Deadline <= 0 {
|
|
||||||
return time.Time{}
|
ns := d.Deadline.Nanoseconds()
|
||||||
|
switch {
|
||||||
|
case ns < 0: // Force
|
||||||
|
return false, time.Time{}
|
||||||
|
case ns == 0: // Infinite
|
||||||
|
return true, time.Time{}
|
||||||
|
default:
|
||||||
|
return false, d.ForceDeadline
|
||||||
}
|
}
|
||||||
return time.Unix(0, d.StartTime).Add(d.Deadline)
|
}
|
||||||
|
|
||||||
|
func (d *DrainStrategy) Equal(o *DrainStrategy) bool {
|
||||||
|
if d == nil && o == nil {
|
||||||
|
return true
|
||||||
|
} else if o != nil && d == nil {
|
||||||
|
return false
|
||||||
|
} else if d != nil && o == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare values
|
||||||
|
if d.ForceDeadline != o.ForceDeadline {
|
||||||
|
return false
|
||||||
|
} else if d.Deadline != o.Deadline {
|
||||||
|
return false
|
||||||
|
} else if d.IgnoreSystemJobs != o.IgnoreSystemJobs {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Node is a representation of a schedulable client node
|
// Node is a representation of a schedulable client node
|
||||||
|
|
Loading…
Reference in New Issue