Improve DeadlineTime helper
This commit is contained in:
parent
b7c993f0e5
commit
a37329189a
|
@ -176,6 +176,9 @@ type Node struct {
|
|||
type DrainStrategy struct {
|
||||
// DrainSpec is the user declared drain specification
|
||||
DrainSpec
|
||||
|
||||
// DeadlineTime is the deadline time for the drain.
|
||||
DeadlineTime time.Time
|
||||
}
|
||||
|
||||
// DrainSpec describes a Node's drain behavior.
|
||||
|
|
|
@ -21,6 +21,10 @@ func TestClient(t testing.T, cb func(c *config.Config)) *Client {
|
|||
},
|
||||
}
|
||||
|
||||
// Loosen GC threshold
|
||||
conf.GCDiskUsageThreshold = 98.0
|
||||
conf.GCInodeUsageThreshold = 98.0
|
||||
|
||||
// Tighten the fingerprinter timeouts
|
||||
if conf.Options == nil {
|
||||
conf.Options = make(map[string]string)
|
||||
|
|
|
@ -157,24 +157,41 @@ func (n *NodeDrainer) Run() {
|
|||
}
|
||||
}
|
||||
|
||||
// getNextDeadline is a helper that takes a set of draining nodes and returns the
|
||||
// next deadline. It also returns a boolean if there is a deadline.
|
||||
func getNextDeadline(nodes map[string]*structs.Node) (time.Time, bool) {
|
||||
var nextDeadline time.Time
|
||||
found := false
|
||||
for _, node := range nodes {
|
||||
inf, d := node.DrainStrategy.DeadlineTime()
|
||||
if !inf && (nextDeadline.IsZero() || d.Before(nextDeadline)) {
|
||||
nextDeadline = d
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
return nextDeadline, found
|
||||
}
|
||||
|
||||
// nodeDrainer is the core node draining main loop and should be started in a
|
||||
// goroutine when a server establishes leadership.
|
||||
func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) {
|
||||
nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state)
|
||||
|
||||
// Wait for a node's drain deadline to expire
|
||||
var nextDeadline time.Time
|
||||
for _, node := range nodes {
|
||||
if nextDeadline.IsZero() {
|
||||
nextDeadline = node.DrainStrategy.DeadlineTime()
|
||||
continue
|
||||
}
|
||||
if deadline := node.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) {
|
||||
nextDeadline = deadline
|
||||
}
|
||||
|
||||
}
|
||||
nextDeadline, ok := getNextDeadline(nodes)
|
||||
deadlineTimer := time.NewTimer(time.Until(nextDeadline))
|
||||
stopDeadlineTimer := func() {
|
||||
if !deadlineTimer.Stop() {
|
||||
select {
|
||||
case <-deadlineTimer.C:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
stopDeadlineTimer()
|
||||
}
|
||||
|
||||
// Watch for nodes to start or stop draining
|
||||
nodeWatcher := newNodeWatcher(n.logger, nodes, nodesIndex, state)
|
||||
|
@ -197,33 +214,14 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
|||
// update draining nodes
|
||||
n.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes))
|
||||
|
||||
// update deadline timer
|
||||
changed := false
|
||||
for _, n := range nodes {
|
||||
if nextDeadline.IsZero() {
|
||||
nextDeadline = n.DrainStrategy.DeadlineTime()
|
||||
changed = true
|
||||
continue
|
||||
}
|
||||
|
||||
if deadline := n.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) {
|
||||
nextDeadline = deadline
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
|
||||
// if changed reset the timer
|
||||
if changed {
|
||||
d, ok := getNextDeadline(nodes)
|
||||
if ok && !nextDeadline.Equal(d) {
|
||||
nextDeadline = d
|
||||
n.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline)
|
||||
if !deadlineTimer.Stop() {
|
||||
// timer may have been recv'd in a
|
||||
// previous loop, so don't block
|
||||
select {
|
||||
case <-deadlineTimer.C:
|
||||
default:
|
||||
}
|
||||
}
|
||||
stopDeadlineTimer()
|
||||
deadlineTimer.Reset(time.Until(nextDeadline))
|
||||
} else if !ok {
|
||||
stopDeadlineTimer()
|
||||
}
|
||||
|
||||
case jobs := <-jobWatcher.WaitCh():
|
||||
|
@ -275,7 +273,8 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
|||
|
||||
// track number of allocs left on this node to be drained
|
||||
allocsLeft := false
|
||||
deadlineReached := node.DrainStrategy.DeadlineTime().Before(now)
|
||||
inf, deadline := node.DrainStrategy.DeadlineTime()
|
||||
deadlineReached := !inf && deadline.Before(now)
|
||||
for _, alloc := range allocs {
|
||||
jobkey := jobKey{alloc.Namespace, alloc.JobID}
|
||||
|
||||
|
@ -307,8 +306,13 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
|||
|
||||
// Don't bother collecting system/batch jobs for nodes that haven't hit their deadline
|
||||
if job.Type != structs.JobTypeService && !deadlineReached {
|
||||
n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s",
|
||||
job.Type, job.Name, node.DrainStrategy.DeadlineTime().Sub(now))
|
||||
if inf, d := node.DrainStrategy.DeadlineTime(); inf {
|
||||
n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because node has an infinite deadline",
|
||||
job.Type, job.Name)
|
||||
} else {
|
||||
n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s",
|
||||
job.Type, job.Name, d.Sub(now))
|
||||
}
|
||||
skipJob[jobkey] = struct{}{}
|
||||
continue
|
||||
}
|
||||
|
@ -370,7 +374,7 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore)
|
|||
|
||||
tgKey := makeTaskGroupKey(alloc)
|
||||
|
||||
if node.DrainStrategy.DeadlineTime().Before(now) {
|
||||
if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) {
|
||||
n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6])
|
||||
// Alloc's Node has reached its deadline
|
||||
stoplist.add(drainingJob.job, alloc)
|
||||
|
@ -494,7 +498,7 @@ func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*struc
|
|||
nodes[node.ID] = node
|
||||
|
||||
// No point in tracking draining allocs as the deadline has been reached
|
||||
if node.DrainStrategy.DeadlineTime().Before(now) {
|
||||
if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) {
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
|||
serviceJob := mock.Job()
|
||||
serviceJob.Name = "service-job"
|
||||
serviceJob.Type = structs.JobTypeService
|
||||
serviceJob.Constraints = nil
|
||||
serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{
|
||||
MaxParallel: 1,
|
||||
HealthCheck: structs.MigrateStrategyHealthStates,
|
||||
|
@ -76,6 +77,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
|||
systemJob := mock.SystemJob()
|
||||
systemJob.Name = "system-job"
|
||||
systemJob.Type = structs.JobTypeSystem
|
||||
systemJob.Constraints = nil
|
||||
//FIXME hack until system job reschedule policy validation is fixed
|
||||
systemJob.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: time.Minute}
|
||||
systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver"
|
||||
|
@ -90,6 +92,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
|||
batchJob := mock.Job()
|
||||
batchJob.Name = "batch-job"
|
||||
batchJob.Type = structs.JobTypeBatch
|
||||
batchJob.Constraints = nil
|
||||
batchJob.TaskGroups[0].Name = "batch-group"
|
||||
batchJob.TaskGroups[0].Migrate = nil
|
||||
batchJob.TaskGroups[0].Tasks[0].Name = "batch-task"
|
||||
|
@ -159,6 +162,11 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) {
|
|||
t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus)
|
||||
}
|
||||
}
|
||||
if resp, err := rpc.EvalList(); err == nil {
|
||||
for _, eval := range resp.Evaluations {
|
||||
t.Logf("% #v\n", pretty.Formatter(eval))
|
||||
}
|
||||
}
|
||||
t.Fatalf("failed waiting for all allocs to start: %v", err)
|
||||
})
|
||||
|
||||
|
|
|
@ -57,18 +57,18 @@ func (n *nodeWatcher) run(ctx context.Context) {
|
|||
for _, newNode := range newNodes {
|
||||
if existingNode, ok := n.nodes[newNode.ID]; ok {
|
||||
// Node was draining, see if it has changed
|
||||
if !newNode.Drain {
|
||||
if newNode.DrainStrategy == nil {
|
||||
// Node stopped draining
|
||||
delete(n.nodes, newNode.ID)
|
||||
changed = true
|
||||
} else if !newNode.DrainStrategy.DeadlineTime().Equal(existingNode.DrainStrategy.DeadlineTime()) {
|
||||
} else if !newNode.DrainStrategy.Equal(existingNode.DrainStrategy) {
|
||||
// Update deadline
|
||||
n.nodes[newNode.ID] = newNode
|
||||
changed = true
|
||||
}
|
||||
} else {
|
||||
// Node was not draining
|
||||
if newNode.Drain {
|
||||
if newNode.DrainStrategy != nil {
|
||||
// Node started draining
|
||||
n.nodes[newNode.ID] = newNode
|
||||
changed = true
|
||||
|
|
|
@ -443,6 +443,11 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
|
|||
}
|
||||
}
|
||||
|
||||
// Mark the deadline time
|
||||
if args.DrainStrategy != nil && args.DrainStrategy.Deadline.Nanoseconds() > 0 {
|
||||
args.DrainStrategy.ForceDeadline = time.Now().Add(args.DrainStrategy.Deadline)
|
||||
}
|
||||
|
||||
// Commit this update via Raft
|
||||
_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
|
||||
if err != nil {
|
||||
|
|
|
@ -1214,9 +1214,9 @@ type DrainStrategy struct {
|
|||
// DrainSpec is the user declared drain specification
|
||||
DrainSpec
|
||||
|
||||
// StartTime as nanoseconds since Unix epoch indicating when a drain
|
||||
// began for deadline calcuations.
|
||||
StartTime int64
|
||||
// ForceDeadline is the deadline time for the drain after which drains will
|
||||
// be forced
|
||||
ForceDeadline time.Time
|
||||
}
|
||||
|
||||
func (d *DrainStrategy) Copy() *DrainStrategy {
|
||||
|
@ -1229,16 +1229,47 @@ func (d *DrainStrategy) Copy() *DrainStrategy {
|
|||
return nd
|
||||
}
|
||||
|
||||
// DeadlineTime returns the Time this drain's deadline will be reached or the
|
||||
// zero value for Time if DrainStrategy is nil or Duration is <= 0.
|
||||
func (d *DrainStrategy) DeadlineTime() time.Time {
|
||||
// DeadlineTime returns a boolean whether the drain strategy allows an infinite
|
||||
// duration or otherwise the deadline time. The force drain is captured by the
|
||||
// deadline time being in the past.
|
||||
func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) {
|
||||
// Treat the nil case as a force drain so during an upgrade where a node may
|
||||
// not have a drain strategy but has Drain set to true, it is treated as a
|
||||
// force to mimick old behavior.
|
||||
if d == nil {
|
||||
return time.Time{}
|
||||
return false, time.Time{}
|
||||
}
|
||||
if d.Deadline <= 0 {
|
||||
return time.Time{}
|
||||
|
||||
ns := d.Deadline.Nanoseconds()
|
||||
switch {
|
||||
case ns < 0: // Force
|
||||
return false, time.Time{}
|
||||
case ns == 0: // Infinite
|
||||
return true, time.Time{}
|
||||
default:
|
||||
return false, d.ForceDeadline
|
||||
}
|
||||
return time.Unix(0, d.StartTime).Add(d.Deadline)
|
||||
}
|
||||
|
||||
func (d *DrainStrategy) Equal(o *DrainStrategy) bool {
|
||||
if d == nil && o == nil {
|
||||
return true
|
||||
} else if o != nil && d == nil {
|
||||
return false
|
||||
} else if d != nil && o == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Compare values
|
||||
if d.ForceDeadline != o.ForceDeadline {
|
||||
return false
|
||||
} else if d.Deadline != o.Deadline {
|
||||
return false
|
||||
} else if d.IgnoreSystemJobs != o.IgnoreSystemJobs {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Node is a representation of a schedulable client node
|
||||
|
|
Loading…
Reference in New Issue