open-nomad/nomad/drainer/drainer.go

package drainer

import (
	"context"
	"log"
	"sync"
	"time"

	multierror "github.com/hashicorp/go-multierror"
	"github.com/hashicorp/nomad/helper"
	"github.com/hashicorp/nomad/helper/uuid"
	"github.com/hashicorp/nomad/nomad/state"
	"github.com/hashicorp/nomad/nomad/structs"
	"golang.org/x/time/rate"
)

var (
	// stateReadErrorDelay is the delay to apply before retrying reading state
	// when there is an error
	stateReadErrorDelay = 1 * time.Second
)

const (
	// LimitStateQueriesPerSecond is the number of state queries allowed per
	// second
	LimitStateQueriesPerSecond = 100.0

	// BatchUpdateInterval is how long we wait to batch updates
	BatchUpdateInterval = 1 * time.Second

	// NodeDeadlineCoalesceWindow is the duration in which deadlining nodes will
	// be coalesced together
	NodeDeadlineCoalesceWindow = 5 * time.Second
)

// RaftApplier contains methods for applying the raft requests required by the
// NodeDrainer.
type RaftApplier interface {
	AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) (uint64, error)
	NodesDrainComplete(nodes []string) (uint64, error)
}

// NodeTracker is the interface to notify an object that is tracking draining
// nodes of changes
type NodeTracker interface {
	// TrackedNodes returns all the nodes that are currently tracked as
	// draining.
	TrackedNodes() map[string]*structs.Node

	// Remove removes a node from the draining set.
	Remove(nodeID string)

	// Update either updates the specification of a draining node or tracks the
	// node as draining.
	Update(node *structs.Node)
}

// DrainingJobWatcherFactory returns a new DrainingJobWatcher
type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger) DrainingJobWatcher

// DrainingNodeWatcherFactory returns a new DrainingNodeWatcher
type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, NodeTracker) DrainingNodeWatcher

// DrainDeadlineNotifierFactory returns a new DrainDeadlineNotifier
type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier

// GetDrainingJobWatcher returns a draining job watcher
func GetDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger) DrainingJobWatcher {
	return NewDrainingJobWatcher(ctx, limiter, state, logger)
}

// GetDeadlineNotifier returns a node deadline notifier with default coalescing.
func GetDeadlineNotifier(ctx context.Context) DrainDeadlineNotifier {
	return NewDeadlineHeap(ctx, NodeDeadlineCoalesceWindow)
}

// GetNodeWatcherFactory returns a DrainingNodeWatcherFactory
func GetNodeWatcherFactory() DrainingNodeWatcherFactory {
	return func(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger, tracker NodeTracker) DrainingNodeWatcher {
		return NewNodeDrainWatcher(ctx, limiter, state, logger, tracker)
	}
}

// allocMigrateBatcher is used to batch allocation updates.
type allocMigrateBatcher struct {
	// updates holds pending client status updates for allocations
	updates []*structs.Allocation

	// updateFuture is used to wait for the pending batch update
	// to complete. This may be nil if no batch is pending.
	updateFuture *structs.BatchFuture

	// updateTimer is the timer that will trigger the next batch
	// update, and may be nil if there is no batch pending.
	updateTimer *time.Timer

	batchWindow time.Duration

	// synchronizes access to the updates list, the future and the timer.
	sync.Mutex
}

// NodeDrainerConfig is used to configure a new node drainer.
type NodeDrainerConfig struct {
	Logger               *log.Logger
	Raft                 RaftApplier
	JobFactory           DrainingJobWatcherFactory
	NodeFactory          DrainingNodeWatcherFactory
	DrainDeadlineFactory DrainDeadlineNotifierFactory

	// StateQueriesPerSecond configures the query limit against the state store
	// that is allowed by the node drainer.
	StateQueriesPerSecond float64

	// BatchUpdateInterval is the interval in which allocation updates are
	// batched.
	BatchUpdateInterval time.Duration
}

// NodeDrainer is used to orchestrate migrating allocations off of draining
// nodes.
type NodeDrainer struct {
	enabled bool
	logger  *log.Logger

	// nodes is the set of draining nodes
	nodes map[string]*drainingNode

	// nodeWatcher watches for nodes to transition in and out of drain state.
	nodeWatcher DrainingNodeWatcher
	nodeFactory DrainingNodeWatcherFactory

	// jobWatcher watches draining jobs and emits desired drains and notifies
	// when migrations take place.
	jobWatcher DrainingJobWatcher
	jobFactory DrainingJobWatcherFactory

	// deadlineNotifier notifies when nodes reach their drain deadline.
	deadlineNotifier        DrainDeadlineNotifier
	deadlineNotifierFactory DrainDeadlineNotifierFactory

	// state is the state that is watched for state changes.
	state *state.StateStore

	// queryLimiter is used to limit the rate of blocking queries
	queryLimiter *rate.Limiter

	// raft is a shim around the raft messages necessary for draining
	raft RaftApplier

	// batcher is used to batch alloc migrations.
	batcher allocMigrateBatcher

	// ctx and exitFn are used to cancel the watcher
	ctx    context.Context
	exitFn context.CancelFunc

	l sync.RWMutex
}

// NewNodeDrainer returns a new new node drainer. The node drainer is
// responsible for marking allocations on draining nodes with a desired
// migration transition, updating the drain strategy on nodes when they are
// complete and creating evaluations for the system to react to these changes.
func NewNodeDrainer(c *NodeDrainerConfig) *NodeDrainer {
	return &NodeDrainer{
		raft:                    c.Raft,
		logger:                  c.Logger,
		jobFactory:              c.JobFactory,
		nodeFactory:             c.NodeFactory,
		deadlineNotifierFactory: c.DrainDeadlineFactory,
		queryLimiter:            rate.NewLimiter(rate.Limit(c.StateQueriesPerSecond), 100),
		batcher: allocMigrateBatcher{
			batchWindow: c.BatchUpdateInterval,
		},
	}
}

// SetEnabled will start or stop the node draining goroutine depending on the
// enabled boolean.
func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) {
	n.l.Lock()
	defer n.l.Unlock()

	// If we are starting now or have a new state, init state and start the
	// run loop
	n.enabled = enabled
	if enabled {
		n.flush(state)
		go n.run(n.ctx)
	} else if !enabled && n.exitFn != nil {
		n.exitFn()
	}
}

// flush is used to clear the state of the watcher
func (n *NodeDrainer) flush(state *state.StateStore) {
	// Cancel anything that may be running.
	if n.exitFn != nil {
		n.exitFn()
	}

	// Store the new state
	if state != nil {
		n.state = state
	}

	n.ctx, n.exitFn = context.WithCancel(context.Background())
	n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n.state, n.logger)
	n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n.state, n.logger, n)
	n.deadlineNotifier = n.deadlineNotifierFactory(n.ctx)
	n.nodes = make(map[string]*drainingNode, 32)
}

// run is a long lived event handler that receives changes from the relevant
// watchers and takes action based on them.
func (n *NodeDrainer) run(ctx context.Context) {
	for {
		select {
		case <-n.ctx.Done():
			return
		case nodes := <-n.deadlineNotifier.NextBatch():
			n.handleDeadlinedNodes(nodes)
		case req := <-n.jobWatcher.Drain():
			n.handleJobAllocDrain(req)
		case allocs := <-n.jobWatcher.Migrated():
			n.handleMigratedAllocs(allocs)
		}
	}
}

// handleDeadlinedNodes handles a set of nodes reaching their drain deadline.
// The handler detects the remaining allocations on the nodes and immediately
// marks them for migration.
func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) {
	// Retrieve the set of allocations that will be force stopped.
	var forceStop []*structs.Allocation
	n.l.RLock()
	for _, node := range nodes {
		draining, ok := n.nodes[node]
		if !ok {
			n.logger.Printf("[DEBUG] nomad.drain: skipping untracked deadlined node %q", node)
			continue
		}

		allocs, err := draining.RemainingAllocs()
		if err != nil {
			n.logger.Printf("[ERR] nomad.drain: failed to retrive allocs on deadlined node %q: %v", node, err)
			continue
		}

		n.logger.Printf("[DEBUG] nomad.drain: node %q deadlined causing %d allocs to be force stopped", node, len(allocs))
		forceStop = append(forceStop, allocs...)
	}
	n.l.RUnlock()
	n.batchDrainAllocs(forceStop)

	// Submit the node transistions in a sharded form to ensure a reasonable
	// Raft transaction size.
	for _, nodes := range partitionIds(defaultMaxIdsPerTxn, nodes) {
		if _, err := n.raft.NodesDrainComplete(nodes); err != nil {
			n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err)
		}
	}
}

// handleJobAllocDrain handles marking a set of allocations as having a desired
// transition to drain. The handler blocks till the changes to the allocation
// have occurred.
func (n *NodeDrainer) handleJobAllocDrain(req *DrainRequest) {
	index, err := n.batchDrainAllocs(req.Allocs)
	req.Resp.Respond(index, err)
}

// handleMigratedAllocs checks to see if any nodes can be considered done
// draining based on the set of allocations that have migrated because of an
// ongoing drain for a job.
func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) {
	// Determine the set of nodes that were effected
	nodes := make(map[string]struct{})
	for _, alloc := range allocs {
		nodes[alloc.NodeID] = struct{}{}
	}

	var done []string
	var remainingAllocs []*structs.Allocation

	// For each node, check if it is now done
	n.l.RLock()
	for node := range nodes {
		draining, ok := n.nodes[node]
		if !ok {
			continue
		}

		isDone, err := draining.IsDone()
		if err != nil {
			n.logger.Printf("[ERR] nomad.drain: error checking if node %q is done draining: %v", node, err)
			continue
		}

		if !isDone {
			continue
		}

		done = append(done, node)

		remaining, err := draining.RemainingAllocs()
		if err != nil {
			n.logger.Printf("[ERR] nomad.drain: node %q is done draining but encountered an error getting remaining allocs: %v", node, err)
			continue
		}

		remainingAllocs = append(remainingAllocs, remaining...)
	}
	n.l.RUnlock()

	// Stop any running system jobs on otherwise done nodes
	if len(remainingAllocs) > 0 {
		future := structs.NewBatchFuture()
		n.drainAllocs(future, remainingAllocs)
		if err := future.Wait(); err != nil {
			n.logger.Printf("[ERR] nomad.drain: failed to drain %d remaining allocs from done nodes: %v",
				len(remainingAllocs), err)
		}
	}

	// Submit the node transistions in a sharded form to ensure a reasonable
	// Raft transaction size.
	for _, nodes := range partitionIds(defaultMaxIdsPerTxn, done) {
		if _, err := n.raft.NodesDrainComplete(nodes); err != nil {
			n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err)
		}
	}
}

// batchDrainAllocs is used to batch the draining of allocations. It will block
// until the batch is complete.
func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, error) {
	// Add this to the batch
	n.batcher.Lock()
	n.batcher.updates = append(n.batcher.updates, allocs...)

	// Start a new batch if none
	future := n.batcher.updateFuture
	if future == nil {
		future = structs.NewBatchFuture()
		n.batcher.updateFuture = future
		n.batcher.updateTimer = time.AfterFunc(n.batcher.batchWindow, func() {
			// Get the pending updates
			n.batcher.Lock()
			updates := n.batcher.updates
			future := n.batcher.updateFuture
			n.batcher.updates = nil
			n.batcher.updateFuture = nil
			n.batcher.updateTimer = nil
			n.batcher.Unlock()

			// Perform the batch update
			n.drainAllocs(future, updates)
		})
	}
	n.batcher.Unlock()

	if err := future.Wait(); err != nil {
		return 0, err
	}

	return future.Index(), nil
}

// drainAllocs is a non batch, marking of the desired transition to migrate for
// the set of allocations. It will also create the necessary evaluations for the
// affected jobs.
func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs.Allocation) {
	// Compute the effected jobs and make the transition map
	jobs := make(map[string]*structs.Allocation, 4)
	transistions := make(map[string]*structs.DesiredTransition, len(allocs))
	for _, alloc := range allocs {
		transistions[alloc.ID] = &structs.DesiredTransition{
			Migrate: helper.BoolToPtr(true),
		}
		jobs[alloc.JobID] = alloc
	}

	evals := make([]*structs.Evaluation, 0, len(jobs))
	for job, alloc := range jobs {
		evals = append(evals, &structs.Evaluation{
			ID:          uuid.Generate(),
			Namespace:   alloc.Namespace,
			Priority:    alloc.Job.Priority,
			Type:        alloc.Job.Type,
			TriggeredBy: structs.EvalTriggerNodeDrain,
			JobID:       job,
			Status:      structs.EvalStatusPending,
		})
	}

	// Commit this update via Raft
	var finalIndex uint64
	var mErr multierror.Error
	for _, u := range partitionAllocDrain(defaultMaxIdsPerTxn, transistions, evals) {
		index, err := n.raft.AllocUpdateDesiredTransition(u.Transitions, u.Evals)
		if err != nil {
			mErr.Errors = append(mErr.Errors, err)
		}
		finalIndex = index
	}

	future.Respond(finalIndex, mErr.ErrorOrNil())
}
Switch to drainerv2 impl 2018-03-08 23:08:23 +00:00			`package drainer`
Initial design 2018-03-02 00:37:19 +00:00
			`import (`
			`"context"`
			`"log"`
			`"sync"`
node watcher 2018-03-03 01:15:38 +00:00			`"time"`
Initial design 2018-03-02 00:37:19 +00:00
drain: fix double-close panic on drain future 2018-04-02 23:39:18 +00:00			`multierror "github.com/hashicorp/go-multierror"`
Drainer 2018-03-06 22:37:37 +00:00			`"github.com/hashicorp/nomad/helper"`
			`"github.com/hashicorp/nomad/helper/uuid"`
Initial design 2018-03-02 00:37:19 +00:00			`"github.com/hashicorp/nomad/nomad/state"`
			`"github.com/hashicorp/nomad/nomad/structs"`
			`"golang.org/x/time/rate"`
			`)`

node watcher 2018-03-03 01:15:38 +00:00			`var (`
			`// stateReadErrorDelay is the delay to apply before retrying reading state`
			`// when there is an error`
			`stateReadErrorDelay = 1 * time.Second`
			`)`

Initial design 2018-03-02 00:37:19 +00:00			`const (`
			`// LimitStateQueriesPerSecond is the number of state queries allowed per`
			`// second`
			`LimitStateQueriesPerSecond = 100.0`
Drainer 2018-03-06 22:37:37 +00:00
			`// BatchUpdateInterval is how long we wait to batch updates`
			`BatchUpdateInterval = 1 * time.Second`

			`// NodeDeadlineCoalesceWindow is the duration in which deadlining nodes will`
			`// be coalesced together`
			`NodeDeadlineCoalesceWindow = 5 * time.Second`
Initial design 2018-03-02 00:37:19 +00:00			`)`

			`// RaftApplier contains methods for applying the raft requests required by the`
			`// NodeDrainer.`
			`type RaftApplier interface {`
Drainer 2018-03-06 22:37:37 +00:00			`AllocUpdateDesiredTransition(allocs map[string]structs.DesiredTransition, evals []structs.Evaluation) (uint64, error)`
Batch drain update 2018-03-09 22:15:21 +00:00			`NodesDrainComplete(nodes []string) (uint64, error)`
Initial design 2018-03-02 00:37:19 +00:00			`}`

Comments 2018-03-07 23:42:17 +00:00			`// NodeTracker is the interface to notify an object that is tracking draining`
			`// nodes of changes`
node watcher 2018-03-03 01:15:38 +00:00			`type NodeTracker interface {`
Comments 2018-03-07 23:42:17 +00:00			`// TrackedNodes returns all the nodes that are currently tracked as`
			`// draining.`
job watcher 2018-03-06 18:12:17 +00:00			`TrackedNodes() map[string]*structs.Node`
Comments 2018-03-07 23:42:17 +00:00
			`// Remove removes a node from the draining set.`
node watcher 2018-03-03 01:15:38 +00:00			`Remove(nodeID string)`
Comments 2018-03-07 23:42:17 +00:00
			`// Update either updates the specification of a draining node or tracks the`
			`// node as draining.`
node watcher 2018-03-03 01:15:38 +00:00			`Update(node *structs.Node)`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// DrainingJobWatcherFactory returns a new DrainingJobWatcher`
job watcher 2018-03-06 18:12:17 +00:00			`type DrainingJobWatcherFactory func(context.Context, rate.Limiter, state.StateStore, *log.Logger) DrainingJobWatcher`
Comments 2018-03-07 23:42:17 +00:00
			`// DrainingNodeWatcherFactory returns a new DrainingNodeWatcher`
node watcher 2018-03-03 01:15:38 +00:00			`type DrainingNodeWatcherFactory func(context.Context, rate.Limiter, state.StateStore, *log.Logger, NodeTracker) DrainingNodeWatcher`
Comments 2018-03-07 23:42:17 +00:00
			`// DrainDeadlineNotifierFactory returns a new DrainDeadlineNotifier`
Initial design 2018-03-02 00:37:19 +00:00			`type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier`

Comments 2018-03-07 23:42:17 +00:00			`// GetDrainingJobWatcher returns a draining job watcher`
Drainer 2018-03-06 22:37:37 +00:00			`func GetDrainingJobWatcher(ctx context.Context, limiter rate.Limiter, state state.StateStore, logger *log.Logger) DrainingJobWatcher {`
			`return NewDrainingJobWatcher(ctx, limiter, state, logger)`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// GetDeadlineNotifier returns a node deadline notifier with default coalescing.`
Drainer 2018-03-06 22:37:37 +00:00			`func GetDeadlineNotifier(ctx context.Context) DrainDeadlineNotifier {`
			`return NewDeadlineHeap(ctx, NodeDeadlineCoalesceWindow)`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// GetNodeWatcherFactory returns a DrainingNodeWatcherFactory`
Drainer 2018-03-06 22:37:37 +00:00			`func GetNodeWatcherFactory() DrainingNodeWatcherFactory {`
			`return func(ctx context.Context, limiter rate.Limiter, state state.StateStore, logger *log.Logger, tracker NodeTracker) DrainingNodeWatcher {`
			`return NewNodeDrainWatcher(ctx, limiter, state, logger, tracker)`
			`}`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// allocMigrateBatcher is used to batch allocation updates.`
Drainer 2018-03-06 22:37:37 +00:00			`type allocMigrateBatcher struct {`
			`// updates holds pending client status updates for allocations`
			`updates []*structs.Allocation`

			`// updateFuture is used to wait for the pending batch update`
			`// to complete. This may be nil if no batch is pending.`
			`updateFuture *structs.BatchFuture`

			`// updateTimer is the timer that will trigger the next batch`
			`// update, and may be nil if there is no batch pending.`
			`updateTimer *time.Timer`

			`batchWindow time.Duration`

			`// synchronizes access to the updates list, the future and the timer.`
			`sync.Mutex`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// NodeDrainerConfig is used to configure a new node drainer.`
Initial design 2018-03-02 00:37:19 +00:00			`type NodeDrainerConfig struct {`
Comments 2018-03-07 23:42:17 +00:00			`Logger *log.Logger`
			`Raft RaftApplier`
			`JobFactory DrainingJobWatcherFactory`
			`NodeFactory DrainingNodeWatcherFactory`
			`DrainDeadlineFactory DrainDeadlineNotifierFactory`

			`// StateQueriesPerSecond configures the query limit against the state store`
			`// that is allowed by the node drainer.`
Initial design 2018-03-02 00:37:19 +00:00			`StateQueriesPerSecond float64`
Comments 2018-03-07 23:42:17 +00:00
			`// BatchUpdateInterval is the interval in which allocation updates are`
			`// batched.`
			`BatchUpdateInterval time.Duration`
Initial design 2018-03-02 00:37:19 +00:00			`}`

code review 2018-03-08 21:25:09 +00:00			`// NodeDrainer is used to orchestrate migrating allocations off of draining`
			`// nodes.`
Initial design 2018-03-02 00:37:19 +00:00			`type NodeDrainer struct {`
			`enabled bool`
			`logger *log.Logger`

			`// nodes is the set of draining nodes`
			`nodes map[string]*drainingNode`

spelling fixes 2018-03-08 00:51:57 +00:00			`// nodeWatcher watches for nodes to transition in and out of drain state.`
Initial design 2018-03-02 00:37:19 +00:00			`nodeWatcher DrainingNodeWatcher`
			`nodeFactory DrainingNodeWatcherFactory`

Comments 2018-03-07 23:42:17 +00:00			`// jobWatcher watches draining jobs and emits desired drains and notifies`
			`// when migrations take place.`
Initial design 2018-03-02 00:37:19 +00:00			`jobWatcher DrainingJobWatcher`
			`jobFactory DrainingJobWatcherFactory`

Comments 2018-03-07 23:42:17 +00:00			`// deadlineNotifier notifies when nodes reach their drain deadline.`
Initial design 2018-03-02 00:37:19 +00:00			`deadlineNotifier DrainDeadlineNotifier`
			`deadlineNotifierFactory DrainDeadlineNotifierFactory`

			`// state is the state that is watched for state changes.`
			`state *state.StateStore`

			`// queryLimiter is used to limit the rate of blocking queries`
			`queryLimiter *rate.Limiter`

			`// raft is a shim around the raft messages necessary for draining`
			`raft RaftApplier`

Drainer 2018-03-06 22:37:37 +00:00			`// batcher is used to batch alloc migrations.`
			`batcher allocMigrateBatcher`

Initial design 2018-03-02 00:37:19 +00:00			`// ctx and exitFn are used to cancel the watcher`
			`ctx context.Context`
			`exitFn context.CancelFunc`

			`l sync.RWMutex`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// NewNodeDrainer returns a new new node drainer. The node drainer is`
			`// responsible for marking allocations on draining nodes with a desired`
spelling fixes 2018-03-08 00:51:57 +00:00			`// migration transition, updating the drain strategy on nodes when they are`
Comments 2018-03-07 23:42:17 +00:00			`// complete and creating evaluations for the system to react to these changes.`
Initial design 2018-03-02 00:37:19 +00:00			`func NewNodeDrainer(c NodeDrainerConfig) NodeDrainer {`
			`return &NodeDrainer{`
			`raft: c.Raft,`
			`logger: c.Logger,`
			`jobFactory: c.JobFactory,`
			`nodeFactory: c.NodeFactory,`
			`deadlineNotifierFactory: c.DrainDeadlineFactory,`
			`queryLimiter: rate.NewLimiter(rate.Limit(c.StateQueriesPerSecond), 100),`
Drainer 2018-03-06 22:37:37 +00:00			`batcher: allocMigrateBatcher{`
			`batchWindow: c.BatchUpdateInterval,`
			`},`
Initial design 2018-03-02 00:37:19 +00:00			`}`
			`}`

			`// SetEnabled will start or stop the node draining goroutine depending on the`
			`// enabled boolean.`
			`func (n NodeDrainer) SetEnabled(enabled bool, state state.StateStore) {`
			`n.l.Lock()`
			`defer n.l.Unlock()`

code review 2018-03-08 21:25:09 +00:00			`// If we are starting now or have a new state, init state and start the`
			`// run loop`
Initial design 2018-03-02 00:37:19 +00:00			`n.enabled = enabled`
code review 2018-03-08 21:25:09 +00:00			`if enabled {`
			`n.flush(state)`
integration test and basic fixes 2018-03-07 22:57:35 +00:00			`go n.run(n.ctx)`
code review 2018-03-08 21:25:09 +00:00			`} else if !enabled && n.exitFn != nil {`
			`n.exitFn()`
Initial design 2018-03-02 00:37:19 +00:00			`}`
			`}`

			`// flush is used to clear the state of the watcher`
code review 2018-03-08 21:25:09 +00:00			`func (n NodeDrainer) flush(state state.StateStore) {`
			`// Cancel anything that may be running.`
Initial design 2018-03-02 00:37:19 +00:00			`if n.exitFn != nil {`
			`n.exitFn()`
			`}`

code review 2018-03-08 21:25:09 +00:00			`// Store the new state`
			`if state != nil {`
			`n.state = state`
			`}`

Initial design 2018-03-02 00:37:19 +00:00			`n.ctx, n.exitFn = context.WithCancel(context.Background())`
job watcher 2018-03-06 18:12:17 +00:00			`n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n.state, n.logger)`
node watcher 2018-03-03 01:15:38 +00:00			`n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n.state, n.logger, n)`
Initial design 2018-03-02 00:37:19 +00:00			`n.deadlineNotifier = n.deadlineNotifierFactory(n.ctx)`
			`n.nodes = make(map[string]*drainingNode, 32)`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// run is a long lived event handler that receives changes from the relevant`
			`// watchers and takes action based on them.`
Initial design 2018-03-02 00:37:19 +00:00			`func (n *NodeDrainer) run(ctx context.Context) {`
			`for {`
			`select {`
			`case <-n.ctx.Done():`
			`return`
			`case nodes := <-n.deadlineNotifier.NextBatch():`
			`n.handleDeadlinedNodes(nodes)`
Drainer 2018-03-06 22:37:37 +00:00			`case req := <-n.jobWatcher.Drain():`
			`n.handleJobAllocDrain(req)`
			`case allocs := <-n.jobWatcher.Migrated():`
			`n.handleMigratedAllocs(allocs)`
Initial design 2018-03-02 00:37:19 +00:00			`}`
			`}`
			`}`

Comments 2018-03-07 23:42:17 +00:00			`// handleDeadlinedNodes handles a set of nodes reaching their drain deadline.`
			`// The handler detects the remaining allocations on the nodes and immediately`
			`// marks them for migration.`
drain heap 2018-03-02 23:19:55 +00:00			`func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) {`
Drainer 2018-03-06 22:37:37 +00:00			`// Retrieve the set of allocations that will be force stopped.`
			`var forceStop []*structs.Allocation`
drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`n.l.RLock()`
Drainer 2018-03-06 22:37:37 +00:00			`for _, node := range nodes {`
			`draining, ok := n.nodes[node]`
			`if !ok {`
drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`n.logger.Printf("[DEBUG] nomad.drain: skipping untracked deadlined node %q", node)`
Drainer 2018-03-06 22:37:37 +00:00			`continue`
			`}`

drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`allocs, err := draining.RemainingAllocs()`
Drainer 2018-03-06 22:37:37 +00:00			`if err != nil {`
drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`n.logger.Printf("[ERR] nomad.drain: failed to retrive allocs on deadlined node %q: %v", node, err)`
Drainer 2018-03-06 22:37:37 +00:00			`continue`
			`}`

Unmark drain when nodes hit their deadline and only batch/system left and add all job type integration test 2018-03-28 18:57:47 +00:00			`n.logger.Printf("[DEBUG] nomad.drain: node %q deadlined causing %d allocs to be force stopped", node, len(allocs))`
Drainer 2018-03-06 22:37:37 +00:00			`forceStop = append(forceStop, allocs...)`
			`}`
			`n.l.RUnlock()`
			`n.batchDrainAllocs(forceStop)`
Unmark drain when nodes hit their deadline and only batch/system left and add all job type integration test 2018-03-28 18:57:47 +00:00
			`// Submit the node transistions in a sharded form to ensure a reasonable`
			`// Raft transaction size.`
drain: improve tests and fix spelling * transistion -> transition * don't t.Fatal in goroutines * don't mutate global state 2018-04-02 23:40:47 +00:00			`for _, nodes := range partitionIds(defaultMaxIdsPerTxn, nodes) {`
Unmark drain when nodes hit their deadline and only batch/system left and add all job type integration test 2018-03-28 18:57:47 +00:00			`if _, err := n.raft.NodesDrainComplete(nodes); err != nil {`
			`n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err)`
			`}`
			`}`
Drainer 2018-03-06 22:37:37 +00:00			`}`

Comments 2018-03-07 23:42:17 +00:00			`// handleJobAllocDrain handles marking a set of allocations as having a desired`
spelling fixes 2018-03-08 00:51:57 +00:00			`// transition to drain. The handler blocks till the changes to the allocation`
			`// have occurred.`
Drainer 2018-03-06 22:37:37 +00:00			`func (n NodeDrainer) handleJobAllocDrain(req DrainRequest) {`
			`index, err := n.batchDrainAllocs(req.Allocs)`
			`req.Resp.Respond(index, err)`
Initial design 2018-03-02 00:37:19 +00:00			`}`

Comments 2018-03-07 23:42:17 +00:00			`// handleMigratedAllocs checks to see if any nodes can be considered done`
			`// draining based on the set of allocations that have migrated because of an`
			`// ongoing drain for a job.`
Drainer 2018-03-06 22:37:37 +00:00			`func (n NodeDrainer) handleMigratedAllocs(allocs []structs.Allocation) {`
			`// Determine the set of nodes that were effected`
			`nodes := make(map[string]struct{})`
			`for _, alloc := range allocs {`
			`nodes[alloc.NodeID] = struct{}{}`
			`}`

drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`var done []string`
			`var remainingAllocs []*structs.Allocation`

Drainer 2018-03-06 22:37:37 +00:00			`// For each node, check if it is now done`
			`n.l.RLock()`
			`for node := range nodes {`
			`draining, ok := n.nodes[node]`
			`if !ok {`
			`continue`
			`}`

			`isDone, err := draining.IsDone()`
			`if err != nil {`
drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`n.logger.Printf("[ERR] nomad.drain: error checking if node %q is done draining: %v", node, err)`
Drainer 2018-03-06 22:37:37 +00:00			`continue`
			`}`

			`if !isDone {`
			`continue`
			`}`
Initial design 2018-03-02 00:37:19 +00:00
Drainer 2018-03-06 22:37:37 +00:00			`done = append(done, node)`
drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00
			`remaining, err := draining.RemainingAllocs()`
			`if err != nil {`
			`n.logger.Printf("[ERR] nomad.drain: node %q is done draining but encountered an error getting remaining allocs: %v", node, err)`
			`continue`
			`}`

			`remainingAllocs = append(remainingAllocs, remaining...)`
Drainer 2018-03-06 22:37:37 +00:00			`}`
			`n.l.RUnlock()`

drain: stop sys jobs after drain completes System allocs should be drained when a node's deadline is hit or when all other allocs on the node have stopped/migrated. 2018-03-27 17:01:15 +00:00			`// Stop any running system jobs on otherwise done nodes`
			`if len(remainingAllocs) > 0 {`
			`future := structs.NewBatchFuture()`
			`n.drainAllocs(future, remainingAllocs)`
			`if err := future.Wait(); err != nil {`
			`n.logger.Printf("[ERR] nomad.drain: failed to drain %d remaining allocs from done nodes: %v",`
			`len(remainingAllocs), err)`
			`}`
			`}`

sharding 2018-03-10 00:10:38 +00:00			`// Submit the node transistions in a sharded form to ensure a reasonable`
			`// Raft transaction size.`
drain: improve tests and fix spelling * transistion -> transition * don't t.Fatal in goroutines * don't mutate global state 2018-04-02 23:40:47 +00:00			`for _, nodes := range partitionIds(defaultMaxIdsPerTxn, done) {`
sharding 2018-03-10 00:10:38 +00:00			`if _, err := n.raft.NodesDrainComplete(nodes); err != nil {`
			`n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err)`
			`}`
Drainer 2018-03-06 22:37:37 +00:00			`}`
Initial design 2018-03-02 00:37:19 +00:00			`}`

Comments 2018-03-07 23:42:17 +00:00			`// batchDrainAllocs is used to batch the draining of allocations. It will block`
			`// until the batch is complete.`
Drainer 2018-03-06 22:37:37 +00:00			`func (n NodeDrainer) batchDrainAllocs(allocs []structs.Allocation) (uint64, error) {`
			`// Add this to the batch`
			`n.batcher.Lock()`
			`n.batcher.updates = append(n.batcher.updates, allocs...)`

			`// Start a new batch if none`
			`future := n.batcher.updateFuture`
			`if future == nil {`
			`future = structs.NewBatchFuture()`
			`n.batcher.updateFuture = future`
			`n.batcher.updateTimer = time.AfterFunc(n.batcher.batchWindow, func() {`
			`// Get the pending updates`
			`n.batcher.Lock()`
			`updates := n.batcher.updates`
			`future := n.batcher.updateFuture`
			`n.batcher.updates = nil`
			`n.batcher.updateFuture = nil`
			`n.batcher.updateTimer = nil`
			`n.batcher.Unlock()`

			`// Perform the batch update`
			`n.drainAllocs(future, updates)`
			`})`
			`}`
			`n.batcher.Unlock()`

			`if err := future.Wait(); err != nil {`
			`return 0, err`
			`}`

			`return future.Index(), nil`
Initial design 2018-03-02 00:37:19 +00:00			`}`

spelling fixes 2018-03-08 00:51:57 +00:00			`// drainAllocs is a non batch, marking of the desired transition to migrate for`
Comments 2018-03-07 23:42:17 +00:00			`// the set of allocations. It will also create the necessary evaluations for the`
			`// affected jobs.`
Drainer 2018-03-06 22:37:37 +00:00			`func (n NodeDrainer) drainAllocs(future structs.BatchFuture, allocs []*structs.Allocation) {`
spelling fixes 2018-03-08 00:51:57 +00:00			`// Compute the effected jobs and make the transition map`
Drainer 2018-03-06 22:37:37 +00:00			`jobs := make(map[string]*structs.Allocation, 4)`
sharding 2018-03-10 00:10:38 +00:00			`transistions := make(map[string]*structs.DesiredTransition, len(allocs))`
Drainer 2018-03-06 22:37:37 +00:00			`for _, alloc := range allocs {`
sharding 2018-03-10 00:10:38 +00:00			`transistions[alloc.ID] = &structs.DesiredTransition{`
Drainer 2018-03-06 22:37:37 +00:00			`Migrate: helper.BoolToPtr(true),`
			`}`
			`jobs[alloc.JobID] = alloc`
			`}`

			`evals := make([]*structs.Evaluation, 0, len(jobs))`
			`for job, alloc := range jobs {`
			`evals = append(evals, &structs.Evaluation{`
			`ID: uuid.Generate(),`
			`Namespace: alloc.Namespace,`
			`Priority: alloc.Job.Priority,`
			`Type: alloc.Job.Type,`
			`TriggeredBy: structs.EvalTriggerNodeDrain,`
			`JobID: job,`
			`Status: structs.EvalStatusPending,`
			`})`
			`}`

			`// Commit this update via Raft`
sharding 2018-03-10 00:10:38 +00:00			`var finalIndex uint64`
drain: fix double-close panic on drain future 2018-04-02 23:39:18 +00:00			`var mErr multierror.Error`
drain: improve tests and fix spelling * transistion -> transition * don't t.Fatal in goroutines * don't mutate global state 2018-04-02 23:40:47 +00:00			`for _, u := range partitionAllocDrain(defaultMaxIdsPerTxn, transistions, evals) {`
			`index, err := n.raft.AllocUpdateDesiredTransition(u.Transitions, u.Evals)`
sharding 2018-03-10 00:10:38 +00:00			`if err != nil {`
drain: fix double-close panic on drain future 2018-04-02 23:39:18 +00:00			`mErr.Errors = append(mErr.Errors, err)`
sharding 2018-03-10 00:10:38 +00:00			`}`
			`finalIndex = index`
			`}`

drain: fix double-close panic on drain future 2018-04-02 23:39:18 +00:00			`future.Respond(finalIndex, mErr.ErrorOrNil())`
Initial design 2018-03-02 00:37:19 +00:00			`}`