// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 package nomad import ( "container/heap" "fmt" "sync" "time" metrics "github.com/armon/go-metrics" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" ) var ( // planQueueFlushed is the error used for all pending plans // when the queue is flushed or disabled planQueueFlushed = fmt.Errorf("plan queue flushed") ) // PlanFuture is used to return a future for an enqueue type PlanFuture interface { Wait() (*structs.PlanResult, error) } // PlanQueue is used to submit commit plans for task allocations // to the current leader. The leader verifies that resources are not // over-committed and commits to Raft. This allows sub-schedulers to // be optimistically concurrent. In the case of an overcommit, the plan // may be partially applied if allowed, or completely rejected (gang commit). type PlanQueue struct { enabled bool stats *QueueStats ready PendingPlans waitCh chan struct{} l sync.RWMutex } // NewPlanQueue is used to construct and return a new plan queue func NewPlanQueue() (*PlanQueue, error) { q := &PlanQueue{ enabled: false, stats: new(QueueStats), ready: make([]*pendingPlan, 0, 16), waitCh: make(chan struct{}, 1), } return q, nil } // pendingPlan is used to wrap a plan that is enqueued // so that we can re-use it as a future. type pendingPlan struct { plan *structs.Plan enqueueTime time.Time result *structs.PlanResult errCh chan error } // Wait is used to block for the plan result or potential error func (p *pendingPlan) Wait() (*structs.PlanResult, error) { err := <-p.errCh return p.result, err } // respond is used to set the response and error for the future func (p *pendingPlan) respond(result *structs.PlanResult, err error) { p.result = result p.errCh <- err } // PendingPlans is a list of waiting plans. // We implement the container/heap interface so that this is a // priority queue type PendingPlans []*pendingPlan // Enabled is used to check if the queue is enabled. func (q *PlanQueue) Enabled() bool { q.l.RLock() defer q.l.RUnlock() return q.enabled } // SetEnabled is used to control if the queue is enabled. The queue // should only be enabled on the active leader. func (q *PlanQueue) SetEnabled(enabled bool) { q.l.Lock() q.enabled = enabled q.l.Unlock() if !enabled { q.Flush() } } // Enqueue is used to enqueue a plan func (q *PlanQueue) Enqueue(plan *structs.Plan) (PlanFuture, error) { q.l.Lock() defer q.l.Unlock() // Do nothing if not enabled if !q.enabled { return nil, fmt.Errorf("plan queue is disabled") } // Wrap the pending plan pending := &pendingPlan{ plan: plan, enqueueTime: time.Now(), errCh: make(chan error, 1), } // Push onto the heap heap.Push(&q.ready, pending) // Update the stats q.stats.Depth += 1 // Unblock any blocked reader select { case q.waitCh <- struct{}{}: default: } return pending, nil } // Dequeue is used to perform a blocking dequeue func (q *PlanQueue) Dequeue(timeout time.Duration) (*pendingPlan, error) { SCAN: q.l.Lock() // Do nothing if not enabled if !q.enabled { q.l.Unlock() return nil, fmt.Errorf("plan queue is disabled") } // Look for available work if len(q.ready) > 0 { raw := heap.Pop(&q.ready) pending := raw.(*pendingPlan) q.stats.Depth -= 1 q.l.Unlock() return pending, nil } q.l.Unlock() // Setup the timeout timer var timerCh <-chan time.Time if timerCh == nil && timeout > 0 { timer := time.NewTimer(timeout) defer timer.Stop() timerCh = timer.C } // Wait for timeout or new work select { case <-q.waitCh: goto SCAN case <-timerCh: return nil, nil } } // Flush is used to reset the state of the plan queue func (q *PlanQueue) Flush() { q.l.Lock() defer q.l.Unlock() // Error out all the futures for _, pending := range q.ready { pending.respond(nil, planQueueFlushed) } // Reset the broker q.stats.Depth = 0 q.ready = make([]*pendingPlan, 0, 16) // Unblock any waiters select { case q.waitCh <- struct{}{}: default: } } // Stats is used to query the state of the queue func (q *PlanQueue) Stats() *QueueStats { // Allocate a new stats struct stats := new(QueueStats) q.l.RLock() defer q.l.RUnlock() // Copy all the stats *stats = *q.stats return stats } // EmitStats is used to export metrics about the broker while enabled func (q *PlanQueue) EmitStats(period time.Duration, stopCh <-chan struct{}) { timer, stop := helper.NewSafeTimer(period) defer stop() for { timer.Reset(period) select { case <-timer.C: stats := q.Stats() metrics.SetGauge([]string{"nomad", "plan", "queue_depth"}, float32(stats.Depth)) case <-stopCh: return } } } // QueueStats returns all the stats about the plan queue type QueueStats struct { Depth int } // Len is for the sorting interface func (p PendingPlans) Len() int { return len(p) } // Less is for the sorting interface. We flip the check // so that the "min" in the min-heap is the element with the // highest priority. For the same priority, we use the enqueue // time of the evaluation to give a FIFO ordering. func (p PendingPlans) Less(i, j int) bool { if p[i].plan.Priority != p[j].plan.Priority { return !(p[i].plan.Priority < p[j].plan.Priority) } return p[i].enqueueTime.Before(p[j].enqueueTime) } // Swap is for the sorting interface func (p PendingPlans) Swap(i, j int) { p[i], p[j] = p[j], p[i] } // Push is used to add a new evaluation to the slice func (p *PendingPlans) Push(e interface{}) { *p = append(*p, e.(*pendingPlan)) } // Pop is used to remove an evaluation from the slice func (p *PendingPlans) Pop() interface{} { n := len(*p) e := (*p)[n-1] (*p)[n-1] = nil *p = (*p)[:n-1] return e } // Peek is used to peek at the next element that would be popped func (p PendingPlans) Peek() *pendingPlan { n := len(p) if n == 0 { return nil } return p[n-1] }