open-nomad/scheduler/scheduler.go
Tim Gross d9d4da1e9f
scheduler: seed random shuffle nodes with eval ID (#12008)
Processing an evaluation is nearly a pure function over the state
snapshot, but we randomly shuffle the nodes. This means that
developers can't take a given state snapshot and pass an evaluation
through it and be guaranteed the same plan results.

But the evaluation ID is already random, so if we use this as the seed
for shuffling the nodes we can greatly reduce the sources of
non-determinism. Unfortunately golang map iteration uses a global
source of randomness and not a goroutine-local one, but arguably
if the scheduler behavior is impacted by this, that's a bug in the
iteration.
2022-02-08 12:16:33 -05:00

136 lines
5.5 KiB
Go

package scheduler
import (
"fmt"
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
// SchedulerVersion is the version of the scheduler. Changes to the
// scheduler that are incompatible with prior schedulers will increment this
// version. It is used to disallow dequeueing when the versions do not match
// across the leader and the dequeueing scheduler.
SchedulerVersion uint16 = 1
)
// BuiltinSchedulers contains the built in registered schedulers
// which are available
var BuiltinSchedulers = map[string]Factory{
"service": NewServiceScheduler,
"batch": NewBatchScheduler,
"system": NewSystemScheduler,
"sysbatch": NewSysBatchScheduler,
}
// NewScheduler is used to instantiate and return a new scheduler
// given the scheduler name, initial state, and planner.
func NewScheduler(name string, logger log.Logger, eventsCh chan<- interface{}, state State, planner Planner) (Scheduler, error) {
// Lookup the factory function
factory, ok := BuiltinSchedulers[name]
if !ok {
return nil, fmt.Errorf("unknown scheduler '%s'", name)
}
// Instantiate the scheduler
sched := factory(logger, eventsCh, state, planner)
return sched, nil
}
// Factory is used to instantiate a new Scheduler
type Factory func(log.Logger, chan<- interface{}, State, Planner) Scheduler
// Scheduler is the top level instance for a scheduler. A scheduler is
// meant to only encapsulate business logic, pushing the various plumbing
// into Nomad itself. They are invoked to process a single evaluation at
// a time. The evaluation may result in task allocations which are computed
// optimistically, as there are many concurrent evaluations being processed.
// The task allocations are submitted as a plan, and the current leader will
// coordinate the commits to prevent oversubscription or improper allocations
// based on stale state.
type Scheduler interface {
// Process is used to handle a new evaluation. The scheduler is free to
// apply any logic necessary to make the task placements. The state and
// planner will be provided prior to any invocations of process.
Process(*structs.Evaluation) error
}
// State is an immutable view of the global state. This allows schedulers
// to make intelligent decisions based on allocations of other schedulers
// and to enforce complex constraints that require more information than
// is available to a local state scheduler.
type State interface {
// Config returns the configuration of the state store
Config() *state.StateStoreConfig
// Nodes returns an iterator over all the nodes.
// The type of each result is *structs.Node
Nodes(ws memdb.WatchSet) (memdb.ResultIterator, error)
// AllocsByJob returns the allocations by JobID
AllocsByJob(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Allocation, error)
// AllocsByNode returns all the allocations by node
AllocsByNode(ws memdb.WatchSet, node string) ([]*structs.Allocation, error)
// AllocByID returns the allocation
AllocByID(ws memdb.WatchSet, allocID string) (*structs.Allocation, error)
// AllocsByNodeTerminal returns all the allocations by node filtering by terminal status
AllocsByNodeTerminal(ws memdb.WatchSet, node string, terminal bool) ([]*structs.Allocation, error)
// GetNodeByID is used to lookup a node by ID
NodeByID(ws memdb.WatchSet, nodeID string) (*structs.Node, error)
// GetJobByID is used to lookup a job by ID
JobByID(ws memdb.WatchSet, namespace, id string) (*structs.Job, error)
// DeploymentsByJobID returns the deployments associated with the job
DeploymentsByJobID(ws memdb.WatchSet, namespace, jobID string, all bool) ([]*structs.Deployment, error)
// JobByIDAndVersion returns the job associated with id and specific version
JobByIDAndVersion(ws memdb.WatchSet, namespace, id string, version uint64) (*structs.Job, error)
// LatestDeploymentByJobID returns the latest deployment matching the given
// job ID
LatestDeploymentByJobID(ws memdb.WatchSet, namespace, jobID string) (*structs.Deployment, error)
// SchedulerConfig returns config options for the scheduler
SchedulerConfig() (uint64, *structs.SchedulerConfiguration, error)
// CSIVolumeByID fetch CSI volumes, containing controller jobs
CSIVolumeByID(memdb.WatchSet, string, string) (*structs.CSIVolume, error)
// CSIVolumeByID fetch CSI volumes, containing controller jobs
CSIVolumesByNodeID(memdb.WatchSet, string, string) (memdb.ResultIterator, error)
// LatestIndex returns the greatest index value for all indexes.
LatestIndex() (uint64, error)
}
// Planner interface is used to submit a task allocation plan.
type Planner interface {
// SubmitPlan is used to submit a plan for consideration.
// This will return a PlanResult or an error. It is possible
// that this will result in a state refresh as well.
SubmitPlan(*structs.Plan) (*structs.PlanResult, State, error)
// UpdateEval is used to update an evaluation. This should update
// a copy of the input evaluation since that should be immutable.
UpdateEval(*structs.Evaluation) error
// CreateEval is used to create an evaluation. This should set the
// PreviousEval to that of the current evaluation.
CreateEval(*structs.Evaluation) error
// ReblockEval takes a blocked evaluation and re-inserts it into the blocked
// evaluation tracker. This update occurs only in-memory on the leader. The
// evaluation must exist in a blocked state prior to this being called such
// that on leader changes, the evaluation will be reblocked properly.
ReblockEval(*structs.Evaluation) error
}