06972fae0c
The allocrunner sends several updates to the server during the early lifecycle of an allocation and its tasks. Clients batch-up allocation updates every 200ms, but experiments like the C2M challenge has shown that even with this batching, servers can be overwhelmed with client updates during high volume deployments. Benchmarking done in #9451 has shown that client updates can easily represent ~70% of all Nomad Raft traffic. Each allocation sends many updates during its lifetime, but only those that change the `ClientStatus` field are critical for progressing a deployment or kicking off a reschedule to recover from failures. Add a priority to the client allocation sync and update the `syncTicker` receiver so that we only send an update if there's a high priority update waiting, or on every 5th tick. This means when there are no high priority updates, the client will send updates at most every 1s instead of 200ms. Benchmarks have shown this can reduce overall Raft traffic by 10%, as well as reduce client-to-server RPC traffic. This changeset also switches from a channel-based collection of updates to a shared buffer, so as to split batching from sending and prevent backpressure onto the allocrunner when the RPC is slow. This doesn't have a major performance benefit in the benchmarks but makes the implementation of the prioritized update simpler. Fixes: #9451
72 lines
2.2 KiB
Go
72 lines
2.2 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package interfaces
|
|
|
|
import (
|
|
"github.com/hashicorp/nomad/client/allocdir"
|
|
"github.com/hashicorp/nomad/client/allocrunner/state"
|
|
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
|
|
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
)
|
|
|
|
// AllocRunner is the interface to the allocRunner struct used by client.Client
|
|
type AllocRunner interface {
|
|
Alloc() *structs.Allocation
|
|
|
|
Run()
|
|
Restore() error
|
|
Update(*structs.Allocation)
|
|
Reconnect(update *structs.Allocation) error
|
|
Shutdown()
|
|
Destroy()
|
|
|
|
IsDestroyed() bool
|
|
IsMigrating() bool
|
|
IsWaiting() bool
|
|
|
|
WaitCh() <-chan struct{}
|
|
DestroyCh() <-chan struct{}
|
|
ShutdownCh() <-chan struct{}
|
|
|
|
AllocState() *state.State
|
|
PersistState() error
|
|
AcknowledgeState(*state.State)
|
|
GetUpdatePriority(*structs.Allocation) cstructs.AllocUpdatePriority
|
|
SetClientStatus(string)
|
|
|
|
Signal(taskName, signal string) error
|
|
RestartTask(taskName string, taskEvent *structs.TaskEvent) error
|
|
RestartRunning(taskEvent *structs.TaskEvent) error
|
|
RestartAll(taskEvent *structs.TaskEvent) error
|
|
|
|
GetTaskEventHandler(taskName string) drivermanager.EventHandler
|
|
GetTaskExecHandler(taskName string) drivermanager.TaskExecHandler
|
|
GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error)
|
|
StatsReporter() AllocStatsReporter
|
|
Listener() *cstructs.AllocListener
|
|
GetAllocDir() *allocdir.AllocDir
|
|
}
|
|
|
|
// TaskStateHandler exposes a handler to be called when a task's state changes
|
|
type TaskStateHandler interface {
|
|
// TaskStateUpdated is used to notify the alloc runner about task state
|
|
// changes.
|
|
TaskStateUpdated()
|
|
}
|
|
|
|
// AllocStatsReporter gives access to the latest resource usage from the
|
|
// allocation
|
|
type AllocStatsReporter interface {
|
|
LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
|
|
}
|
|
|
|
// HookResourceSetter is used to communicate between alloc hooks and task hooks
|
|
type HookResourceSetter interface {
|
|
SetCSIMounts(map[string]*csimanager.MountInfo)
|
|
GetCSIMounts(map[string]*csimanager.MountInfo)
|
|
}
|