2018-10-04 23:22:01 +00:00
|
|
|
package allocrunner
|
2018-06-22 00:35:07 +00:00
|
|
|
|
|
|
|
import (
|
2018-07-17 20:57:57 +00:00
|
|
|
"context"
|
2018-06-22 00:35:07 +00:00
|
|
|
"fmt"
|
2018-06-29 00:01:05 +00:00
|
|
|
"path/filepath"
|
2018-06-22 00:35:07 +00:00
|
|
|
"sync"
|
2018-07-19 00:06:44 +00:00
|
|
|
"time"
|
2018-06-22 00:35:07 +00:00
|
|
|
|
|
|
|
log "github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/hashicorp/nomad/client/allocdir"
|
2018-10-04 23:22:01 +00:00
|
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
|
|
|
"github.com/hashicorp/nomad/client/allocrunner/state"
|
|
|
|
"github.com/hashicorp/nomad/client/allocrunner/taskrunner"
|
2018-08-23 19:03:17 +00:00
|
|
|
"github.com/hashicorp/nomad/client/allocwatcher"
|
2018-07-13 16:45:29 +00:00
|
|
|
"github.com/hashicorp/nomad/client/config"
|
2018-07-20 00:40:25 +00:00
|
|
|
"github.com/hashicorp/nomad/client/consul"
|
2018-11-16 23:29:59 +00:00
|
|
|
"github.com/hashicorp/nomad/client/devicemanager"
|
2018-07-19 00:06:44 +00:00
|
|
|
cinterfaces "github.com/hashicorp/nomad/client/interfaces"
|
2018-08-08 00:46:37 +00:00
|
|
|
cstate "github.com/hashicorp/nomad/client/state"
|
2018-06-29 00:01:05 +00:00
|
|
|
cstructs "github.com/hashicorp/nomad/client/structs"
|
2018-07-13 16:45:29 +00:00
|
|
|
"github.com/hashicorp/nomad/client/vaultclient"
|
2018-07-19 00:06:44 +00:00
|
|
|
"github.com/hashicorp/nomad/helper"
|
2018-06-22 00:35:07 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2018-11-15 15:13:14 +00:00
|
|
|
"github.com/hashicorp/nomad/plugins/device"
|
2018-10-06 01:42:15 +00:00
|
|
|
"github.com/hashicorp/nomad/plugins/shared/loader"
|
2018-06-22 00:35:07 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// allocRunner is used to run all the tasks in a given allocation
|
|
|
|
type allocRunner struct {
|
2018-07-19 00:06:44 +00:00
|
|
|
// id is the ID of the allocation. Can be accessed without a lock
|
|
|
|
id string
|
|
|
|
|
2018-06-22 00:35:07 +00:00
|
|
|
// Logger is the logger for the alloc runner.
|
|
|
|
logger log.Logger
|
|
|
|
|
2018-07-13 16:45:29 +00:00
|
|
|
clientConfig *config.Config
|
|
|
|
|
2018-10-12 01:03:48 +00:00
|
|
|
// stateUpdater is used to emit updated alloc state
|
2018-07-19 00:06:44 +00:00
|
|
|
stateUpdater cinterfaces.AllocStateHandler
|
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// taskStateUpdatedCh is ticked whenever task state as changed. Must
|
2018-10-12 01:03:48 +00:00
|
|
|
// have len==1 to allow nonblocking notification of state updates while
|
|
|
|
// the goroutine is already processing a previous update.
|
|
|
|
taskStateUpdatedCh chan struct{}
|
|
|
|
|
2018-10-16 03:38:12 +00:00
|
|
|
// taskStateUpdateHandlerCh is closed when the task state handling
|
2018-10-12 01:03:48 +00:00
|
|
|
// goroutine exits. It is unsafe to destroy the local allocation state
|
|
|
|
// before this goroutine exits.
|
|
|
|
taskStateUpdateHandlerCh chan struct{}
|
|
|
|
|
2018-12-17 12:27:54 +00:00
|
|
|
// allocUpdatedCh is a channel that is used to stream allocation updates into
|
|
|
|
// the allocUpdate handler. Must have len==1 to allow nonblocking notification
|
|
|
|
// of new allocation updates while the goroutine is processing a previous
|
|
|
|
// update.
|
|
|
|
allocUpdatedCh chan *structs.Allocation
|
|
|
|
|
2018-07-20 00:40:25 +00:00
|
|
|
// consulClient is the client used by the consul service hook for
|
|
|
|
// registering services and checks
|
|
|
|
consulClient consul.ConsulServiceAPI
|
|
|
|
|
2018-07-13 16:45:29 +00:00
|
|
|
// vaultClient is the used to manage Vault tokens
|
|
|
|
vaultClient vaultclient.VaultClient
|
2018-06-22 00:35:07 +00:00
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
// waitCh is closed when the Run() loop has exited
|
2018-06-22 00:35:07 +00:00
|
|
|
waitCh chan struct{}
|
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
// destroyed is true when the Run() loop has exited, postrun hooks have
|
2018-10-16 22:17:36 +00:00
|
|
|
// run, and alloc runner has been destroyed. Must acquire destroyedLock
|
|
|
|
// to access.
|
|
|
|
destroyed bool
|
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// destroyCh is closed when the Run() loop has exited, postrun hooks have
|
|
|
|
// run, and alloc runner has been destroyed.
|
|
|
|
destroyCh chan struct{}
|
|
|
|
|
|
|
|
// shutdown is true when the Run() loop has exited, and shutdown hooks have
|
|
|
|
// run. Must acquire destroyedLock to access.
|
|
|
|
shutdown bool
|
|
|
|
|
|
|
|
// shutdownCh is closed when the Run() loop has exited, and shutdown hooks
|
|
|
|
// have run.
|
|
|
|
shutdownCh chan struct{}
|
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// runnersLaunched is true if TaskRunners were Run. Must acquire
|
|
|
|
// destroyedLock to access.
|
|
|
|
runnersLaunched bool
|
2018-10-16 22:17:36 +00:00
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// destroyLaunched is true if Destroy has been called. Must acquire
|
|
|
|
// destroyedLock to access.
|
|
|
|
destroyLaunched bool
|
|
|
|
|
|
|
|
// shutdownLaunched is true if Shutdown has been called. Must acquire
|
|
|
|
// destroyedLock to access.
|
|
|
|
shutdownLaunched bool
|
|
|
|
|
|
|
|
// destroyedLock guards destroyed, runnersLaunched, destroyLaunched,
|
|
|
|
// shutdownLaunched, and serializes Shutdown/Destroy calls.
|
2018-08-23 19:03:17 +00:00
|
|
|
destroyedLock sync.Mutex
|
|
|
|
|
2018-06-22 00:35:07 +00:00
|
|
|
// Alloc captures the allocation being run.
|
|
|
|
alloc *structs.Allocation
|
2018-06-29 00:01:05 +00:00
|
|
|
allocLock sync.RWMutex
|
2018-06-22 00:35:07 +00:00
|
|
|
|
2018-09-28 00:30:10 +00:00
|
|
|
// state is the alloc runner's state
|
|
|
|
state *state.State
|
2018-07-19 00:06:44 +00:00
|
|
|
stateLock sync.RWMutex
|
2018-06-22 00:35:07 +00:00
|
|
|
|
2018-08-08 00:46:37 +00:00
|
|
|
stateDB cstate.StateDB
|
2018-07-11 04:21:12 +00:00
|
|
|
|
2018-06-22 00:35:07 +00:00
|
|
|
// allocDir is used to build the allocations directory structure.
|
|
|
|
allocDir *allocdir.AllocDir
|
|
|
|
|
|
|
|
// runnerHooks are alloc runner lifecycle hooks that should be run on state
|
|
|
|
// transistions.
|
|
|
|
runnerHooks []interfaces.RunnerHook
|
|
|
|
|
|
|
|
// tasks are the set of task runners
|
2018-10-12 01:03:48 +00:00
|
|
|
tasks map[string]*taskrunner.TaskRunner
|
2018-06-28 00:27:03 +00:00
|
|
|
|
2018-11-15 15:13:14 +00:00
|
|
|
// deviceStatsReporter is used to lookup resource usage for alloc devices
|
|
|
|
deviceStatsReporter cinterfaces.DeviceStatsReporter
|
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
// allocBroadcaster sends client allocation updates to all listeners
|
|
|
|
allocBroadcaster *cstructs.AllocBroadcaster
|
|
|
|
|
2018-12-06 11:15:59 +00:00
|
|
|
// prevAllocWatcher allows waiting for any previous or preempted allocations
|
|
|
|
// to exit
|
2018-08-23 19:03:17 +00:00
|
|
|
prevAllocWatcher allocwatcher.PrevAllocWatcher
|
2018-10-06 01:42:15 +00:00
|
|
|
|
2018-12-06 11:15:59 +00:00
|
|
|
// prevAllocMigrator allows the migration of a previous allocations alloc dir.
|
|
|
|
prevAllocMigrator allocwatcher.PrevAllocMigrator
|
2018-12-05 18:18:04 +00:00
|
|
|
|
2018-10-06 01:42:15 +00:00
|
|
|
// pluginSingletonLoader is a plugin loader that will returns singleton
|
|
|
|
// instances of the plugins.
|
|
|
|
pluginSingletonLoader loader.PluginCatalog
|
2018-11-16 23:29:59 +00:00
|
|
|
|
|
|
|
// devicemanager is used to mount devices as well as lookup device
|
|
|
|
// statistics
|
|
|
|
devicemanager devicemanager.Manager
|
2018-06-22 00:35:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewAllocRunner returns a new allocation runner.
|
2018-07-13 00:56:52 +00:00
|
|
|
func NewAllocRunner(config *Config) (*allocRunner, error) {
|
|
|
|
alloc := config.Alloc
|
|
|
|
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
|
|
|
|
if tg == nil {
|
|
|
|
return nil, fmt.Errorf("failed to lookup task group %q", alloc.TaskGroup)
|
|
|
|
}
|
|
|
|
|
2018-06-22 00:35:07 +00:00
|
|
|
ar := &allocRunner{
|
2018-10-12 01:03:48 +00:00
|
|
|
id: alloc.ID,
|
|
|
|
alloc: alloc,
|
|
|
|
clientConfig: config.ClientConfig,
|
|
|
|
consulClient: config.Consul,
|
|
|
|
vaultClient: config.Vault,
|
|
|
|
tasks: make(map[string]*taskrunner.TaskRunner, len(tg.Tasks)),
|
|
|
|
waitCh: make(chan struct{}),
|
2018-12-14 15:02:47 +00:00
|
|
|
destroyCh: make(chan struct{}),
|
|
|
|
shutdownCh: make(chan struct{}),
|
2018-10-12 01:03:48 +00:00
|
|
|
state: &state.State{},
|
|
|
|
stateDB: config.StateDB,
|
|
|
|
stateUpdater: config.StateUpdater,
|
|
|
|
taskStateUpdatedCh: make(chan struct{}, 1),
|
|
|
|
taskStateUpdateHandlerCh: make(chan struct{}),
|
2018-12-17 12:27:54 +00:00
|
|
|
allocUpdatedCh: make(chan *structs.Allocation, 1),
|
2018-11-15 15:13:14 +00:00
|
|
|
deviceStatsReporter: config.DeviceStatsReporter,
|
2018-10-12 01:03:48 +00:00
|
|
|
prevAllocWatcher: config.PrevAllocWatcher,
|
2018-12-06 11:15:59 +00:00
|
|
|
prevAllocMigrator: config.PrevAllocMigrator,
|
2018-10-12 01:03:48 +00:00
|
|
|
pluginSingletonLoader: config.PluginSingletonLoader,
|
2018-11-16 23:29:59 +00:00
|
|
|
devicemanager: config.DeviceManager,
|
2018-06-22 00:35:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create the logger based on the allocation ID
|
2018-08-30 21:33:50 +00:00
|
|
|
ar.logger = config.Logger.Named("alloc_runner").With("alloc_id", alloc.ID)
|
2018-06-22 00:35:07 +00:00
|
|
|
|
2018-11-17 01:29:25 +00:00
|
|
|
// Create alloc broadcaster
|
|
|
|
ar.allocBroadcaster = cstructs.NewAllocBroadcaster(ar.logger)
|
|
|
|
|
2018-08-29 22:05:03 +00:00
|
|
|
// Create alloc dir
|
|
|
|
ar.allocDir = allocdir.NewAllocDir(ar.logger, filepath.Join(config.ClientConfig.AllocDir, alloc.ID))
|
|
|
|
|
2018-06-22 00:35:07 +00:00
|
|
|
// Initialize the runners hooks.
|
|
|
|
ar.initRunnerHooks()
|
|
|
|
|
2018-07-13 00:56:52 +00:00
|
|
|
// Create the TaskRunners
|
|
|
|
if err := ar.initTaskRunners(tg.Tasks); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return ar, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// initTaskRunners creates task runners but does *not* run them.
|
|
|
|
func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error {
|
|
|
|
for _, task := range tasks {
|
|
|
|
config := &taskrunner.Config{
|
2018-10-06 01:42:15 +00:00
|
|
|
Alloc: ar.alloc,
|
|
|
|
ClientConfig: ar.clientConfig,
|
|
|
|
Task: task,
|
|
|
|
TaskDir: ar.allocDir.NewTaskDir(task.Name),
|
|
|
|
Logger: ar.logger,
|
|
|
|
StateDB: ar.stateDB,
|
|
|
|
StateUpdater: ar,
|
|
|
|
Consul: ar.consulClient,
|
2018-10-18 20:39:02 +00:00
|
|
|
Vault: ar.vaultClient,
|
2018-10-06 01:42:15 +00:00
|
|
|
PluginSingletonLoader: ar.pluginSingletonLoader,
|
2018-11-15 15:13:14 +00:00
|
|
|
DeviceStatsReporter: ar.deviceStatsReporter,
|
2018-11-16 23:29:59 +00:00
|
|
|
DeviceManager: ar.devicemanager,
|
2018-07-13 00:56:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create, but do not Run, the task runner
|
|
|
|
tr, err := taskrunner.NewTaskRunner(config)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed creating runner for task %q: %v", task.Name, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
ar.tasks[task.Name] = tr
|
|
|
|
}
|
|
|
|
return nil
|
2018-06-22 00:35:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (ar *allocRunner) WaitCh() <-chan struct{} {
|
|
|
|
return ar.waitCh
|
|
|
|
}
|
|
|
|
|
2018-11-26 20:50:35 +00:00
|
|
|
// Run the AllocRunner. Starts tasks if the alloc is non-terminal and closes
|
|
|
|
// WaitCh when it exits. Should be started in a goroutine.
|
2018-06-22 00:35:07 +00:00
|
|
|
func (ar *allocRunner) Run() {
|
2018-11-14 18:29:07 +00:00
|
|
|
// Close the wait channel on return
|
|
|
|
defer close(ar.waitCh)
|
2018-10-16 22:17:36 +00:00
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// Start the task state update handler
|
|
|
|
go ar.handleTaskStateUpdates()
|
2018-10-16 22:17:36 +00:00
|
|
|
|
2018-12-17 12:27:54 +00:00
|
|
|
// Start the alloc update handler
|
|
|
|
go ar.handleAllocUpdates()
|
|
|
|
|
2018-11-05 20:20:45 +00:00
|
|
|
// If an alloc should not be run, ensure any restored task handles are
|
|
|
|
// destroyed and exit to wait for the AR to be GC'd by the client.
|
|
|
|
if !ar.shouldRun() {
|
|
|
|
ar.logger.Debug("not running terminal alloc")
|
2018-11-05 23:11:10 +00:00
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// Ensure all tasks are cleaned up
|
|
|
|
ar.killTasks()
|
|
|
|
return
|
|
|
|
}
|
2018-11-05 23:11:10 +00:00
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// Mark task runners as being run for Shutdown
|
|
|
|
ar.destroyedLock.Lock()
|
|
|
|
ar.runnersLaunched = true
|
|
|
|
ar.destroyedLock.Unlock()
|
2018-11-05 23:11:10 +00:00
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// If task update chan has been closed, that means we've been shutdown.
|
|
|
|
select {
|
|
|
|
case <-ar.taskStateUpdateHandlerCh:
|
2018-11-01 00:41:37 +00:00
|
|
|
return
|
2018-11-14 18:29:07 +00:00
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run the prestart hooks
|
|
|
|
if err := ar.prerun(); err != nil {
|
|
|
|
ar.logger.Error("prerun failed", "error", err)
|
|
|
|
goto POST
|
|
|
|
}
|
|
|
|
|
2018-11-26 20:51:18 +00:00
|
|
|
// Run the runners (blocks until they exit)
|
|
|
|
ar.runTasks()
|
2018-11-14 18:29:07 +00:00
|
|
|
|
|
|
|
POST:
|
|
|
|
// Run the postrun hooks
|
|
|
|
// XXX Equivalent to TR.Poststop hook
|
|
|
|
if err := ar.postrun(); err != nil {
|
|
|
|
ar.logger.Error("postrun failed", "error", err)
|
2018-11-01 00:41:37 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 20:20:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// shouldRun returns true if the alloc is in a state that the alloc runner
|
|
|
|
// should run it.
|
|
|
|
func (ar *allocRunner) shouldRun() bool {
|
|
|
|
// Do not run allocs that are terminal
|
|
|
|
if ar.Alloc().TerminalStatus() {
|
|
|
|
ar.logger.Trace("alloc terminal; not running",
|
|
|
|
"desired_status", ar.Alloc().DesiredStatus,
|
|
|
|
"client_status", ar.Alloc().ClientStatus,
|
|
|
|
)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-11-01 00:41:37 +00:00
|
|
|
// It's possible that the alloc local state was marked terminal before
|
|
|
|
// the server copy of the alloc (checked above) was marked as terminal,
|
|
|
|
// so check the local state as well.
|
|
|
|
switch clientStatus := ar.AllocState().ClientStatus; clientStatus {
|
|
|
|
case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed, structs.AllocClientStatusLost:
|
|
|
|
ar.logger.Trace("alloc terminal; updating server and not running", "status", clientStatus)
|
2018-11-05 20:20:45 +00:00
|
|
|
return false
|
2018-11-01 00:41:37 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 20:20:45 +00:00
|
|
|
return true
|
2018-10-16 22:17:36 +00:00
|
|
|
}
|
|
|
|
|
2018-11-26 20:51:18 +00:00
|
|
|
// runTasks is used to run the task runners and block until they exit.
|
|
|
|
func (ar *allocRunner) runTasks() {
|
2018-07-13 00:56:52 +00:00
|
|
|
for _, task := range ar.tasks {
|
|
|
|
go task.Run()
|
2018-06-22 00:35:07 +00:00
|
|
|
}
|
|
|
|
|
2018-11-26 20:51:18 +00:00
|
|
|
for _, task := range ar.tasks {
|
|
|
|
<-task.WaitCh()
|
|
|
|
}
|
2018-06-22 00:35:07 +00:00
|
|
|
}
|
2018-06-28 00:27:03 +00:00
|
|
|
|
2018-09-20 00:32:50 +00:00
|
|
|
// Alloc returns the current allocation being run by this runner as sent by the
|
|
|
|
// server. This view of the allocation does not have updated task states.
|
2018-06-29 00:01:05 +00:00
|
|
|
func (ar *allocRunner) Alloc() *structs.Allocation {
|
|
|
|
ar.allocLock.RLock()
|
|
|
|
defer ar.allocLock.RUnlock()
|
|
|
|
return ar.alloc
|
|
|
|
}
|
|
|
|
|
2018-08-01 18:03:52 +00:00
|
|
|
func (ar *allocRunner) setAlloc(updated *structs.Allocation) {
|
|
|
|
ar.allocLock.Lock()
|
|
|
|
ar.alloc = updated
|
|
|
|
ar.allocLock.Unlock()
|
|
|
|
}
|
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
// GetAllocDir returns the alloc dir which is safe for concurrent use.
|
|
|
|
func (ar *allocRunner) GetAllocDir() *allocdir.AllocDir {
|
|
|
|
return ar.allocDir
|
|
|
|
}
|
|
|
|
|
2018-07-13 00:56:52 +00:00
|
|
|
// Restore state from database. Must be called after NewAllocRunner but before
|
|
|
|
// Run.
|
|
|
|
func (ar *allocRunner) Restore() error {
|
2018-08-08 00:46:37 +00:00
|
|
|
// Restore task runners
|
|
|
|
for _, tr := range ar.tasks {
|
|
|
|
if err := tr.Restore(); err != nil {
|
|
|
|
return err
|
2018-07-13 00:56:52 +00:00
|
|
|
}
|
2018-08-08 00:46:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
2018-07-19 17:48:01 +00:00
|
|
|
// TaskStateUpdated is called by TaskRunner when a task's state has been
|
2018-10-12 01:03:48 +00:00
|
|
|
// updated. It does not process the update synchronously but instead notifies a
|
|
|
|
// goroutine the state has change. Since processing the state change may cause
|
|
|
|
// the task to be killed (thus change its state again) it cannot be done
|
|
|
|
// synchronously as it would cause a deadlock due to reentrancy.
|
|
|
|
//
|
|
|
|
// The goroutine is used to compute changes to the alloc's ClientStatus and to
|
|
|
|
// update the server with the new state.
|
2018-10-16 03:38:12 +00:00
|
|
|
func (ar *allocRunner) TaskStateUpdated() {
|
2018-10-12 01:03:48 +00:00
|
|
|
select {
|
|
|
|
case ar.taskStateUpdatedCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
// already pending updates
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleTaskStateUpdates must be run in goroutine as it monitors
|
2018-11-14 18:29:07 +00:00
|
|
|
// taskStateUpdatedCh for task state update notifications and processes task
|
2018-10-12 01:03:48 +00:00
|
|
|
// states.
|
|
|
|
//
|
|
|
|
// Processing task state updates must be done in a goroutine as it may have to
|
|
|
|
// kill tasks which causes further task state updates.
|
|
|
|
func (ar *allocRunner) handleTaskStateUpdates() {
|
|
|
|
defer close(ar.taskStateUpdateHandlerCh)
|
|
|
|
|
|
|
|
for done := false; !done; {
|
|
|
|
select {
|
|
|
|
case <-ar.taskStateUpdatedCh:
|
|
|
|
case <-ar.waitCh:
|
2018-11-14 18:29:07 +00:00
|
|
|
// Run has exited, sync once more to ensure final
|
2018-10-12 01:03:48 +00:00
|
|
|
// states are collected.
|
|
|
|
done = true
|
2018-07-19 00:06:44 +00:00
|
|
|
}
|
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
ar.logger.Trace("handling task state update", "done", done)
|
|
|
|
|
2018-10-12 01:03:48 +00:00
|
|
|
// Set with the appropriate event if task runners should be
|
|
|
|
// killed.
|
|
|
|
var killEvent *structs.TaskEvent
|
|
|
|
|
|
|
|
// If task runners should be killed, this is set to the task
|
|
|
|
// name whose fault it is.
|
|
|
|
killTask := ""
|
|
|
|
|
|
|
|
// True if task runners should be killed because a leader
|
|
|
|
// failed (informational).
|
|
|
|
leaderFailed := false
|
|
|
|
|
|
|
|
// Task state has been updated; gather the state of the other tasks
|
|
|
|
trNum := len(ar.tasks)
|
|
|
|
liveRunners := make([]*taskrunner.TaskRunner, 0, trNum)
|
|
|
|
states := make(map[string]*structs.TaskState, trNum)
|
|
|
|
|
|
|
|
for name, tr := range ar.tasks {
|
|
|
|
state := tr.TaskState()
|
|
|
|
states[name] = state
|
|
|
|
|
|
|
|
// Capture live task runners in case we need to kill them
|
|
|
|
if state.State != structs.TaskStateDead {
|
|
|
|
liveRunners = append(liveRunners, tr)
|
|
|
|
continue
|
2018-07-19 17:49:46 +00:00
|
|
|
}
|
2018-10-12 01:03:48 +00:00
|
|
|
|
|
|
|
// Task is dead, determine if other tasks should be killed
|
|
|
|
if state.Failed {
|
|
|
|
// Only set failed event if no event has been
|
|
|
|
// set yet to give dead leaders priority.
|
|
|
|
if killEvent == nil {
|
|
|
|
killTask = name
|
|
|
|
killEvent = structs.NewTaskEvent(structs.TaskSiblingFailed).
|
|
|
|
SetFailedSibling(name)
|
|
|
|
}
|
|
|
|
} else if tr.IsLeader() {
|
|
|
|
killEvent = structs.NewTaskEvent(structs.TaskLeaderDead)
|
|
|
|
leaderFailed = true
|
|
|
|
killTask = name
|
2018-07-19 00:06:44 +00:00
|
|
|
}
|
2018-10-12 01:03:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If there's a kill event set and live runners, kill them
|
|
|
|
if killEvent != nil && len(liveRunners) > 0 {
|
|
|
|
|
|
|
|
// Log kill reason
|
|
|
|
if leaderFailed {
|
|
|
|
ar.logger.Debug("leader task dead, destroying all tasks", "leader_task", killTask)
|
|
|
|
} else {
|
|
|
|
ar.logger.Debug("task failure, destroying all tasks", "failed_task", killTask)
|
2018-07-19 00:06:44 +00:00
|
|
|
}
|
2018-10-12 01:03:48 +00:00
|
|
|
|
2018-11-05 23:11:10 +00:00
|
|
|
states = ar.killTasks()
|
2018-07-19 00:06:44 +00:00
|
|
|
}
|
|
|
|
|
2018-10-12 01:03:48 +00:00
|
|
|
// Get the client allocation
|
|
|
|
calloc := ar.clientAlloc(states)
|
2018-07-19 00:06:44 +00:00
|
|
|
|
2018-10-12 01:03:48 +00:00
|
|
|
// Update the server
|
|
|
|
ar.stateUpdater.AllocStateUpdated(calloc)
|
2018-08-23 19:03:17 +00:00
|
|
|
|
2018-10-12 01:03:48 +00:00
|
|
|
// Broadcast client alloc to listeners
|
|
|
|
ar.allocBroadcaster.Send(calloc)
|
|
|
|
}
|
2018-07-18 20:45:55 +00:00
|
|
|
}
|
|
|
|
|
2018-10-16 03:38:12 +00:00
|
|
|
// killTasks kills all task runners, leader (if there is one) first. Errors are
|
2018-11-05 23:11:10 +00:00
|
|
|
// logged except taskrunner.ErrTaskNotRunning which is ignored. Task states
|
|
|
|
// after Kill has been called are returned.
|
|
|
|
func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
|
|
|
|
var mu sync.Mutex
|
|
|
|
states := make(map[string]*structs.TaskState, len(ar.tasks))
|
|
|
|
|
2018-10-16 03:38:12 +00:00
|
|
|
// Kill leader first, synchronously
|
|
|
|
for name, tr := range ar.tasks {
|
|
|
|
if !tr.IsLeader() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
err := tr.Kill(context.TODO(), structs.NewTaskEvent(structs.TaskKilled))
|
2018-10-16 22:17:36 +00:00
|
|
|
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
2018-10-16 03:38:12 +00:00
|
|
|
ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
|
|
|
|
}
|
2018-11-05 23:11:10 +00:00
|
|
|
|
|
|
|
state := tr.TaskState()
|
|
|
|
states[name] = state
|
2018-10-16 03:38:12 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Kill the rest concurrently
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
for name, tr := range ar.tasks {
|
|
|
|
if tr.IsLeader() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func(name string, tr *taskrunner.TaskRunner) {
|
|
|
|
defer wg.Done()
|
|
|
|
err := tr.Kill(context.TODO(), structs.NewTaskEvent(structs.TaskKilled))
|
|
|
|
if err != nil && err != taskrunner.ErrTaskNotRunning {
|
|
|
|
ar.logger.Warn("error stopping task", "error", err, "task_name", name)
|
|
|
|
}
|
2018-11-05 23:11:10 +00:00
|
|
|
|
|
|
|
state := tr.TaskState()
|
|
|
|
mu.Lock()
|
|
|
|
states[name] = state
|
|
|
|
mu.Unlock()
|
2018-10-16 03:38:12 +00:00
|
|
|
}(name, tr)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
2018-11-05 23:11:10 +00:00
|
|
|
|
|
|
|
return states
|
2018-10-16 03:38:12 +00:00
|
|
|
}
|
|
|
|
|
2018-07-19 00:06:44 +00:00
|
|
|
// clientAlloc takes in the task states and returns an Allocation populated
|
|
|
|
// with Client specific fields
|
|
|
|
func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *structs.Allocation {
|
2018-10-12 01:03:48 +00:00
|
|
|
ar.stateLock.Lock()
|
|
|
|
defer ar.stateLock.Unlock()
|
2018-07-19 00:06:44 +00:00
|
|
|
|
2018-09-27 00:08:43 +00:00
|
|
|
// store task states for AllocState to expose
|
|
|
|
ar.state.TaskStates = taskStates
|
|
|
|
|
2018-07-19 00:06:44 +00:00
|
|
|
a := &structs.Allocation{
|
|
|
|
ID: ar.id,
|
|
|
|
TaskStates: taskStates,
|
|
|
|
}
|
|
|
|
|
2018-09-27 00:08:43 +00:00
|
|
|
if d := ar.state.DeploymentStatus; d != nil {
|
2018-07-19 00:06:44 +00:00
|
|
|
a.DeploymentStatus = d.Copy()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the ClientStatus
|
2018-09-27 00:08:43 +00:00
|
|
|
if ar.state.ClientStatus != "" {
|
2018-07-19 00:06:44 +00:00
|
|
|
// The client status is being forced
|
2018-09-27 00:08:43 +00:00
|
|
|
a.ClientStatus, a.ClientDescription = ar.state.ClientStatus, ar.state.ClientDescription
|
2018-07-19 00:06:44 +00:00
|
|
|
} else {
|
|
|
|
a.ClientStatus, a.ClientDescription = getClientStatus(taskStates)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the allocation is terminal, make sure all required fields are properly
|
|
|
|
// set.
|
|
|
|
if a.ClientTerminalStatus() {
|
|
|
|
alloc := ar.Alloc()
|
|
|
|
|
|
|
|
// If we are part of a deployment and the task has failed, mark the
|
|
|
|
// alloc as unhealthy. This guards against the watcher not be started.
|
|
|
|
if a.ClientStatus == structs.AllocClientStatusFailed &&
|
|
|
|
alloc.DeploymentID != "" && !a.DeploymentStatus.IsUnhealthy() {
|
|
|
|
a.DeploymentStatus = &structs.AllocDeploymentStatus{
|
|
|
|
Healthy: helper.BoolToPtr(false),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure we have marked the finished at for every task. This is used
|
|
|
|
// to calculate the reschedule time for failed allocations.
|
|
|
|
now := time.Now()
|
|
|
|
for _, task := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Tasks {
|
|
|
|
ts, ok := a.TaskStates[task.Name]
|
|
|
|
if !ok {
|
|
|
|
ts = &structs.TaskState{}
|
|
|
|
a.TaskStates[task.Name] = ts
|
|
|
|
}
|
|
|
|
if ts.FinishedAt.IsZero() {
|
|
|
|
ts.FinishedAt = now
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
|
|
|
|
// getClientStatus takes in the task states for a given allocation and computes
|
|
|
|
// the client status and description
|
|
|
|
func getClientStatus(taskStates map[string]*structs.TaskState) (status, description string) {
|
|
|
|
var pending, running, dead, failed bool
|
|
|
|
for _, state := range taskStates {
|
|
|
|
switch state.State {
|
|
|
|
case structs.TaskStateRunning:
|
|
|
|
running = true
|
|
|
|
case structs.TaskStatePending:
|
|
|
|
pending = true
|
|
|
|
case structs.TaskStateDead:
|
|
|
|
if state.Failed {
|
|
|
|
failed = true
|
|
|
|
} else {
|
|
|
|
dead = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine the alloc status
|
|
|
|
if failed {
|
|
|
|
return structs.AllocClientStatusFailed, "Failed tasks"
|
|
|
|
} else if running {
|
|
|
|
return structs.AllocClientStatusRunning, "Tasks are running"
|
|
|
|
} else if pending {
|
|
|
|
return structs.AllocClientStatusPending, "No tasks have started"
|
|
|
|
} else if dead {
|
|
|
|
return structs.AllocClientStatusComplete, "All tasks have completed"
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", ""
|
|
|
|
}
|
|
|
|
|
2018-09-27 00:08:43 +00:00
|
|
|
// AllocState returns a copy of allocation state including a snapshot of task
|
|
|
|
// states.
|
|
|
|
func (ar *allocRunner) AllocState() *state.State {
|
2018-09-28 00:30:10 +00:00
|
|
|
ar.stateLock.RLock()
|
|
|
|
state := ar.state.Copy()
|
|
|
|
ar.stateLock.RUnlock()
|
2018-09-27 00:08:43 +00:00
|
|
|
|
|
|
|
// If TaskStateUpdated has not been called yet, ar.state.TaskStates
|
|
|
|
// won't be set as it is not the canonical source of TaskStates.
|
2018-09-28 00:30:10 +00:00
|
|
|
if len(state.TaskStates) == 0 {
|
2018-09-27 00:08:43 +00:00
|
|
|
ar.state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks))
|
|
|
|
for k, tr := range ar.tasks {
|
2018-09-28 00:30:10 +00:00
|
|
|
state.TaskStates[k] = tr.TaskState()
|
2018-09-27 00:08:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-01 00:41:37 +00:00
|
|
|
// Generate alloc to get other state fields
|
|
|
|
alloc := ar.clientAlloc(state.TaskStates)
|
|
|
|
state.ClientStatus = alloc.ClientStatus
|
|
|
|
state.ClientDescription = alloc.ClientDescription
|
|
|
|
state.DeploymentStatus = alloc.DeploymentStatus
|
|
|
|
|
2018-09-28 00:30:10 +00:00
|
|
|
return state
|
2018-09-27 00:08:43 +00:00
|
|
|
}
|
|
|
|
|
2018-12-17 12:27:54 +00:00
|
|
|
// Update asyncronously updates the running allocation with a new version
|
|
|
|
// received from the server.
|
|
|
|
// When processing a new update, we will first attempt to drain stale updates
|
|
|
|
// from the queue, before appending the new one.
|
|
|
|
func (ar *allocRunner) Update(update *structs.Allocation) {
|
|
|
|
select {
|
|
|
|
// Drain queued update from the channel if possible, and check the modify
|
|
|
|
// index
|
|
|
|
case oldUpdate := <-ar.allocUpdatedCh:
|
|
|
|
// If the old update is newer than the replacement, then skip the new one
|
|
|
|
// and return. This case shouldn't happen, but may in the case of a bug
|
|
|
|
// elsewhere inside the system.
|
|
|
|
if oldUpdate.AllocModifyIndex > update.AllocModifyIndex {
|
|
|
|
ar.allocUpdatedCh <- oldUpdate
|
|
|
|
return
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
// Queue the new update
|
|
|
|
ar.allocUpdatedCh <- update
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ar *allocRunner) handleAllocUpdates() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case update := <-ar.allocUpdatedCh:
|
|
|
|
ar.handleAllocUpdate(update)
|
|
|
|
case <-ar.waitCh:
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-01 18:03:52 +00:00
|
|
|
// This method sends the updated alloc to Run for serially processing updates.
|
|
|
|
// If there is already a pending update it will be discarded and replaced by
|
|
|
|
// the latest update.
|
2018-12-17 12:27:54 +00:00
|
|
|
func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
|
2018-10-12 01:03:48 +00:00
|
|
|
// Detect Stop updates
|
|
|
|
stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus()
|
|
|
|
|
2018-08-17 17:34:44 +00:00
|
|
|
// Update ar.alloc
|
|
|
|
ar.setAlloc(update)
|
|
|
|
|
2018-10-16 03:38:12 +00:00
|
|
|
// Run update hooks if not stopping or dead
|
|
|
|
if !update.TerminalStatus() {
|
|
|
|
if err := ar.update(update); err != nil {
|
|
|
|
ar.logger.Error("error running update hooks", "error", err)
|
2018-10-12 01:03:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-08-17 17:34:44 +00:00
|
|
|
// Update task runners
|
|
|
|
for _, tr := range ar.tasks {
|
|
|
|
tr.Update(update)
|
2018-08-01 18:03:52 +00:00
|
|
|
}
|
2018-10-16 03:38:12 +00:00
|
|
|
|
|
|
|
// If alloc is being terminated, kill all tasks, leader first
|
|
|
|
if stopping {
|
|
|
|
ar.killTasks()
|
|
|
|
}
|
|
|
|
|
2018-06-28 00:27:03 +00:00
|
|
|
}
|
2018-06-29 00:01:05 +00:00
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
func (ar *allocRunner) Listener() *cstructs.AllocListener {
|
|
|
|
return ar.allocBroadcaster.Listen()
|
|
|
|
}
|
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
func (ar *allocRunner) destroyImpl() {
|
2018-11-05 23:11:10 +00:00
|
|
|
// Stop any running tasks and persist states in case the client is
|
|
|
|
// shutdown before Destroy finishes.
|
|
|
|
states := ar.killTasks()
|
|
|
|
calloc := ar.clientAlloc(states)
|
|
|
|
ar.stateUpdater.AllocStateUpdated(calloc)
|
2018-07-17 20:57:57 +00:00
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// Wait for tasks to exit and postrun hooks to finish
|
|
|
|
<-ar.waitCh
|
2018-08-23 19:03:17 +00:00
|
|
|
|
|
|
|
// Run destroy hooks
|
|
|
|
if err := ar.destroy(); err != nil {
|
|
|
|
ar.logger.Warn("error running destroy hooks", "error", err)
|
2018-07-17 20:57:57 +00:00
|
|
|
}
|
2018-08-23 19:03:17 +00:00
|
|
|
|
2018-10-12 01:03:48 +00:00
|
|
|
// Wait for task state update handler to exit before removing local
|
2018-10-16 22:17:36 +00:00
|
|
|
// state if Run() ran at all.
|
2018-11-14 18:29:07 +00:00
|
|
|
<-ar.taskStateUpdateHandlerCh
|
2018-10-12 01:03:48 +00:00
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
// Cleanup state db
|
|
|
|
if err := ar.stateDB.DeleteAllocationBucket(ar.id); err != nil {
|
|
|
|
ar.logger.Warn("failed to delete allocation state", "error", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark alloc as destroyed
|
2018-12-14 15:02:47 +00:00
|
|
|
ar.destroyedLock.Lock()
|
|
|
|
|
|
|
|
if !ar.shutdown {
|
|
|
|
ar.shutdown = true
|
|
|
|
close(ar.shutdownCh)
|
|
|
|
}
|
|
|
|
|
2018-08-23 19:03:17 +00:00
|
|
|
ar.destroyed = true
|
2018-12-14 15:02:47 +00:00
|
|
|
close(ar.destroyCh)
|
|
|
|
|
|
|
|
ar.destroyedLock.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Destroy the alloc runner by stopping it if it is still running and cleaning
|
|
|
|
// up all of its resources.
|
|
|
|
//
|
|
|
|
// This method is safe for calling concurrently with Run() and will cause it to
|
|
|
|
// exit (thus closing WaitCh).
|
|
|
|
func (ar *allocRunner) Destroy() {
|
|
|
|
ar.destroyedLock.Lock()
|
|
|
|
defer ar.destroyedLock.Unlock()
|
|
|
|
|
|
|
|
if ar.destroyed {
|
|
|
|
// Only destroy once
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if ar.destroyLaunched {
|
|
|
|
// Only dispatch a destroy once
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
ar.destroyLaunched = true
|
|
|
|
|
|
|
|
// Synchronize calls to shutdown/destroy
|
|
|
|
if ar.shutdownLaunched {
|
|
|
|
go func() {
|
|
|
|
ar.logger.Debug("Waiting for shutdown before destroying runner")
|
|
|
|
<-ar.shutdownCh
|
|
|
|
ar.destroyImpl()
|
|
|
|
}()
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
go ar.destroyImpl()
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// IsDestroyed returns true if the alloc runner has been destroyed (stopped and
|
|
|
|
// garbage collected).
|
|
|
|
//
|
|
|
|
// This method is safe for calling concurrently with Run(). Callers must
|
|
|
|
// receive on WaitCh() to block until alloc runner has stopped and been
|
|
|
|
// destroyed.
|
|
|
|
func (ar *allocRunner) IsDestroyed() bool {
|
2018-08-23 19:03:17 +00:00
|
|
|
ar.destroyedLock.Lock()
|
|
|
|
defer ar.destroyedLock.Unlock()
|
|
|
|
return ar.destroyed
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// IsWaiting returns true if the alloc runner is waiting for its previous
|
|
|
|
// allocation to terminate.
|
|
|
|
//
|
|
|
|
// This method is safe for calling concurrently with Run().
|
|
|
|
func (ar *allocRunner) IsWaiting() bool {
|
2018-12-06 11:15:59 +00:00
|
|
|
return ar.prevAllocWatcher.IsWaiting()
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
func (ar *allocRunner) DestroyCh() <-chan struct{} {
|
|
|
|
return ar.destroyCh
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ar *allocRunner) ShutdownCh() <-chan struct{} {
|
|
|
|
return ar.shutdownCh
|
|
|
|
}
|
|
|
|
|
2018-11-14 18:29:07 +00:00
|
|
|
// Shutdown AllocRunner gracefully. Blocks while shutting down all TaskRunners.
|
|
|
|
// Tasks are unaffected and may be restored.
|
|
|
|
func (ar *allocRunner) Shutdown() {
|
|
|
|
ar.destroyedLock.Lock()
|
|
|
|
defer ar.destroyedLock.Unlock()
|
|
|
|
|
|
|
|
// Destroy is a superset of Shutdown so there's nothing to do if this
|
|
|
|
// has already been destroyed.
|
|
|
|
if ar.destroyed {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// Destroy is a superset of Shutdown so if it's been marked for destruction,
|
|
|
|
// don't try and shutdown in parallel. If shutdown has been launched, don't
|
|
|
|
// try again.
|
|
|
|
if ar.destroyLaunched || ar.shutdownLaunched {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
ar.shutdownLaunched = true
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
ar.logger.Trace("shutting down")
|
2018-11-14 18:29:07 +00:00
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// Shutdown tasks gracefully if they were run
|
|
|
|
if ar.runnersLaunched {
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
for _, tr := range ar.tasks {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(tr *taskrunner.TaskRunner) {
|
|
|
|
tr.Shutdown()
|
|
|
|
wg.Done()
|
|
|
|
}(tr)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
2018-11-14 18:29:07 +00:00
|
|
|
}
|
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// Wait for Run to exit
|
|
|
|
<-ar.waitCh
|
2018-11-14 18:29:07 +00:00
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// Run shutdown hooks
|
|
|
|
ar.shutdownHooks()
|
2018-11-14 18:29:07 +00:00
|
|
|
|
2018-12-14 15:02:47 +00:00
|
|
|
// Wait for updater to finish its final run
|
|
|
|
<-ar.taskStateUpdateHandlerCh
|
|
|
|
|
|
|
|
ar.destroyedLock.Lock()
|
|
|
|
ar.shutdown = true
|
|
|
|
close(ar.shutdownCh)
|
|
|
|
ar.destroyedLock.Unlock()
|
|
|
|
}()
|
2018-11-14 18:29:07 +00:00
|
|
|
}
|
|
|
|
|
2018-06-29 00:01:05 +00:00
|
|
|
// IsMigrating returns true if the alloc runner is migrating data from its
|
|
|
|
// previous allocation.
|
|
|
|
//
|
|
|
|
// This method is safe for calling concurrently with Run().
|
|
|
|
func (ar *allocRunner) IsMigrating() bool {
|
2018-12-06 11:15:59 +00:00
|
|
|
return ar.prevAllocMigrator.IsMigrating()
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
2018-10-04 22:45:46 +00:00
|
|
|
func (ar *allocRunner) StatsReporter() interfaces.AllocStatsReporter {
|
2018-09-15 00:08:26 +00:00
|
|
|
return ar
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
2018-09-15 00:08:26 +00:00
|
|
|
// LatestAllocStats returns the latest stats for an allocation. If taskFilter
|
|
|
|
// is set, only stats for that task -- if it exists -- are returned.
|
|
|
|
func (ar *allocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
|
|
|
|
astat := &cstructs.AllocResourceUsage{
|
|
|
|
Tasks: make(map[string]*cstructs.TaskResourceUsage, len(ar.tasks)),
|
|
|
|
ResourceUsage: &cstructs.ResourceUsage{
|
|
|
|
MemoryStats: &cstructs.MemoryStats{},
|
|
|
|
CpuStats: &cstructs.CpuStats{},
|
2018-11-15 15:13:14 +00:00
|
|
|
DeviceStats: []*device.DeviceGroupStats{},
|
2018-09-15 00:08:26 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for name, tr := range ar.tasks {
|
|
|
|
if taskFilter != "" && taskFilter != name {
|
|
|
|
// Getting stats for a particular task and its not this one!
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if usage := tr.LatestResourceUsage(); usage != nil {
|
|
|
|
astat.Tasks[name] = usage
|
|
|
|
astat.ResourceUsage.Add(usage.ResourceUsage)
|
|
|
|
if usage.Timestamp > astat.Timestamp {
|
|
|
|
astat.Timestamp = usage.Timestamp
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-06-29 00:01:05 +00:00
|
|
|
|
2018-09-15 00:08:26 +00:00
|
|
|
return astat, nil
|
2018-06-29 00:01:05 +00:00
|
|
|
}
|