Merge branch 'master' into f-artifact-location
This commit is contained in:
commit
c85dfdf9a0
|
@ -20,7 +20,7 @@ import (
|
||||||
|
|
||||||
"github.com/hashicorp/nomad/client/allocdir"
|
"github.com/hashicorp/nomad/client/allocdir"
|
||||||
"github.com/hashicorp/nomad/client/config"
|
"github.com/hashicorp/nomad/client/config"
|
||||||
"github.com/hashicorp/nomad/client/driver/logging"
|
"github.com/hashicorp/nomad/client/driver/executor"
|
||||||
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||||
"github.com/hashicorp/nomad/helper/discover"
|
"github.com/hashicorp/nomad/helper/discover"
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
|
@ -101,7 +101,7 @@ type dockerPID struct {
|
||||||
|
|
||||||
type DockerHandle struct {
|
type DockerHandle struct {
|
||||||
pluginClient *plugin.Client
|
pluginClient *plugin.Client
|
||||||
logCollector logging.LogCollector
|
executor executor.Executor
|
||||||
client *docker.Client
|
client *docker.Client
|
||||||
logger *log.Logger
|
logger *log.Logger
|
||||||
cleanupContainer bool
|
cleanupContainer bool
|
||||||
|
@ -533,23 +533,23 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
|
||||||
}
|
}
|
||||||
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-syslog-collector.out", task.Name))
|
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
|
||||||
pluginConfig := &plugin.ClientConfig{
|
pluginConfig := &plugin.ClientConfig{
|
||||||
Cmd: exec.Command(bin, "syslog", pluginLogFile),
|
Cmd: exec.Command(bin, "executor", pluginLogFile),
|
||||||
}
|
}
|
||||||
|
|
||||||
logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config)
|
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
logCollectorCtx := &logging.LogCollectorContext{
|
executorCtx := &executor.ExecutorContext{
|
||||||
TaskName: task.Name,
|
TaskEnv: d.taskEnv,
|
||||||
|
Task: task,
|
||||||
AllocDir: ctx.AllocDir,
|
AllocDir: ctx.AllocDir,
|
||||||
LogConfig: task.LogConfig,
|
|
||||||
PortLowerBound: d.config.ClientMinPort,
|
PortLowerBound: d.config.ClientMinPort,
|
||||||
PortUpperBound: d.config.ClientMaxPort,
|
PortUpperBound: d.config.ClientMaxPort,
|
||||||
}
|
}
|
||||||
ss, err := logCollector.LaunchCollector(logCollectorCtx)
|
ss, err := exec.LaunchSyslogServer(executorCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to start syslog collector: %v", err)
|
return nil, fmt.Errorf("failed to start syslog collector: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -629,7 +629,7 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
|
||||||
maxKill := d.DriverContext.config.MaxKillTimeout
|
maxKill := d.DriverContext.config.MaxKillTimeout
|
||||||
h := &DockerHandle{
|
h := &DockerHandle{
|
||||||
client: client,
|
client: client,
|
||||||
logCollector: logCollector,
|
executor: exec,
|
||||||
pluginClient: pluginClient,
|
pluginClient: pluginClient,
|
||||||
cleanupContainer: cleanupContainer,
|
cleanupContainer: cleanupContainer,
|
||||||
cleanupImage: cleanupImage,
|
cleanupImage: cleanupImage,
|
||||||
|
@ -686,7 +686,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
|
||||||
if !found {
|
if !found {
|
||||||
return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
|
return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
|
||||||
}
|
}
|
||||||
logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config)
|
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err)
|
d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err)
|
||||||
if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout*time.Second)); e != nil {
|
if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout*time.Second)); e != nil {
|
||||||
|
@ -698,7 +698,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
|
||||||
// Return a driver handle
|
// Return a driver handle
|
||||||
h := &DockerHandle{
|
h := &DockerHandle{
|
||||||
client: client,
|
client: client,
|
||||||
logCollector: logCollector,
|
executor: exec,
|
||||||
pluginClient: pluginClient,
|
pluginClient: pluginClient,
|
||||||
cleanupContainer: cleanupContainer,
|
cleanupContainer: cleanupContainer,
|
||||||
cleanupImage: cleanupImage,
|
cleanupImage: cleanupImage,
|
||||||
|
@ -743,7 +743,7 @@ func (h *DockerHandle) WaitCh() chan *cstructs.WaitResult {
|
||||||
func (h *DockerHandle) Update(task *structs.Task) error {
|
func (h *DockerHandle) Update(task *structs.Task) error {
|
||||||
// Store the updated kill timeout.
|
// Store the updated kill timeout.
|
||||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||||
if err := h.logCollector.UpdateLogConfig(task.LogConfig); err != nil {
|
if err := h.executor.UpdateTask(task); err != nil {
|
||||||
h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err)
|
h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -759,9 +759,13 @@ func (h *DockerHandle) Kill() error {
|
||||||
// Container has already been removed.
|
// Container has already been removed.
|
||||||
if strings.Contains(err.Error(), NoSuchContainerError) {
|
if strings.Contains(err.Error(), NoSuchContainerError) {
|
||||||
h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID)
|
h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID)
|
||||||
|
h.executor.Exit()
|
||||||
|
h.pluginClient.Kill()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
|
h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
|
||||||
|
h.executor.Exit()
|
||||||
|
h.pluginClient.Kill()
|
||||||
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
|
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
|
||||||
}
|
}
|
||||||
h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
|
h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
|
||||||
|
@ -824,7 +828,7 @@ func (h *DockerHandle) run() {
|
||||||
close(h.waitCh)
|
close(h.waitCh)
|
||||||
|
|
||||||
// Shutdown the syslog collector
|
// Shutdown the syslog collector
|
||||||
if err := h.logCollector.Exit(); err != nil {
|
if err := h.executor.Exit(); err != nil {
|
||||||
h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)
|
h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)
|
||||||
}
|
}
|
||||||
h.pluginClient.Kill()
|
h.pluginClient.Kill()
|
||||||
|
|
|
@ -145,7 +145,7 @@ func TestDockerDriver_Handle(t *testing.T) {
|
||||||
pluginConfig := &plugin.ClientConfig{
|
pluginConfig := &plugin.ClientConfig{
|
||||||
Cmd: exec.Command(bin, "syslog", f.Name()),
|
Cmd: exec.Command(bin, "syslog", f.Name()),
|
||||||
}
|
}
|
||||||
logCollector, pluginClient, err := createLogCollector(pluginConfig, os.Stdout, &config.Config{})
|
exec, pluginClient, err := createExecutor(pluginConfig, os.Stdout, &config.Config{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("got an err: %v", err)
|
t.Fatalf("got an err: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -154,7 +154,7 @@ func TestDockerDriver_Handle(t *testing.T) {
|
||||||
h := &DockerHandle{
|
h := &DockerHandle{
|
||||||
version: "version",
|
version: "version",
|
||||||
imageID: "imageid",
|
imageID: "imageid",
|
||||||
logCollector: logCollector,
|
executor: exec,
|
||||||
pluginClient: pluginClient,
|
pluginClient: pluginClient,
|
||||||
containerID: "containerid",
|
containerID: "containerid",
|
||||||
killTimeout: 5 * time.Nanosecond,
|
killTimeout: 5 * time.Nanosecond,
|
||||||
|
|
|
@ -100,16 +100,17 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
executorCtx := &executor.ExecutorContext{
|
executorCtx := &executor.ExecutorContext{
|
||||||
TaskEnv: d.taskEnv,
|
TaskEnv: d.taskEnv,
|
||||||
AllocDir: ctx.AllocDir,
|
AllocDir: ctx.AllocDir,
|
||||||
TaskName: task.Name,
|
Task: task,
|
||||||
TaskResources: task.Resources,
|
|
||||||
LogConfig: task.LogConfig,
|
|
||||||
ResourceLimits: true,
|
|
||||||
FSIsolation: true,
|
|
||||||
UnprivilegedUser: true,
|
|
||||||
}
|
}
|
||||||
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
|
ps, err := exec.LaunchCmd(&executor.ExecCommand{
|
||||||
|
Cmd: command,
|
||||||
|
Args: driverConfig.Args,
|
||||||
|
FSIsolation: true,
|
||||||
|
ResourceLimits: true,
|
||||||
|
User: cstructs.DefaultUnpriviledgedUser,
|
||||||
|
}, executorCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pluginClient.Kill()
|
pluginClient.Kill()
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -217,7 +218,7 @@ func (h *execHandle) WaitCh() chan *cstructs.WaitResult {
|
||||||
func (h *execHandle) Update(task *structs.Task) error {
|
func (h *execHandle) Update(task *structs.Task) error {
|
||||||
// Store the updated kill timeout.
|
// Store the updated kill timeout.
|
||||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||||
h.executor.UpdateLogConfig(task.LogConfig)
|
h.executor.UpdateTask(task)
|
||||||
|
|
||||||
// Update is not possible
|
// Update is not possible
|
||||||
return nil
|
return nil
|
||||||
|
@ -267,5 +268,8 @@ func (h *execHandle) run() {
|
||||||
}
|
}
|
||||||
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
|
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
|
||||||
close(h.waitCh)
|
close(h.waitCh)
|
||||||
|
if err := h.executor.Exit(); err != nil {
|
||||||
|
h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err)
|
||||||
|
}
|
||||||
h.pluginClient.Kill()
|
h.pluginClient.Kill()
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,9 @@ package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -22,6 +24,18 @@ import (
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Executor is the interface which allows a driver to launch and supervise
|
||||||
|
// a process
|
||||||
|
type Executor interface {
|
||||||
|
LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
|
||||||
|
LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error)
|
||||||
|
Wait() (*ProcessState, error)
|
||||||
|
ShutDown() error
|
||||||
|
Exit() error
|
||||||
|
UpdateLogConfig(logConfig *structs.LogConfig) error
|
||||||
|
UpdateTask(task *structs.Task) error
|
||||||
|
}
|
||||||
|
|
||||||
// ExecutorContext holds context to configure the command user
|
// ExecutorContext holds context to configure the command user
|
||||||
// wants to run and isolate it
|
// wants to run and isolate it
|
||||||
type ExecutorContext struct {
|
type ExecutorContext struct {
|
||||||
|
@ -32,33 +46,36 @@ type ExecutorContext struct {
|
||||||
// the task
|
// the task
|
||||||
AllocDir *allocdir.AllocDir
|
AllocDir *allocdir.AllocDir
|
||||||
|
|
||||||
// TaskName is the name of the Task
|
// Task is the task whose executor is being launched
|
||||||
TaskName string
|
Task *structs.Task
|
||||||
|
|
||||||
// TaskResources are the resource constraints for the Task
|
// PortUpperBound is the upper bound of the ports that we can use to start
|
||||||
TaskResources *structs.Resources
|
// the syslog server
|
||||||
|
PortUpperBound uint
|
||||||
|
|
||||||
// FSIsolation is a flag for drivers to impose file system
|
// PortLowerBound is the lower bound of the ports that we can use to start
|
||||||
// isolation on certain platforms
|
// the syslog server
|
||||||
FSIsolation bool
|
PortLowerBound uint
|
||||||
|
|
||||||
// ResourceLimits is a flag for drivers to impose resource
|
|
||||||
// contraints on a Task on certain platforms
|
|
||||||
ResourceLimits bool
|
|
||||||
|
|
||||||
// UnprivilegedUser is a flag for drivers to make the process
|
|
||||||
// run as nobody
|
|
||||||
UnprivilegedUser bool
|
|
||||||
|
|
||||||
// LogConfig provides the configuration related to log rotation
|
|
||||||
LogConfig *structs.LogConfig
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExecCommand holds the user command and args. It's a lightweight replacement
|
// ExecCommand holds the user command, args, and other isolation related
|
||||||
// of exec.Cmd for serialization purposes.
|
// settings.
|
||||||
type ExecCommand struct {
|
type ExecCommand struct {
|
||||||
Cmd string
|
// Cmd is the command that the user wants to run.
|
||||||
|
Cmd string
|
||||||
|
|
||||||
|
// Args is the args of the command that the user wants to run.
|
||||||
Args []string
|
Args []string
|
||||||
|
|
||||||
|
// FSIsolation determines whether the command would be run in a chroot.
|
||||||
|
FSIsolation bool
|
||||||
|
|
||||||
|
// User is the user which the executor uses to run the command.
|
||||||
|
User string
|
||||||
|
|
||||||
|
// ResourceLimits determines whether resource limits are enforced by the
|
||||||
|
// executor.
|
||||||
|
ResourceLimits bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessState holds information about the state of a user process.
|
// ProcessState holds information about the state of a user process.
|
||||||
|
@ -70,37 +87,44 @@ type ProcessState struct {
|
||||||
Time time.Time
|
Time time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// Executor is the interface which allows a driver to launch and supervise
|
// SyslogServerState holds the address and islation information of a launched
|
||||||
// a process
|
// syslog server
|
||||||
type Executor interface {
|
type SyslogServerState struct {
|
||||||
LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
|
IsolationConfig *cstructs.IsolationConfig
|
||||||
Wait() (*ProcessState, error)
|
Addr string
|
||||||
ShutDown() error
|
|
||||||
Exit() error
|
|
||||||
UpdateLogConfig(logConfig *structs.LogConfig) error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UniversalExecutor is an implementation of the Executor which launches and
|
// UniversalExecutor is an implementation of the Executor which launches and
|
||||||
// supervises processes. In addition to process supervision it provides resource
|
// supervises processes. In addition to process supervision it provides resource
|
||||||
// and file system isolation
|
// and file system isolation
|
||||||
type UniversalExecutor struct {
|
type UniversalExecutor struct {
|
||||||
cmd exec.Cmd
|
cmd exec.Cmd
|
||||||
ctx *ExecutorContext
|
ctx *ExecutorContext
|
||||||
|
command *ExecCommand
|
||||||
|
|
||||||
taskDir string
|
taskDir string
|
||||||
groups *cgroupConfig.Cgroup
|
|
||||||
exitState *ProcessState
|
exitState *ProcessState
|
||||||
processExited chan interface{}
|
processExited chan interface{}
|
||||||
lre *logging.FileRotator
|
|
||||||
lro *logging.FileRotator
|
lre *logging.FileRotator
|
||||||
|
lro *logging.FileRotator
|
||||||
|
rotatorLock sync.Mutex
|
||||||
|
|
||||||
|
syslogServer *logging.SyslogServer
|
||||||
|
syslogChan chan *logging.SyslogMessage
|
||||||
|
|
||||||
|
groups *cgroupConfig.Cgroup
|
||||||
|
cgLock sync.Mutex
|
||||||
|
|
||||||
logger *log.Logger
|
logger *log.Logger
|
||||||
lock sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewExecutor returns an Executor
|
// NewExecutor returns an Executor
|
||||||
func NewExecutor(logger *log.Logger) Executor {
|
func NewExecutor(logger *log.Logger) Executor {
|
||||||
return &UniversalExecutor{logger: logger, processExited: make(chan interface{})}
|
return &UniversalExecutor{
|
||||||
|
logger: logger,
|
||||||
|
processExited: make(chan interface{}),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// LaunchCmd launches a process and returns it's state. It also configures an
|
// LaunchCmd launches a process and returns it's state. It also configures an
|
||||||
|
@ -109,6 +133,7 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
|
||||||
e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
|
e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
|
||||||
|
|
||||||
e.ctx = ctx
|
e.ctx = ctx
|
||||||
|
e.command = command
|
||||||
|
|
||||||
// configuring the task dir
|
// configuring the task dir
|
||||||
if err := e.configureTaskDir(); err != nil {
|
if err := e.configureTaskDir(); err != nil {
|
||||||
|
@ -122,29 +147,18 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
|
||||||
}
|
}
|
||||||
|
|
||||||
// setting the user of the process
|
// setting the user of the process
|
||||||
if e.ctx.UnprivilegedUser {
|
if command.User != "" {
|
||||||
if err := e.runAs("nobody"); err != nil {
|
if err := e.runAs(command.User); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logFileSize := int64(ctx.LogConfig.MaxFileSizeMB * 1024 * 1024)
|
// Setup the loggers
|
||||||
lro, err := logging.NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", ctx.TaskName),
|
if err := e.configureLoggers(); err != nil {
|
||||||
ctx.LogConfig.MaxFiles, logFileSize, e.logger)
|
return nil, err
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error creating log rotator for stdout of task %v", err)
|
|
||||||
}
|
}
|
||||||
e.cmd.Stdout = lro
|
e.cmd.Stdout = e.lro
|
||||||
e.lro = lro
|
e.cmd.Stderr = e.lre
|
||||||
|
|
||||||
lre, err := logging.NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", ctx.TaskName),
|
|
||||||
ctx.LogConfig.MaxFiles, logFileSize, e.logger)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error creating log rotator for stderr of task %v", err)
|
|
||||||
}
|
|
||||||
e.cmd.Stderr = lre
|
|
||||||
e.lre = lre
|
|
||||||
|
|
||||||
e.ctx.TaskEnv.Build()
|
e.ctx.TaskEnv.Build()
|
||||||
|
|
||||||
|
@ -160,7 +174,7 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
|
||||||
|
|
||||||
// Determine the path to run as it may have to be relative to the chroot.
|
// Determine the path to run as it may have to be relative to the chroot.
|
||||||
path := absPath
|
path := absPath
|
||||||
if e.ctx.FSIsolation {
|
if e.command.FSIsolation {
|
||||||
rel, err := filepath.Rel(e.taskDir, absPath)
|
rel, err := filepath.Rel(e.taskDir, absPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -182,15 +196,42 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
|
||||||
return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
|
return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// configureLoggers sets up the standard out/error file rotators
|
||||||
|
func (e *UniversalExecutor) configureLoggers() error {
|
||||||
|
e.rotatorLock.Lock()
|
||||||
|
defer e.rotatorLock.Unlock()
|
||||||
|
|
||||||
|
logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
|
||||||
|
if e.lro == nil {
|
||||||
|
lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
|
||||||
|
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
e.lro = lro
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.lre == nil {
|
||||||
|
lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
|
||||||
|
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
e.lre = lre
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Wait waits until a process has exited and returns it's exitcode and errors
|
// Wait waits until a process has exited and returns it's exitcode and errors
|
||||||
func (e *UniversalExecutor) Wait() (*ProcessState, error) {
|
func (e *UniversalExecutor) Wait() (*ProcessState, error) {
|
||||||
<-e.processExited
|
<-e.processExited
|
||||||
return e.exitState, nil
|
return e.exitState, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
|
||||||
// UpdateLogConfig updates the log configuration
|
// UpdateLogConfig updates the log configuration
|
||||||
func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||||
e.ctx.LogConfig = logConfig
|
e.ctx.Task.LogConfig = logConfig
|
||||||
if e.lro == nil {
|
if e.lro == nil {
|
||||||
return fmt.Errorf("log rotator for stdout doesn't exist")
|
return fmt.Errorf("log rotator for stdout doesn't exist")
|
||||||
}
|
}
|
||||||
|
@ -205,11 +246,21 @@ func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
|
||||||
|
e.ctx.Task = task
|
||||||
|
|
||||||
|
// Updating Log Config
|
||||||
|
fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
|
||||||
|
e.lro.MaxFiles = task.LogConfig.MaxFiles
|
||||||
|
e.lro.FileSize = fileSize
|
||||||
|
e.lre.MaxFiles = task.LogConfig.MaxFiles
|
||||||
|
e.lre.FileSize = fileSize
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (e *UniversalExecutor) wait() {
|
func (e *UniversalExecutor) wait() {
|
||||||
defer close(e.processExited)
|
defer close(e.processExited)
|
||||||
err := e.cmd.Wait()
|
err := e.cmd.Wait()
|
||||||
e.lre.Close()
|
|
||||||
e.lro.Close()
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
e.exitState = &ProcessState{Pid: 0, ExitCode: 0, Time: time.Now()}
|
e.exitState = &ProcessState{Pid: 0, ExitCode: 0, Time: time.Now()}
|
||||||
return
|
return
|
||||||
|
@ -220,14 +271,6 @@ func (e *UniversalExecutor) wait() {
|
||||||
exitCode = status.ExitStatus()
|
exitCode = status.ExitStatus()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if e.ctx.FSIsolation {
|
|
||||||
e.removeChrootMounts()
|
|
||||||
}
|
|
||||||
if e.ctx.ResourceLimits {
|
|
||||||
e.lock.Lock()
|
|
||||||
DestroyCgroup(e.groups)
|
|
||||||
e.lock.Unlock()
|
|
||||||
}
|
|
||||||
e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Time: time.Now()}
|
e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Time: time.Now()}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,7 +284,13 @@ var (
|
||||||
// process
|
// process
|
||||||
func (e *UniversalExecutor) Exit() error {
|
func (e *UniversalExecutor) Exit() error {
|
||||||
var merr multierror.Error
|
var merr multierror.Error
|
||||||
if e.cmd.Process != nil {
|
if e.syslogServer != nil {
|
||||||
|
e.syslogServer.Shutdown()
|
||||||
|
}
|
||||||
|
e.lre.Close()
|
||||||
|
e.lro.Close()
|
||||||
|
|
||||||
|
if e.command != nil && e.cmd.Process != nil {
|
||||||
proc, err := os.FindProcess(e.cmd.Process.Pid)
|
proc, err := os.FindProcess(e.cmd.Process.Pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
|
e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
|
||||||
|
@ -252,17 +301,17 @@ func (e *UniversalExecutor) Exit() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if e.ctx.FSIsolation {
|
if e.command != nil && e.command.FSIsolation {
|
||||||
if err := e.removeChrootMounts(); err != nil {
|
if err := e.removeChrootMounts(); err != nil {
|
||||||
merr.Errors = append(merr.Errors, err)
|
merr.Errors = append(merr.Errors, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if e.ctx.ResourceLimits {
|
if e.command != nil && e.command.ResourceLimits {
|
||||||
e.lock.Lock()
|
e.cgLock.Lock()
|
||||||
if err := DestroyCgroup(e.groups); err != nil {
|
if err := DestroyCgroup(e.groups); err != nil {
|
||||||
merr.Errors = append(merr.Errors, err)
|
merr.Errors = append(merr.Errors, err)
|
||||||
}
|
}
|
||||||
e.lock.Unlock()
|
e.cgLock.Unlock()
|
||||||
}
|
}
|
||||||
return merr.ErrorOrNil()
|
return merr.ErrorOrNil()
|
||||||
}
|
}
|
||||||
|
@ -287,10 +336,10 @@ func (e *UniversalExecutor) ShutDown() error {
|
||||||
|
|
||||||
// configureTaskDir sets the task dir in the executor
|
// configureTaskDir sets the task dir in the executor
|
||||||
func (e *UniversalExecutor) configureTaskDir() error {
|
func (e *UniversalExecutor) configureTaskDir() error {
|
||||||
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.TaskName]
|
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name]
|
||||||
e.taskDir = taskDir
|
e.taskDir = taskDir
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("couldn't find task directory for task %v", e.ctx.TaskName)
|
return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name)
|
||||||
}
|
}
|
||||||
e.cmd.Dir = taskDir
|
e.cmd.Dir = taskDir
|
||||||
return nil
|
return nil
|
||||||
|
@ -344,3 +393,48 @@ func (e *UniversalExecutor) makeExecutable(binPath string) error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getFreePort returns a free port ready to be listened on between upper and
|
||||||
|
// lower bounds
|
||||||
|
func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
return e.listenerTCP(lowerBound, upperBound)
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.listenerUnix()
|
||||||
|
}
|
||||||
|
|
||||||
|
// listenerTCP creates a TCP listener using an unused port between an upper and
|
||||||
|
// lower bound
|
||||||
|
func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
|
||||||
|
for i := lowerBound; i <= upperBound; i++ {
|
||||||
|
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
l, err := net.ListenTCP("tcp", addr)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return l, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("No free port found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// listenerUnix creates a Unix domain socket
|
||||||
|
func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
|
||||||
|
f, err := ioutil.TempFile("", "plugin")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
path := f.Name()
|
||||||
|
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := os.Remove(path); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return net.Listen("unix", path)
|
||||||
|
}
|
||||||
|
|
|
@ -38,14 +38,14 @@ var (
|
||||||
|
|
||||||
// configureIsolation configures chroot and creates cgroups
|
// configureIsolation configures chroot and creates cgroups
|
||||||
func (e *UniversalExecutor) configureIsolation() error {
|
func (e *UniversalExecutor) configureIsolation() error {
|
||||||
if e.ctx.FSIsolation {
|
if e.command.FSIsolation {
|
||||||
if err := e.configureChroot(); err != nil {
|
if err := e.configureChroot(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if e.ctx.ResourceLimits {
|
if e.command.ResourceLimits {
|
||||||
if err := e.configureCgroups(e.ctx.TaskResources); err != nil {
|
if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
|
||||||
return fmt.Errorf("error creating cgroups: %v", err)
|
return fmt.Errorf("error creating cgroups: %v", err)
|
||||||
}
|
}
|
||||||
if err := e.applyLimits(os.Getpid()); err != nil {
|
if err := e.applyLimits(os.Getpid()); err != nil {
|
||||||
|
@ -63,7 +63,7 @@ func (e *UniversalExecutor) configureIsolation() error {
|
||||||
|
|
||||||
// applyLimits puts a process in a pre-configured cgroup
|
// applyLimits puts a process in a pre-configured cgroup
|
||||||
func (e *UniversalExecutor) applyLimits(pid int) error {
|
func (e *UniversalExecutor) applyLimits(pid int) error {
|
||||||
if !e.ctx.ResourceLimits {
|
if !e.command.ResourceLimits {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,11 +155,11 @@ func (e *UniversalExecutor) runAs(userid string) error {
|
||||||
// configureChroot configures a chroot
|
// configureChroot configures a chroot
|
||||||
func (e *UniversalExecutor) configureChroot() error {
|
func (e *UniversalExecutor) configureChroot() error {
|
||||||
allocDir := e.ctx.AllocDir
|
allocDir := e.ctx.AllocDir
|
||||||
if err := allocDir.MountSharedDir(e.ctx.TaskName); err != nil {
|
if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := allocDir.Embed(e.ctx.TaskName, chrootEnv); err != nil {
|
if err := allocDir.Embed(e.ctx.Task.Name, chrootEnv); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,8 +183,8 @@ func (e *UniversalExecutor) configureChroot() error {
|
||||||
// should be called when tearing down the task.
|
// should be called when tearing down the task.
|
||||||
func (e *UniversalExecutor) removeChrootMounts() error {
|
func (e *UniversalExecutor) removeChrootMounts() error {
|
||||||
// Prevent a race between Wait/ForceStop
|
// Prevent a race between Wait/ForceStop
|
||||||
e.lock.Lock()
|
e.cgLock.Lock()
|
||||||
defer e.lock.Unlock()
|
defer e.cgLock.Unlock()
|
||||||
return e.ctx.AllocDir.UnmountAll()
|
return e.ctx.AllocDir.UnmountAll()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
44
client/driver/executor/executor_posix.go
Normal file
44
client/driver/executor/executor_posix.go
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package executor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/syslog"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/client/driver/logging"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
|
||||||
|
e.ctx = ctx
|
||||||
|
e.syslogChan = make(chan *logging.SyslogMessage, 2048)
|
||||||
|
l, err := e.getListener(e.ctx.PortLowerBound, e.ctx.PortUpperBound)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
e.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
|
||||||
|
if err := e.configureLoggers(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
e.syslogServer = logging.NewSyslogServer(l, e.syslogChan, e.logger)
|
||||||
|
go e.syslogServer.Start()
|
||||||
|
go e.collectLogs(e.lre, e.lro)
|
||||||
|
syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
|
||||||
|
return &SyslogServerState{Addr: syslogAddr}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *UniversalExecutor) collectLogs(we io.Writer, wo io.Writer) {
|
||||||
|
for logParts := range e.syslogChan {
|
||||||
|
// If the severity of the log line is err then we write to stderr
|
||||||
|
// otherwise all messages go to stdout
|
||||||
|
if logParts.Severity == syslog.LOG_ERR {
|
||||||
|
e.lre.Write(logParts.Message)
|
||||||
|
e.lre.Write([]byte{'\n'})
|
||||||
|
} else {
|
||||||
|
e.lro.Write(logParts.Message)
|
||||||
|
e.lro.Write([]byte{'\n'})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"github.com/hashicorp/nomad/nomad/mock"
|
"github.com/hashicorp/nomad/nomad/mock"
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
tu "github.com/hashicorp/nomad/testutil"
|
tu "github.com/hashicorp/nomad/testutil"
|
||||||
|
cstructs "github.com/hashicorp/nomad/client/driver/structs"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -30,7 +31,7 @@ var (
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
|
func mockAllocDir(t *testing.T) (*structs.Task, *allocdir.AllocDir) {
|
||||||
alloc := mock.Alloc()
|
alloc := mock.Alloc()
|
||||||
task := alloc.Job.TaskGroups[0].Tasks[0]
|
task := alloc.Job.TaskGroups[0].Tasks[0]
|
||||||
|
|
||||||
|
@ -39,18 +40,16 @@ func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
|
||||||
log.Panicf("allocDir.Build() failed: %v", err)
|
log.Panicf("allocDir.Build() failed: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return task.Name, allocDir
|
return task, allocDir
|
||||||
}
|
}
|
||||||
|
|
||||||
func testExecutorContext(t *testing.T) *ExecutorContext {
|
func testExecutorContext(t *testing.T) *ExecutorContext {
|
||||||
taskEnv := env.NewTaskEnvironment(mock.Node())
|
taskEnv := env.NewTaskEnvironment(mock.Node())
|
||||||
taskName, allocDir := mockAllocDir(t)
|
task, allocDir := mockAllocDir(t)
|
||||||
ctx := &ExecutorContext{
|
ctx := &ExecutorContext{
|
||||||
TaskEnv: taskEnv,
|
TaskEnv: taskEnv,
|
||||||
TaskName: taskName,
|
Task: task,
|
||||||
AllocDir: allocDir,
|
AllocDir: allocDir,
|
||||||
TaskResources: constraint,
|
|
||||||
LogConfig: structs.DefaultLogConfig(),
|
|
||||||
}
|
}
|
||||||
return ctx
|
return ctx
|
||||||
}
|
}
|
||||||
|
@ -80,6 +79,9 @@ func TestExecutor_Start_Wait_Failure_Code(t *testing.T) {
|
||||||
if ps.ExitCode < 1 {
|
if ps.ExitCode < 1 {
|
||||||
t.Fatalf("expected exit code to be non zero, actual: %v", ps.ExitCode)
|
t.Fatalf("expected exit code to be non zero, actual: %v", ps.ExitCode)
|
||||||
}
|
}
|
||||||
|
if err := executor.Exit(); err != nil {
|
||||||
|
t.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestExecutor_Start_Wait(t *testing.T) {
|
func TestExecutor_Start_Wait(t *testing.T) {
|
||||||
|
@ -98,6 +100,9 @@ func TestExecutor_Start_Wait(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("error in waiting for command: %v", err)
|
t.Fatalf("error in waiting for command: %v", err)
|
||||||
}
|
}
|
||||||
|
if err := executor.Exit(); err != nil {
|
||||||
|
t.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
expected := "hello world"
|
expected := "hello world"
|
||||||
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
||||||
|
@ -119,9 +124,9 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
|
||||||
ctx := testExecutorContext(t)
|
ctx := testExecutorContext(t)
|
||||||
defer ctx.AllocDir.Destroy()
|
defer ctx.AllocDir.Destroy()
|
||||||
|
|
||||||
ctx.FSIsolation = true
|
execCmd.FSIsolation = true
|
||||||
ctx.ResourceLimits = true
|
execCmd.ResourceLimits = true
|
||||||
ctx.UnprivilegedUser = true
|
execCmd.User = cstructs.DefaultUnpriviledgedUser
|
||||||
|
|
||||||
executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
|
executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
|
||||||
ps, err := executor.LaunchCmd(&execCmd, ctx)
|
ps, err := executor.LaunchCmd(&execCmd, ctx)
|
||||||
|
@ -135,6 +140,9 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("error in waiting for command: %v", err)
|
t.Fatalf("error in waiting for command: %v", err)
|
||||||
}
|
}
|
||||||
|
if err := executor.Exit(); err != nil {
|
||||||
|
t.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
expected := "hello world"
|
expected := "hello world"
|
||||||
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
||||||
|
@ -154,13 +162,13 @@ func TestExecutor_DestroyCgroup(t *testing.T) {
|
||||||
|
|
||||||
execCmd := ExecCommand{Cmd: "/bin/bash", Args: []string{"-c", "/usr/bin/yes"}}
|
execCmd := ExecCommand{Cmd: "/bin/bash", Args: []string{"-c", "/usr/bin/yes"}}
|
||||||
ctx := testExecutorContext(t)
|
ctx := testExecutorContext(t)
|
||||||
ctx.LogConfig.MaxFiles = 1
|
ctx.Task.LogConfig.MaxFiles = 1
|
||||||
ctx.LogConfig.MaxFileSizeMB = 300
|
ctx.Task.LogConfig.MaxFileSizeMB = 300
|
||||||
defer ctx.AllocDir.Destroy()
|
defer ctx.AllocDir.Destroy()
|
||||||
|
|
||||||
ctx.FSIsolation = true
|
execCmd.FSIsolation = true
|
||||||
ctx.ResourceLimits = true
|
execCmd.ResourceLimits = true
|
||||||
ctx.UnprivilegedUser = true
|
execCmd.User = "nobody"
|
||||||
|
|
||||||
executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
|
executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
|
||||||
ps, err := executor.LaunchCmd(&execCmd, ctx)
|
ps, err := executor.LaunchCmd(&execCmd, ctx)
|
||||||
|
@ -171,7 +179,10 @@ func TestExecutor_DestroyCgroup(t *testing.T) {
|
||||||
t.Fatalf("expected process to start and have non zero pid")
|
t.Fatalf("expected process to start and have non zero pid")
|
||||||
}
|
}
|
||||||
time.Sleep(200 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
executor.Exit()
|
if err := executor.Exit(); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
||||||
finfo, err := os.Stat(file)
|
finfo, err := os.Stat(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -203,6 +214,9 @@ func TestExecutor_Start_Kill(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("error in waiting for command: %v", err)
|
t.Fatalf("error in waiting for command: %v", err)
|
||||||
}
|
}
|
||||||
|
if err := executor.Exit(); err != nil {
|
||||||
|
t.Fatalf("error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
|
||||||
time.Sleep(time.Duration(tu.TestMultiplier()*2) * time.Second)
|
time.Sleep(time.Duration(tu.TestMultiplier()*2) * time.Second)
|
||||||
|
|
5
client/driver/executor/executor_windows.go
Normal file
5
client/driver/executor/executor_windows.go
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
package executor
|
||||||
|
|
||||||
|
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
package driver
|
package driver
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/gob"
|
||||||
"log"
|
"log"
|
||||||
"net/rpc"
|
"net/rpc"
|
||||||
|
|
||||||
|
@ -9,6 +10,14 @@ import (
|
||||||
"github.com/hashicorp/nomad/nomad/structs"
|
"github.com/hashicorp/nomad/nomad/structs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Registering these types since we have to serialize and de-serialize the Task
|
||||||
|
// structs over the wire between drivers and the executor.
|
||||||
|
func init() {
|
||||||
|
gob.Register([]interface{}{})
|
||||||
|
gob.Register(map[string]interface{}{})
|
||||||
|
gob.Register([]map[string]string{})
|
||||||
|
}
|
||||||
|
|
||||||
type ExecutorRPC struct {
|
type ExecutorRPC struct {
|
||||||
client *rpc.Client
|
client *rpc.Client
|
||||||
}
|
}
|
||||||
|
@ -19,12 +28,23 @@ type LaunchCmdArgs struct {
|
||||||
Ctx *executor.ExecutorContext
|
Ctx *executor.ExecutorContext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LaunchSyslogServerArgs wraps the executor context for the purposes of RPC
|
||||||
|
type LaunchSyslogServerArgs struct {
|
||||||
|
Ctx *executor.ExecutorContext
|
||||||
|
}
|
||||||
|
|
||||||
func (e *ExecutorRPC) LaunchCmd(cmd *executor.ExecCommand, ctx *executor.ExecutorContext) (*executor.ProcessState, error) {
|
func (e *ExecutorRPC) LaunchCmd(cmd *executor.ExecCommand, ctx *executor.ExecutorContext) (*executor.ProcessState, error) {
|
||||||
var ps *executor.ProcessState
|
var ps *executor.ProcessState
|
||||||
err := e.client.Call("Plugin.LaunchCmd", LaunchCmdArgs{Cmd: cmd, Ctx: ctx}, &ps)
|
err := e.client.Call("Plugin.LaunchCmd", LaunchCmdArgs{Cmd: cmd, Ctx: ctx}, &ps)
|
||||||
return ps, err
|
return ps, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *ExecutorRPC) LaunchSyslogServer(ctx *executor.ExecutorContext) (*executor.SyslogServerState, error) {
|
||||||
|
var ss *executor.SyslogServerState
|
||||||
|
err := e.client.Call("Plugin.LaunchSyslogServer", LaunchSyslogServerArgs{Ctx: ctx}, &ss)
|
||||||
|
return ss, err
|
||||||
|
}
|
||||||
|
|
||||||
func (e *ExecutorRPC) Wait() (*executor.ProcessState, error) {
|
func (e *ExecutorRPC) Wait() (*executor.ProcessState, error) {
|
||||||
var ps executor.ProcessState
|
var ps executor.ProcessState
|
||||||
err := e.client.Call("Plugin.Wait", new(interface{}), &ps)
|
err := e.client.Call("Plugin.Wait", new(interface{}), &ps)
|
||||||
|
@ -43,6 +63,10 @@ func (e *ExecutorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
|
||||||
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
|
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *ExecutorRPC) UpdateTask(task *structs.Task) error {
|
||||||
|
return e.client.Call("Plugin.UpdateTask", task, new(interface{}))
|
||||||
|
}
|
||||||
|
|
||||||
type ExecutorRPCServer struct {
|
type ExecutorRPCServer struct {
|
||||||
Impl executor.Executor
|
Impl executor.Executor
|
||||||
}
|
}
|
||||||
|
@ -55,6 +79,14 @@ func (e *ExecutorRPCServer) LaunchCmd(args LaunchCmdArgs, ps *executor.ProcessSt
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *ExecutorRPCServer) LaunchSyslogServer(args LaunchSyslogServerArgs, ss *executor.SyslogServerState) error {
|
||||||
|
state, err := e.Impl.LaunchSyslogServer(args.Ctx)
|
||||||
|
if state != nil {
|
||||||
|
*ss = *state
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func (e *ExecutorRPCServer) Wait(args interface{}, ps *executor.ProcessState) error {
|
func (e *ExecutorRPCServer) Wait(args interface{}, ps *executor.ProcessState) error {
|
||||||
state, err := e.Impl.Wait()
|
state, err := e.Impl.Wait()
|
||||||
if state != nil {
|
if state != nil {
|
||||||
|
@ -75,6 +107,10 @@ func (e *ExecutorRPCServer) UpdateLogConfig(args *structs.LogConfig, resp *inter
|
||||||
return e.Impl.UpdateLogConfig(args)
|
return e.Impl.UpdateLogConfig(args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *ExecutorRPCServer) UpdateTask(args *structs.Task, resp *interface{}) error {
|
||||||
|
return e.Impl.UpdateTask(args)
|
||||||
|
}
|
||||||
|
|
||||||
type ExecutorPlugin struct {
|
type ExecutorPlugin struct {
|
||||||
logger *log.Logger
|
logger *log.Logger
|
||||||
Impl *ExecutorRPCServer
|
Impl *ExecutorRPCServer
|
||||||
|
|
|
@ -154,14 +154,9 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
executorCtx := &executor.ExecutorContext{
|
executorCtx := &executor.ExecutorContext{
|
||||||
TaskEnv: d.taskEnv,
|
TaskEnv: d.taskEnv,
|
||||||
AllocDir: ctx.AllocDir,
|
AllocDir: ctx.AllocDir,
|
||||||
TaskName: task.Name,
|
Task: task,
|
||||||
TaskResources: task.Resources,
|
|
||||||
LogConfig: task.LogConfig,
|
|
||||||
FSIsolation: true,
|
|
||||||
UnprivilegedUser: true,
|
|
||||||
ResourceLimits: true,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
absPath, err := GetAbsolutePath("java")
|
absPath, err := GetAbsolutePath("java")
|
||||||
|
@ -169,7 +164,13 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{Cmd: absPath, Args: args}, executorCtx)
|
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
|
||||||
|
Cmd: absPath,
|
||||||
|
Args: args,
|
||||||
|
FSIsolation: true,
|
||||||
|
ResourceLimits: true,
|
||||||
|
User: cstructs.DefaultUnpriviledgedUser,
|
||||||
|
}, executorCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pluginClient.Kill()
|
pluginClient.Kill()
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -290,7 +291,7 @@ func (h *javaHandle) WaitCh() chan *cstructs.WaitResult {
|
||||||
func (h *javaHandle) Update(task *structs.Task) error {
|
func (h *javaHandle) Update(task *structs.Task) error {
|
||||||
// Store the updated kill timeout.
|
// Store the updated kill timeout.
|
||||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||||
h.executor.UpdateLogConfig(task.LogConfig)
|
h.executor.UpdateTask(task)
|
||||||
|
|
||||||
// Update is not possible
|
// Update is not possible
|
||||||
return nil
|
return nil
|
||||||
|
@ -338,5 +339,6 @@ func (h *javaHandle) run() {
|
||||||
}
|
}
|
||||||
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
|
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
|
||||||
close(h.waitCh)
|
close(h.waitCh)
|
||||||
|
h.executor.Exit()
|
||||||
h.pluginClient.Kill()
|
h.pluginClient.Kill()
|
||||||
}
|
}
|
||||||
|
|
10
client/driver/logging/syslog_server_windows.go
Normal file
10
client/driver/logging/syslog_server_windows.go
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
package logging
|
||||||
|
|
||||||
|
type SyslogServer struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SyslogServer) Shutdown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
type SyslogMessage struct {
|
||||||
|
}
|
|
@ -191,11 +191,9 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
executorCtx := &executor.ExecutorContext{
|
executorCtx := &executor.ExecutorContext{
|
||||||
TaskEnv: d.taskEnv,
|
TaskEnv: d.taskEnv,
|
||||||
AllocDir: ctx.AllocDir,
|
AllocDir: ctx.AllocDir,
|
||||||
TaskName: task.Name,
|
Task: task,
|
||||||
TaskResources: task.Resources,
|
|
||||||
LogConfig: task.LogConfig,
|
|
||||||
}
|
}
|
||||||
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: args[0], Args: args[1:]}, executorCtx)
|
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: args[0], Args: args[1:]}, executorCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -292,7 +290,7 @@ func (h *qemuHandle) WaitCh() chan *cstructs.WaitResult {
|
||||||
func (h *qemuHandle) Update(task *structs.Task) error {
|
func (h *qemuHandle) Update(task *structs.Task) error {
|
||||||
// Store the updated kill timeout.
|
// Store the updated kill timeout.
|
||||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||||
h.executor.UpdateLogConfig(task.LogConfig)
|
h.executor.UpdateTask(task)
|
||||||
|
|
||||||
// Update is not possible
|
// Update is not possible
|
||||||
return nil
|
return nil
|
||||||
|
@ -336,5 +334,6 @@ func (h *qemuHandle) run() {
|
||||||
close(h.doneCh)
|
close(h.doneCh)
|
||||||
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
|
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
|
||||||
close(h.waitCh)
|
close(h.waitCh)
|
||||||
|
h.executor.Exit()
|
||||||
h.pluginClient.Kill()
|
h.pluginClient.Kill()
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,11 +96,9 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
executorCtx := &executor.ExecutorContext{
|
executorCtx := &executor.ExecutorContext{
|
||||||
TaskEnv: d.taskEnv,
|
TaskEnv: d.taskEnv,
|
||||||
AllocDir: ctx.AllocDir,
|
AllocDir: ctx.AllocDir,
|
||||||
TaskName: task.Name,
|
Task: task,
|
||||||
TaskResources: task.Resources,
|
|
||||||
LogConfig: task.LogConfig,
|
|
||||||
}
|
}
|
||||||
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
|
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -195,7 +193,7 @@ func (h *rawExecHandle) WaitCh() chan *cstructs.WaitResult {
|
||||||
func (h *rawExecHandle) Update(task *structs.Task) error {
|
func (h *rawExecHandle) Update(task *structs.Task) error {
|
||||||
// Store the updated kill timeout.
|
// Store the updated kill timeout.
|
||||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||||
h.executor.UpdateLogConfig(task.LogConfig)
|
h.executor.UpdateTask(task)
|
||||||
|
|
||||||
// Update is not possible
|
// Update is not possible
|
||||||
return nil
|
return nil
|
||||||
|
@ -237,5 +235,8 @@ func (h *rawExecHandle) run() {
|
||||||
}
|
}
|
||||||
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
|
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
|
||||||
close(h.waitCh)
|
close(h.waitCh)
|
||||||
|
if err := h.executor.Exit(); err != nil {
|
||||||
|
h.logger.Printf("[ERR] driver.raw_exec: error killing executor: %v", err)
|
||||||
|
}
|
||||||
h.pluginClient.Kill()
|
h.pluginClient.Kill()
|
||||||
}
|
}
|
||||||
|
|
|
@ -233,12 +233,9 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
executorCtx := &executor.ExecutorContext{
|
executorCtx := &executor.ExecutorContext{
|
||||||
TaskEnv: d.taskEnv,
|
TaskEnv: d.taskEnv,
|
||||||
AllocDir: ctx.AllocDir,
|
AllocDir: ctx.AllocDir,
|
||||||
TaskName: task.Name,
|
Task: task,
|
||||||
TaskResources: task.Resources,
|
|
||||||
UnprivilegedUser: false,
|
|
||||||
LogConfig: task.LogConfig,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
absPath, err := GetAbsolutePath("rkt")
|
absPath, err := GetAbsolutePath("rkt")
|
||||||
|
@ -329,7 +326,7 @@ func (h *rktHandle) WaitCh() chan *cstructs.WaitResult {
|
||||||
func (h *rktHandle) Update(task *structs.Task) error {
|
func (h *rktHandle) Update(task *structs.Task) error {
|
||||||
// Store the updated kill timeout.
|
// Store the updated kill timeout.
|
||||||
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
|
||||||
h.executor.UpdateLogConfig(task.LogConfig)
|
h.executor.UpdateTask(task)
|
||||||
|
|
||||||
// Update is not possible
|
// Update is not possible
|
||||||
return nil
|
return nil
|
||||||
|
@ -360,5 +357,8 @@ func (h *rktHandle) run() {
|
||||||
}
|
}
|
||||||
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
|
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
|
||||||
close(h.waitCh)
|
close(h.waitCh)
|
||||||
|
if err := h.executor.Exit(); err != nil {
|
||||||
|
h.logger.Printf("[ERR] driver.rkt: error killing executor: %v", err)
|
||||||
|
}
|
||||||
h.pluginClient.Kill()
|
h.pluginClient.Kill()
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,11 @@ import (
|
||||||
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
|
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// The default user that the executor uses to run tasks
|
||||||
|
DefaultUnpriviledgedUser = "nobody"
|
||||||
|
)
|
||||||
|
|
||||||
// WaitResult stores the result of a Wait operation.
|
// WaitResult stores the result of a Wait operation.
|
||||||
type WaitResult struct {
|
type WaitResult struct {
|
||||||
ExitCode int
|
ExitCode int
|
||||||
|
|
|
@ -75,50 +75,47 @@ func (c *AllocStatusCommand) Run(args []string) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query the allocation info
|
// Query the allocation info
|
||||||
alloc, _, err := client.Allocations().Info(allocID, nil)
|
if len(allocID) == 1 {
|
||||||
if err != nil {
|
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
||||||
if len(allocID) == 1 {
|
return 1
|
||||||
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
}
|
||||||
return 1
|
if len(allocID)%2 == 1 {
|
||||||
}
|
// Identifiers must be of even length, so we strip off the last byte
|
||||||
if len(allocID)%2 == 1 {
|
// to provide a consistent user experience.
|
||||||
// Identifiers must be of even length, so we strip off the last byte
|
allocID = allocID[:len(allocID)-1]
|
||||||
// to provide a consistent user experience.
|
}
|
||||||
allocID = allocID[:len(allocID)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
allocs, _, err := client.Allocations().PrefixList(allocID)
|
allocs, _, err := client.Allocations().PrefixList(allocID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) == 0 {
|
if len(allocs) == 0 {
|
||||||
c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) > 1 {
|
if len(allocs) > 1 {
|
||||||
// Format the allocs
|
// Format the allocs
|
||||||
out := make([]string, len(allocs)+1)
|
out := make([]string, len(allocs)+1)
|
||||||
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
||||||
for i, alloc := range allocs {
|
for i, alloc := range allocs {
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
||||||
limit(alloc.ID, length),
|
limit(alloc.ID, length),
|
||||||
limit(alloc.EvalID, length),
|
limit(alloc.EvalID, length),
|
||||||
alloc.JobID,
|
alloc.JobID,
|
||||||
alloc.TaskGroup,
|
alloc.TaskGroup,
|
||||||
alloc.DesiredStatus,
|
alloc.DesiredStatus,
|
||||||
alloc.ClientStatus,
|
alloc.ClientStatus,
|
||||||
)
|
)
|
||||||
}
|
|
||||||
c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single allocation
|
|
||||||
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single allocation
|
||||||
|
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format the allocation data
|
// Format the allocation data
|
||||||
|
|
|
@ -1,6 +1,12 @@
|
||||||
package command
|
package command
|
||||||
|
|
||||||
import "github.com/mitchellh/cli"
|
import (
|
||||||
|
"math/rand"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/api"
|
||||||
|
"github.com/mitchellh/cli"
|
||||||
|
)
|
||||||
|
|
||||||
type FSCommand struct {
|
type FSCommand struct {
|
||||||
Meta
|
Meta
|
||||||
|
@ -17,3 +23,23 @@ func (f *FSCommand) Synopsis() string {
|
||||||
func (f *FSCommand) Run(args []string) int {
|
func (f *FSCommand) Run(args []string) int {
|
||||||
return cli.RunResultHelp
|
return cli.RunResultHelp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get Random Allocation ID from a known jobID. Prefer to use a running allocation,
|
||||||
|
// but use a dead allocation if no running allocations are found
|
||||||
|
func getRandomJobAlloc(client *api.Client, jobID string) (string, error) {
|
||||||
|
var runningAllocs []*api.AllocationListStub
|
||||||
|
allocs, _, err := client.Jobs().Allocations(jobID, nil)
|
||||||
|
for _, v := range allocs {
|
||||||
|
if v.ClientStatus == "running" {
|
||||||
|
runningAllocs = append(runningAllocs, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we don't have any allocations running, use dead allocations
|
||||||
|
if len(runningAllocs) < 1 {
|
||||||
|
runningAllocs = allocs
|
||||||
|
}
|
||||||
|
|
||||||
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
allocID := runningAllocs[r.Intn(len(runningAllocs))].ID
|
||||||
|
return allocID, err
|
||||||
|
}
|
||||||
|
|
|
@ -26,6 +26,9 @@ Cat Options:
|
||||||
|
|
||||||
-verbose
|
-verbose
|
||||||
Show full information.
|
Show full information.
|
||||||
|
|
||||||
|
-job <job-id>
|
||||||
|
Use a random allocation from a specified job-id.
|
||||||
`
|
`
|
||||||
return strings.TrimSpace(helpText)
|
return strings.TrimSpace(helpText)
|
||||||
}
|
}
|
||||||
|
@ -35,10 +38,11 @@ func (f *FSCatCommand) Synopsis() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *FSCatCommand) Run(args []string) int {
|
func (f *FSCatCommand) Run(args []string) int {
|
||||||
var verbose bool
|
var verbose, job bool
|
||||||
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
|
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
|
||||||
flags.Usage = func() { f.Ui.Output(f.Help()) }
|
flags.Usage = func() { f.Ui.Output(f.Help()) }
|
||||||
flags.BoolVar(&verbose, "verbose", false, "")
|
flags.BoolVar(&verbose, "verbose", false, "")
|
||||||
|
flags.BoolVar(&job, "job", false, "")
|
||||||
|
|
||||||
if err := flags.Parse(args); err != nil {
|
if err := flags.Parse(args); err != nil {
|
||||||
return 1
|
return 1
|
||||||
|
@ -50,7 +54,6 @@ func (f *FSCatCommand) Run(args []string) int {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
allocID := args[0]
|
|
||||||
path := "/"
|
path := "/"
|
||||||
if len(args) == 2 {
|
if len(args) == 2 {
|
||||||
path = args[1]
|
path = args[1]
|
||||||
|
@ -58,60 +61,66 @@ func (f *FSCatCommand) Run(args []string) int {
|
||||||
|
|
||||||
client, err := f.Meta.Client()
|
client, err := f.Meta.Client()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
|
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If -job is specified, use random allocation, otherwise use provided allocation
|
||||||
|
allocID := args[0]
|
||||||
|
if job {
|
||||||
|
allocID, err = getRandomJobAlloc(client, args[0])
|
||||||
|
if err != nil {
|
||||||
|
f.Ui.Error(fmt.Sprintf("Error querying API: %v", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Truncate the id unless full length is requested
|
// Truncate the id unless full length is requested
|
||||||
length := shortId
|
length := shortId
|
||||||
if verbose {
|
if verbose {
|
||||||
length = fullId
|
length = fullId
|
||||||
}
|
}
|
||||||
// Query the allocation info
|
// Query the allocation info
|
||||||
alloc, _, err := client.Allocations().Info(allocID, nil)
|
if len(allocID) == 1 {
|
||||||
if err != nil {
|
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
|
||||||
if len(allocID) == 1 {
|
return 1
|
||||||
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
|
}
|
||||||
return 1
|
if len(allocID)%2 == 1 {
|
||||||
}
|
// Identifiers must be of even length, so we strip off the last byte
|
||||||
if len(allocID)%2 == 1 {
|
// to provide a consistent user experience.
|
||||||
// Identifiers must be of even length, so we strip off the last byte
|
allocID = allocID[:len(allocID)-1]
|
||||||
// to provide a consistent user experience.
|
}
|
||||||
allocID = allocID[:len(allocID)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
allocs, _, err := client.Allocations().PrefixList(allocID)
|
allocs, _, err := client.Allocations().PrefixList(allocID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) == 0 {
|
if len(allocs) == 0 {
|
||||||
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) > 1 {
|
if len(allocs) > 1 {
|
||||||
// Format the allocs
|
// Format the allocs
|
||||||
out := make([]string, len(allocs)+1)
|
out := make([]string, len(allocs)+1)
|
||||||
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
||||||
for i, alloc := range allocs {
|
for i, alloc := range allocs {
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
||||||
limit(alloc.ID, length),
|
limit(alloc.ID, length),
|
||||||
limit(alloc.EvalID, length),
|
limit(alloc.EvalID, length),
|
||||||
alloc.JobID,
|
alloc.JobID,
|
||||||
alloc.TaskGroup,
|
alloc.TaskGroup,
|
||||||
alloc.DesiredStatus,
|
alloc.DesiredStatus,
|
||||||
alloc.ClientStatus,
|
alloc.ClientStatus,
|
||||||
)
|
)
|
||||||
}
|
|
||||||
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single allocation
|
|
||||||
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single allocation
|
||||||
|
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
if alloc.DesiredStatus == "failed" {
|
if alloc.DesiredStatus == "failed" {
|
||||||
|
|
101
command/fs_ls.go
101
command/fs_ls.go
|
@ -4,7 +4,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/dustin/go-humanize"
|
humanize "github.com/dustin/go-humanize"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FSListCommand struct {
|
type FSListCommand struct {
|
||||||
|
@ -30,6 +30,9 @@ Ls Options:
|
||||||
-verbose
|
-verbose
|
||||||
Show full information.
|
Show full information.
|
||||||
|
|
||||||
|
-job <job-id>
|
||||||
|
Use a random allocation from a specified job-id.
|
||||||
|
|
||||||
`
|
`
|
||||||
return strings.TrimSpace(helpText)
|
return strings.TrimSpace(helpText)
|
||||||
}
|
}
|
||||||
|
@ -41,10 +44,12 @@ func (f *FSListCommand) Synopsis() string {
|
||||||
func (f *FSListCommand) Run(args []string) int {
|
func (f *FSListCommand) Run(args []string) int {
|
||||||
var verbose bool
|
var verbose bool
|
||||||
var machine bool
|
var machine bool
|
||||||
|
var job bool
|
||||||
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
|
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
|
||||||
flags.Usage = func() { f.Ui.Output(f.Help()) }
|
flags.Usage = func() { f.Ui.Output(f.Help()) }
|
||||||
flags.BoolVar(&verbose, "verbose", false, "")
|
flags.BoolVar(&verbose, "verbose", false, "")
|
||||||
flags.BoolVar(&machine, "H", false, "")
|
flags.BoolVar(&machine, "H", false, "")
|
||||||
|
flags.BoolVar(&job, "job", false, "")
|
||||||
|
|
||||||
if err := flags.Parse(args); err != nil {
|
if err := flags.Parse(args); err != nil {
|
||||||
return 1
|
return 1
|
||||||
|
@ -56,7 +61,6 @@ func (f *FSListCommand) Run(args []string) int {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
allocID := args[0]
|
|
||||||
path := "/"
|
path := "/"
|
||||||
if len(args) == 2 {
|
if len(args) == 2 {
|
||||||
path = args[1]
|
path = args[1]
|
||||||
|
@ -64,60 +68,67 @@ func (f *FSListCommand) Run(args []string) int {
|
||||||
|
|
||||||
client, err := f.Meta.Client()
|
client, err := f.Meta.Client()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
|
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If -job is specified, use random allocation, otherwise use provided allocation
|
||||||
|
allocID := args[0]
|
||||||
|
if job {
|
||||||
|
allocID, err = getRandomJobAlloc(client, args[0])
|
||||||
|
if err != nil {
|
||||||
|
f.Ui.Error(fmt.Sprintf("Error fetching allocations: %v", err))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Truncate the id unless full length is requested
|
// Truncate the id unless full length is requested
|
||||||
length := shortId
|
length := shortId
|
||||||
if verbose {
|
if verbose {
|
||||||
length = fullId
|
length = fullId
|
||||||
}
|
}
|
||||||
// Query the allocation info
|
// Query the allocation info
|
||||||
alloc, _, err := client.Allocations().Info(allocID, nil)
|
if len(allocID) == 1 {
|
||||||
if err != nil {
|
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
|
||||||
if len(allocID) == 1 {
|
return 1
|
||||||
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
|
}
|
||||||
return 1
|
if len(allocID)%2 == 1 {
|
||||||
}
|
// Identifiers must be of even length, so we strip off the last byte
|
||||||
if len(allocID)%2 == 1 {
|
// to provide a consistent user experience.
|
||||||
// Identifiers must be of even length, so we strip off the last byte
|
allocID = allocID[:len(allocID)-1]
|
||||||
// to provide a consistent user experience.
|
}
|
||||||
allocID = allocID[:len(allocID)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
allocs, _, err := client.Allocations().PrefixList(allocID)
|
allocs, _, err := client.Allocations().PrefixList(allocID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) == 0 {
|
if len(allocs) == 0 {
|
||||||
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) > 1 {
|
if len(allocs) > 1 {
|
||||||
// Format the allocs
|
// Format the allocs
|
||||||
out := make([]string, len(allocs)+1)
|
out := make([]string, len(allocs)+1)
|
||||||
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
||||||
for i, alloc := range allocs {
|
for i, alloc := range allocs {
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
||||||
limit(alloc.ID, length),
|
limit(alloc.ID, length),
|
||||||
limit(alloc.EvalID, length),
|
limit(alloc.EvalID, length),
|
||||||
alloc.JobID,
|
alloc.JobID,
|
||||||
alloc.TaskGroup,
|
alloc.TaskGroup,
|
||||||
alloc.DesiredStatus,
|
alloc.DesiredStatus,
|
||||||
alloc.ClientStatus,
|
alloc.ClientStatus,
|
||||||
)
|
)
|
||||||
}
|
|
||||||
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single allocation
|
|
||||||
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single allocation
|
||||||
|
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
if alloc.DesiredStatus == "failed" {
|
if alloc.DesiredStatus == "failed" {
|
||||||
|
|
|
@ -4,7 +4,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/dustin/go-humanize"
|
humanize "github.com/dustin/go-humanize"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FSStatCommand struct {
|
type FSStatCommand struct {
|
||||||
|
@ -29,6 +29,9 @@ Stat Options:
|
||||||
|
|
||||||
-verbose
|
-verbose
|
||||||
Show full information.
|
Show full information.
|
||||||
|
|
||||||
|
-job <job-id>
|
||||||
|
Use a random allocation from a specified job-id.
|
||||||
`
|
`
|
||||||
return strings.TrimSpace(helpText)
|
return strings.TrimSpace(helpText)
|
||||||
}
|
}
|
||||||
|
@ -40,10 +43,12 @@ func (f *FSStatCommand) Synopsis() string {
|
||||||
func (f *FSStatCommand) Run(args []string) int {
|
func (f *FSStatCommand) Run(args []string) int {
|
||||||
var verbose bool
|
var verbose bool
|
||||||
var machine bool
|
var machine bool
|
||||||
|
var job bool
|
||||||
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
|
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
|
||||||
flags.Usage = func() { f.Ui.Output(f.Help()) }
|
flags.Usage = func() { f.Ui.Output(f.Help()) }
|
||||||
flags.BoolVar(&verbose, "verbose", false, "")
|
flags.BoolVar(&verbose, "verbose", false, "")
|
||||||
flags.BoolVar(&machine, "H", false, "")
|
flags.BoolVar(&machine, "H", false, "")
|
||||||
|
flags.BoolVar(&job, "job", false, "")
|
||||||
|
|
||||||
if err := flags.Parse(args); err != nil {
|
if err := flags.Parse(args); err != nil {
|
||||||
return 1
|
return 1
|
||||||
|
@ -55,7 +60,6 @@ func (f *FSStatCommand) Run(args []string) int {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
allocID := args[0]
|
|
||||||
path := "/"
|
path := "/"
|
||||||
if len(args) == 2 {
|
if len(args) == 2 {
|
||||||
path = args[1]
|
path = args[1]
|
||||||
|
@ -63,60 +67,65 @@ func (f *FSStatCommand) Run(args []string) int {
|
||||||
|
|
||||||
client, err := f.Meta.Client()
|
client, err := f.Meta.Client()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
|
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
allocID := args[0]
|
||||||
|
if job {
|
||||||
|
allocID, err = getRandomJobAlloc(client, args[0])
|
||||||
|
if err != nil {
|
||||||
|
f.Ui.Error(fmt.Sprintf("Error querying API: %v", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Truncate the id unless full length is requested
|
// Truncate the id unless full length is requested
|
||||||
length := shortId
|
length := shortId
|
||||||
if verbose {
|
if verbose {
|
||||||
length = fullId
|
length = fullId
|
||||||
}
|
}
|
||||||
// Query the allocation info
|
// Query the allocation info
|
||||||
alloc, _, err := client.Allocations().Info(allocID, nil)
|
if len(allocID) == 1 {
|
||||||
if err != nil {
|
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
|
||||||
if len(allocID) == 1 {
|
return 1
|
||||||
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
|
}
|
||||||
return 1
|
if len(allocID)%2 == 1 {
|
||||||
}
|
// Identifiers must be of even length, so we strip off the last byte
|
||||||
if len(allocID)%2 == 1 {
|
// to provide a consistent user experience.
|
||||||
// Identifiers must be of even length, so we strip off the last byte
|
allocID = allocID[:len(allocID)-1]
|
||||||
// to provide a consistent user experience.
|
}
|
||||||
allocID = allocID[:len(allocID)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
allocs, _, err := client.Allocations().PrefixList(allocID)
|
allocs, _, err := client.Allocations().PrefixList(allocID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
f.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) == 0 {
|
if len(allocs) == 0 {
|
||||||
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
f.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(allocs) > 1 {
|
if len(allocs) > 1 {
|
||||||
// Format the allocs
|
// Format the allocs
|
||||||
out := make([]string, len(allocs)+1)
|
out := make([]string, len(allocs)+1)
|
||||||
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
out[0] = "ID|Eval ID|Job ID|Task Group|Desired Status|Client Status"
|
||||||
for i, alloc := range allocs {
|
for i, alloc := range allocs {
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s",
|
||||||
limit(alloc.ID, length),
|
limit(alloc.ID, length),
|
||||||
limit(alloc.EvalID, length),
|
limit(alloc.EvalID, length),
|
||||||
alloc.JobID,
|
alloc.JobID,
|
||||||
alloc.TaskGroup,
|
alloc.TaskGroup,
|
||||||
alloc.DesiredStatus,
|
alloc.DesiredStatus,
|
||||||
alloc.ClientStatus,
|
alloc.ClientStatus,
|
||||||
)
|
)
|
||||||
}
|
|
||||||
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single allocation
|
|
||||||
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
f.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single allocation
|
||||||
|
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
if alloc.DesiredStatus == "failed" {
|
if alloc.DesiredStatus == "failed" {
|
||||||
|
|
|
@ -69,53 +69,50 @@ func (c *NodeDrainCommand) Run(args []string) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if node exists
|
// Check if node exists
|
||||||
node, _, err := client.Nodes().Info(nodeID, nil)
|
if len(nodeID) == 1 {
|
||||||
if err != nil {
|
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
||||||
if len(nodeID) == 1 {
|
return 1
|
||||||
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
}
|
||||||
return 1
|
if len(nodeID)%2 == 1 {
|
||||||
}
|
// Identifiers must be of even length, so we strip off the last byte
|
||||||
if len(nodeID)%2 == 1 {
|
// to provide a consistent user experience.
|
||||||
// Identifiers must be of even length, so we strip off the last byte
|
nodeID = nodeID[:len(nodeID)-1]
|
||||||
// to provide a consistent user experience.
|
}
|
||||||
nodeID = nodeID[:len(nodeID)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Exact lookup failed, try with prefix based search
|
// Exact lookup failed, try with prefix based search
|
||||||
nodes, _, err := client.Nodes().PrefixList(nodeID)
|
nodes, _, err := client.Nodes().PrefixList(nodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
|
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
// Return error if no nodes are found
|
// Return error if no nodes are found
|
||||||
if len(nodes) == 0 {
|
if len(nodes) == 0 {
|
||||||
c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID))
|
c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(nodes) > 1 {
|
if len(nodes) > 1 {
|
||||||
// Format the nodes list that matches the prefix so that the user
|
// Format the nodes list that matches the prefix so that the user
|
||||||
// can create a more specific request
|
// can create a more specific request
|
||||||
out := make([]string, len(nodes)+1)
|
out := make([]string, len(nodes)+1)
|
||||||
out[0] = "ID|Datacenter|Name|Class|Drain|Status"
|
out[0] = "ID|Datacenter|Name|Class|Drain|Status"
|
||||||
for i, node := range nodes {
|
for i, node := range nodes {
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s",
|
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s",
|
||||||
node.ID,
|
node.ID,
|
||||||
node.Datacenter,
|
node.Datacenter,
|
||||||
node.Name,
|
node.Name,
|
||||||
node.NodeClass,
|
node.NodeClass,
|
||||||
node.Drain,
|
node.Drain,
|
||||||
node.Status)
|
node.Status)
|
||||||
}
|
|
||||||
// Dump the output
|
|
||||||
c.Ui.Output(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single node
|
|
||||||
node, _, err = client.Nodes().Info(nodes[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
// Dump the output
|
||||||
|
c.Ui.Output(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single node
|
||||||
|
node, _, err := client.Nodes().Info(nodes[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Toggle node draining
|
// Toggle node draining
|
||||||
|
|
|
@ -133,53 +133,50 @@ func (c *NodeStatusCommand) Run(args []string) int {
|
||||||
|
|
||||||
// Query the specific node
|
// Query the specific node
|
||||||
nodeID := args[0]
|
nodeID := args[0]
|
||||||
node, _, err := client.Nodes().Info(nodeID, nil)
|
if len(nodeID) == 1 {
|
||||||
if err != nil {
|
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
||||||
if len(nodeID) == 1 {
|
return 1
|
||||||
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
|
}
|
||||||
return 1
|
if len(nodeID)%2 == 1 {
|
||||||
}
|
// Identifiers must be of even length, so we strip off the last byte
|
||||||
if len(nodeID)%2 == 1 {
|
// to provide a consistent user experience.
|
||||||
// Identifiers must be of even length, so we strip off the last byte
|
nodeID = nodeID[:len(nodeID)-1]
|
||||||
// to provide a consistent user experience.
|
}
|
||||||
nodeID = nodeID[:len(nodeID)-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Exact lookup failed, try with prefix based search
|
// Exact lookup failed, try with prefix based search
|
||||||
nodes, _, err := client.Nodes().PrefixList(nodeID)
|
nodes, _, err := client.Nodes().PrefixList(nodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
|
c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
// Return error if no nodes are found
|
// Return error if no nodes are found
|
||||||
if len(nodes) == 0 {
|
if len(nodes) == 0 {
|
||||||
c.Ui.Error(fmt.Sprintf("No node(s) with prefix %q found", nodeID))
|
c.Ui.Error(fmt.Sprintf("No node(s) with prefix %q found", nodeID))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
if len(nodes) > 1 {
|
if len(nodes) > 1 {
|
||||||
// Format the nodes list that matches the prefix so that the user
|
// Format the nodes list that matches the prefix so that the user
|
||||||
// can create a more specific request
|
// can create a more specific request
|
||||||
out := make([]string, len(nodes)+1)
|
out := make([]string, len(nodes)+1)
|
||||||
out[0] = "ID|DC|Name|Class|Drain|Status"
|
out[0] = "ID|DC|Name|Class|Drain|Status"
|
||||||
for i, node := range nodes {
|
for i, node := range nodes {
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s",
|
out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s",
|
||||||
limit(node.ID, length),
|
limit(node.ID, length),
|
||||||
node.Datacenter,
|
node.Datacenter,
|
||||||
node.Name,
|
node.Name,
|
||||||
node.NodeClass,
|
node.NodeClass,
|
||||||
node.Drain,
|
node.Drain,
|
||||||
node.Status)
|
node.Status)
|
||||||
}
|
|
||||||
// Dump the output
|
|
||||||
c.Ui.Output(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single node
|
|
||||||
node, _, err = client.Nodes().Info(nodes[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
// Dump the output
|
||||||
|
c.Ui.Output(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single node
|
||||||
|
node, _, err := client.Nodes().Info(nodes[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
m := node.Attributes
|
m := node.Attributes
|
||||||
|
|
|
@ -105,36 +105,33 @@ func (c *StatusCommand) Run(args []string) int {
|
||||||
|
|
||||||
// Try querying the job
|
// Try querying the job
|
||||||
jobID := args[0]
|
jobID := args[0]
|
||||||
job, _, err := client.Jobs().Info(jobID, nil)
|
jobs, _, err := client.Jobs().PrefixList(jobID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
jobs, _, err := client.Jobs().PrefixList(jobID)
|
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
|
||||||
if err != nil {
|
return 1
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
|
}
|
||||||
return 1
|
if len(jobs) == 0 {
|
||||||
}
|
c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
|
||||||
if len(jobs) == 0 {
|
return 1
|
||||||
c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
|
}
|
||||||
return 1
|
if len(jobs) > 1 {
|
||||||
}
|
out := make([]string, len(jobs)+1)
|
||||||
if len(jobs) > 1 {
|
out[0] = "ID|Type|Priority|Status"
|
||||||
out := make([]string, len(jobs)+1)
|
for i, job := range jobs {
|
||||||
out[0] = "ID|Type|Priority|Status"
|
out[i+1] = fmt.Sprintf("%s|%s|%d|%s",
|
||||||
for i, job := range jobs {
|
job.ID,
|
||||||
out[i+1] = fmt.Sprintf("%s|%s|%d|%s",
|
job.Type,
|
||||||
job.ID,
|
job.Priority,
|
||||||
job.Type,
|
job.Status)
|
||||||
job.Priority,
|
|
||||||
job.Status)
|
|
||||||
}
|
|
||||||
c.Ui.Output(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", formatList(out)))
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Prefix lookup matched a single job
|
|
||||||
job, _, err = client.Jobs().Info(jobs[0].ID, nil)
|
|
||||||
if err != nil {
|
|
||||||
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
c.Ui.Output(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", formatList(out)))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Prefix lookup matched a single job
|
||||||
|
job, _, err := client.Jobs().Info(jobs[0].ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if it is periodic
|
// Check if it is periodic
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
RKT_VERSION="v1.0.0"
|
RKT_VERSION="v1.2.0"
|
||||||
DEST_DIR="/usr/local/bin"
|
DEST_DIR="/usr/local/bin"
|
||||||
|
|
||||||
sudo mkdir -p /etc/rkt/net.d
|
sudo mkdir -p /etc/rkt/net.d
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
DOCKER_VERSION="1.10.2"
|
DOCKER_VERSION="1.10.3"
|
||||||
|
|
||||||
sudo stop docker
|
sudo stop docker
|
||||||
sudo rm -rf /var/lib/docker
|
sudo rm -rf /var/lib/docker
|
||||||
|
|
|
@ -23,7 +23,7 @@ nomad fs stat <alloc-id> <path>
|
||||||
nomad fs cat <alloc-id> <path>
|
nomad fs cat <alloc-id> <path>
|
||||||
```
|
```
|
||||||
|
|
||||||
A valid allocation id is necessary and the path is relative to the root of the allocation directory.
|
A valid allocation id is necessary unless `-job` is specified and the path is relative to the root of the allocation directory.
|
||||||
The path is optional and it defaults to `/` of the allocation directory
|
The path is optional and it defaults to `/` of the allocation directory
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
@ -50,3 +50,14 @@ $ nomad fs cat redis/local/redis.stdout
|
||||||
6710:C 27 Jan 22:04:03.794 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
|
6710:C 27 Jan 22:04:03.794 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
|
||||||
6710:M 27 Jan 22:04:03.795 * Increased maximum number of open files to 10032 (it was originally set to 256).
|
6710:M 27 Jan 22:04:03.795 * Increased maximum number of open files to 10032 (it was originally set to 256).
|
||||||
|
|
||||||
|
## Using Job-ID instead of Alloc-ID
|
||||||
|
|
||||||
|
Passing `-job` into one of the `fs` commands will allow the `fs` command to randomly select an allocation ID from the specified job.
|
||||||
|
|
||||||
|
```
|
||||||
|
nomad fs ls -job <job-id> <path>
|
||||||
|
```
|
||||||
|
|
||||||
|
Nomad will prefer to select a running allocation ID for the job, but if no running allocations for the job are found, Nomad will use a dead allocation.
|
||||||
|
|
||||||
|
This can be useful for debugging a job that has multiple allocations, and it's not really required to use a specific allocation ID.
|
|
@ -28,8 +28,8 @@ spelunking through the source code.
|
||||||
|
|
||||||
There are four primary "nouns" in Nomad; jobs, nodes, allocations, and evaluations.
|
There are four primary "nouns" in Nomad; jobs, nodes, allocations, and evaluations.
|
||||||
Jobs are submitted by users and represent a _desired state_. A job is a declarative description
|
Jobs are submitted by users and represent a _desired state_. A job is a declarative description
|
||||||
of tasks to run which are bounded by constraints and require resources. Nodes are the servers
|
of tasks to run which are bounded by constraints and require resources. Tasks can be scheduled on
|
||||||
in the clusters that tasks can be scheduled on. The mapping of tasks in a job to nodes is done
|
nodes in the cluster running the Nomad client. The mapping of tasks in a job to clients is done
|
||||||
using allocations. An allocation is used to declare that a set of tasks in a job should be run
|
using allocations. An allocation is used to declare that a set of tasks in a job should be run
|
||||||
on a particular node. Scheduling is the process of determining the appropriate allocations and
|
on a particular node. Scheduling is the process of determining the appropriate allocations and
|
||||||
is done as part of an evaluation.
|
is done as part of an evaluation.
|
||||||
|
|
|
@ -65,30 +65,37 @@ driver.
|
||||||
<tr>
|
<tr>
|
||||||
<th>Variable</th>
|
<th>Variable</th>
|
||||||
<th>Description</th>
|
<th>Description</th>
|
||||||
|
<th>Example</th>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>${node.unique.id}</td>
|
<td>${node.unique.id}</td>
|
||||||
<td>The client node identifier</td>
|
<td>The 36 character unique client node identifier</td>
|
||||||
|
<td>9afa5da1-8f39-25a2-48dc-ba31fd7c0023</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>${node.datacenter}</td>
|
<td>${node.datacenter}</td>
|
||||||
<td>The client node datacenter</td>
|
<td>The client node's datacenter</td>
|
||||||
|
<td>dc1</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>${node.unique.name}</td>
|
<td>${node.unique.name}</td>
|
||||||
<td>The client node name</td>
|
<td>The client node's name</td>
|
||||||
|
<td>nomad-client-10-1-2-4</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>${node.class}</td>
|
<td>${node.class}</td>
|
||||||
<td>The client node class</td>
|
<td>The client node's class</td>
|
||||||
|
<td>linux-64bit</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>${attr.\<key\>}</td>
|
<td>${attr.\<key\>}</td>
|
||||||
<td>The attribute given by `key` on the client node.</td>
|
<td>The attribute given by `key` on the client node.</td>
|
||||||
|
<td>platform.aws.instance-type:r3.large</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>${meta.\<key\>}</td>
|
<td>${meta.\<key\>}</td>
|
||||||
<td>The metadata value given by `key` on the client node.</td>
|
<td>The metadata value given by `key` on the client node.</td>
|
||||||
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
@ -177,19 +184,19 @@ a particular node and as such can not be used in constraints.
|
||||||
<td>The CPU limit in MHz for the task</td>
|
<td>The CPU limit in MHz for the task</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>NOMAD_ALLOC_ID</td>
|
<td>${NOMAD_ALLOC_ID}</td>
|
||||||
<td>The allocation ID of the task</td>
|
<td>The allocation ID of the task</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>NOMAD_ALLOC_NAME</td>
|
<td>${NOMAD_ALLOC_NAME}</td>
|
||||||
<td>The allocation name of the task</td>
|
<td>The allocation name of the task</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>NOMAD_ALLOC_INDEX</td>
|
<td>${NOMAD_ALLOC_INDEX}</td>
|
||||||
<td>The allocation index; useful to distinguish instances of task groups</td>
|
<td>The allocation index; useful to distinguish instances of task groups</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>NOMAD_TASK_NAME</td>
|
<td>${NOMAD_TASK_NAME}</td>
|
||||||
<td>The task's name</td>
|
<td>The task's name</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: "docs"
|
layout: "docs"
|
||||||
page_title: "Upgrade Nomad"
|
page_title: "Upgrading Nomad"
|
||||||
sidebar_current: "docs-upgrade-upgrading"
|
sidebar_current: "docs-upgrade-upgrading"
|
||||||
description: |-
|
description: |-
|
||||||
Learn how to upgrade Nomad.
|
Learn how to upgrade Nomad.
|
||||||
|
@ -8,29 +8,109 @@ description: |-
|
||||||
|
|
||||||
# Upgrading Nomad
|
# Upgrading Nomad
|
||||||
|
|
||||||
Both Nomad Clients and Servers are meant to be long-running processes that
|
|
||||||
maintain communication with each other. Nomad Servers maintain quorum with other
|
|
||||||
Servers and Clients are in constant communication with Servers. As such, care
|
|
||||||
should be taken to properly upgrade Nomad to ensure minimal service disruption.
|
|
||||||
|
|
||||||
This page documents how to upgrade Nomad when a new version is released.
|
This page documents how to upgrade Nomad when a new version is released.
|
||||||
|
|
||||||
## Standard Upgrades
|
~> **Upgrade Warning!** Both Nomad Clients and Servers are meant to be
|
||||||
|
long-running processes that maintain communication with each other. Nomad
|
||||||
|
Servers maintain quorum with other Servers and Clients are in constant
|
||||||
|
communication with Servers. As such, care should be taken to properly
|
||||||
|
upgrade Nomad to ensure minimal service disruption. Unsafe upgrades can
|
||||||
|
cause a service outage.
|
||||||
|
|
||||||
|
## Upgrade Process
|
||||||
|
|
||||||
For upgrades we strive to ensure backwards compatibility. For most upgrades, the
|
For upgrades we strive to ensure backwards compatibility. For most upgrades, the
|
||||||
process is simple. Assuming the current version of Nomad is A, and version B is
|
process is as simple as upgrading the binary and restarting the service.
|
||||||
released.
|
|
||||||
|
|
||||||
1. On each server, install version B of Nomad.
|
Prior to starting the upgrade please check the
|
||||||
|
[specific version details](/docs/upgrade/upgrade-specific.html) page as some
|
||||||
|
version differences may require specific steps.
|
||||||
|
|
||||||
2. Shut down version A, restart with version B on one server at a time.
|
At a high level we complete the following steps to upgrade Nomad:
|
||||||
|
|
||||||
3. You can run `nomad server-members` to ensure that all servers are
|
* **Add the new version**
|
||||||
clustered and running the version B.
|
* **Check cluster health**
|
||||||
|
* **Remove the old version**
|
||||||
|
* **Check cluster health**
|
||||||
|
* **Upgrade clients**
|
||||||
|
|
||||||
|
### 1. Add the new version to the existing cluster
|
||||||
|
|
||||||
|
Whether you are replacing the software in place on existing systems or bringing
|
||||||
|
up new hosts you should make changes incrementally, verifying cluster health at
|
||||||
|
each step of the upgrade
|
||||||
|
|
||||||
|
On a single server, install the new version of Nomad. You can do this by
|
||||||
|
joining a new server to the cluster or by replacing or upgrading the binary
|
||||||
|
locally and restarting the service.
|
||||||
|
|
||||||
|
### 2. Check cluster health
|
||||||
|
|
||||||
|
Monitor the Nomad logs on the remaining nodes to check the new node has entered
|
||||||
|
the cluster correctly.
|
||||||
|
|
||||||
|
Run `nomad agent-info` on the new server and check that the `last_log_index` is
|
||||||
|
of a similar value to the other nodes. This step ensures that changes have been
|
||||||
|
replicated to the new node.
|
||||||
|
|
||||||
|
```
|
||||||
|
ubuntu@nomad-server-10-1-1-4:~$ nomad agent-info
|
||||||
|
nomad
|
||||||
|
bootstrap = false
|
||||||
|
known_regions = 1
|
||||||
|
leader = false
|
||||||
|
server = true
|
||||||
|
raft
|
||||||
|
applied_index = 53460
|
||||||
|
commit_index = 53460
|
||||||
|
fsm_pending = 0
|
||||||
|
last_contact = 54.512216ms
|
||||||
|
last_log_index = 53460
|
||||||
|
last_log_term = 1
|
||||||
|
last_snapshot_index = 49511
|
||||||
|
last_snapshot_term = 1
|
||||||
|
num_peers = 2
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
Continue with the upgrades across the Server fleet making sure to do a single Nomad
|
||||||
|
server at a time. You can check state of the servers and clients with the
|
||||||
|
`nomad server-members` and `nomad node-status` commands which indicate state of the
|
||||||
|
nodes.
|
||||||
|
|
||||||
|
### 3. Remove the old versions from servers
|
||||||
|
|
||||||
|
If you are doing an in place upgrade on existing servers this step is not
|
||||||
|
necessary as the version was changed in place.
|
||||||
|
|
||||||
|
If you are doing an upgrade by adding new servers and removing old servers
|
||||||
|
from the fleet you need to ensure that the server has left the fleet safely.
|
||||||
|
|
||||||
|
1. Stop the service on the existing host
|
||||||
|
2. On another server issue a `nomad server-members` and check the status, if
|
||||||
|
the server is now in a left state you are safe to continue.
|
||||||
|
3. If the server is not in a left state, issue a `nomad server-force-leave <server id>`
|
||||||
|
to remove the server from the cluster.
|
||||||
|
|
||||||
|
Monitor the logs of the other hosts in the Nomad cluster over this period.
|
||||||
|
|
||||||
|
### 4. Check cluster health
|
||||||
|
|
||||||
|
Use the same actions in step #2 above to confirm cluster health.
|
||||||
|
|
||||||
|
### 5. Upgrade clients
|
||||||
|
|
||||||
|
Following the successful upgrade of the servers you can now update your
|
||||||
|
clients using a similar process as the servers. If you wish to gracefully
|
||||||
|
move tasks on a client use the `nomad node-drain <node-id>` command to
|
||||||
|
gracefully migrate jobs to another client in the cluster. The `node-drain`
|
||||||
|
command prevents new tasks from being allocated to the client and begins
|
||||||
|
migrating existing allocations to another client.
|
||||||
|
|
||||||
|
## Done!
|
||||||
|
|
||||||
|
You are now running the latest Nomad version. You can verify all
|
||||||
|
Clients joined by running `nomad node-status` and checking all the clients
|
||||||
|
are in a `ready` state.
|
||||||
|
|
||||||
4. Once all the servers are upgraded, begin a rollout of clients following
|
|
||||||
the same process.
|
|
||||||
|
|
||||||
5. Done! You are now running the latest Nomad version. You can verify all
|
|
||||||
Clients joined by running `nomad node-status` and checking all the clients
|
|
||||||
are in a `ready` state.
|
|
||||||
|
|
Loading…
Reference in a new issue