Merge branch 'master' into f-artifact-location

This commit is contained in:
Alex Dadgar 2016-03-19 12:39:15 -07:00
commit c85dfdf9a0
29 changed files with 839 additions and 480 deletions

View file

@ -20,7 +20,7 @@ import (
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver/logging"
"github.com/hashicorp/nomad/client/driver/executor"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
"github.com/hashicorp/nomad/helper/discover"
"github.com/hashicorp/nomad/nomad/structs"
@ -101,7 +101,7 @@ type dockerPID struct {
type DockerHandle struct {
pluginClient *plugin.Client
logCollector logging.LogCollector
executor executor.Executor
client *docker.Client
logger *log.Logger
cleanupContainer bool
@ -533,23 +533,23 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
if err != nil {
return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
}
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-syslog-collector.out", task.Name))
pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "syslog", pluginLogFile),
Cmd: exec.Command(bin, "executor", pluginLogFile),
}
logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config)
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
return nil, err
}
logCollectorCtx := &logging.LogCollectorContext{
TaskName: task.Name,
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
Task: task,
AllocDir: ctx.AllocDir,
LogConfig: task.LogConfig,
PortLowerBound: d.config.ClientMinPort,
PortUpperBound: d.config.ClientMaxPort,
}
ss, err := logCollector.LaunchCollector(logCollectorCtx)
ss, err := exec.LaunchSyslogServer(executorCtx)
if err != nil {
return nil, fmt.Errorf("failed to start syslog collector: %v", err)
}
@ -629,7 +629,7 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
maxKill := d.DriverContext.config.MaxKillTimeout
h := &DockerHandle{
client: client,
logCollector: logCollector,
executor: exec,
pluginClient: pluginClient,
cleanupContainer: cleanupContainer,
cleanupImage: cleanupImage,
@ -686,7 +686,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
if !found {
return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
}
logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config)
exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
if err != nil {
d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err)
if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout*time.Second)); e != nil {
@ -698,7 +698,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
// Return a driver handle
h := &DockerHandle{
client: client,
logCollector: logCollector,
executor: exec,
pluginClient: pluginClient,
cleanupContainer: cleanupContainer,
cleanupImage: cleanupImage,
@ -743,7 +743,7 @@ func (h *DockerHandle) WaitCh() chan *cstructs.WaitResult {
func (h *DockerHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
if err := h.logCollector.UpdateLogConfig(task.LogConfig); err != nil {
if err := h.executor.UpdateTask(task); err != nil {
h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err)
}
@ -759,9 +759,13 @@ func (h *DockerHandle) Kill() error {
// Container has already been removed.
if strings.Contains(err.Error(), NoSuchContainerError) {
h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID)
h.executor.Exit()
h.pluginClient.Kill()
return nil
}
h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
h.executor.Exit()
h.pluginClient.Kill()
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
}
h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
@ -824,7 +828,7 @@ func (h *DockerHandle) run() {
close(h.waitCh)
// Shutdown the syslog collector
if err := h.logCollector.Exit(); err != nil {
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)
}
h.pluginClient.Kill()

View file

@ -145,7 +145,7 @@ func TestDockerDriver_Handle(t *testing.T) {
pluginConfig := &plugin.ClientConfig{
Cmd: exec.Command(bin, "syslog", f.Name()),
}
logCollector, pluginClient, err := createLogCollector(pluginConfig, os.Stdout, &config.Config{})
exec, pluginClient, err := createExecutor(pluginConfig, os.Stdout, &config.Config{})
if err != nil {
t.Fatalf("got an err: %v", err)
}
@ -154,7 +154,7 @@ func TestDockerDriver_Handle(t *testing.T) {
h := &DockerHandle{
version: "version",
imageID: "imageid",
logCollector: logCollector,
executor: exec,
pluginClient: pluginClient,
containerID: "containerid",
killTimeout: 5 * time.Nanosecond,

View file

@ -102,14 +102,15 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
TaskName: task.Name,
TaskResources: task.Resources,
LogConfig: task.LogConfig,
ResourceLimits: true,
FSIsolation: true,
UnprivilegedUser: true,
Task: task,
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
ps, err := exec.LaunchCmd(&executor.ExecCommand{
Cmd: command,
Args: driverConfig.Args,
FSIsolation: true,
ResourceLimits: true,
User: cstructs.DefaultUnpriviledgedUser,
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
@ -217,7 +218,7 @@ func (h *execHandle) WaitCh() chan *cstructs.WaitResult {
func (h *execHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateLogConfig(task.LogConfig)
h.executor.UpdateTask(task)
// Update is not possible
return nil
@ -267,5 +268,8 @@ func (h *execHandle) run() {
}
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
close(h.waitCh)
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err)
}
h.pluginClient.Kill()
}

View file

@ -2,7 +2,9 @@ package executor
import (
"fmt"
"io/ioutil"
"log"
"net"
"os"
"os/exec"
"path/filepath"
@ -22,6 +24,18 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)
// Executor is the interface which allows a driver to launch and supervise
// a process
type Executor interface {
LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error)
Wait() (*ProcessState, error)
ShutDown() error
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
UpdateTask(task *structs.Task) error
}
// ExecutorContext holds context to configure the command user
// wants to run and isolate it
type ExecutorContext struct {
@ -32,33 +46,36 @@ type ExecutorContext struct {
// the task
AllocDir *allocdir.AllocDir
// TaskName is the name of the Task
TaskName string
// Task is the task whose executor is being launched
Task *structs.Task
// TaskResources are the resource constraints for the Task
TaskResources *structs.Resources
// PortUpperBound is the upper bound of the ports that we can use to start
// the syslog server
PortUpperBound uint
// FSIsolation is a flag for drivers to impose file system
// isolation on certain platforms
FSIsolation bool
// ResourceLimits is a flag for drivers to impose resource
// contraints on a Task on certain platforms
ResourceLimits bool
// UnprivilegedUser is a flag for drivers to make the process
// run as nobody
UnprivilegedUser bool
// LogConfig provides the configuration related to log rotation
LogConfig *structs.LogConfig
// PortLowerBound is the lower bound of the ports that we can use to start
// the syslog server
PortLowerBound uint
}
// ExecCommand holds the user command and args. It's a lightweight replacement
// of exec.Cmd for serialization purposes.
// ExecCommand holds the user command, args, and other isolation related
// settings.
type ExecCommand struct {
// Cmd is the command that the user wants to run.
Cmd string
// Args is the args of the command that the user wants to run.
Args []string
// FSIsolation determines whether the command would be run in a chroot.
FSIsolation bool
// User is the user which the executor uses to run the command.
User string
// ResourceLimits determines whether resource limits are enforced by the
// executor.
ResourceLimits bool
}
// ProcessState holds information about the state of a user process.
@ -70,14 +87,11 @@ type ProcessState struct {
Time time.Time
}
// Executor is the interface which allows a driver to launch and supervise
// a process
type Executor interface {
LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
Wait() (*ProcessState, error)
ShutDown() error
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
// SyslogServerState holds the address and islation information of a launched
// syslog server
type SyslogServerState struct {
IsolationConfig *cstructs.IsolationConfig
Addr string
}
// UniversalExecutor is an implementation of the Executor which launches and
@ -86,21 +100,31 @@ type Executor interface {
type UniversalExecutor struct {
cmd exec.Cmd
ctx *ExecutorContext
command *ExecCommand
taskDir string
groups *cgroupConfig.Cgroup
exitState *ProcessState
processExited chan interface{}
lre *logging.FileRotator
lro *logging.FileRotator
rotatorLock sync.Mutex
syslogServer *logging.SyslogServer
syslogChan chan *logging.SyslogMessage
groups *cgroupConfig.Cgroup
cgLock sync.Mutex
logger *log.Logger
lock sync.Mutex
}
// NewExecutor returns an Executor
func NewExecutor(logger *log.Logger) Executor {
return &UniversalExecutor{logger: logger, processExited: make(chan interface{})}
return &UniversalExecutor{
logger: logger,
processExited: make(chan interface{}),
}
}
// LaunchCmd launches a process and returns it's state. It also configures an
@ -109,6 +133,7 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
e.ctx = ctx
e.command = command
// configuring the task dir
if err := e.configureTaskDir(); err != nil {
@ -122,29 +147,18 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
}
// setting the user of the process
if e.ctx.UnprivilegedUser {
if err := e.runAs("nobody"); err != nil {
if command.User != "" {
if err := e.runAs(command.User); err != nil {
return nil, err
}
}
logFileSize := int64(ctx.LogConfig.MaxFileSizeMB * 1024 * 1024)
lro, err := logging.NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", ctx.TaskName),
ctx.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return nil, fmt.Errorf("error creating log rotator for stdout of task %v", err)
// Setup the loggers
if err := e.configureLoggers(); err != nil {
return nil, err
}
e.cmd.Stdout = lro
e.lro = lro
lre, err := logging.NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", ctx.TaskName),
ctx.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return nil, fmt.Errorf("error creating log rotator for stderr of task %v", err)
}
e.cmd.Stderr = lre
e.lre = lre
e.cmd.Stdout = e.lro
e.cmd.Stderr = e.lre
e.ctx.TaskEnv.Build()
@ -160,7 +174,7 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
// Determine the path to run as it may have to be relative to the chroot.
path := absPath
if e.ctx.FSIsolation {
if e.command.FSIsolation {
rel, err := filepath.Rel(e.taskDir, absPath)
if err != nil {
return nil, err
@ -182,15 +196,42 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
}
// configureLoggers sets up the standard out/error file rotators
func (e *UniversalExecutor) configureLoggers() error {
e.rotatorLock.Lock()
defer e.rotatorLock.Unlock()
logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
if e.lro == nil {
lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return err
}
e.lro = lro
}
if e.lre == nil {
lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
if err != nil {
return err
}
e.lre = lre
}
return nil
}
// Wait waits until a process has exited and returns it's exitcode and errors
func (e *UniversalExecutor) Wait() (*ProcessState, error) {
<-e.processExited
return e.exitState, nil
}
// COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
// UpdateLogConfig updates the log configuration
func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
e.ctx.LogConfig = logConfig
e.ctx.Task.LogConfig = logConfig
if e.lro == nil {
return fmt.Errorf("log rotator for stdout doesn't exist")
}
@ -205,11 +246,21 @@ func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error
return nil
}
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
e.ctx.Task = task
// Updating Log Config
fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
e.lro.MaxFiles = task.LogConfig.MaxFiles
e.lro.FileSize = fileSize
e.lre.MaxFiles = task.LogConfig.MaxFiles
e.lre.FileSize = fileSize
return nil
}
func (e *UniversalExecutor) wait() {
defer close(e.processExited)
err := e.cmd.Wait()
e.lre.Close()
e.lro.Close()
if err == nil {
e.exitState = &ProcessState{Pid: 0, ExitCode: 0, Time: time.Now()}
return
@ -220,14 +271,6 @@ func (e *UniversalExecutor) wait() {
exitCode = status.ExitStatus()
}
}
if e.ctx.FSIsolation {
e.removeChrootMounts()
}
if e.ctx.ResourceLimits {
e.lock.Lock()
DestroyCgroup(e.groups)
e.lock.Unlock()
}
e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Time: time.Now()}
}
@ -241,7 +284,13 @@ var (
// process
func (e *UniversalExecutor) Exit() error {
var merr multierror.Error
if e.cmd.Process != nil {
if e.syslogServer != nil {
e.syslogServer.Shutdown()
}
e.lre.Close()
e.lro.Close()
if e.command != nil && e.cmd.Process != nil {
proc, err := os.FindProcess(e.cmd.Process.Pid)
if err != nil {
e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
@ -252,17 +301,17 @@ func (e *UniversalExecutor) Exit() error {
}
}
if e.ctx.FSIsolation {
if e.command != nil && e.command.FSIsolation {
if err := e.removeChrootMounts(); err != nil {
merr.Errors = append(merr.Errors, err)
}
}
if e.ctx.ResourceLimits {
e.lock.Lock()
if e.command != nil && e.command.ResourceLimits {
e.cgLock.Lock()
if err := DestroyCgroup(e.groups); err != nil {
merr.Errors = append(merr.Errors, err)
}
e.lock.Unlock()
e.cgLock.Unlock()
}
return merr.ErrorOrNil()
}
@ -287,10 +336,10 @@ func (e *UniversalExecutor) ShutDown() error {
// configureTaskDir sets the task dir in the executor
func (e *UniversalExecutor) configureTaskDir() error {
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.TaskName]
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name]
e.taskDir = taskDir
if !ok {
return fmt.Errorf("couldn't find task directory for task %v", e.ctx.TaskName)
return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name)
}
e.cmd.Dir = taskDir
return nil
@ -344,3 +393,48 @@ func (e *UniversalExecutor) makeExecutable(binPath string) error {
}
return nil
}
// getFreePort returns a free port ready to be listened on between upper and
// lower bounds
func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
if runtime.GOOS == "windows" {
return e.listenerTCP(lowerBound, upperBound)
}
return e.listenerUnix()
}
// listenerTCP creates a TCP listener using an unused port between an upper and
// lower bound
func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
for i := lowerBound; i <= upperBound; i++ {
addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
if err != nil {
return nil, err
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
continue
}
return l, nil
}
return nil, fmt.Errorf("No free port found")
}
// listenerUnix creates a Unix domain socket
func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
f, err := ioutil.TempFile("", "plugin")
if err != nil {
return nil, err
}
path := f.Name()
if err := f.Close(); err != nil {
return nil, err
}
if err := os.Remove(path); err != nil {
return nil, err
}
return net.Listen("unix", path)
}

View file

@ -38,14 +38,14 @@ var (
// configureIsolation configures chroot and creates cgroups
func (e *UniversalExecutor) configureIsolation() error {
if e.ctx.FSIsolation {
if e.command.FSIsolation {
if err := e.configureChroot(); err != nil {
return err
}
}
if e.ctx.ResourceLimits {
if err := e.configureCgroups(e.ctx.TaskResources); err != nil {
if e.command.ResourceLimits {
if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
return fmt.Errorf("error creating cgroups: %v", err)
}
if err := e.applyLimits(os.Getpid()); err != nil {
@ -63,7 +63,7 @@ func (e *UniversalExecutor) configureIsolation() error {
// applyLimits puts a process in a pre-configured cgroup
func (e *UniversalExecutor) applyLimits(pid int) error {
if !e.ctx.ResourceLimits {
if !e.command.ResourceLimits {
return nil
}
@ -155,11 +155,11 @@ func (e *UniversalExecutor) runAs(userid string) error {
// configureChroot configures a chroot
func (e *UniversalExecutor) configureChroot() error {
allocDir := e.ctx.AllocDir
if err := allocDir.MountSharedDir(e.ctx.TaskName); err != nil {
if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
return err
}
if err := allocDir.Embed(e.ctx.TaskName, chrootEnv); err != nil {
if err := allocDir.Embed(e.ctx.Task.Name, chrootEnv); err != nil {
return err
}
@ -183,8 +183,8 @@ func (e *UniversalExecutor) configureChroot() error {
// should be called when tearing down the task.
func (e *UniversalExecutor) removeChrootMounts() error {
// Prevent a race between Wait/ForceStop
e.lock.Lock()
defer e.lock.Unlock()
e.cgLock.Lock()
defer e.cgLock.Unlock()
return e.ctx.AllocDir.UnmountAll()
}

View file

@ -0,0 +1,44 @@
// +build !windows
package executor
import (
"fmt"
"io"
"log/syslog"
"github.com/hashicorp/nomad/client/driver/logging"
)
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
e.ctx = ctx
e.syslogChan = make(chan *logging.SyslogMessage, 2048)
l, err := e.getListener(e.ctx.PortLowerBound, e.ctx.PortUpperBound)
if err != nil {
return nil, err
}
e.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
if err := e.configureLoggers(); err != nil {
return nil, err
}
e.syslogServer = logging.NewSyslogServer(l, e.syslogChan, e.logger)
go e.syslogServer.Start()
go e.collectLogs(e.lre, e.lro)
syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
return &SyslogServerState{Addr: syslogAddr}, nil
}
func (e *UniversalExecutor) collectLogs(we io.Writer, wo io.Writer) {
for logParts := range e.syslogChan {
// If the severity of the log line is err then we write to stderr
// otherwise all messages go to stdout
if logParts.Severity == syslog.LOG_ERR {
e.lre.Write(logParts.Message)
e.lre.Write([]byte{'\n'})
} else {
e.lro.Write(logParts.Message)
e.lro.Write([]byte{'\n'})
}
}
}

View file

@ -15,6 +15,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
tu "github.com/hashicorp/nomad/testutil"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
)
var (
@ -30,7 +31,7 @@ var (
}
)
func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
func mockAllocDir(t *testing.T) (*structs.Task, *allocdir.AllocDir) {
alloc := mock.Alloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
@ -39,18 +40,16 @@ func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
log.Panicf("allocDir.Build() failed: %v", err)
}
return task.Name, allocDir
return task, allocDir
}
func testExecutorContext(t *testing.T) *ExecutorContext {
taskEnv := env.NewTaskEnvironment(mock.Node())
taskName, allocDir := mockAllocDir(t)
task, allocDir := mockAllocDir(t)
ctx := &ExecutorContext{
TaskEnv: taskEnv,
TaskName: taskName,
Task: task,
AllocDir: allocDir,
TaskResources: constraint,
LogConfig: structs.DefaultLogConfig(),
}
return ctx
}
@ -80,6 +79,9 @@ func TestExecutor_Start_Wait_Failure_Code(t *testing.T) {
if ps.ExitCode < 1 {
t.Fatalf("expected exit code to be non zero, actual: %v", ps.ExitCode)
}
if err := executor.Exit(); err != nil {
t.Fatalf("error: %v", err)
}
}
func TestExecutor_Start_Wait(t *testing.T) {
@ -98,6 +100,9 @@ func TestExecutor_Start_Wait(t *testing.T) {
if err != nil {
t.Fatalf("error in waiting for command: %v", err)
}
if err := executor.Exit(); err != nil {
t.Fatalf("error: %v", err)
}
expected := "hello world"
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
@ -119,9 +124,9 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
ctx := testExecutorContext(t)
defer ctx.AllocDir.Destroy()
ctx.FSIsolation = true
ctx.ResourceLimits = true
ctx.UnprivilegedUser = true
execCmd.FSIsolation = true
execCmd.ResourceLimits = true
execCmd.User = cstructs.DefaultUnpriviledgedUser
executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
ps, err := executor.LaunchCmd(&execCmd, ctx)
@ -135,6 +140,9 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
if err != nil {
t.Fatalf("error in waiting for command: %v", err)
}
if err := executor.Exit(); err != nil {
t.Fatalf("error: %v", err)
}
expected := "hello world"
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
@ -154,13 +162,13 @@ func TestExecutor_DestroyCgroup(t *testing.T) {
execCmd := ExecCommand{Cmd: "/bin/bash", Args: []string{"-c", "/usr/bin/yes"}}
ctx := testExecutorContext(t)
ctx.LogConfig.MaxFiles = 1
ctx.LogConfig.MaxFileSizeMB = 300
ctx.Task.LogConfig.MaxFiles = 1
ctx.Task.LogConfig.MaxFileSizeMB = 300
defer ctx.AllocDir.Destroy()
ctx.FSIsolation = true
ctx.ResourceLimits = true
ctx.UnprivilegedUser = true
execCmd.FSIsolation = true
execCmd.ResourceLimits = true
execCmd.User = "nobody"
executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
ps, err := executor.LaunchCmd(&execCmd, ctx)
@ -171,7 +179,10 @@ func TestExecutor_DestroyCgroup(t *testing.T) {
t.Fatalf("expected process to start and have non zero pid")
}
time.Sleep(200 * time.Millisecond)
executor.Exit()
if err := executor.Exit(); err != nil {
t.Fatalf("err: %v", err)
}
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
finfo, err := os.Stat(file)
if err != nil {
@ -203,6 +214,9 @@ func TestExecutor_Start_Kill(t *testing.T) {
if err != nil {
t.Fatalf("error in waiting for command: %v", err)
}
if err := executor.Exit(); err != nil {
t.Fatalf("error: %v", err)
}
file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
time.Sleep(time.Duration(tu.TestMultiplier()*2) * time.Second)

View file

@ -0,0 +1,5 @@
package executor
func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
return nil, nil
}

View file

@ -1,6 +1,7 @@
package driver
import (
"encoding/gob"
"log"
"net/rpc"
@ -9,6 +10,14 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
)
// Registering these types since we have to serialize and de-serialize the Task
// structs over the wire between drivers and the executor.
func init() {
gob.Register([]interface{}{})
gob.Register(map[string]interface{}{})
gob.Register([]map[string]string{})
}
type ExecutorRPC struct {
client *rpc.Client
}
@ -19,12 +28,23 @@ type LaunchCmdArgs struct {
Ctx *executor.ExecutorContext
}
// LaunchSyslogServerArgs wraps the executor context for the purposes of RPC
type LaunchSyslogServerArgs struct {
Ctx *executor.ExecutorContext
}
func (e *ExecutorRPC) LaunchCmd(cmd *executor.ExecCommand, ctx *executor.ExecutorContext) (*executor.ProcessState, error) {
var ps *executor.ProcessState
err := e.client.Call("Plugin.LaunchCmd", LaunchCmdArgs{Cmd: cmd, Ctx: ctx}, &ps)
return ps, err
}
func (e *ExecutorRPC) LaunchSyslogServer(ctx *executor.ExecutorContext) (*executor.SyslogServerState, error) {
var ss *executor.SyslogServerState
err := e.client.Call("Plugin.LaunchSyslogServer", LaunchSyslogServerArgs{Ctx: ctx}, &ss)
return ss, err
}
func (e *ExecutorRPC) Wait() (*executor.ProcessState, error) {
var ps executor.ProcessState
err := e.client.Call("Plugin.Wait", new(interface{}), &ps)
@ -43,6 +63,10 @@ func (e *ExecutorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
}
func (e *ExecutorRPC) UpdateTask(task *structs.Task) error {
return e.client.Call("Plugin.UpdateTask", task, new(interface{}))
}
type ExecutorRPCServer struct {
Impl executor.Executor
}
@ -55,6 +79,14 @@ func (e *ExecutorRPCServer) LaunchCmd(args LaunchCmdArgs, ps *executor.ProcessSt
return err
}
func (e *ExecutorRPCServer) LaunchSyslogServer(args LaunchSyslogServerArgs, ss *executor.SyslogServerState) error {
state, err := e.Impl.LaunchSyslogServer(args.Ctx)
if state != nil {
*ss = *state
}
return err
}
func (e *ExecutorRPCServer) Wait(args interface{}, ps *executor.ProcessState) error {
state, err := e.Impl.Wait()
if state != nil {
@ -75,6 +107,10 @@ func (e *ExecutorRPCServer) UpdateLogConfig(args *structs.LogConfig, resp *inter
return e.Impl.UpdateLogConfig(args)
}
func (e *ExecutorRPCServer) UpdateTask(args *structs.Task, resp *interface{}) error {
return e.Impl.UpdateTask(args)
}
type ExecutorPlugin struct {
logger *log.Logger
Impl *ExecutorRPCServer

View file

@ -156,12 +156,7 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
TaskName: task.Name,
TaskResources: task.Resources,
LogConfig: task.LogConfig,
FSIsolation: true,
UnprivilegedUser: true,
ResourceLimits: true,
Task: task,
}
absPath, err := GetAbsolutePath("java")
@ -169,7 +164,13 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
return nil, err
}
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{Cmd: absPath, Args: args}, executorCtx)
ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
Cmd: absPath,
Args: args,
FSIsolation: true,
ResourceLimits: true,
User: cstructs.DefaultUnpriviledgedUser,
}, executorCtx)
if err != nil {
pluginClient.Kill()
return nil, err
@ -290,7 +291,7 @@ func (h *javaHandle) WaitCh() chan *cstructs.WaitResult {
func (h *javaHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateLogConfig(task.LogConfig)
h.executor.UpdateTask(task)
// Update is not possible
return nil
@ -338,5 +339,6 @@ func (h *javaHandle) run() {
}
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
close(h.waitCh)
h.executor.Exit()
h.pluginClient.Kill()
}

View file

@ -0,0 +1,10 @@
package logging
type SyslogServer struct {
}
func (s *SyslogServer) Shutdown() {
}
type SyslogMessage struct {
}

View file

@ -193,9 +193,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
TaskName: task.Name,
TaskResources: task.Resources,
LogConfig: task.LogConfig,
Task: task,
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: args[0], Args: args[1:]}, executorCtx)
if err != nil {
@ -292,7 +290,7 @@ func (h *qemuHandle) WaitCh() chan *cstructs.WaitResult {
func (h *qemuHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateLogConfig(task.LogConfig)
h.executor.UpdateTask(task)
// Update is not possible
return nil
@ -336,5 +334,6 @@ func (h *qemuHandle) run() {
close(h.doneCh)
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
close(h.waitCh)
h.executor.Exit()
h.pluginClient.Kill()
}

View file

@ -98,9 +98,7 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
TaskName: task.Name,
TaskResources: task.Resources,
LogConfig: task.LogConfig,
Task: task,
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
if err != nil {
@ -195,7 +193,7 @@ func (h *rawExecHandle) WaitCh() chan *cstructs.WaitResult {
func (h *rawExecHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateLogConfig(task.LogConfig)
h.executor.UpdateTask(task)
// Update is not possible
return nil
@ -237,5 +235,8 @@ func (h *rawExecHandle) run() {
}
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
close(h.waitCh)
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: error killing executor: %v", err)
}
h.pluginClient.Kill()
}

View file

@ -235,10 +235,7 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
TaskName: task.Name,
TaskResources: task.Resources,
UnprivilegedUser: false,
LogConfig: task.LogConfig,
Task: task,
}
absPath, err := GetAbsolutePath("rkt")
@ -329,7 +326,7 @@ func (h *rktHandle) WaitCh() chan *cstructs.WaitResult {
func (h *rktHandle) Update(task *structs.Task) error {
// Store the updated kill timeout.
h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
h.executor.UpdateLogConfig(task.LogConfig)
h.executor.UpdateTask(task)
// Update is not possible
return nil
@ -360,5 +357,8 @@ func (h *rktHandle) run() {
}
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
close(h.waitCh)
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.rkt: error killing executor: %v", err)
}
h.pluginClient.Kill()
}

View file

@ -6,6 +6,11 @@ import (
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
)
const (
// The default user that the executor uses to run tasks
DefaultUnpriviledgedUser = "nobody"
)
// WaitResult stores the result of a Wait operation.
type WaitResult struct {
ExitCode int

View file

@ -75,8 +75,6 @@ func (c *AllocStatusCommand) Run(args []string) int {
}
// Query the allocation info
alloc, _, err := client.Allocations().Info(allocID, nil)
if err != nil {
if len(allocID) == 1 {
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
return 1
@ -114,12 +112,11 @@ func (c *AllocStatusCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single allocation
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
return 1
}
}
// Format the allocation data
basic := []string{

View file

@ -1,6 +1,12 @@
package command
import "github.com/mitchellh/cli"
import (
"math/rand"
"time"
"github.com/hashicorp/nomad/api"
"github.com/mitchellh/cli"
)
type FSCommand struct {
Meta
@ -17,3 +23,23 @@ func (f *FSCommand) Synopsis() string {
func (f *FSCommand) Run(args []string) int {
return cli.RunResultHelp
}
// Get Random Allocation ID from a known jobID. Prefer to use a running allocation,
// but use a dead allocation if no running allocations are found
func getRandomJobAlloc(client *api.Client, jobID string) (string, error) {
var runningAllocs []*api.AllocationListStub
allocs, _, err := client.Jobs().Allocations(jobID, nil)
for _, v := range allocs {
if v.ClientStatus == "running" {
runningAllocs = append(runningAllocs, v)
}
}
// If we don't have any allocations running, use dead allocations
if len(runningAllocs) < 1 {
runningAllocs = allocs
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
allocID := runningAllocs[r.Intn(len(runningAllocs))].ID
return allocID, err
}

View file

@ -26,6 +26,9 @@ Cat Options:
-verbose
Show full information.
-job <job-id>
Use a random allocation from a specified job-id.
`
return strings.TrimSpace(helpText)
}
@ -35,10 +38,11 @@ func (f *FSCatCommand) Synopsis() string {
}
func (f *FSCatCommand) Run(args []string) int {
var verbose bool
var verbose, job bool
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
flags.Usage = func() { f.Ui.Output(f.Help()) }
flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&job, "job", false, "")
if err := flags.Parse(args); err != nil {
return 1
@ -50,7 +54,6 @@ func (f *FSCatCommand) Run(args []string) int {
return 1
}
allocID := args[0]
path := "/"
if len(args) == 2 {
path = args[1]
@ -58,18 +61,25 @@ func (f *FSCatCommand) Run(args []string) int {
client, err := f.Meta.Client()
if err != nil {
f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
return 1
}
// If -job is specified, use random allocation, otherwise use provided allocation
allocID := args[0]
if job {
allocID, err = getRandomJobAlloc(client, args[0])
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying API: %v", err))
}
}
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
alloc, _, err := client.Allocations().Info(allocID, nil)
if err != nil {
if len(allocID) == 1 {
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
return 1
@ -107,12 +117,11 @@ func (f *FSCatCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single allocation
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
return 1
}
}
if alloc.DesiredStatus == "failed" {
allocID := limit(alloc.ID, length)

View file

@ -4,7 +4,7 @@ import (
"fmt"
"strings"
"github.com/dustin/go-humanize"
humanize "github.com/dustin/go-humanize"
)
type FSListCommand struct {
@ -30,6 +30,9 @@ Ls Options:
-verbose
Show full information.
-job <job-id>
Use a random allocation from a specified job-id.
`
return strings.TrimSpace(helpText)
}
@ -41,10 +44,12 @@ func (f *FSListCommand) Synopsis() string {
func (f *FSListCommand) Run(args []string) int {
var verbose bool
var machine bool
var job bool
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
flags.Usage = func() { f.Ui.Output(f.Help()) }
flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&machine, "H", false, "")
flags.BoolVar(&job, "job", false, "")
if err := flags.Parse(args); err != nil {
return 1
@ -56,7 +61,6 @@ func (f *FSListCommand) Run(args []string) int {
return 1
}
allocID := args[0]
path := "/"
if len(args) == 2 {
path = args[1]
@ -64,18 +68,26 @@ func (f *FSListCommand) Run(args []string) int {
client, err := f.Meta.Client()
if err != nil {
f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
return 1
}
// If -job is specified, use random allocation, otherwise use provided allocation
allocID := args[0]
if job {
allocID, err = getRandomJobAlloc(client, args[0])
if err != nil {
f.Ui.Error(fmt.Sprintf("Error fetching allocations: %v", err))
return 1
}
}
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
alloc, _, err := client.Allocations().Info(allocID, nil)
if err != nil {
if len(allocID) == 1 {
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
return 1
@ -113,12 +125,11 @@ func (f *FSListCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single allocation
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
return 1
}
}
if alloc.DesiredStatus == "failed" {
allocID := limit(alloc.ID, length)

View file

@ -4,7 +4,7 @@ import (
"fmt"
"strings"
"github.com/dustin/go-humanize"
humanize "github.com/dustin/go-humanize"
)
type FSStatCommand struct {
@ -29,6 +29,9 @@ Stat Options:
-verbose
Show full information.
-job <job-id>
Use a random allocation from a specified job-id.
`
return strings.TrimSpace(helpText)
}
@ -40,10 +43,12 @@ func (f *FSStatCommand) Synopsis() string {
func (f *FSStatCommand) Run(args []string) int {
var verbose bool
var machine bool
var job bool
flags := f.Meta.FlagSet("fs-list", FlagSetClient)
flags.Usage = func() { f.Ui.Output(f.Help()) }
flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&machine, "H", false, "")
flags.BoolVar(&job, "job", false, "")
if err := flags.Parse(args); err != nil {
return 1
@ -55,7 +60,6 @@ func (f *FSStatCommand) Run(args []string) int {
return 1
}
allocID := args[0]
path := "/"
if len(args) == 2 {
path = args[1]
@ -63,18 +67,24 @@ func (f *FSStatCommand) Run(args []string) int {
client, err := f.Meta.Client()
if err != nil {
f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
return 1
}
allocID := args[0]
if job {
allocID, err = getRandomJobAlloc(client, args[0])
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying API: %v", err))
}
}
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
alloc, _, err := client.Allocations().Info(allocID, nil)
if err != nil {
if len(allocID) == 1 {
f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
return 1
@ -112,12 +122,11 @@ func (f *FSStatCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single allocation
alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
return 1
}
}
if alloc.DesiredStatus == "failed" {
allocID := limit(alloc.ID, length)

View file

@ -69,8 +69,6 @@ func (c *NodeDrainCommand) Run(args []string) int {
}
// Check if node exists
node, _, err := client.Nodes().Info(nodeID, nil)
if err != nil {
if len(nodeID) == 1 {
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
return 1
@ -111,12 +109,11 @@ func (c *NodeDrainCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single node
node, _, err = client.Nodes().Info(nodes[0].ID, nil)
node, _, err := client.Nodes().Info(nodes[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
return 1
}
}
// Toggle node draining
if _, err := client.Nodes().ToggleDrain(node.ID, enable, nil); err != nil {

View file

@ -133,8 +133,6 @@ func (c *NodeStatusCommand) Run(args []string) int {
// Query the specific node
nodeID := args[0]
node, _, err := client.Nodes().Info(nodeID, nil)
if err != nil {
if len(nodeID) == 1 {
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
return 1
@ -175,12 +173,11 @@ func (c *NodeStatusCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single node
node, _, err = client.Nodes().Info(nodes[0].ID, nil)
node, _, err := client.Nodes().Info(nodes[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
return 1
}
}
m := node.Attributes
keys := make([]string, len(m))

View file

@ -105,8 +105,6 @@ func (c *StatusCommand) Run(args []string) int {
// Try querying the job
jobID := args[0]
job, _, err := client.Jobs().Info(jobID, nil)
if err != nil {
jobs, _, err := client.Jobs().PrefixList(jobID)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
@ -130,12 +128,11 @@ func (c *StatusCommand) Run(args []string) int {
return 0
}
// Prefix lookup matched a single job
job, _, err = client.Jobs().Info(jobs[0].ID, nil)
job, _, err := client.Jobs().Info(jobs[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
return 1
}
}
// Check if it is periodic
sJob, err := convertApiJob(job)

View file

@ -2,7 +2,7 @@
set -ex
RKT_VERSION="v1.0.0"
RKT_VERSION="v1.2.0"
DEST_DIR="/usr/local/bin"
sudo mkdir -p /etc/rkt/net.d

View file

@ -2,7 +2,7 @@
set -ex
DOCKER_VERSION="1.10.2"
DOCKER_VERSION="1.10.3"
sudo stop docker
sudo rm -rf /var/lib/docker

View file

@ -23,7 +23,7 @@ nomad fs stat <alloc-id> <path>
nomad fs cat <alloc-id> <path>
```
A valid allocation id is necessary and the path is relative to the root of the allocation directory.
A valid allocation id is necessary unless `-job` is specified and the path is relative to the root of the allocation directory.
The path is optional and it defaults to `/` of the allocation directory
## Examples
@ -50,3 +50,14 @@ $ nomad fs cat redis/local/redis.stdout
6710:C 27 Jan 22:04:03.794 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
6710:M 27 Jan 22:04:03.795 * Increased maximum number of open files to 10032 (it was originally set to 256).
## Using Job-ID instead of Alloc-ID
Passing `-job` into one of the `fs` commands will allow the `fs` command to randomly select an allocation ID from the specified job.
```
nomad fs ls -job <job-id> <path>
```
Nomad will prefer to select a running allocation ID for the job, but if no running allocations for the job are found, Nomad will use a dead allocation.
This can be useful for debugging a job that has multiple allocations, and it's not really required to use a specific allocation ID.

View file

@ -28,8 +28,8 @@ spelunking through the source code.
There are four primary "nouns" in Nomad; jobs, nodes, allocations, and evaluations.
Jobs are submitted by users and represent a _desired state_. A job is a declarative description
of tasks to run which are bounded by constraints and require resources. Nodes are the servers
in the clusters that tasks can be scheduled on. The mapping of tasks in a job to nodes is done
of tasks to run which are bounded by constraints and require resources. Tasks can be scheduled on
nodes in the cluster running the Nomad client. The mapping of tasks in a job to clients is done
using allocations. An allocation is used to declare that a set of tasks in a job should be run
on a particular node. Scheduling is the process of determining the appropriate allocations and
is done as part of an evaluation.

View file

@ -65,30 +65,37 @@ driver.
<tr>
<th>Variable</th>
<th>Description</th>
<th>Example</th>
</tr>
<tr>
<td>${node.unique.id}</td>
<td>The client node identifier</td>
<td>The 36 character unique client node identifier</td>
<td>9afa5da1-8f39-25a2-48dc-ba31fd7c0023</td>
</tr>
<tr>
<td>${node.datacenter}</td>
<td>The client node datacenter</td>
<td>The client node's datacenter</td>
<td>dc1</td>
</tr>
<tr>
<td>${node.unique.name}</td>
<td>The client node name</td>
<td>The client node's name</td>
<td>nomad-client-10-1-2-4</td>
</tr>
<tr>
<td>${node.class}</td>
<td>The client node class</td>
<td>The client node's class</td>
<td>linux-64bit</td>
</tr>
<tr>
<td>${attr.\<key\>}</td>
<td>The attribute given by `key` on the client node.</td>
<td>platform.aws.instance-type:r3.large</td>
</tr>
<tr>
<td>${meta.\<key\>}</td>
<td>The metadata value given by `key` on the client node.</td>
<td></td>
</tr>
</table>
@ -177,19 +184,19 @@ a particular node and as such can not be used in constraints.
<td>The CPU limit in MHz for the task</td>
</tr>
<tr>
<td>NOMAD_ALLOC_ID</td>
<td>${NOMAD_ALLOC_ID}</td>
<td>The allocation ID of the task</td>
</tr>
<tr>
<td>NOMAD_ALLOC_NAME</td>
<td>${NOMAD_ALLOC_NAME}</td>
<td>The allocation name of the task</td>
</tr>
<tr>
<td>NOMAD_ALLOC_INDEX</td>
<td>${NOMAD_ALLOC_INDEX}</td>
<td>The allocation index; useful to distinguish instances of task groups</td>
</tr>
<tr>
<td>NOMAD_TASK_NAME</td>
<td>${NOMAD_TASK_NAME}</td>
<td>The task's name</td>
</tr>
<tr>

View file

@ -1,6 +1,6 @@
---
layout: "docs"
page_title: "Upgrade Nomad"
page_title: "Upgrading Nomad"
sidebar_current: "docs-upgrade-upgrading"
description: |-
Learn how to upgrade Nomad.
@ -8,29 +8,109 @@ description: |-
# Upgrading Nomad
Both Nomad Clients and Servers are meant to be long-running processes that
maintain communication with each other. Nomad Servers maintain quorum with other
Servers and Clients are in constant communication with Servers. As such, care
should be taken to properly upgrade Nomad to ensure minimal service disruption.
This page documents how to upgrade Nomad when a new version is released.
## Standard Upgrades
~> **Upgrade Warning!** Both Nomad Clients and Servers are meant to be
long-running processes that maintain communication with each other. Nomad
Servers maintain quorum with other Servers and Clients are in constant
communication with Servers. As such, care should be taken to properly
upgrade Nomad to ensure minimal service disruption. Unsafe upgrades can
cause a service outage.
## Upgrade Process
For upgrades we strive to ensure backwards compatibility. For most upgrades, the
process is simple. Assuming the current version of Nomad is A, and version B is
released.
process is as simple as upgrading the binary and restarting the service.
1. On each server, install version B of Nomad.
Prior to starting the upgrade please check the
[specific version details](/docs/upgrade/upgrade-specific.html) page as some
version differences may require specific steps.
2. Shut down version A, restart with version B on one server at a time.
At a high level we complete the following steps to upgrade Nomad:
3. You can run `nomad server-members` to ensure that all servers are
clustered and running the version B.
* **Add the new version**
* **Check cluster health**
* **Remove the old version**
* **Check cluster health**
* **Upgrade clients**
### 1. Add the new version to the existing cluster
Whether you are replacing the software in place on existing systems or bringing
up new hosts you should make changes incrementally, verifying cluster health at
each step of the upgrade
On a single server, install the new version of Nomad. You can do this by
joining a new server to the cluster or by replacing or upgrading the binary
locally and restarting the service.
### 2. Check cluster health
Monitor the Nomad logs on the remaining nodes to check the new node has entered
the cluster correctly.
Run `nomad agent-info` on the new server and check that the `last_log_index` is
of a similar value to the other nodes. This step ensures that changes have been
replicated to the new node.
```
ubuntu@nomad-server-10-1-1-4:~$ nomad agent-info
nomad
bootstrap = false
known_regions = 1
leader = false
server = true
raft
applied_index = 53460
commit_index = 53460
fsm_pending = 0
last_contact = 54.512216ms
last_log_index = 53460
last_log_term = 1
last_snapshot_index = 49511
last_snapshot_term = 1
num_peers = 2
...
```
Continue with the upgrades across the Server fleet making sure to do a single Nomad
server at a time. You can check state of the servers and clients with the
`nomad server-members` and `nomad node-status` commands which indicate state of the
nodes.
### 3. Remove the old versions from servers
If you are doing an in place upgrade on existing servers this step is not
necessary as the version was changed in place.
If you are doing an upgrade by adding new servers and removing old servers
from the fleet you need to ensure that the server has left the fleet safely.
1. Stop the service on the existing host
2. On another server issue a `nomad server-members` and check the status, if
the server is now in a left state you are safe to continue.
3. If the server is not in a left state, issue a `nomad server-force-leave <server id>`
to remove the server from the cluster.
Monitor the logs of the other hosts in the Nomad cluster over this period.
### 4. Check cluster health
Use the same actions in step #2 above to confirm cluster health.
### 5. Upgrade clients
Following the successful upgrade of the servers you can now update your
clients using a similar process as the servers. If you wish to gracefully
move tasks on a client use the `nomad node-drain <node-id>` command to
gracefully migrate jobs to another client in the cluster. The `node-drain`
command prevents new tasks from being allocated to the client and begins
migrating existing allocations to another client.
## Done!
You are now running the latest Nomad version. You can verify all
Clients joined by running `nomad node-status` and checking all the clients
are in a `ready` state.
4. Once all the servers are upgraded, begin a rollout of clients following
the same process.
5. Done! You are now running the latest Nomad version. You can verify all
Clients joined by running `nomad node-status` and checking all the clients
are in a `ready` state.