Merge branch 'master' into f-artifact-location

2016-03-19 12:39:15 -07:00 · 2016-03-19 12:39:15 -07:00 · c85dfdf9a0
parent 452aacd5f3 4371904813
commit c85dfdf9a0
29 changed files with 839 additions and 480 deletions
--- a/client/driver/docker.go
+++ b/client/driver/docker.go
@ -20,7 +20,7 @@ import (

 	"github.com/hashicorp/nomad/client/allocdir"
 	"github.com/hashicorp/nomad/client/config"
-	"github.com/hashicorp/nomad/client/driver/logging"
+	"github.com/hashicorp/nomad/client/driver/executor"
 	cstructs "github.com/hashicorp/nomad/client/driver/structs"
 	"github.com/hashicorp/nomad/helper/discover"
 	"github.com/hashicorp/nomad/nomad/structs"
@ -101,7 +101,7 @@ type dockerPID struct {

 type DockerHandle struct {
 	pluginClient     *plugin.Client
-	logCollector     logging.LogCollector
+	executor         executor.Executor
 	client           *docker.Client
 	logger           *log.Logger
 	cleanupContainer bool
@ -533,23 +533,23 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
 	if err != nil {
 		return nil, fmt.Errorf("unable to find the nomad binary: %v", err)
 	}
-	pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-syslog-collector.out", task.Name))
+	pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name))
 	pluginConfig := &plugin.ClientConfig{
-		Cmd: exec.Command(bin, "syslog", pluginLogFile),
+		Cmd: exec.Command(bin, "executor", pluginLogFile),
 	}

-	logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config)
+	exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
 	if err != nil {
 		return nil, err
 	}
-	logCollectorCtx := &logging.LogCollectorContext{
-		TaskName:       task.Name,
+	executorCtx := &executor.ExecutorContext{
+		TaskEnv:        d.taskEnv,
+		Task:           task,
 		AllocDir:       ctx.AllocDir,
-		LogConfig:      task.LogConfig,
 		PortLowerBound: d.config.ClientMinPort,
 		PortUpperBound: d.config.ClientMaxPort,
 	}
-	ss, err := logCollector.LaunchCollector(logCollectorCtx)
+	ss, err := exec.LaunchSyslogServer(executorCtx)
 	if err != nil {
 		return nil, fmt.Errorf("failed to start syslog collector: %v", err)
 	}
@ -629,7 +629,7 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
 	maxKill := d.DriverContext.config.MaxKillTimeout
 	h := &DockerHandle{
 		client:           client,
-		logCollector:     logCollector,
+		executor:         exec,
 		pluginClient:     pluginClient,
 		cleanupContainer: cleanupContainer,
 		cleanupImage:     cleanupImage,
@ -686,7 +686,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
 	if !found {
 		return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
 	}
-	logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config)
+	exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config)
 	if err != nil {
 		d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err)
 		if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout*time.Second)); e != nil {
@ -698,7 +698,7 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er
 	// Return a driver handle
 	h := &DockerHandle{
 		client:           client,
-		logCollector:     logCollector,
+		executor:         exec,
 		pluginClient:     pluginClient,
 		cleanupContainer: cleanupContainer,
 		cleanupImage:     cleanupImage,
@ -743,7 +743,7 @@ func (h *DockerHandle) WaitCh() chan *cstructs.WaitResult {
 func (h *DockerHandle) Update(task *structs.Task) error {
 	// Store the updated kill timeout.
 	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
-	if err := h.logCollector.UpdateLogConfig(task.LogConfig); err != nil {
+	if err := h.executor.UpdateTask(task); err != nil {
 		h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err)
 	}

@ -759,9 +759,13 @@ func (h *DockerHandle) Kill() error {
 		// Container has already been removed.
 		if strings.Contains(err.Error(), NoSuchContainerError) {
 			h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID)
+			h.executor.Exit()
+			h.pluginClient.Kill()
 			return nil
 		}
 		h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
+		h.executor.Exit()
+		h.pluginClient.Kill()
 		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
 	}
 	h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
@ -824,7 +828,7 @@ func (h *DockerHandle) run() {
 	close(h.waitCh)

 	// Shutdown the syslog collector
-	if err := h.logCollector.Exit(); err != nil {
+	if err := h.executor.Exit(); err != nil {
 		h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)
 	}
 	h.pluginClient.Kill()
--- a/client/driver/docker_test.go
+++ b/client/driver/docker_test.go
@ -145,7 +145,7 @@ func TestDockerDriver_Handle(t *testing.T) {
 	pluginConfig := &plugin.ClientConfig{
 		Cmd: exec.Command(bin, "syslog", f.Name()),
 	}
-	logCollector, pluginClient, err := createLogCollector(pluginConfig, os.Stdout, &config.Config{})
+	exec, pluginClient, err := createExecutor(pluginConfig, os.Stdout, &config.Config{})
 	if err != nil {
 		t.Fatalf("got an err: %v", err)
 	}
@ -154,7 +154,7 @@ func TestDockerDriver_Handle(t *testing.T) {
 	h := &DockerHandle{
 		version:        "version",
 		imageID:        "imageid",
-		logCollector:   logCollector,
+		executor:       exec,
 		pluginClient:   pluginClient,
 		containerID:    "containerid",
 		killTimeout:    5 * time.Nanosecond,
--- a/client/driver/exec.go
+++ b/client/driver/exec.go
@ -102,14 +102,15 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 	executorCtx := &executor.ExecutorContext{
 		TaskEnv:  d.taskEnv,
 		AllocDir: ctx.AllocDir,
-		TaskName:         task.Name,
-		TaskResources:    task.Resources,
-		LogConfig:        task.LogConfig,
-		ResourceLimits:   true,
-		FSIsolation:      true,
-		UnprivilegedUser: true,
+		Task:     task,
 	}
-	ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
+	ps, err := exec.LaunchCmd(&executor.ExecCommand{
+		Cmd:            command,
+		Args:           driverConfig.Args,
+		FSIsolation:    true,
+		ResourceLimits: true,
+		User:           cstructs.DefaultUnpriviledgedUser,
+	}, executorCtx)
 	if err != nil {
 		pluginClient.Kill()
 		return nil, err
@ -217,7 +218,7 @@ func (h *execHandle) WaitCh() chan *cstructs.WaitResult {
 func (h *execHandle) Update(task *structs.Task) error {
 	// Store the updated kill timeout.
 	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
-	h.executor.UpdateLogConfig(task.LogConfig)
+	h.executor.UpdateTask(task)

 	// Update is not possible
 	return nil
@ -267,5 +268,8 @@ func (h *execHandle) run() {
 	}
 	h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
 	close(h.waitCh)
+	if err := h.executor.Exit(); err != nil {
+		h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err)
+	}
 	h.pluginClient.Kill()
 }
--- a/client/driver/executor/executor.go
+++ b/client/driver/executor/executor.go
@ -2,7 +2,9 @@ package executor

 import (
 	"fmt"
+	"io/ioutil"
 	"log"
+	"net"
 	"os"
 	"os/exec"
 	"path/filepath"
@ -22,6 +24,18 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )

+// Executor is the interface which allows a driver to launch and supervise
+// a process
+type Executor interface {
+	LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
+	LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error)
+	Wait() (*ProcessState, error)
+	ShutDown() error
+	Exit() error
+	UpdateLogConfig(logConfig *structs.LogConfig) error
+	UpdateTask(task *structs.Task) error
+}
+
 // ExecutorContext holds context to configure the command user
 // wants to run and isolate it
 type ExecutorContext struct {
@ -32,33 +46,36 @@ type ExecutorContext struct {
 	// the task
 	AllocDir *allocdir.AllocDir

-	// TaskName is the name of the Task
-	TaskName string
+	// Task is the task whose executor is being launched
+	Task *structs.Task

-	// TaskResources are the resource constraints for the Task
-	TaskResources *structs.Resources
+	// PortUpperBound is the upper bound of the ports that we can use to start
+	// the syslog server
+	PortUpperBound uint

-	// FSIsolation is a flag for drivers to impose file system
-	// isolation on certain platforms
-	FSIsolation bool
-
-	// ResourceLimits is a flag for drivers to impose resource
-	// contraints on a Task on certain platforms
-	ResourceLimits bool
-
-	// UnprivilegedUser is a flag for drivers to make the process
-	// run as nobody
-	UnprivilegedUser bool
-
-	// LogConfig provides the configuration related to log rotation
-	LogConfig *structs.LogConfig
+	// PortLowerBound is the lower bound of the ports that we can use to start
+	// the syslog server
+	PortLowerBound uint
 }

-// ExecCommand holds the user command and args. It's a lightweight replacement
-// of exec.Cmd for serialization purposes.
+// ExecCommand holds the user command, args, and other isolation related
+// settings.
 type ExecCommand struct {
+	// Cmd is the command that the user wants to run.
 	Cmd string
+
+	// Args is the args of the command that the user wants to run.
 	Args []string
+
+	// FSIsolation determines whether the command would be run in a chroot.
+	FSIsolation bool
+
+	// User is the user which the executor uses to run the command.
+	User string
+
+	// ResourceLimits determines whether resource limits are enforced by the
+	// executor.
+	ResourceLimits bool
 }

 // ProcessState holds information about the state of a user process.
@ -70,14 +87,11 @@ type ProcessState struct {
 	Time            time.Time
 }

-// Executor is the interface which allows a driver to launch and supervise
-// a process
-type Executor interface {
-	LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error)
-	Wait() (*ProcessState, error)
-	ShutDown() error
-	Exit() error
-	UpdateLogConfig(logConfig *structs.LogConfig) error
+// SyslogServerState holds the address and islation information of a launched
+// syslog server
+type SyslogServerState struct {
+	IsolationConfig *cstructs.IsolationConfig
+	Addr            string
 }

 // UniversalExecutor is an implementation of the Executor which launches and
@ -86,21 +100,31 @@ type Executor interface {
 type UniversalExecutor struct {
 	cmd     exec.Cmd
 	ctx     *ExecutorContext
+	command *ExecCommand

 	taskDir       string
-	groups        *cgroupConfig.Cgroup
 	exitState     *ProcessState
 	processExited chan interface{}
+
 	lre         *logging.FileRotator
 	lro         *logging.FileRotator
+	rotatorLock sync.Mutex
+
+	syslogServer *logging.SyslogServer
+	syslogChan   chan *logging.SyslogMessage
+
+	groups *cgroupConfig.Cgroup
+	cgLock sync.Mutex

 	logger *log.Logger
-	lock   sync.Mutex
 }

 // NewExecutor returns an Executor
 func NewExecutor(logger *log.Logger) Executor {
-	return &UniversalExecutor{logger: logger, processExited: make(chan interface{})}
+	return &UniversalExecutor{
+		logger:        logger,
+		processExited: make(chan interface{}),
+	}
 }

 // LaunchCmd launches a process and returns it's state. It also configures an
@ -109,6 +133,7 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
 	e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))

 	e.ctx = ctx
+	e.command = command

 	// configuring the task dir
 	if err := e.configureTaskDir(); err != nil {
@ -122,29 +147,18 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
 	}

 	// setting the user of the process
-	if e.ctx.UnprivilegedUser {
-		if err := e.runAs("nobody"); err != nil {
+	if command.User != "" {
+		if err := e.runAs(command.User); err != nil {
 			return nil, err
 		}
 	}

-	logFileSize := int64(ctx.LogConfig.MaxFileSizeMB * 1024 * 1024)
-	lro, err := logging.NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", ctx.TaskName),
-		ctx.LogConfig.MaxFiles, logFileSize, e.logger)
-
-	if err != nil {
-		return nil, fmt.Errorf("error creating log rotator for stdout of task %v", err)
+	// Setup the loggers
+	if err := e.configureLoggers(); err != nil {
+		return nil, err
 	}
-	e.cmd.Stdout = lro
-	e.lro = lro
-
-	lre, err := logging.NewFileRotator(ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", ctx.TaskName),
-		ctx.LogConfig.MaxFiles, logFileSize, e.logger)
-	if err != nil {
-		return nil, fmt.Errorf("error creating log rotator for stderr of task %v", err)
-	}
-	e.cmd.Stderr = lre
-	e.lre = lre
+	e.cmd.Stdout = e.lro
+	e.cmd.Stderr = e.lre

 	e.ctx.TaskEnv.Build()

@ -160,7 +174,7 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext

 	// Determine the path to run as it may have to be relative to the chroot.
 	path := absPath
-	if e.ctx.FSIsolation {
+	if e.command.FSIsolation {
 		rel, err := filepath.Rel(e.taskDir, absPath)
 		if err != nil {
 			return nil, err
@ -182,15 +196,42 @@ func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext
 	return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
 }

+// configureLoggers sets up the standard out/error file rotators
+func (e *UniversalExecutor) configureLoggers() error {
+	e.rotatorLock.Lock()
+	defer e.rotatorLock.Unlock()
+
+	logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
+	if e.lro == nil {
+		lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
+			e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
+		if err != nil {
+			return err
+		}
+		e.lro = lro
+	}
+
+	if e.lre == nil {
+		lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
+			e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
+		if err != nil {
+			return err
+		}
+		e.lre = lre
+	}
+	return nil
+}
+
 // Wait waits until a process has exited and returns it's exitcode and errors
 func (e *UniversalExecutor) Wait() (*ProcessState, error) {
 	<-e.processExited
 	return e.exitState, nil
 }

+// COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
 // UpdateLogConfig updates the log configuration
 func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
-	e.ctx.LogConfig = logConfig
+	e.ctx.Task.LogConfig = logConfig
 	if e.lro == nil {
 		return fmt.Errorf("log rotator for stdout doesn't exist")
 	}
@ -205,11 +246,21 @@ func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error
 	return nil
 }

+func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
+	e.ctx.Task = task
+
+	// Updating Log Config
+	fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
+	e.lro.MaxFiles = task.LogConfig.MaxFiles
+	e.lro.FileSize = fileSize
+	e.lre.MaxFiles = task.LogConfig.MaxFiles
+	e.lre.FileSize = fileSize
+	return nil
+}
+
 func (e *UniversalExecutor) wait() {
 	defer close(e.processExited)
 	err := e.cmd.Wait()
-	e.lre.Close()
-	e.lro.Close()
 	if err == nil {
 		e.exitState = &ProcessState{Pid: 0, ExitCode: 0, Time: time.Now()}
 		return
@ -220,14 +271,6 @@ func (e *UniversalExecutor) wait() {
 			exitCode = status.ExitStatus()
 		}
 	}
-	if e.ctx.FSIsolation {
-		e.removeChrootMounts()
-	}
-	if e.ctx.ResourceLimits {
-		e.lock.Lock()
-		DestroyCgroup(e.groups)
-		e.lock.Unlock()
-	}
 	e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Time: time.Now()}
 }

@ -241,7 +284,13 @@ var (
 // process
 func (e *UniversalExecutor) Exit() error {
 	var merr multierror.Error
-	if e.cmd.Process != nil {
+	if e.syslogServer != nil {
+		e.syslogServer.Shutdown()
+	}
+	e.lre.Close()
+	e.lro.Close()
+
+	if e.command != nil && e.cmd.Process != nil {
 		proc, err := os.FindProcess(e.cmd.Process.Pid)
 		if err != nil {
 			e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
@ -252,17 +301,17 @@ func (e *UniversalExecutor) Exit() error {
 		}
 	}

-	if e.ctx.FSIsolation {
+	if e.command != nil && e.command.FSIsolation {
 		if err := e.removeChrootMounts(); err != nil {
 			merr.Errors = append(merr.Errors, err)
 		}
 	}
-	if e.ctx.ResourceLimits {
-		e.lock.Lock()
+	if e.command != nil && e.command.ResourceLimits {
+		e.cgLock.Lock()
 		if err := DestroyCgroup(e.groups); err != nil {
 			merr.Errors = append(merr.Errors, err)
 		}
-		e.lock.Unlock()
+		e.cgLock.Unlock()
 	}
 	return merr.ErrorOrNil()
 }
@ -287,10 +336,10 @@ func (e *UniversalExecutor) ShutDown() error {

 // configureTaskDir sets the task dir in the executor
 func (e *UniversalExecutor) configureTaskDir() error {
-	taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.TaskName]
+	taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name]
 	e.taskDir = taskDir
 	if !ok {
-		return fmt.Errorf("couldn't find task directory for task %v", e.ctx.TaskName)
+		return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name)
 	}
 	e.cmd.Dir = taskDir
 	return nil
@ -344,3 +393,48 @@ func (e *UniversalExecutor) makeExecutable(binPath string) error {
 	}
 	return nil
 }
+
+// getFreePort returns a free port ready to be listened on between upper and
+// lower bounds
+func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
+	if runtime.GOOS == "windows" {
+		return e.listenerTCP(lowerBound, upperBound)
+	}
+
+	return e.listenerUnix()
+}
+
+// listenerTCP creates a TCP listener using an unused port between an upper and
+// lower bound
+func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
+	for i := lowerBound; i <= upperBound; i++ {
+		addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
+		if err != nil {
+			return nil, err
+		}
+		l, err := net.ListenTCP("tcp", addr)
+		if err != nil {
+			continue
+		}
+		return l, nil
+	}
+	return nil, fmt.Errorf("No free port found")
+}
+
+// listenerUnix creates a Unix domain socket
+func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
+	f, err := ioutil.TempFile("", "plugin")
+	if err != nil {
+		return nil, err
+	}
+	path := f.Name()
+
+	if err := f.Close(); err != nil {
+		return nil, err
+	}
+	if err := os.Remove(path); err != nil {
+		return nil, err
+	}
+
+	return net.Listen("unix", path)
+}
--- a/client/driver/executor/executor_linux.go
+++ b/client/driver/executor/executor_linux.go
@ -38,14 +38,14 @@ var (

 // configureIsolation configures chroot and creates cgroups
 func (e *UniversalExecutor) configureIsolation() error {
-	if e.ctx.FSIsolation {
+	if e.command.FSIsolation {
 		if err := e.configureChroot(); err != nil {
 			return err
 		}
 	}

-	if e.ctx.ResourceLimits {
-		if err := e.configureCgroups(e.ctx.TaskResources); err != nil {
+	if e.command.ResourceLimits {
+		if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
 			return fmt.Errorf("error creating cgroups: %v", err)
 		}
 		if err := e.applyLimits(os.Getpid()); err != nil {
@ -63,7 +63,7 @@ func (e *UniversalExecutor) configureIsolation() error {

 // applyLimits puts a process in a pre-configured cgroup
 func (e *UniversalExecutor) applyLimits(pid int) error {
-	if !e.ctx.ResourceLimits {
+	if !e.command.ResourceLimits {
 		return nil
 	}

@ -155,11 +155,11 @@ func (e *UniversalExecutor) runAs(userid string) error {
 // configureChroot configures a chroot
 func (e *UniversalExecutor) configureChroot() error {
 	allocDir := e.ctx.AllocDir
-	if err := allocDir.MountSharedDir(e.ctx.TaskName); err != nil {
+	if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
 		return err
 	}

-	if err := allocDir.Embed(e.ctx.TaskName, chrootEnv); err != nil {
+	if err := allocDir.Embed(e.ctx.Task.Name, chrootEnv); err != nil {
 		return err
 	}

@ -183,8 +183,8 @@ func (e *UniversalExecutor) configureChroot() error {
 // should be called when tearing down the task.
 func (e *UniversalExecutor) removeChrootMounts() error {
 	// Prevent a race between Wait/ForceStop
-	e.lock.Lock()
-	defer e.lock.Unlock()
+	e.cgLock.Lock()
+	defer e.cgLock.Unlock()
 	return e.ctx.AllocDir.UnmountAll()
 }

--- a/client/driver/executor/executor_posix.go
+++ b/client/driver/executor/executor_posix.go
@ -0,0 +1,44 @@
+// +build !windows
+
+package executor
+
+import (
+	"fmt"
+	"io"
+	"log/syslog"
+
+	"github.com/hashicorp/nomad/client/driver/logging"
+)
+
+func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
+	e.ctx = ctx
+	e.syslogChan = make(chan *logging.SyslogMessage, 2048)
+	l, err := e.getListener(e.ctx.PortLowerBound, e.ctx.PortUpperBound)
+	if err != nil {
+		return nil, err
+	}
+	e.logger.Printf("[DEBUG] sylog-server: launching syslog server on addr: %v", l.Addr().String())
+	if err := e.configureLoggers(); err != nil {
+		return nil, err
+	}
+
+	e.syslogServer = logging.NewSyslogServer(l, e.syslogChan, e.logger)
+	go e.syslogServer.Start()
+	go e.collectLogs(e.lre, e.lro)
+	syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String())
+	return &SyslogServerState{Addr: syslogAddr}, nil
+}
+
+func (e *UniversalExecutor) collectLogs(we io.Writer, wo io.Writer) {
+	for logParts := range e.syslogChan {
+		// If the severity of the log line is err then we write to stderr
+		// otherwise all messages go to stdout
+		if logParts.Severity == syslog.LOG_ERR {
+			e.lre.Write(logParts.Message)
+			e.lre.Write([]byte{'\n'})
+		} else {
+			e.lro.Write(logParts.Message)
+			e.lro.Write([]byte{'\n'})
+		}
+	}
+}
--- a/client/driver/executor/executor_test.go
+++ b/client/driver/executor/executor_test.go
@ -15,6 +15,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/hashicorp/nomad/nomad/structs"
 	tu "github.com/hashicorp/nomad/testutil"
+	cstructs "github.com/hashicorp/nomad/client/driver/structs"
 )

 var (
@ -30,7 +31,7 @@ var (
 	}
 )

-func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
+func mockAllocDir(t *testing.T) (*structs.Task, *allocdir.AllocDir) {
 	alloc := mock.Alloc()
 	task := alloc.Job.TaskGroups[0].Tasks[0]

@ -39,18 +40,16 @@ func mockAllocDir(t *testing.T) (string, *allocdir.AllocDir) {
 		log.Panicf("allocDir.Build() failed: %v", err)
 	}

-	return task.Name, allocDir
+	return task, allocDir
 }

 func testExecutorContext(t *testing.T) *ExecutorContext {
 	taskEnv := env.NewTaskEnvironment(mock.Node())
-	taskName, allocDir := mockAllocDir(t)
+	task, allocDir := mockAllocDir(t)
 	ctx := &ExecutorContext{
 		TaskEnv:  taskEnv,
-		TaskName:      taskName,
+		Task:     task,
 		AllocDir: allocDir,
-		TaskResources: constraint,
-		LogConfig:     structs.DefaultLogConfig(),
 	}
 	return ctx
 }
@ -80,6 +79,9 @@ func TestExecutor_Start_Wait_Failure_Code(t *testing.T) {
 	if ps.ExitCode < 1 {
 		t.Fatalf("expected exit code to be non zero, actual: %v", ps.ExitCode)
 	}
+	if err := executor.Exit(); err != nil {
+		t.Fatalf("error: %v", err)
+	}
 }

 func TestExecutor_Start_Wait(t *testing.T) {
@ -98,6 +100,9 @@ func TestExecutor_Start_Wait(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error in waiting for command: %v", err)
 	}
+	if err := executor.Exit(); err != nil {
+		t.Fatalf("error: %v", err)
+	}

 	expected := "hello world"
 	file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
@ -119,9 +124,9 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
 	ctx := testExecutorContext(t)
 	defer ctx.AllocDir.Destroy()

-	ctx.FSIsolation = true
-	ctx.ResourceLimits = true
-	ctx.UnprivilegedUser = true
+	execCmd.FSIsolation = true
+	execCmd.ResourceLimits = true
+	execCmd.User = cstructs.DefaultUnpriviledgedUser

 	executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
 	ps, err := executor.LaunchCmd(&execCmd, ctx)
@ -135,6 +140,9 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error in waiting for command: %v", err)
 	}
+	if err := executor.Exit(); err != nil {
+		t.Fatalf("error: %v", err)
+	}

 	expected := "hello world"
 	file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
@ -154,13 +162,13 @@ func TestExecutor_DestroyCgroup(t *testing.T) {

 	execCmd := ExecCommand{Cmd: "/bin/bash", Args: []string{"-c", "/usr/bin/yes"}}
 	ctx := testExecutorContext(t)
-	ctx.LogConfig.MaxFiles = 1
-	ctx.LogConfig.MaxFileSizeMB = 300
+	ctx.Task.LogConfig.MaxFiles = 1
+	ctx.Task.LogConfig.MaxFileSizeMB = 300
 	defer ctx.AllocDir.Destroy()

-	ctx.FSIsolation = true
-	ctx.ResourceLimits = true
-	ctx.UnprivilegedUser = true
+	execCmd.FSIsolation = true
+	execCmd.ResourceLimits = true
+	execCmd.User = "nobody"

 	executor := NewExecutor(log.New(os.Stdout, "", log.LstdFlags))
 	ps, err := executor.LaunchCmd(&execCmd, ctx)
@ -171,7 +179,10 @@ func TestExecutor_DestroyCgroup(t *testing.T) {
 		t.Fatalf("expected process to start and have non zero pid")
 	}
 	time.Sleep(200 * time.Millisecond)
-	executor.Exit()
+	if err := executor.Exit(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
 	file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
 	finfo, err := os.Stat(file)
 	if err != nil {
@ -203,6 +214,9 @@ func TestExecutor_Start_Kill(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error in waiting for command: %v", err)
 	}
+	if err := executor.Exit(); err != nil {
+		t.Fatalf("error: %v", err)
+	}

 	file := filepath.Join(ctx.AllocDir.LogDir(), "web.stdout.0")
 	time.Sleep(time.Duration(tu.TestMultiplier()*2) * time.Second)
--- a/client/driver/executor/executor_windows.go
+++ b/client/driver/executor/executor_windows.go
@ -0,0 +1,5 @@
+package executor
+
+func (e *UniversalExecutor) LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) {
+	return nil, nil
+}
--- a/client/driver/executor_plugin.go
+++ b/client/driver/executor_plugin.go
@ -1,6 +1,7 @@
 package driver

 import (
+	"encoding/gob"
 	"log"
 	"net/rpc"

@ -9,6 +10,14 @@ import (
 	"github.com/hashicorp/nomad/nomad/structs"
 )

+// Registering these types since we have to serialize and de-serialize the Task
+// structs over the wire between drivers and the executor.
+func init() {
+	gob.Register([]interface{}{})
+	gob.Register(map[string]interface{}{})
+	gob.Register([]map[string]string{})
+}
+
 type ExecutorRPC struct {
 	client *rpc.Client
 }
@ -19,12 +28,23 @@ type LaunchCmdArgs struct {
 	Ctx *executor.ExecutorContext
 }

+// LaunchSyslogServerArgs wraps the executor context for the purposes of RPC
+type LaunchSyslogServerArgs struct {
+	Ctx *executor.ExecutorContext
+}
+
 func (e *ExecutorRPC) LaunchCmd(cmd *executor.ExecCommand, ctx *executor.ExecutorContext) (*executor.ProcessState, error) {
 	var ps *executor.ProcessState
 	err := e.client.Call("Plugin.LaunchCmd", LaunchCmdArgs{Cmd: cmd, Ctx: ctx}, &ps)
 	return ps, err
 }

+func (e *ExecutorRPC) LaunchSyslogServer(ctx *executor.ExecutorContext) (*executor.SyslogServerState, error) {
+	var ss *executor.SyslogServerState
+	err := e.client.Call("Plugin.LaunchSyslogServer", LaunchSyslogServerArgs{Ctx: ctx}, &ss)
+	return ss, err
+}
+
 func (e *ExecutorRPC) Wait() (*executor.ProcessState, error) {
 	var ps executor.ProcessState
 	err := e.client.Call("Plugin.Wait", new(interface{}), &ps)
@ -43,6 +63,10 @@ func (e *ExecutorRPC) UpdateLogConfig(logConfig *structs.LogConfig) error {
 	return e.client.Call("Plugin.UpdateLogConfig", logConfig, new(interface{}))
 }

+func (e *ExecutorRPC) UpdateTask(task *structs.Task) error {
+	return e.client.Call("Plugin.UpdateTask", task, new(interface{}))
+}
+
 type ExecutorRPCServer struct {
 	Impl executor.Executor
 }
@ -55,6 +79,14 @@ func (e *ExecutorRPCServer) LaunchCmd(args LaunchCmdArgs, ps *executor.ProcessSt
 	return err
 }

+func (e *ExecutorRPCServer) LaunchSyslogServer(args LaunchSyslogServerArgs, ss *executor.SyslogServerState) error {
+	state, err := e.Impl.LaunchSyslogServer(args.Ctx)
+	if state != nil {
+		*ss = *state
+	}
+	return err
+}
+
 func (e *ExecutorRPCServer) Wait(args interface{}, ps *executor.ProcessState) error {
 	state, err := e.Impl.Wait()
 	if state != nil {
@ -75,6 +107,10 @@ func (e *ExecutorRPCServer) UpdateLogConfig(args *structs.LogConfig, resp *inter
 	return e.Impl.UpdateLogConfig(args)
 }

+func (e *ExecutorRPCServer) UpdateTask(args *structs.Task, resp *interface{}) error {
+	return e.Impl.UpdateTask(args)
+}
+
 type ExecutorPlugin struct {
 	logger *log.Logger
 	Impl   *ExecutorRPCServer
--- a/client/driver/java.go
+++ b/client/driver/java.go
@ -156,12 +156,7 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 	executorCtx := &executor.ExecutorContext{
 		TaskEnv:  d.taskEnv,
 		AllocDir: ctx.AllocDir,
-		TaskName:         task.Name,
-		TaskResources:    task.Resources,
-		LogConfig:        task.LogConfig,
-		FSIsolation:      true,
-		UnprivilegedUser: true,
-		ResourceLimits:   true,
+		Task:     task,
 	}

 	absPath, err := GetAbsolutePath("java")
@ -169,7 +164,13 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 		return nil, err
 	}

-	ps, err := execIntf.LaunchCmd(&executor.ExecCommand{Cmd: absPath, Args: args}, executorCtx)
+	ps, err := execIntf.LaunchCmd(&executor.ExecCommand{
+		Cmd:            absPath,
+		Args:           args,
+		FSIsolation:    true,
+		ResourceLimits: true,
+		User:           cstructs.DefaultUnpriviledgedUser,
+	}, executorCtx)
 	if err != nil {
 		pluginClient.Kill()
 		return nil, err
@ -290,7 +291,7 @@ func (h *javaHandle) WaitCh() chan *cstructs.WaitResult {
 func (h *javaHandle) Update(task *structs.Task) error {
 	// Store the updated kill timeout.
 	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
-	h.executor.UpdateLogConfig(task.LogConfig)
+	h.executor.UpdateTask(task)

 	// Update is not possible
 	return nil
@ -338,5 +339,6 @@ func (h *javaHandle) run() {
 	}
 	h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
 	close(h.waitCh)
+	h.executor.Exit()
 	h.pluginClient.Kill()
 }
--- a/client/driver/logging/syslog_server_windows.go
+++ b/client/driver/logging/syslog_server_windows.go
@ -0,0 +1,10 @@
+package logging
+
+type SyslogServer struct {
+}
+
+func (s *SyslogServer) Shutdown() {
+}
+
+type SyslogMessage struct {
+}
--- a/client/driver/qemu.go
+++ b/client/driver/qemu.go
@ -193,9 +193,7 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
 	executorCtx := &executor.ExecutorContext{
 		TaskEnv:  d.taskEnv,
 		AllocDir: ctx.AllocDir,
-		TaskName:      task.Name,
-		TaskResources: task.Resources,
-		LogConfig:     task.LogConfig,
+		Task:     task,
 	}
 	ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: args[0], Args: args[1:]}, executorCtx)
 	if err != nil {
@ -292,7 +290,7 @@ func (h *qemuHandle) WaitCh() chan *cstructs.WaitResult {
 func (h *qemuHandle) Update(task *structs.Task) error {
 	// Store the updated kill timeout.
 	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
-	h.executor.UpdateLogConfig(task.LogConfig)
+	h.executor.UpdateTask(task)

 	// Update is not possible
 	return nil
@ -336,5 +334,6 @@ func (h *qemuHandle) run() {
 	close(h.doneCh)
 	h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
 	close(h.waitCh)
+	h.executor.Exit()
 	h.pluginClient.Kill()
 }
--- a/client/driver/raw_exec.go
+++ b/client/driver/raw_exec.go
@ -98,9 +98,7 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
 	executorCtx := &executor.ExecutorContext{
 		TaskEnv:  d.taskEnv,
 		AllocDir: ctx.AllocDir,
-		TaskName:      task.Name,
-		TaskResources: task.Resources,
-		LogConfig:     task.LogConfig,
+		Task:     task,
 	}
 	ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
 	if err != nil {
@ -195,7 +193,7 @@ func (h *rawExecHandle) WaitCh() chan *cstructs.WaitResult {
 func (h *rawExecHandle) Update(task *structs.Task) error {
 	// Store the updated kill timeout.
 	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
-	h.executor.UpdateLogConfig(task.LogConfig)
+	h.executor.UpdateTask(task)

 	// Update is not possible
 	return nil
@ -237,5 +235,8 @@ func (h *rawExecHandle) run() {
 	}
 	h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
 	close(h.waitCh)
+	if err := h.executor.Exit(); err != nil {
+		h.logger.Printf("[ERR] driver.raw_exec: error killing executor: %v", err)
+	}
 	h.pluginClient.Kill()
 }
--- a/client/driver/rkt.go
+++ b/client/driver/rkt.go
@ -235,10 +235,7 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
 	executorCtx := &executor.ExecutorContext{
 		TaskEnv:  d.taskEnv,
 		AllocDir: ctx.AllocDir,
-		TaskName:         task.Name,
-		TaskResources:    task.Resources,
-		UnprivilegedUser: false,
-		LogConfig:        task.LogConfig,
+		Task:     task,
 	}

 	absPath, err := GetAbsolutePath("rkt")
@ -329,7 +326,7 @@ func (h *rktHandle) WaitCh() chan *cstructs.WaitResult {
 func (h *rktHandle) Update(task *structs.Task) error {
 	// Store the updated kill timeout.
 	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
-	h.executor.UpdateLogConfig(task.LogConfig)
+	h.executor.UpdateTask(task)

 	// Update is not possible
 	return nil
@ -360,5 +357,8 @@ func (h *rktHandle) run() {
 	}
 	h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
 	close(h.waitCh)
+	if err := h.executor.Exit(); err != nil {
+		h.logger.Printf("[ERR] driver.rkt: error killing executor: %v", err)
+	}
 	h.pluginClient.Kill()
 }
--- a/client/driver/structs/structs.go
+++ b/client/driver/structs/structs.go
@ -6,6 +6,11 @@ import (
 	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
 )

+const (
+	// The default user that the executor uses to run tasks
+	DefaultUnpriviledgedUser = "nobody"
+)
+
 // WaitResult stores the result of a Wait operation.
 type WaitResult struct {
 	ExitCode int
--- a/command/alloc_status.go
+++ b/command/alloc_status.go
@ -75,8 +75,6 @@ func (c *AllocStatusCommand) Run(args []string) int {
 	}

 	// Query the allocation info
-	alloc, _, err := client.Allocations().Info(allocID, nil)
-	if err != nil {
 	if len(allocID) == 1 {
 		c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
 		return 1
@ -114,12 +112,11 @@ func (c *AllocStatusCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single allocation
-		alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
+	alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
 	if err != nil {
 		c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
 		return 1
 	}
-	}

 	// Format the allocation data
 	basic := []string{
--- a/command/fs.go
+++ b/command/fs.go
@ -1,6 +1,12 @@
 package command

-import "github.com/mitchellh/cli"
+import (
+	"math/rand"
+	"time"
+
+	"github.com/hashicorp/nomad/api"
+	"github.com/mitchellh/cli"
+)

 type FSCommand struct {
 	Meta
@ -17,3 +23,23 @@ func (f *FSCommand) Synopsis() string {
 func (f *FSCommand) Run(args []string) int {
 	return cli.RunResultHelp
 }
+
+// Get Random Allocation ID from a known jobID. Prefer to use a running allocation,
+// but use a dead allocation if no running allocations are found
+func getRandomJobAlloc(client *api.Client, jobID string) (string, error) {
+	var runningAllocs []*api.AllocationListStub
+	allocs, _, err := client.Jobs().Allocations(jobID, nil)
+	for _, v := range allocs {
+		if v.ClientStatus == "running" {
+			runningAllocs = append(runningAllocs, v)
+		}
+	}
+	// If we don't have any allocations running, use dead allocations
+	if len(runningAllocs) < 1 {
+		runningAllocs = allocs
+	}
+
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	allocID := runningAllocs[r.Intn(len(runningAllocs))].ID
+	return allocID, err
+}
--- a/command/fs_cat.go
+++ b/command/fs_cat.go
@ -26,6 +26,9 @@ Cat Options:

  -verbose
    Show full information.
+
+  -job <job-id>
+    Use a random allocation from a specified job-id.
 `
 	return strings.TrimSpace(helpText)
 }
@ -35,10 +38,11 @@ func (f *FSCatCommand) Synopsis() string {
 }

 func (f *FSCatCommand) Run(args []string) int {
-	var verbose bool
+	var verbose, job bool
 	flags := f.Meta.FlagSet("fs-list", FlagSetClient)
 	flags.Usage = func() { f.Ui.Output(f.Help()) }
 	flags.BoolVar(&verbose, "verbose", false, "")
+	flags.BoolVar(&job, "job", false, "")

 	if err := flags.Parse(args); err != nil {
 		return 1
@ -50,7 +54,6 @@ func (f *FSCatCommand) Run(args []string) int {
 		return 1
 	}

-	allocID := args[0]
 	path := "/"
 	if len(args) == 2 {
 		path = args[1]
@ -58,18 +61,25 @@ func (f *FSCatCommand) Run(args []string) int {

 	client, err := f.Meta.Client()
 	if err != nil {
-		f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
+		f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
 		return 1
 	}

+	// If -job is specified, use random allocation, otherwise use provided allocation
+	allocID := args[0]
+	if job {
+		allocID, err = getRandomJobAlloc(client, args[0])
+		if err != nil {
+			f.Ui.Error(fmt.Sprintf("Error querying API: %v", err))
+		}
+	}
+
 	// Truncate the id unless full length is requested
 	length := shortId
 	if verbose {
 		length = fullId
 	}
 	// Query the allocation info
-	alloc, _, err := client.Allocations().Info(allocID, nil)
-	if err != nil {
 	if len(allocID) == 1 {
 		f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
 		return 1
@ -107,12 +117,11 @@ func (f *FSCatCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single allocation
-		alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
+	alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
 	if err != nil {
 		f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
 		return 1
 	}
-	}

 	if alloc.DesiredStatus == "failed" {
 		allocID := limit(alloc.ID, length)
--- a/command/fs_ls.go
+++ b/command/fs_ls.go
@ -4,7 +4,7 @@ import (
 	"fmt"
 	"strings"

-	"github.com/dustin/go-humanize"
+	humanize "github.com/dustin/go-humanize"
 )

 type FSListCommand struct {
@ -30,6 +30,9 @@ Ls Options:
  -verbose
    Show full information.

+  -job <job-id>
+    Use a random allocation from a specified job-id.
+
 `
 	return strings.TrimSpace(helpText)
 }
@ -41,10 +44,12 @@ func (f *FSListCommand) Synopsis() string {
 func (f *FSListCommand) Run(args []string) int {
 	var verbose bool
 	var machine bool
+	var job bool
 	flags := f.Meta.FlagSet("fs-list", FlagSetClient)
 	flags.Usage = func() { f.Ui.Output(f.Help()) }
 	flags.BoolVar(&verbose, "verbose", false, "")
 	flags.BoolVar(&machine, "H", false, "")
+	flags.BoolVar(&job, "job", false, "")

 	if err := flags.Parse(args); err != nil {
 		return 1
@ -56,7 +61,6 @@ func (f *FSListCommand) Run(args []string) int {
 		return 1
 	}

-	allocID := args[0]
 	path := "/"
 	if len(args) == 2 {
 		path = args[1]
@ -64,18 +68,26 @@ func (f *FSListCommand) Run(args []string) int {

 	client, err := f.Meta.Client()
 	if err != nil {
-		f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
+		f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
 		return 1
 	}

+	// If -job is specified, use random allocation, otherwise use provided allocation
+	allocID := args[0]
+	if job {
+		allocID, err = getRandomJobAlloc(client, args[0])
+		if err != nil {
+			f.Ui.Error(fmt.Sprintf("Error fetching allocations: %v", err))
+			return 1
+		}
+	}
+
 	// Truncate the id unless full length is requested
 	length := shortId
 	if verbose {
 		length = fullId
 	}
 	// Query the allocation info
-	alloc, _, err := client.Allocations().Info(allocID, nil)
-	if err != nil {
 	if len(allocID) == 1 {
 		f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
 		return 1
@ -113,12 +125,11 @@ func (f *FSListCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single allocation
-		alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
+	alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
 	if err != nil {
 		f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
 		return 1
 	}
-	}

 	if alloc.DesiredStatus == "failed" {
 		allocID := limit(alloc.ID, length)
--- a/command/fs_stat.go
+++ b/command/fs_stat.go
@ -4,7 +4,7 @@ import (
 	"fmt"
 	"strings"

-	"github.com/dustin/go-humanize"
+	humanize "github.com/dustin/go-humanize"
 )

 type FSStatCommand struct {
@ -29,6 +29,9 @@ Stat Options:

  -verbose
    Show full information.
+
+  -job <job-id>
+    Use a random allocation from a specified job-id.
 `
 	return strings.TrimSpace(helpText)
 }
@ -40,10 +43,12 @@ func (f *FSStatCommand) Synopsis() string {
 func (f *FSStatCommand) Run(args []string) int {
 	var verbose bool
 	var machine bool
+	var job bool
 	flags := f.Meta.FlagSet("fs-list", FlagSetClient)
 	flags.Usage = func() { f.Ui.Output(f.Help()) }
 	flags.BoolVar(&verbose, "verbose", false, "")
 	flags.BoolVar(&machine, "H", false, "")
+	flags.BoolVar(&job, "job", false, "")

 	if err := flags.Parse(args); err != nil {
 		return 1
@ -55,7 +60,6 @@ func (f *FSStatCommand) Run(args []string) int {
 		return 1
 	}

-	allocID := args[0]
 	path := "/"
 	if len(args) == 2 {
 		path = args[1]
@ -63,18 +67,24 @@ func (f *FSStatCommand) Run(args []string) int {

 	client, err := f.Meta.Client()
 	if err != nil {
-		f.Ui.Error(fmt.Sprintf("Error inititalizing client: %v", err))
+		f.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
 		return 1
 	}

+	allocID := args[0]
+	if job {
+		allocID, err = getRandomJobAlloc(client, args[0])
+		if err != nil {
+			f.Ui.Error(fmt.Sprintf("Error querying API: %v", err))
+		}
+	}
+
 	// Truncate the id unless full length is requested
 	length := shortId
 	if verbose {
 		length = fullId
 	}
 	// Query the allocation info
-	alloc, _, err := client.Allocations().Info(allocID, nil)
-	if err != nil {
 	if len(allocID) == 1 {
 		f.Ui.Error(fmt.Sprintf("Alloc ID must contain at least two characters."))
 		return 1
@ -112,12 +122,11 @@ func (f *FSStatCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single allocation
-		alloc, _, err = client.Allocations().Info(allocs[0].ID, nil)
+	alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
 	if err != nil {
 		f.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
 		return 1
 	}
-	}

 	if alloc.DesiredStatus == "failed" {
 		allocID := limit(alloc.ID, length)
--- a/command/node_drain.go
+++ b/command/node_drain.go
@ -69,8 +69,6 @@ func (c *NodeDrainCommand) Run(args []string) int {
 	}

 	// Check if node exists
-	node, _, err := client.Nodes().Info(nodeID, nil)
-	if err != nil {
 	if len(nodeID) == 1 {
 		c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
 		return 1
@ -111,12 +109,11 @@ func (c *NodeDrainCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single node
-		node, _, err = client.Nodes().Info(nodes[0].ID, nil)
+	node, _, err := client.Nodes().Info(nodes[0].ID, nil)
 	if err != nil {
 		c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
 		return 1
 	}
-	}

 	// Toggle node draining
 	if _, err := client.Nodes().ToggleDrain(node.ID, enable, nil); err != nil {
--- a/command/node_status.go
+++ b/command/node_status.go
@ -133,8 +133,6 @@ func (c *NodeStatusCommand) Run(args []string) int {

 	// Query the specific node
 	nodeID := args[0]
-	node, _, err := client.Nodes().Info(nodeID, nil)
-	if err != nil {
 	if len(nodeID) == 1 {
 		c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
 		return 1
@ -175,12 +173,11 @@ func (c *NodeStatusCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single node
-		node, _, err = client.Nodes().Info(nodes[0].ID, nil)
+	node, _, err := client.Nodes().Info(nodes[0].ID, nil)
 	if err != nil {
 		c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
 		return 1
 	}
-	}

 	m := node.Attributes
 	keys := make([]string, len(m))
--- a/command/status.go
+++ b/command/status.go
@ -105,8 +105,6 @@ func (c *StatusCommand) Run(args []string) int {

 	// Try querying the job
 	jobID := args[0]
-	job, _, err := client.Jobs().Info(jobID, nil)
-	if err != nil {
 	jobs, _, err := client.Jobs().PrefixList(jobID)
 	if err != nil {
 		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
@ -130,12 +128,11 @@ func (c *StatusCommand) Run(args []string) int {
 		return 0
 	}
 	// Prefix lookup matched a single job
-		job, _, err = client.Jobs().Info(jobs[0].ID, nil)
+	job, _, err := client.Jobs().Info(jobs[0].ID, nil)
 	if err != nil {
 		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
 		return 1
 	}
-	}

 	// Check if it is periodic
 	sJob, err := convertApiJob(job)
--- a/scripts/install_rkt.sh
+++ b/scripts/install_rkt.sh
@ -2,7 +2,7 @@

 set -ex

-RKT_VERSION="v1.0.0"
+RKT_VERSION="v1.2.0"
 DEST_DIR="/usr/local/bin"

 sudo mkdir -p /etc/rkt/net.d
--- a/scripts/update_docker.sh
+++ b/scripts/update_docker.sh
@ -2,7 +2,7 @@

 set -ex

-DOCKER_VERSION="1.10.2"
+DOCKER_VERSION="1.10.3"

 sudo stop docker
 sudo rm -rf /var/lib/docker
--- a/website/source/docs/commands/fs.html.md.erb
+++ b/website/source/docs/commands/fs.html.md.erb
@ -23,7 +23,7 @@ nomad fs stat <alloc-id> <path>
 nomad fs cat <alloc-id> <path>
 ```

-A valid allocation id is necessary and the path is relative to the root of the allocation directory.
+A valid allocation id is necessary unless `-job` is specified and the path is relative to the root of the allocation directory.
 The path is optional and it defaults to `/` of the allocation directory

 ## Examples
@ -50,3 +50,14 @@ $ nomad fs cat redis/local/redis.stdout
 6710:C 27 Jan 22:04:03.794 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
 6710:M 27 Jan 22:04:03.795 * Increased maximum number of open files to 10032 (it was originally set to 256).

+## Using Job-ID instead of Alloc-ID
+
+Passing `-job` into one of the `fs` commands will allow the `fs` command to randomly select an allocation ID from the specified job.
+
+```
+nomad fs ls -job <job-id> <path>
+```
+
+Nomad will prefer to select a running allocation ID for the job, but if no running allocations for the job are found, Nomad will use a dead allocation.
+
+This can be useful for debugging a job that has multiple allocations, and it's not really required to use a specific allocation ID.
--- a/website/source/docs/internals/scheduling.html.md
+++ b/website/source/docs/internals/scheduling.html.md
@ -28,8 +28,8 @@ spelunking through the source code.

 There are four primary "nouns" in Nomad; jobs, nodes, allocations, and evaluations.
 Jobs are submitted by users and represent a _desired state_. A job is a declarative description
-of tasks to run which are bounded by constraints and require resources. Nodes are the servers
-in the clusters that tasks can be scheduled on. The mapping of tasks in a job to nodes is done
+of tasks to run which are bounded by constraints and require resources. Tasks can be scheduled on 
+nodes in the cluster running the Nomad client. The mapping of tasks in a job to clients is done
 using allocations. An allocation is used to declare that a set of tasks in a job should be run
 on a particular node. Scheduling is the process of determining the appropriate allocations and
 is done as part of an evaluation.
--- a/website/source/docs/jobspec/interpreted.html.md
+++ b/website/source/docs/jobspec/interpreted.html.md
@ -65,30 +65,37 @@ driver.
  <tr>
    <th>Variable</th>
    <th>Description</th>
+    <th>Example</th>
  </tr>
  <tr>
    <td>${node.unique.id}</td>
-    <td>The client node identifier</td>
+    <td>The 36 character unique client node identifier</td>
+    <td>9afa5da1-8f39-25a2-48dc-ba31fd7c0023</td>
  </tr>
  <tr>
    <td>${node.datacenter}</td>
-    <td>The client node datacenter</td>
+    <td>The client node's datacenter</td>
+    <td>dc1</td>
  </tr>
  <tr>
    <td>${node.unique.name}</td>
-    <td>The client node name</td>
+    <td>The client node's name</td>
+    <td>nomad-client-10-1-2-4</td>
  </tr>
  <tr>
    <td>${node.class}</td>
-    <td>The client node class</td>
+    <td>The client node's class</td>
+    <td>linux-64bit</td>
  </tr>
  <tr>
    <td>${attr.\<key\>}</td>
    <td>The attribute given by `key` on the client node.</td>
+    <td>platform.aws.instance-type:r3.large</td>
  </tr>
  <tr>
    <td>${meta.\<key\>}</td>
    <td>The metadata value given by `key` on the client node.</td>
+    <td></td>
  </tr>
 </table>

@ -177,19 +184,19 @@ a particular node and as such can not be used in constraints.
    <td>The CPU limit in MHz for the task</td>
  </tr>
  <tr>
-    <td>NOMAD_ALLOC_ID</td>
+    <td>${NOMAD_ALLOC_ID}</td>
    <td>The allocation ID of the task</td>
  </tr>
  <tr>
-    <td>NOMAD_ALLOC_NAME</td>
+    <td>${NOMAD_ALLOC_NAME}</td>
    <td>The allocation name of the task</td>
  </tr>
  <tr>
-    <td>NOMAD_ALLOC_INDEX</td>
+    <td>${NOMAD_ALLOC_INDEX}</td>
    <td>The allocation index; useful to distinguish instances of task groups</td>
  </tr>
  <tr>
-    <td>NOMAD_TASK_NAME</td>
+    <td>${NOMAD_TASK_NAME}</td>
    <td>The task's name</td>
  </tr>
  <tr>
--- a/website/source/docs/upgrade/index.html.md
+++ b/website/source/docs/upgrade/index.html.md
@ -1,6 +1,6 @@
 ---
 layout: "docs"
-page_title: "Upgrade Nomad"
+page_title: "Upgrading Nomad"
 sidebar_current: "docs-upgrade-upgrading"
 description: |-
  Learn how to upgrade Nomad.
@ -8,29 +8,109 @@ description: |-

 # Upgrading Nomad

-Both Nomad Clients and Servers are meant to be long-running processes that
-maintain communication with each other. Nomad Servers maintain quorum with other
-Servers and Clients are in constant communication with Servers. As such, care
-should be taken to properly upgrade Nomad to ensure minimal service disruption.
-
 This page documents how to upgrade Nomad when a new version is released.

-## Standard Upgrades
+~> **Upgrade Warning!** Both Nomad Clients and Servers are meant to be 
+long-running processes that maintain communication with each other. Nomad 
+Servers maintain quorum with other Servers and Clients are in constant 
+communication with Servers. As such, care should be taken to properly 
+upgrade Nomad to ensure minimal service disruption. Unsafe upgrades can
+cause a service outage.
+
+## Upgrade Process

 For upgrades we strive to ensure backwards compatibility. For most upgrades, the
-process is simple. Assuming the current version of Nomad is A, and version B is
-released.
+process is as simple as upgrading the binary and restarting the service.

-1. On each server, install version B of Nomad.
+Prior to starting the upgrade please check the 
+[specific version details](/docs/upgrade/upgrade-specific.html) page as some 
+version differences may require specific steps.

-2. Shut down version A, restart with version B on one server at a time.
+At a high level we complete the following steps to upgrade Nomad:

-    3. You can run `nomad server-members` to ensure that all servers are
-       clustered and running the version B.
+* **Add the new version**
+* **Check cluster health**
+* **Remove the old version**
+* **Check cluster health**
+* **Upgrade clients**
+
+### 1. Add the new version to the existing cluster
+
+Whether you are replacing the software in place on existing systems or bringing
+up new hosts you should make changes incrementally, verifying cluster health at
+each step of the upgrade  
+
+On a single server, install the new version of Nomad.  You can do this by
+joining a new server to the cluster or by replacing or upgrading the binary
+locally and restarting the service.
+
+### 2. Check cluster health
+
+Monitor the Nomad logs on the remaining nodes to check the new node has entered
+the cluster correctly.
+
+Run `nomad agent-info` on the new server and check that the `last_log_index` is
+of a similar value to the other nodes.  This step ensures that changes have been
+replicated to the new node.
+
+```
+ubuntu@nomad-server-10-1-1-4:~$ nomad agent-info
+nomad
+  bootstrap = false
+  known_regions = 1
+  leader = false
+  server = true
+raft
+  applied_index = 53460
+  commit_index = 53460
+  fsm_pending = 0
+  last_contact = 54.512216ms
+  last_log_index = 53460
+  last_log_term = 1
+  last_snapshot_index = 49511
+  last_snapshot_term = 1
+  num_peers = 2
+...
+```
+
+Continue with the upgrades across the Server fleet making sure to do a single Nomad
+server at a time.  You can check state of the servers and clients with the
+`nomad server-members` and `nomad node-status` commands which indicate state of the
+nodes.
+
+### 3. Remove the old versions from servers
+
+If you are doing an in place upgrade on existing servers this step is not
+necessary as the version was changed in place.
+
+If you are doing an upgrade by adding new servers and removing old servers
+from the fleet you need to ensure that the server has left the fleet safely.
+
+1. Stop the service on the existing host
+2. On another server issue a `nomad server-members` and check the status, if 
+the server is now in a left state you are safe to continue.
+3. If the server is not in a left state, issue a `nomad server-force-leave <server id>`
+to remove the server from the cluster.
+
+Monitor the logs of the other hosts in the Nomad cluster over this period.
+
+### 4. Check cluster health
+
+Use the same actions in step #2 above to confirm cluster health.
+
+### 5. Upgrade clients
+
+Following the successful upgrade of the servers you can now update your
+clients using a similar process as the servers.  If you wish to gracefully
+move tasks on a client use the `nomad node-drain <node-id>` command to 
+gracefully migrate jobs to another client in the cluster.  The `node-drain` 
+command prevents new tasks from being allocated to the client and begins 
+migrating existing allocations to another client.
+
+## Done! 
+
+You are now running the latest Nomad version. You can verify all
+Clients joined by running `nomad node-status` and checking all the clients
+are in a `ready` state.

-4. Once all the servers are upgraded, begin a rollout of clients following
-   the same process.

-   5. Done! You are now running the latest Nomad version. You can verify all
-      Clients joined by running `nomad node-status` and checking all the clients
-      are in a `ready` state.