open-nomad/drivers/rkt/driver.go

// +build linux

package rkt

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"math/rand"
	"net"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	appcschema "github.com/appc/spec/schema"
	"github.com/hashicorp/consul-template/signals"
	hclog "github.com/hashicorp/go-hclog"
	version "github.com/hashicorp/go-version"
	"github.com/hashicorp/nomad/client/config"
	"github.com/hashicorp/nomad/client/taskenv"
	"github.com/hashicorp/nomad/drivers/shared/eventer"
	"github.com/hashicorp/nomad/drivers/shared/executor"
	"github.com/hashicorp/nomad/plugins/base"
	"github.com/hashicorp/nomad/plugins/drivers"
	"github.com/hashicorp/nomad/plugins/shared"
	"github.com/hashicorp/nomad/plugins/shared/hclspec"
	"github.com/hashicorp/nomad/plugins/shared/loader"
	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
	rktv1 "github.com/rkt/rkt/api/v1"
)

const (
	// pluginName is the name of the plugin
	pluginName = "rkt"

	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
	fingerprintPeriod = 30 * time.Second

	// minRktVersion is the earliest supported version of rkt. rkt added support
	// for CPU and memory isolators in 0.14.0. We cannot support an earlier
	// version to maintain an uniform interface across all drivers
	minRktVersion = "1.27.0"

	// rktCmd is the command rkt is installed as.
	rktCmd = "rkt"

	// networkDeadline is how long to wait for container network
	// information to become available.
	networkDeadline = 1 * time.Minute
)

var (
	// PluginID is the rawexec plugin metadata registered in the plugin
	// catalog.
	PluginID = loader.PluginID{
		Name:       pluginName,
		PluginType: base.PluginTypeDriver,
	}

	// PluginConfig is the rawexec factory function registered in the
	// plugin catalog.
	PluginConfig = &loader.InternalPluginConfig{
		Config:  map[string]interface{}{},
		Factory: func(l hclog.Logger) interface{} { return NewRktDriver(l) },
	}
)

// PluginLoader maps pre-0.9 client driver options to post-0.9 plugin options.
func PluginLoader(opts map[string]string) (map[string]interface{}, error) {
	conf := map[string]interface{}{}
	if v, err := strconv.ParseBool(opts["driver.rkt.volumes.enabled"]); err == nil {
		conf["volumes_enabled"] = v
	}
	return conf, nil
}

var (
	// pluginInfo is the response returned for the PluginInfo RPC
	pluginInfo = &base.PluginInfoResponse{
		Type:              base.PluginTypeDriver,
		PluginApiVersions: []string{drivers.ApiVersion010},
		PluginVersion:     "0.1.0",
		Name:              pluginName,
	}

	// configSpec is the hcl specification returned by the ConfigSchema RPC
	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
		"volumes_enabled": hclspec.NewDefault(
			hclspec.NewAttr("volumes_enabled", "bool", false),
			hclspec.NewLiteral("true"),
		),
	})

	// taskConfigSpec is the hcl specification for the driver config section of
	// a taskConfig within a job. It is returned in the TaskConfigSchema RPC
	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
		"image":              hclspec.NewAttr("image", "string", true),
		"command":            hclspec.NewAttr("command", "string", false),
		"args":               hclspec.NewAttr("args", "list(string)", false),
		"trust_prefix":       hclspec.NewAttr("trust_prefix", "string", false),
		"dns_servers":        hclspec.NewAttr("dns_servers", "list(string)", false),
		"dns_search_domains": hclspec.NewAttr("dns_search_domains", "list(string)", false),
		"net":                hclspec.NewAttr("net", "list(string)", false),
		"port_map":           hclspec.NewBlockAttrs("port_map", "string", false),
		"volumes":            hclspec.NewAttr("volumes", "list(string)", false),
		"insecure_options":   hclspec.NewAttr("insecure_options", "list(string)", false),
		"no_overlay":         hclspec.NewAttr("no_overlay", "bool", false),
		"debug":              hclspec.NewAttr("debug", "bool", false),
		"group":              hclspec.NewAttr("group", "string", false),
	})

	// capabilities is returned by the Capabilities RPC and indicates what
	// optional features this driver supports
	capabilities = &drivers.Capabilities{
		SendSignals: true,
		Exec:        true,
		FSIsolation: drivers.FSIsolationImage,
	}

	reRktVersion  = regexp.MustCompile(`rkt [vV]ersion[:]? (\d[.\d]+)`)
	reAppcVersion = regexp.MustCompile(`appc [vV]ersion[:]? (\d[.\d]+)`)
)

// Config is the client configuration for the driver
type Config struct {
	// VolumesEnabled allows tasks to bind host paths (volumes) inside their
	// container. Binding relative paths is always allowed and will be resolved
	// relative to the allocation's directory.
	VolumesEnabled bool `codec:"volumes_enabled"`
}

// TaskConfig is the driver configuration of a taskConfig within a job
type TaskConfig struct {
	ImageName        string            `codec:"image"`
	Command          string            `codec:"command"`
	Args             []string          `codec:"args"`
	TrustPrefix      string            `codec:"trust_prefix"`
	DNSServers       []string          `codec:"dns_servers"`        // DNS Server for containers
	DNSSearchDomains []string          `codec:"dns_search_domains"` // DNS Search domains for containers
	Net              []string          `codec:"net"`                // Networks for the containers
	PortMap          map[string]string `codec:"port_map"`           // A map of host port and the port name defined in the image manifest file
	Volumes          []string          `codec:"volumes"`            // Host-Volumes to mount in, syntax: /path/to/host/directory:/destination/path/in/container[:readOnly]
	InsecureOptions  []string          `codec:"insecure_options"`   // list of args for --insecure-options

	NoOverlay bool   `codec:"no_overlay"` // disable overlayfs for rkt run
	Debug     bool   `codec:"debug"`      // Enable debug option for rkt command
	Group     string `codec:"group"`      // Group override for the container
}

// TaskState is the state which is encoded in the handle returned in
// StartTask. This information is needed to rebuild the taskConfig state and handler
// during recovery.
type TaskState struct {
	ReattachConfig *shared.ReattachConfig
	TaskConfig     *drivers.TaskConfig
	Pid            int
	StartedAt      time.Time
	UUID           string
}

// Driver is a driver for running images via Rkt We attempt to chose sane
// defaults for now, with more configuration available planned in the future.
type Driver struct {
	// eventer is used to handle multiplexing of TaskEvents calls such that an
	// event can be broadcast to all callers
	eventer *eventer.Eventer

	// config is the driver configuration set by the SetConfig RPC
	config *Config

	// nomadConfig is the client config from nomad
	nomadConfig *base.ClientDriverConfig

	// tasks is the in memory datastore mapping taskIDs to rktTaskHandles
	tasks *taskStore

	// ctx is the context for the driver. It is passed to other subsystems to
	// coordinate shutdown
	ctx context.Context

	// signalShutdown is called when the driver is shutting down and cancels the
	// ctx passed to any subsystems
	signalShutdown context.CancelFunc

	// logger will log to the Nomad agent
	logger hclog.Logger

	// hasFingerprinted is used to store whether we have fingerprinted before
	hasFingerprinted bool
	fingerprintLock  sync.Mutex
}

func NewRktDriver(logger hclog.Logger) drivers.DriverPlugin {
	ctx, cancel := context.WithCancel(context.Background())
	logger = logger.Named(pluginName)
	return &Driver{
		eventer:        eventer.NewEventer(ctx, logger),
		config:         &Config{},
		tasks:          newTaskStore(),
		ctx:            ctx,
		signalShutdown: cancel,
		logger:         logger,
	}
}

func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
	return pluginInfo, nil
}

func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
	return configSpec, nil
}

func (d *Driver) SetConfig(cfg *base.Config) error {
	var config Config
	if len(cfg.PluginConfig) != 0 {
		if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
			return err
		}
	}

	d.config = &config
	if cfg.AgentConfig != nil {
		d.nomadConfig = cfg.AgentConfig.Driver
	}
	return nil
}

func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
	return taskConfigSpec, nil
}

func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
	return capabilities, nil
}

func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
	ch := make(chan *drivers.Fingerprint)
	go d.handleFingerprint(ctx, ch)
	return ch, nil
}

func (d *Driver) handleFingerprint(ctx context.Context, ch chan *drivers.Fingerprint) {
	defer close(ch)
	ticker := time.NewTimer(0)
	for {
		select {
		case <-ctx.Done():
			return
		case <-d.ctx.Done():
			return
		case <-ticker.C:
			ticker.Reset(fingerprintPeriod)
			ch <- d.buildFingerprint()
		}
	}
}

// setFingerprinted marks the driver as having fingerprinted once before
func (d *Driver) setFingerprinted() {
	d.fingerprintLock.Lock()
	d.hasFingerprinted = true
	d.fingerprintLock.Unlock()
}

// fingerprinted returns whether the driver has fingerprinted before
func (d *Driver) fingerprinted() bool {
	d.fingerprintLock.Lock()
	defer d.fingerprintLock.Unlock()
	return d.hasFingerprinted
}

func (d *Driver) buildFingerprint() *drivers.Fingerprint {
	defer func() {
		d.setFingerprinted()
	}()

	fingerprint := &drivers.Fingerprint{
		Attributes:        map[string]*pstructs.Attribute{},
		Health:            drivers.HealthStateHealthy,
		HealthDescription: drivers.DriverHealthy,
	}

	// Only enable if we are root
	if syscall.Geteuid() != 0 {
		if !d.fingerprinted() {
			d.logger.Debug("must run as root user, disabling")
		}
		fingerprint.Health = drivers.HealthStateUndetected
		fingerprint.HealthDescription = drivers.DriverRequiresRootMessage
		return fingerprint
	}

	outBytes, err := exec.Command(rktCmd, "version").Output()
	if err != nil {
		fingerprint.Health = drivers.HealthStateUndetected
		fingerprint.HealthDescription = fmt.Sprintf("Failed to execute %s version: %v", rktCmd, err)
		return fingerprint
	}
	out := strings.TrimSpace(string(outBytes))

	rktMatches := reRktVersion.FindStringSubmatch(out)
	appcMatches := reAppcVersion.FindStringSubmatch(out)
	if len(rktMatches) != 2 || len(appcMatches) != 2 {
		fingerprint.Health = drivers.HealthStateUndetected
		fingerprint.HealthDescription = "Unable to parse rkt version string"
		return fingerprint
	}

	minVersion, _ := version.NewVersion(minRktVersion)
	currentVersion, _ := version.NewVersion(rktMatches[1])
	if currentVersion.LessThan(minVersion) {
		// Do not allow ancient rkt versions
		fingerprint.Health = drivers.HealthStateUndetected
		fingerprint.HealthDescription = fmt.Sprintf("Unsuported rkt version %s", currentVersion)
		if !d.fingerprinted() {
			d.logger.Warn("unsupported rkt version please upgrade to >= "+minVersion.String(),
				"rkt_version", currentVersion)
		}
		return fingerprint
	}

	fingerprint.Attributes["driver.rkt"] = pstructs.NewBoolAttribute(true)
	fingerprint.Attributes["driver.rkt.version"] = pstructs.NewStringAttribute(rktMatches[1])
	fingerprint.Attributes["driver.rkt.appc.version"] = pstructs.NewStringAttribute(appcMatches[1])
	if d.config.VolumesEnabled {
		fingerprint.Attributes["driver.rkt.volumes.enabled"] = pstructs.NewBoolAttribute(true)
	}

	return fingerprint

}

func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
	if handle == nil {
		return fmt.Errorf("error: handle cannot be nil")
	}

	// If already attached to handle there's nothing to recover.
	if _, ok := d.tasks.Get(handle.Config.ID); ok {
		d.logger.Trace("nothing to recover; task already exists",
			"task_id", handle.Config.ID,
			"task_name", handle.Config.Name,
		)
		return nil
	}

	var taskState TaskState
	if err := handle.GetDriverState(&taskState); err != nil {
		d.logger.Error("failed to decode taskConfig state from handle", "error", err, "task_id", handle.Config.ID)
		return fmt.Errorf("failed to decode taskConfig state from handle: %v", err)
	}

	plugRC, err := shared.ReattachConfigToGoPlugin(taskState.ReattachConfig)
	if err != nil {
		d.logger.Error("failed to build ReattachConfig from taskConfig state", "error", err, "task_id", handle.Config.ID)
		return fmt.Errorf("failed to build ReattachConfig from taskConfig state: %v", err)
	}

	execImpl, pluginClient, err := executor.CreateExecutorWithConfig(plugRC,
		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
	if err != nil {
		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
		return fmt.Errorf("failed to reattach to executor: %v", err)
	}

	// The taskConfig's environment is set via --set-env flags in Start, but the rkt
	// command itself needs an environment with PATH set to find iptables.
	// TODO (preetha) need to figure out how to read env.blacklist
	eb := taskenv.NewEmptyBuilder()
	filter := strings.Split(config.DefaultEnvBlacklist, ",")
	rktEnv := eb.SetHostEnvvars(filter).Build()

	h := &taskHandle{
		exec:         execImpl,
		env:          rktEnv,
		pid:          taskState.Pid,
		uuid:         taskState.UUID,
		pluginClient: pluginClient,
		taskConfig:   taskState.TaskConfig,
		procState:    drivers.TaskStateRunning,
		startedAt:    taskState.StartedAt,
		exitResult:   &drivers.ExitResult{},
	}

	d.tasks.Set(taskState.TaskConfig.ID, h)

	go h.run()
	return nil
}

func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
	if _, ok := d.tasks.Get(cfg.ID); ok {
		return nil, nil, fmt.Errorf("taskConfig with ID '%s' already started", cfg.ID)
	}

	var driverConfig TaskConfig

	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
	}

	handle := drivers.NewTaskHandle(pluginName)
	handle.Config = cfg

	// todo(preetha) - port map in client v1 is a slice of maps that get merged. figure out if the caller will do this
	//driverConfig.PortMap

	// ACI image
	img := driverConfig.ImageName

	// Global arguments given to both prepare and run-prepared
	globalArgs := make([]string, 0, 50)

	// Add debug option to rkt command.
	debug := driverConfig.Debug

	// Add the given trust prefix
	trustPrefix := driverConfig.TrustPrefix
	insecure := false
	if trustPrefix != "" {
		var outBuf, errBuf bytes.Buffer
		cmd := exec.Command(rktCmd, "trust", "--skip-fingerprint-review=true", fmt.Sprintf("--prefix=%s", trustPrefix), fmt.Sprintf("--debug=%t", debug))
		cmd.Stdout = &outBuf
		cmd.Stderr = &errBuf
		if err := cmd.Run(); err != nil {
			return nil, nil, fmt.Errorf("Error running rkt trust: %s\n\nOutput: %s\n\nError: %s",
				err, outBuf.String(), errBuf.String())
		}
		d.logger.Debug("added trust prefix", "trust_prefix", trustPrefix, "task_name", cfg.Name)
	} else {
		// Disable signature verification if the trust command was not run.
		insecure = true
	}

	// if we have a selective insecure_options, prefer them
	// insecure options are rkt's global argument, so we do this before the actual "run"
	if len(driverConfig.InsecureOptions) > 0 {
		globalArgs = append(globalArgs, fmt.Sprintf("--insecure-options=%s", strings.Join(driverConfig.InsecureOptions, ",")))
	} else if insecure {
		globalArgs = append(globalArgs, "--insecure-options=all")
	}

	// debug is rkt's global argument, so add it before the actual "run"
	globalArgs = append(globalArgs, fmt.Sprintf("--debug=%t", debug))

	prepareArgs := make([]string, 0, 50)
	runArgs := make([]string, 0, 50)

	prepareArgs = append(prepareArgs, globalArgs...)
	prepareArgs = append(prepareArgs, "prepare")
	runArgs = append(runArgs, globalArgs...)
	runArgs = append(runArgs, "run-prepared")

	// disable overlayfs
	if driverConfig.NoOverlay {
		prepareArgs = append(prepareArgs, "--no-overlay=true")
	}

	// Convert underscores to dashes in taskConfig names for use in volume names #2358
	sanitizedName := strings.Replace(cfg.Name, "_", "-", -1)

	// Mount /alloc
	allocVolName := fmt.Sprintf("%s-%s-alloc", cfg.AllocID, sanitizedName)
	prepareArgs = append(prepareArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s", allocVolName, cfg.TaskDir().SharedAllocDir))
	prepareArgs = append(prepareArgs, fmt.Sprintf("--mount=volume=%s,target=%s", allocVolName, "/alloc"))

	// Mount /local
	localVolName := fmt.Sprintf("%s-%s-local", cfg.AllocID, sanitizedName)
	prepareArgs = append(prepareArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s", localVolName, cfg.TaskDir().LocalDir))
	prepareArgs = append(prepareArgs, fmt.Sprintf("--mount=volume=%s,target=%s", localVolName, "/local"))

	// Mount /secrets
	secretsVolName := fmt.Sprintf("%s-%s-secrets", cfg.AllocID, sanitizedName)
	prepareArgs = append(prepareArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s", secretsVolName, cfg.TaskDir().SecretsDir))
	prepareArgs = append(prepareArgs, fmt.Sprintf("--mount=volume=%s,target=%s", secretsVolName, "/secrets"))

	// Mount arbitrary volumes if enabled
	if len(driverConfig.Volumes) > 0 {
		if !d.config.VolumesEnabled {
			return nil, nil, fmt.Errorf("volumes_enabled is false; cannot use rkt volumes: %+q", driverConfig.Volumes)
		}
		for i, rawvol := range driverConfig.Volumes {
			parts := strings.Split(rawvol, ":")
			readOnly := "false"
			// job spec:
			//   volumes = ["/host/path:/container/path[:readOnly]"]
			// the third parameter is optional, mount is read-write by default
			if len(parts) == 3 {
				if parts[2] == "readOnly" {
					d.logger.Debug("mounting volume as readOnly", "volume", strings.Join(parts[:2], parts[1]))
					readOnly = "true"
				} else {
					d.logger.Warn("unknown volume parameter ignored for mount", "parameter", parts[2], "mount", parts[0])
				}
			} else if len(parts) != 2 {
				return nil, nil, fmt.Errorf("invalid rkt volume: %q", rawvol)
			}
			volName := fmt.Sprintf("%s-%s-%d", cfg.AllocID, sanitizedName, i)
			prepareArgs = append(prepareArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s,readOnly=%s", volName, parts[0], readOnly))
			prepareArgs = append(prepareArgs, fmt.Sprintf("--mount=volume=%s,target=%s", volName, parts[1]))
		}
	}

	// Mount task volumes, always do
	for i, vol := range cfg.Mounts {
		volName := fmt.Sprintf("%s-%s-taskmounts-%d", cfg.AllocID, sanitizedName, i)
		prepareArgs = append(prepareArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s,readOnly=%v", volName, vol.HostPath, vol.Readonly))
		prepareArgs = append(prepareArgs, fmt.Sprintf("--mount=volume=%s,target=%s", volName, vol.TaskPath))
	}

	// Mount task devices, always do
	for i, vol := range cfg.Devices {
		volName := fmt.Sprintf("%s-%s-taskdevices-%d", cfg.AllocID, sanitizedName, i)
		readOnly := !strings.Contains(vol.Permissions, "w")
		prepareArgs = append(prepareArgs, fmt.Sprintf("--volume=%s,kind=host,source=%s,readOnly=%v", volName, vol.HostPath, readOnly))
		prepareArgs = append(prepareArgs, fmt.Sprintf("--mount=volume=%s,target=%s", volName, vol.TaskPath))
	}

	// Inject environment variables
	for k, v := range cfg.Env {
		prepareArgs = append(prepareArgs, fmt.Sprintf("--set-env=%s=%s", k, v))
	}

	// Image is set here, because the commands that follow apply to it
	prepareArgs = append(prepareArgs, img)

	// Check if the user has overridden the exec command.
	if driverConfig.Command != "" {
		prepareArgs = append(prepareArgs, fmt.Sprintf("--exec=%v", driverConfig.Command))
	}

	// Add memory isolator
	prepareArgs = append(prepareArgs, fmt.Sprintf("--memory=%v", cfg.Resources.LinuxResources.MemoryLimitBytes))

	// Add CPU isolator
	prepareArgs = append(prepareArgs, fmt.Sprintf("--cpu-shares=%v", cfg.Resources.LinuxResources.CPUShares))

	// Add DNS servers
	if len(driverConfig.DNSServers) == 1 && (driverConfig.DNSServers[0] == "host" || driverConfig.DNSServers[0] == "none") {
		// Special case single item lists with the special values "host" or "none"
		runArgs = append(runArgs, fmt.Sprintf("--dns=%s", driverConfig.DNSServers[0]))
	} else {
		for _, ip := range driverConfig.DNSServers {
			if err := net.ParseIP(ip); err == nil {
				wrappedErr := fmt.Errorf("invalid ip address for container dns server %q", ip)
				d.logger.Debug("error parsing DNS server", "error", wrappedErr)
				return nil, nil, wrappedErr
			}
			runArgs = append(runArgs, fmt.Sprintf("--dns=%s", ip))
		}
	}

	// set DNS search domains
	for _, domain := range driverConfig.DNSSearchDomains {
		runArgs = append(runArgs, fmt.Sprintf("--dns-search=%s", domain))
	}

	// set network
	network := strings.Join(driverConfig.Net, ",")
	if network != "" {
		runArgs = append(runArgs, fmt.Sprintf("--net=%s", network))
	}

	// Setup port mapping and exposed ports
	if len(cfg.Resources.NomadResources.Networks) == 0 {
		d.logger.Debug("no network interfaces are available")
		if len(driverConfig.PortMap) > 0 {
			return nil, nil, fmt.Errorf("Trying to map ports but no network interface is available")
		}
	} else if network == "host" {
		// Port mapping is skipped when host networking is used.
		d.logger.Debug("Ignoring port_map when using --net=host", "task_name", cfg.Name)
	} else {
		network := cfg.Resources.NomadResources.Networks[0]
		for _, port := range network.ReservedPorts {
			var containerPort string

			mapped, ok := driverConfig.PortMap[port.Label]
			if !ok {
				// If the user doesn't have a mapped port using port_map, driver stops running container.
				return nil, nil, fmt.Errorf("port_map is not set. When you defined port in the resources, you need to configure port_map.")
			}
			containerPort = mapped

			hostPortStr := strconv.Itoa(port.Value)

			d.logger.Debug("driver.rkt: exposed port", "containerPort", containerPort)
			// Add port option to rkt run arguments. rkt allows multiple port args
			prepareArgs = append(prepareArgs, fmt.Sprintf("--port=%s:%s", containerPort, hostPortStr))
		}

		for _, port := range network.DynamicPorts {
			// By default we will map the allocated port 1:1 to the container
			var containerPort string

			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
				containerPort = mapped
			} else {
				// If the user doesn't have mapped a port using port_map, driver stops running container.
				return nil, nil, fmt.Errorf("port_map is not set. When you defined port in the resources, you need to configure port_map.")
			}

			hostPortStr := strconv.Itoa(port.Value)

			d.logger.Debug("exposed port", "containerPort", containerPort, "task_name", cfg.Name)
			// Add port option to rkt run arguments. rkt allows multiple port args
			prepareArgs = append(prepareArgs, fmt.Sprintf("--port=%s:%s", containerPort, hostPortStr))
		}

	}

	// If a user has been specified for the taskConfig, pass it through to the user
	if cfg.User != "" {
		prepareArgs = append(prepareArgs, fmt.Sprintf("--user=%s", cfg.User))
	}

	// There's no taskConfig-level parameter for groups so check the driver
	// config for a custom group
	if driverConfig.Group != "" {
		prepareArgs = append(prepareArgs, fmt.Sprintf("--group=%s", driverConfig.Group))
	}

	// Add user passed arguments.
	if len(driverConfig.Args) != 0 {

		// Need to start arguments with "--"
		prepareArgs = append(prepareArgs, "--")

		for _, arg := range driverConfig.Args {
			prepareArgs = append(prepareArgs, fmt.Sprintf("%v", arg))
		}
	}

	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, fmt.Sprintf("%s-executor.out", cfg.Name))
	executorConfig := &executor.ExecutorConfig{
		LogFile:  pluginLogFile,
		LogLevel: "debug",
	}

	execImpl, pluginClient, err := executor.CreateExecutor(
		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID),
		d.nomadConfig, executorConfig)
	if err != nil {
		return nil, nil, err
	}

	absPath, err := GetAbsolutePath(rktCmd)
	if err != nil {
		return nil, nil, err
	}

	var outBuf, errBuf bytes.Buffer
	cmd := exec.Command(rktCmd, prepareArgs...)
	cmd.Stdout = &outBuf
	cmd.Stderr = &errBuf
	d.logger.Debug("preparing taskConfig", "pod", img, "task_name", cfg.Name, "args", prepareArgs)
	if err := cmd.Run(); err != nil {
		return nil, nil, fmt.Errorf("Error preparing rkt pod: %s\n\nOutput: %s\n\nError: %s",
			err, outBuf.String(), errBuf.String())
	}
	uuid := strings.TrimSpace(outBuf.String())
	d.logger.Debug("taskConfig prepared", "pod", img, "task_name", cfg.Name, "uuid", uuid)
	runArgs = append(runArgs, uuid)

	// The taskConfig's environment is set via --set-env flags above, but the rkt
	// command itself needs an environment with PATH set to find iptables.

	// TODO (preetha) need to figure out how to pass env.blacklist from client config
	eb := taskenv.NewEmptyBuilder()
	filter := strings.Split(config.DefaultEnvBlacklist, ",")
	rktEnv := eb.SetHostEnvvars(filter).Build()

	// Enable ResourceLimits to place the executor in a parent cgroup of
	// the rkt container. This allows stats collection via the executor to
	// work just like it does for exec.
	execCmd := &executor.ExecCommand{
		Cmd:            absPath,
		Args:           runArgs,
		ResourceLimits: true,
		Resources:      cfg.Resources,

		// Use rktEnv, the environment needed for running rkt, not the task env
		Env: rktEnv.List(),

		TaskDir:    cfg.TaskDir().Dir,
		StdoutPath: cfg.StdoutPath,
		StderrPath: cfg.StderrPath,
	}
	ps, err := execImpl.Launch(execCmd)
	if err != nil {
		pluginClient.Kill()
		return nil, nil, err
	}

	d.logger.Debug("started taskConfig", "aci", img, "uuid", uuid, "task_name", cfg.Name, "args", runArgs)
	h := &taskHandle{
		exec:         execImpl,
		env:          rktEnv,
		pid:          ps.Pid,
		uuid:         uuid,
		pluginClient: pluginClient,
		taskConfig:   cfg,
		procState:    drivers.TaskStateRunning,
		startedAt:    time.Now().Round(time.Millisecond),
		logger:       d.logger,
	}

	rktDriverState := TaskState{
		ReattachConfig: shared.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
		Pid:            ps.Pid,
		TaskConfig:     cfg,
		StartedAt:      h.startedAt,
		UUID:           uuid,
	}

	if err := handle.SetDriverState(&rktDriverState); err != nil {
		d.logger.Error("failed to start task, error setting driver state", "error", err, "task_name", cfg.Name)
		execImpl.Shutdown("", 0)
		pluginClient.Kill()
		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
	}

	d.tasks.Set(cfg.ID, h)
	go h.run()

	// Do not attempt to retrieve driver network if one won't exist:
	//  - "host" means the container itself has no networking metadata
	//  - "none" means no network is configured
	// https://coreos.com/rkt/docs/latest/networking/overview.html#no-loopback-only-networking
	var driverNetwork *drivers.DriverNetwork
	if network != "host" && network != "none" {
		d.logger.Debug("retrieving network information for pod", "pod", img, "UUID", uuid, "task_name", cfg.Name)
		driverNetwork, err = rktGetDriverNetwork(uuid, driverConfig.PortMap, d.logger)
		if err != nil && !pluginClient.Exited() {
			d.logger.Warn("network status retrieval for pod failed", "pod", img, "UUID", uuid, "task_name", cfg.Name, "error", err)

			// If a portmap was given, this turns into a fatal error
			if len(driverConfig.PortMap) != 0 {
				pluginClient.Kill()
				return nil, nil, fmt.Errorf("Trying to map ports but driver could not determine network information")
			}
		}
	}

	return handle, driverNetwork, nil

}

func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return nil, drivers.ErrTaskNotFound
	}

	ch := make(chan *drivers.ExitResult)
	go d.handleWait(ctx, handle, ch)

	return ch, nil
}

func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return drivers.ErrTaskNotFound
	}

	if err := handle.exec.Shutdown(signal, timeout); err != nil {
		if handle.pluginClient.Exited() {
			return nil
		}
		return fmt.Errorf("executor Shutdown failed: %v", err)
	}

	return nil
}

func (d *Driver) DestroyTask(taskID string, force bool) error {
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return drivers.ErrTaskNotFound
	}

	if handle.IsRunning() && !force {
		return fmt.Errorf("cannot destroy running task")
	}

	if !handle.pluginClient.Exited() {
		if handle.IsRunning() {
			if err := handle.exec.Shutdown("", 0); err != nil {
				handle.logger.Error("destroying executor failed", "err", err)
			}
		}

		handle.pluginClient.Kill()
	}

	d.tasks.Delete(taskID)
	return nil
}

func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return nil, drivers.ErrTaskNotFound
	}

	return handle.TaskStatus(), nil
}

func (d *Driver) TaskStats(taskID string) (*drivers.TaskResourceUsage, error) {
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return nil, drivers.ErrTaskNotFound
	}

	return handle.exec.Stats()
}

func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
	return d.eventer.TaskEvents(ctx)
}

func (d *Driver) SignalTask(taskID string, signal string) error {
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return drivers.ErrTaskNotFound
	}

	sig := os.Interrupt
	if s, ok := signals.SignalLookup[signal]; ok {
		d.logger.Warn("signal to send to task unknown, using SIGINT", "signal", signal, "task_id", handle.taskConfig.ID, "task_name", handle.taskConfig.Name)
		sig = s
	}
	return handle.exec.Signal(sig)
}

func (d *Driver) ExecTask(taskID string, cmdArgs []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
	if len(cmdArgs) == 0 {
		return nil, fmt.Errorf("error cmd must have atleast one value")
	}
	handle, ok := d.tasks.Get(taskID)
	if !ok {
		return nil, drivers.ErrTaskNotFound
	}
	// enter + UUID + cmd + args...
	cmd := cmdArgs[0]
	args := cmdArgs[1:]
	enterArgs := make([]string, 3+len(args))
	enterArgs[0] = "enter"
	enterArgs[1] = handle.uuid
	enterArgs[2] = handle.env.ReplaceEnv(cmd)
	copy(enterArgs[3:], handle.env.ParseAndReplace(args))
	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), rktCmd, enterArgs)
	if err != nil {
		return nil, err
	}

	return &drivers.ExecTaskResult{
		Stdout: out,
		ExitResult: &drivers.ExitResult{
			ExitCode: exitCode,
		},
	}, nil

}

// GetAbsolutePath returns the absolute path of the passed binary by resolving
// it in the path and following symlinks.
func GetAbsolutePath(bin string) (string, error) {
	lp, err := exec.LookPath(bin)
	if err != nil {
		return "", fmt.Errorf("failed to resolve path to %q executable: %v", bin, err)
	}

	return filepath.EvalSymlinks(lp)
}

func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, logger hclog.Logger) (*drivers.DriverNetwork, error) {
	deadline := time.Now().Add(networkDeadline)
	var lastErr error
	try := 0

	for time.Now().Before(deadline) {
		try++
		if status, err := rktGetStatus(uuid, logger); err == nil {
			for _, net := range status.Networks {
				if !net.IP.IsGlobalUnicast() {
					continue
				}

				// Get the pod manifest so we can figure out which ports are exposed
				var portmap map[string]int
				manifest, err := rktGetManifest(uuid)
				if err == nil {
					portmap, err = rktManifestMakePortMap(manifest, driverConfigPortMap)
					if err != nil {
						lastErr = fmt.Errorf("could not create manifest-based portmap: %v", err)
						return nil, lastErr
					}
				} else {
					lastErr = fmt.Errorf("could not get pod manifest: %v", err)
					return nil, lastErr
				}

				// This is a successful landing; log if its not the first attempt.
				if try > 1 {
					logger.Debug("retrieved network info for pod", "uuid", uuid, "attempt", try)
				}
				return &drivers.DriverNetwork{
					PortMap: portmap,
					IP:      status.Networks[0].IP.String(),
				}, nil
			}

			if len(status.Networks) == 0 {
				lastErr = fmt.Errorf("no networks found")
			} else {
				lastErr = fmt.Errorf("no good driver networks out of %d returned", len(status.Networks))
			}
		} else {
			lastErr = fmt.Errorf("getting status failed: %v", err)
		}

		waitTime := getJitteredNetworkRetryTime()
		logger.Debug("failed getting network info for pod, sleeping", "uuid", uuid, "attempt", try, "err", lastErr, "wait", waitTime)
		time.Sleep(waitTime)
	}
	return nil, fmt.Errorf("timed out, last error: %v", lastErr)
}

// Given a rkt/appc pod manifest and driver portmap configuration, create
// a driver portmap.
func rktManifestMakePortMap(manifest *appcschema.PodManifest, configPortMap map[string]string) (map[string]int, error) {
	if len(manifest.Apps) == 0 {
		return nil, fmt.Errorf("manifest has no apps")
	}
	if len(manifest.Apps) != 1 {
		return nil, fmt.Errorf("manifest has multiple apps!")
	}
	app := manifest.Apps[0]
	if app.App == nil {
		return nil, fmt.Errorf("specified app has no App object")
	}

	portMap := make(map[string]int)
	for svc, name := range configPortMap {
		for _, port := range app.App.Ports {
			if port.Name.String() == name {
				portMap[svc] = int(port.Port)
			}
		}
	}
	return portMap, nil
}

// Retrieve pod status for the pod with the given UUID.
func rktGetStatus(uuid string, logger hclog.Logger) (*rktv1.Pod, error) {
	statusArgs := []string{
		"status",
		"--format=json",
		uuid,
	}
	var outBuf, errBuf bytes.Buffer
	cmd := exec.Command(rktCmd, statusArgs...)
	cmd.Stdout = &outBuf
	cmd.Stderr = &errBuf
	if err := cmd.Run(); err != nil {
		if outBuf.Len() > 0 {
			logger.Debug("status output for UUID", "uuid", uuid, "error", elide(outBuf))
		}
		if errBuf.Len() == 0 {
			return nil, err
		}
		logger.Debug("status error output", "uuid", uuid, "error", elide(errBuf))
		return nil, fmt.Errorf("%s. stderr: %q", err, elide(errBuf))
	}
	var status rktv1.Pod
	if err := json.Unmarshal(outBuf.Bytes(), &status); err != nil {
		return nil, err
	}
	return &status, nil
}

// Retrieves a pod manifest
func rktGetManifest(uuid string) (*appcschema.PodManifest, error) {
	statusArgs := []string{
		"cat-manifest",
		uuid,
	}
	var outBuf bytes.Buffer
	cmd := exec.Command(rktCmd, statusArgs...)
	cmd.Stdout = &outBuf
	cmd.Stderr = ioutil.Discard
	if err := cmd.Run(); err != nil {
		return nil, err
	}
	var manifest appcschema.PodManifest
	if err := json.Unmarshal(outBuf.Bytes(), &manifest); err != nil {
		return nil, err
	}
	return &manifest, nil
}

// Create a time with a 0 to 100ms jitter for rktGetDriverNetwork retries
func getJitteredNetworkRetryTime() time.Duration {
	return time.Duration(900+rand.Intn(100)) * time.Millisecond
}

// Conditionally elide a buffer to an arbitrary length
func elideToLen(inBuf bytes.Buffer, length int) bytes.Buffer {
	if inBuf.Len() > length {
		inBuf.Truncate(length)
		inBuf.WriteString("...")
	}
	return inBuf
}

// Conditionally elide a buffer to an 80 character string
func elide(inBuf bytes.Buffer) string {
	tempBuf := elideToLen(inBuf, 80)
	return tempBuf.String()
}

func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
	defer close(ch)
	var result *drivers.ExitResult
	ps, err := handle.exec.Wait(ctx)
	if err != nil {
		result = &drivers.ExitResult{
			Err: fmt.Errorf("executor: error waiting on process: %v", err),
		}
	} else {
		result = &drivers.ExitResult{
			ExitCode: ps.ExitCode,
			Signal:   ps.Signal,
		}
	}

	select {
	case <-ctx.Done():
	case <-d.ctx.Done():
	case ch <- result:
	}
}

func (d *Driver) Shutdown() {
	d.signalShutdown()
}