open-nomad/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go

// +build linux

package libcontainer

import (
	"encoding/json"
	"fmt"
	"io"
	"net"
	"os"
	"strings"
	"syscall" // only for Errno
	"unsafe"

	"golang.org/x/sys/unix"

	"github.com/containerd/console"
	"github.com/opencontainers/runc/libcontainer/cgroups"
	"github.com/opencontainers/runc/libcontainer/configs"
	"github.com/opencontainers/runc/libcontainer/system"
	"github.com/opencontainers/runc/libcontainer/user"
	"github.com/opencontainers/runc/libcontainer/utils"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
	"github.com/vishvananda/netlink"
)

type initType string

const (
	initSetns    initType = "setns"
	initStandard initType = "standard"
)

type pid struct {
	Pid           int `json:"pid"`
	PidFirstChild int `json:"pid_first"`
}

// network is an internal struct used to setup container networks.
type network struct {
	configs.Network

	// TempVethPeerName is a unique temporary veth peer name that was placed into
	// the container's namespace.
	TempVethPeerName string `json:"temp_veth_peer_name"`
}

// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
	Args             []string              `json:"args"`
	Env              []string              `json:"env"`
	Cwd              string                `json:"cwd"`
	Capabilities     *configs.Capabilities `json:"capabilities"`
	ProcessLabel     string                `json:"process_label"`
	AppArmorProfile  string                `json:"apparmor_profile"`
	NoNewPrivileges  bool                  `json:"no_new_privileges"`
	User             string                `json:"user"`
	AdditionalGroups []string              `json:"additional_groups"`
	Config           *configs.Config       `json:"config"`
	Networks         []*network            `json:"network"`
	PassedFilesCount int                   `json:"passed_files_count"`
	ContainerId      string                `json:"containerid"`
	Rlimits          []configs.Rlimit      `json:"rlimits"`
	CreateConsole    bool                  `json:"create_console"`
	ConsoleWidth     uint16                `json:"console_width"`
	ConsoleHeight    uint16                `json:"console_height"`
	Rootless         bool                  `json:"rootless"`
}

type initer interface {
	Init() error
}

func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
	var config *initConfig
	if err := json.NewDecoder(pipe).Decode(&config); err != nil {
		return nil, err
	}
	if err := populateProcessEnvironment(config.Env); err != nil {
		return nil, err
	}
	switch t {
	case initSetns:
		return &linuxSetnsInit{
			pipe:          pipe,
			consoleSocket: consoleSocket,
			config:        config,
		}, nil
	case initStandard:
		return &linuxStandardInit{
			pipe:          pipe,
			consoleSocket: consoleSocket,
			parentPid:     unix.Getppid(),
			config:        config,
			fifoFd:        fifoFd,
		}, nil
	}
	return nil, fmt.Errorf("unknown init type %q", t)
}

// populateProcessEnvironment loads the provided environment variables into the
// current processes's environment.
func populateProcessEnvironment(env []string) error {
	for _, pair := range env {
		p := strings.SplitN(pair, "=", 2)
		if len(p) < 2 {
			return fmt.Errorf("invalid environment '%v'", pair)
		}
		if err := os.Setenv(p[0], p[1]); err != nil {
			return err
		}
	}
	return nil
}

// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
func finalizeNamespace(config *initConfig) error {
	// Ensure that all unwanted fds we may have accidentally
	// inherited are marked close-on-exec so they stay out of the
	// container
	if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
		return errors.Wrap(err, "close exec fds")
	}

	capabilities := &configs.Capabilities{}
	if config.Capabilities != nil {
		capabilities = config.Capabilities
	} else if config.Config.Capabilities != nil {
		capabilities = config.Config.Capabilities
	}
	w, err := newContainerCapList(capabilities)
	if err != nil {
		return err
	}
	// drop capabilities in bounding set before changing user
	if err := w.ApplyBoundingSet(); err != nil {
		return errors.Wrap(err, "apply bounding set")
	}
	// preserve existing capabilities while we change users
	if err := system.SetKeepCaps(); err != nil {
		return errors.Wrap(err, "set keep caps")
	}
	if err := setupUser(config); err != nil {
		return errors.Wrap(err, "setup user")
	}
	if err := system.ClearKeepCaps(); err != nil {
		return errors.Wrap(err, "clear keep caps")
	}
	if err := w.ApplyCaps(); err != nil {
		return errors.Wrap(err, "apply caps")
	}
	if config.Cwd != "" {
		if err := unix.Chdir(config.Cwd); err != nil {
			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
		}
	}
	return nil
}

// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
	defer socket.Close()
	// At this point, /dev/ptmx points to something that we would expect. We
	// used to change the owner of the slave path, but since the /dev/pts mount
	// can have gid=X set (at the users' option). So touching the owner of the
	// slave PTY is not necessary, as the kernel will handle that for us. Note
	// however, that setupUser (specifically fixStdioPermissions) *will* change
	// the UID owner of the console to be the user the process will run as (so
	// they can actually control their console).

	pty, slavePath, err := console.NewPty()
	if err != nil {
		return err
	}

	if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
		err = pty.Resize(console.WinSize{
			Height: config.ConsoleHeight,
			Width:  config.ConsoleWidth,
		})

		if err != nil {
			return err
		}
	}

	// After we return from here, we don't need the console anymore.
	defer pty.Close()

	// Mount the console inside our rootfs.
	if mount {
		if err := mountConsole(slavePath); err != nil {
			return err
		}
	}
	// While we can access console.master, using the API is a good idea.
	if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
		return err
	}
	// Now, dup over all the things.
	return dupStdio(slavePath)
}

// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
	// Tell parent.
	if err := writeSync(pipe, procReady); err != nil {
		return err
	}

	// Wait for parent to give the all-clear.
	if err := readSync(pipe, procRun); err != nil {
		return err
	}

	return nil
}

// syncParentHooks sends to the given pipe a JSON payload which indicates that
// the parent should execute pre-start hooks. It then waits for the parent to
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
	// Tell parent.
	if err := writeSync(pipe, procHooks); err != nil {
		return err
	}

	// Wait for parent to give the all-clear.
	if err := readSync(pipe, procResume); err != nil {
		return err
	}

	return nil
}

// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
	// Set up defaults.
	defaultExecUser := user.ExecUser{
		Uid:  0,
		Gid:  0,
		Home: "/",
	}

	passwdPath, err := user.GetPasswdPath()
	if err != nil {
		return err
	}

	groupPath, err := user.GetGroupPath()
	if err != nil {
		return err
	}

	execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
	if err != nil {
		return err
	}

	var addGroups []int
	if len(config.AdditionalGroups) > 0 {
		addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
		if err != nil {
			return err
		}
	}

	// Rather than just erroring out later in setuid(2) and setgid(2), check
	// that the user is mapped here.
	if _, err := config.Config.HostUID(execUser.Uid); err != nil {
		return fmt.Errorf("cannot set uid to unmapped user in user namespace")
	}
	if _, err := config.Config.HostGID(execUser.Gid); err != nil {
		return fmt.Errorf("cannot set gid to unmapped user in user namespace")
	}

	if config.Rootless {
		// We cannot set any additional groups in a rootless container and thus
		// we bail if the user asked us to do so. TODO: We currently can't do
		// this check earlier, but if libcontainer.Process.User was typesafe
		// this might work.
		if len(addGroups) > 0 {
			return fmt.Errorf("cannot set any additional groups in a rootless container")
		}
	}

	// Before we change to the container's user make sure that the processes
	// STDIO is correctly owned by the user that we are switching to.
	if err := fixStdioPermissions(config, execUser); err != nil {
		return err
	}

	// This isn't allowed in an unprivileged user namespace since Linux 3.19.
	// There's nothing we can do about /etc/group entries, so we silently
	// ignore setting groups here (since the user didn't explicitly ask us to
	// set the group).
	if !config.Rootless {
		suppGroups := append(execUser.Sgids, addGroups...)
		if err := unix.Setgroups(suppGroups); err != nil {
			return err
		}
	}

	if err := system.Setgid(execUser.Gid); err != nil {
		return err
	}
	if err := system.Setuid(execUser.Uid); err != nil {
		return err
	}

	// if we didn't get HOME already, set it based on the user's HOME
	if envHome := os.Getenv("HOME"); envHome == "" {
		if err := os.Setenv("HOME", execUser.Home); err != nil {
			return err
		}
	}
	return nil
}

// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
	var null unix.Stat_t
	if err := unix.Stat("/dev/null", &null); err != nil {
		return err
	}
	for _, fd := range []uintptr{
		os.Stdin.Fd(),
		os.Stderr.Fd(),
		os.Stdout.Fd(),
	} {
		var s unix.Stat_t
		if err := unix.Fstat(int(fd), &s); err != nil {
			return err
		}

		// Skip chown of /dev/null if it was used as one of the STDIO fds.
		if s.Rdev == null.Rdev {
			continue
		}

		// We only change the uid owner (as it is possible for the mount to
		// prefer a different gid, and there's no reason for us to change it).
		// The reason why we don't just leave the default uid=X mount setup is
		// that users expect to be able to actually use their console. Without
		// this code, you couldn't effectively run as a non-root user inside a
		// container and also have a console set up.
		if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
			// If we've hit an EINVAL then s.Gid isn't mapped in the user
			// namespace. If we've hit an EPERM then the inode's current owner
			// is not mapped in our user namespace (in particular,
			// privileged_wrt_inode_uidgid() has failed). In either case, we
			// are in a configuration where it's better for us to just not
			// touch the stdio rather than bail at this point.
			if err == unix.EINVAL || err == unix.EPERM {
				continue
			}
			return err
		}
	}
	return nil
}

// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
	for _, config := range config.Networks {
		strategy, err := getStrategy(config.Type)
		if err != nil {
			return err
		}
		if err := strategy.initialize(config); err != nil {
			return err
		}
	}
	return nil
}

func setupRoute(config *configs.Config) error {
	for _, config := range config.Routes {
		_, dst, err := net.ParseCIDR(config.Destination)
		if err != nil {
			return err
		}
		src := net.ParseIP(config.Source)
		if src == nil {
			return fmt.Errorf("Invalid source for route: %s", config.Source)
		}
		gw := net.ParseIP(config.Gateway)
		if gw == nil {
			return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
		}
		l, err := netlink.LinkByName(config.InterfaceName)
		if err != nil {
			return err
		}
		route := &netlink.Route{
			Scope:     netlink.SCOPE_UNIVERSE,
			Dst:       dst,
			Src:       src,
			Gw:        gw,
			LinkIndex: l.Attrs().Index,
		}
		if err := netlink.RouteAdd(route); err != nil {
			return err
		}
	}
	return nil
}

func setupRlimits(limits []configs.Rlimit, pid int) error {
	for _, rlimit := range limits {
		if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
		}
	}
	return nil
}

const _P_PID = 1

type siginfo struct {
	si_signo int32
	si_errno int32
	si_code  int32
	// below here is a union; si_pid is the only field we use
	si_pid int32
	// Pad to 128 bytes as detailed in blockUntilWaitable
	pad [96]byte
}

// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
	si := &siginfo{}
	_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
	if e != 0 {
		return false, os.NewSyscallError("waitid", e)
	}

	return si.si_pid != 0, nil
}

// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
func isNoChildren(err error) bool {
	switch err := err.(type) {
	case syscall.Errno:
		if err == unix.ECHILD {
			return true
		}
	case *os.SyscallError:
		if err.Err == unix.ECHILD {
			return true
		}
	}
	return false
}

// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL then it will wait for each process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
	var procs []*os.Process
	if err := m.Freeze(configs.Frozen); err != nil {
		logrus.Warn(err)
	}
	pids, err := m.GetAllPids()
	if err != nil {
		m.Freeze(configs.Thawed)
		return err
	}
	for _, pid := range pids {
		p, err := os.FindProcess(pid)
		if err != nil {
			logrus.Warn(err)
			continue
		}
		procs = append(procs, p)
		if err := p.Signal(s); err != nil {
			logrus.Warn(err)
		}
	}
	if err := m.Freeze(configs.Thawed); err != nil {
		logrus.Warn(err)
	}

	subreaper, err := system.GetSubreaper()
	if err != nil {
		// The error here means that PR_GET_CHILD_SUBREAPER is not
		// supported because this code might run on a kernel older
		// than 3.4. We don't want to throw an error in that case,
		// and we simplify things, considering there is no subreaper
		// set.
		subreaper = 0
	}

	for _, p := range procs {
		if s != unix.SIGKILL {
			if ok, err := isWaitable(p.Pid); err != nil {
				if !isNoChildren(err) {
					logrus.Warn("signalAllProcesses: ", p.Pid, err)
				}
				continue
			} else if !ok {
				// Not ready to report so don't wait
				continue
			}
		}

		// In case a subreaper has been setup, this code must not
		// wait for the process. Otherwise, we cannot be sure the
		// current process will be reaped by the subreaper, while
		// the subreaper might be waiting for this process in order
		// to retrieve its exit code.
		if subreaper == 0 {
			if _, err := p.Wait(); err != nil {
				if !isNoChildren(err) {
					logrus.Warn("wait: ", err)
				}
			}
		}
	}
	return nil
}
Update runc/libcontainer and friends (#4655) * vendor: bump libcontainer and docker to remove Sirupsen imports * vendor: fix bad vendoring of archive package * vendor: fix api changes to cgroups in executor * vendor: fix docker api changes * vendor: update github.com/Azure/go-ansiterm to use non capitalized logrus import 2018-09-14 03:00:31 +00:00			`// +build linux`

			`package libcontainer`

			`import (`
			`"encoding/json"`
			`"fmt"`
			`"io"`
			`"net"`
			`"os"`
			`"strings"`
			`"syscall" // only for Errno`
			`"unsafe"`

			`"golang.org/x/sys/unix"`

			`"github.com/containerd/console"`
			`"github.com/opencontainers/runc/libcontainer/cgroups"`
			`"github.com/opencontainers/runc/libcontainer/configs"`
			`"github.com/opencontainers/runc/libcontainer/system"`
			`"github.com/opencontainers/runc/libcontainer/user"`
			`"github.com/opencontainers/runc/libcontainer/utils"`
			`"github.com/pkg/errors"`
			`"github.com/sirupsen/logrus"`
			`"github.com/vishvananda/netlink"`
			`)`

			`type initType string`

			`const (`
			`initSetns initType = "setns"`
			`initStandard initType = "standard"`
			`)`

			`type pid struct {`
			Pid int `json:"pid"`
			PidFirstChild int `json:"pid_first"`
			`}`

			`// network is an internal struct used to setup container networks.`
			`type network struct {`
			`configs.Network`

			`// TempVethPeerName is a unique temporary veth peer name that was placed into`
			`// the container's namespace.`
			TempVethPeerName string `json:"temp_veth_peer_name"`
			`}`

			`// initConfig is used for transferring parameters from Exec() to Init()`
			`type initConfig struct {`
			Args []string `json:"args"`
			Env []string `json:"env"`
			Cwd string `json:"cwd"`
			Capabilities *configs.Capabilities `json:"capabilities"`
			ProcessLabel string `json:"process_label"`
			AppArmorProfile string `json:"apparmor_profile"`
			NoNewPrivileges bool `json:"no_new_privileges"`
			User string `json:"user"`
			AdditionalGroups []string `json:"additional_groups"`
			Config *configs.Config `json:"config"`
			Networks []*network `json:"network"`
			PassedFilesCount int `json:"passed_files_count"`
			ContainerId string `json:"containerid"`
			Rlimits []configs.Rlimit `json:"rlimits"`
			CreateConsole bool `json:"create_console"`
			ConsoleWidth uint16 `json:"console_width"`
			ConsoleHeight uint16 `json:"console_height"`
			Rootless bool `json:"rootless"`
			`}`

			`type initer interface {`
			`Init() error`
			`}`

			`func newContainerInit(t initType, pipe os.File, consoleSocket os.File, fifoFd int) (initer, error) {`
			`var config *initConfig`
			`if err := json.NewDecoder(pipe).Decode(&config); err != nil {`
			`return nil, err`
			`}`
			`if err := populateProcessEnvironment(config.Env); err != nil {`
			`return nil, err`
			`}`
			`switch t {`
			`case initSetns:`
			`return &linuxSetnsInit{`
			`pipe: pipe,`
			`consoleSocket: consoleSocket,`
			`config: config,`
			`}, nil`
			`case initStandard:`
			`return &linuxStandardInit{`
			`pipe: pipe,`
			`consoleSocket: consoleSocket,`
			`parentPid: unix.Getppid(),`
			`config: config,`
			`fifoFd: fifoFd,`
			`}, nil`
			`}`
			`return nil, fmt.Errorf("unknown init type %q", t)`
			`}`

			`// populateProcessEnvironment loads the provided environment variables into the`
			`// current processes's environment.`
			`func populateProcessEnvironment(env []string) error {`
			`for _, pair := range env {`
			`p := strings.SplitN(pair, "=", 2)`
			`if len(p) < 2 {`
			`return fmt.Errorf("invalid environment '%v'", pair)`
			`}`
			`if err := os.Setenv(p[0], p[1]); err != nil {`
			`return err`
			`}`
			`}`
			`return nil`
			`}`

			`// finalizeNamespace drops the caps, sets the correct user`
			`// and working dir, and closes any leaked file descriptors`
			`// before executing the command inside the namespace`
			`func finalizeNamespace(config *initConfig) error {`
			`// Ensure that all unwanted fds we may have accidentally`
			`// inherited are marked close-on-exec so they stay out of the`
			`// container`
			`if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {`
			`return errors.Wrap(err, "close exec fds")`
			`}`

			`capabilities := &configs.Capabilities{}`
			`if config.Capabilities != nil {`
			`capabilities = config.Capabilities`
			`} else if config.Config.Capabilities != nil {`
			`capabilities = config.Config.Capabilities`
			`}`
			`w, err := newContainerCapList(capabilities)`
			`if err != nil {`
			`return err`
			`}`
			`// drop capabilities in bounding set before changing user`
			`if err := w.ApplyBoundingSet(); err != nil {`
			`return errors.Wrap(err, "apply bounding set")`
			`}`
			`// preserve existing capabilities while we change users`
			`if err := system.SetKeepCaps(); err != nil {`
			`return errors.Wrap(err, "set keep caps")`
			`}`
			`if err := setupUser(config); err != nil {`
			`return errors.Wrap(err, "setup user")`
			`}`
			`if err := system.ClearKeepCaps(); err != nil {`
			`return errors.Wrap(err, "clear keep caps")`
			`}`
			`if err := w.ApplyCaps(); err != nil {`
			`return errors.Wrap(err, "apply caps")`
			`}`
			`if config.Cwd != "" {`
			`if err := unix.Chdir(config.Cwd); err != nil {`
			`return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)`
			`}`
			`}`
			`return nil`
			`}`

			`// setupConsole sets up the console from inside the container, and sends the`
			`// master pty fd to the config.Pipe (using cmsg). This is done to ensure that`
			`// consoles are scoped to a container properly (see runc#814 and the many`
			`// issues related to that). This has to be run after we've pivoted to the new`
			`// rootfs (and the users' configuration is entirely set up).`
			`func setupConsole(socket os.File, config initConfig, mount bool) error {`
			`defer socket.Close()`
			`// At this point, /dev/ptmx points to something that we would expect. We`
			`// used to change the owner of the slave path, but since the /dev/pts mount`
			`// can have gid=X set (at the users' option). So touching the owner of the`
			`// slave PTY is not necessary, as the kernel will handle that for us. Note`
			`// however, that setupUser (specifically fixStdioPermissions) will change`
			`// the UID owner of the console to be the user the process will run as (so`
			`// they can actually control their console).`

			`pty, slavePath, err := console.NewPty()`
			`if err != nil {`
			`return err`
			`}`

			`if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {`
			`err = pty.Resize(console.WinSize{`
			`Height: config.ConsoleHeight,`
			`Width: config.ConsoleWidth,`
			`})`

			`if err != nil {`
			`return err`
			`}`
			`}`

			`// After we return from here, we don't need the console anymore.`
			`defer pty.Close()`

			`// Mount the console inside our rootfs.`
			`if mount {`
			`if err := mountConsole(slavePath); err != nil {`
			`return err`
			`}`
			`}`
			`// While we can access console.master, using the API is a good idea.`
			`if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {`
			`return err`
			`}`
			`// Now, dup over all the things.`
			`return dupStdio(slavePath)`
			`}`

			`// syncParentReady sends to the given pipe a JSON payload which indicates that`
			`// the init is ready to Exec the child process. It then waits for the parent to`
			`// indicate that it is cleared to Exec.`
			`func syncParentReady(pipe io.ReadWriter) error {`
			`// Tell parent.`
			`if err := writeSync(pipe, procReady); err != nil {`
			`return err`
			`}`

			`// Wait for parent to give the all-clear.`
			`if err := readSync(pipe, procRun); err != nil {`
			`return err`
			`}`

			`return nil`
			`}`

			`// syncParentHooks sends to the given pipe a JSON payload which indicates that`
			`// the parent should execute pre-start hooks. It then waits for the parent to`
			`// indicate that it is cleared to resume.`
			`func syncParentHooks(pipe io.ReadWriter) error {`
			`// Tell parent.`
			`if err := writeSync(pipe, procHooks); err != nil {`
			`return err`
			`}`

			`// Wait for parent to give the all-clear.`
			`if err := readSync(pipe, procResume); err != nil {`
			`return err`
			`}`

			`return nil`
			`}`

			`// setupUser changes the groups, gid, and uid for the user inside the container`
			`func setupUser(config *initConfig) error {`
			`// Set up defaults.`
			`defaultExecUser := user.ExecUser{`
			`Uid: 0,`
			`Gid: 0,`
			`Home: "/",`
			`}`

			`passwdPath, err := user.GetPasswdPath()`
			`if err != nil {`
			`return err`
			`}`

			`groupPath, err := user.GetGroupPath()`
			`if err != nil {`
			`return err`
			`}`

			`execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)`
			`if err != nil {`
			`return err`
			`}`

			`var addGroups []int`
			`if len(config.AdditionalGroups) > 0 {`
			`addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)`
			`if err != nil {`
			`return err`
			`}`
			`}`

			`// Rather than just erroring out later in setuid(2) and setgid(2), check`
			`// that the user is mapped here.`
			`if _, err := config.Config.HostUID(execUser.Uid); err != nil {`
			`return fmt.Errorf("cannot set uid to unmapped user in user namespace")`
			`}`
			`if _, err := config.Config.HostGID(execUser.Gid); err != nil {`
			`return fmt.Errorf("cannot set gid to unmapped user in user namespace")`
			`}`

			`if config.Rootless {`
			`// We cannot set any additional groups in a rootless container and thus`
			`// we bail if the user asked us to do so. TODO: We currently can't do`
			`// this check earlier, but if libcontainer.Process.User was typesafe`
			`// this might work.`
			`if len(addGroups) > 0 {`
			`return fmt.Errorf("cannot set any additional groups in a rootless container")`
			`}`
			`}`

			`// Before we change to the container's user make sure that the processes`
			`// STDIO is correctly owned by the user that we are switching to.`
			`if err := fixStdioPermissions(config, execUser); err != nil {`
			`return err`
			`}`

			`// This isn't allowed in an unprivileged user namespace since Linux 3.19.`
			`// There's nothing we can do about /etc/group entries, so we silently`
			`// ignore setting groups here (since the user didn't explicitly ask us to`
			`// set the group).`
			`if !config.Rootless {`
			`suppGroups := append(execUser.Sgids, addGroups...)`
			`if err := unix.Setgroups(suppGroups); err != nil {`
			`return err`
			`}`
			`}`

			`if err := system.Setgid(execUser.Gid); err != nil {`
			`return err`
			`}`
			`if err := system.Setuid(execUser.Uid); err != nil {`
			`return err`
			`}`

			`// if we didn't get HOME already, set it based on the user's HOME`
			`if envHome := os.Getenv("HOME"); envHome == "" {`
			`if err := os.Setenv("HOME", execUser.Home); err != nil {`
			`return err`
			`}`
			`}`
			`return nil`
			`}`

			`// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.`
			`// The ownership needs to match because it is created outside of the container and needs to be`
			`// localized.`
			`func fixStdioPermissions(config initConfig, u user.ExecUser) error {`
			`var null unix.Stat_t`
			`if err := unix.Stat("/dev/null", &null); err != nil {`
			`return err`
			`}`
			`for _, fd := range []uintptr{`
			`os.Stdin.Fd(),`
			`os.Stderr.Fd(),`
			`os.Stdout.Fd(),`
			`} {`
			`var s unix.Stat_t`
			`if err := unix.Fstat(int(fd), &s); err != nil {`
			`return err`
			`}`

			`// Skip chown of /dev/null if it was used as one of the STDIO fds.`
			`if s.Rdev == null.Rdev {`
			`continue`
			`}`

			`// We only change the uid owner (as it is possible for the mount to`
			`// prefer a different gid, and there's no reason for us to change it).`
			`// The reason why we don't just leave the default uid=X mount setup is`
			`// that users expect to be able to actually use their console. Without`
			`// this code, you couldn't effectively run as a non-root user inside a`
			`// container and also have a console set up.`
			`if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {`
			`// If we've hit an EINVAL then s.Gid isn't mapped in the user`
			`// namespace. If we've hit an EPERM then the inode's current owner`
			`// is not mapped in our user namespace (in particular,`
			`// privileged_wrt_inode_uidgid() has failed). In either case, we`
			`// are in a configuration where it's better for us to just not`
			`// touch the stdio rather than bail at this point.`
			`if err == unix.EINVAL \|\| err == unix.EPERM {`
			`continue`
			`}`
			`return err`
			`}`
			`}`
			`return nil`
			`}`

			`// setupNetwork sets up and initializes any network interface inside the container.`
			`func setupNetwork(config *initConfig) error {`
			`for _, config := range config.Networks {`
			`strategy, err := getStrategy(config.Type)`
			`if err != nil {`
			`return err`
			`}`
			`if err := strategy.initialize(config); err != nil {`
			`return err`
			`}`
			`}`
			`return nil`
			`}`

			`func setupRoute(config *configs.Config) error {`
			`for _, config := range config.Routes {`
			`_, dst, err := net.ParseCIDR(config.Destination)`
			`if err != nil {`
			`return err`
			`}`
			`src := net.ParseIP(config.Source)`
			`if src == nil {`
			`return fmt.Errorf("Invalid source for route: %s", config.Source)`
			`}`
			`gw := net.ParseIP(config.Gateway)`
			`if gw == nil {`
			`return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)`
			`}`
			`l, err := netlink.LinkByName(config.InterfaceName)`
			`if err != nil {`
			`return err`
			`}`
			`route := &netlink.Route{`
			`Scope: netlink.SCOPE_UNIVERSE,`
			`Dst: dst,`
			`Src: src,`
			`Gw: gw,`
			`LinkIndex: l.Attrs().Index,`
			`}`
			`if err := netlink.RouteAdd(route); err != nil {`
			`return err`
			`}`
			`}`
			`return nil`
			`}`

			`func setupRlimits(limits []configs.Rlimit, pid int) error {`
			`for _, rlimit := range limits {`
			`if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {`
			`return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)`
			`}`
			`}`
			`return nil`
			`}`

			`const _P_PID = 1`

			`type siginfo struct {`
			`si_signo int32`
			`si_errno int32`
			`si_code int32`
			`// below here is a union; si_pid is the only field we use`
			`si_pid int32`
			`// Pad to 128 bytes as detailed in blockUntilWaitable`
			`pad [96]byte`
			`}`

			`// isWaitable returns true if the process has exited false otherwise.`
			`// Its based off blockUntilWaitable in src/os/wait_waitid.go`
			`func isWaitable(pid int) (bool, error) {`
			`si := &siginfo{}`
			`_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED\|unix.WNOWAIT\|unix.WNOHANG, 0, 0)`
			`if e != 0 {`
			`return false, os.NewSyscallError("waitid", e)`
			`}`

			`return si.si_pid != 0, nil`
			`}`

			`// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise`
			`func isNoChildren(err error) bool {`
			`switch err := err.(type) {`
			`case syscall.Errno:`
			`if err == unix.ECHILD {`
			`return true`
			`}`
			`case *os.SyscallError:`
			`if err.Err == unix.ECHILD {`
			`return true`
			`}`
			`}`
			`return false`
			`}`

			`// signalAllProcesses freezes then iterates over all the processes inside the`
			`// manager's cgroups sending the signal s to them.`
			`// If s is SIGKILL then it will wait for each process to exit.`
			`// For all other signals it will check if the process is ready to report its`
			`// exit status and only if it is will a wait be performed.`
			`func signalAllProcesses(m cgroups.Manager, s os.Signal) error {`
			`var procs []*os.Process`
			`if err := m.Freeze(configs.Frozen); err != nil {`
			`logrus.Warn(err)`
			`}`
			`pids, err := m.GetAllPids()`
			`if err != nil {`
			`m.Freeze(configs.Thawed)`
			`return err`
			`}`
			`for _, pid := range pids {`
			`p, err := os.FindProcess(pid)`
			`if err != nil {`
			`logrus.Warn(err)`
			`continue`
			`}`
			`procs = append(procs, p)`
			`if err := p.Signal(s); err != nil {`
			`logrus.Warn(err)`
			`}`
			`}`
			`if err := m.Freeze(configs.Thawed); err != nil {`
			`logrus.Warn(err)`
			`}`

			`subreaper, err := system.GetSubreaper()`
			`if err != nil {`
			`// The error here means that PR_GET_CHILD_SUBREAPER is not`
			`// supported because this code might run on a kernel older`
			`// than 3.4. We don't want to throw an error in that case,`
			`// and we simplify things, considering there is no subreaper`
			`// set.`
			`subreaper = 0`
			`}`

			`for _, p := range procs {`
			`if s != unix.SIGKILL {`
			`if ok, err := isWaitable(p.Pid); err != nil {`
			`if !isNoChildren(err) {`
			`logrus.Warn("signalAllProcesses: ", p.Pid, err)`
			`}`
			`continue`
			`} else if !ok {`
			`// Not ready to report so don't wait`
			`continue`
			`}`
			`}`

			`// In case a subreaper has been setup, this code must not`
			`// wait for the process. Otherwise, we cannot be sure the`
			`// current process will be reaped by the subreaper, while`
			`// the subreaper might be waiting for this process in order`
			`// to retrieve its exit code.`
			`if subreaper == 0 {`
			`if _, err := p.Wait(); err != nil {`
			`if !isNoChildren(err) {`
			`logrus.Warn("wait: ", err)`
			`}`
			`}`
			`}`
			`}`
			`return nil`
			`}`