2018-11-06 05:39:48 +00:00
|
|
|
package docker
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os"
|
2021-04-22 18:45:16 +00:00
|
|
|
"runtime"
|
2018-11-06 05:39:48 +00:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"syscall"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/armon/circbuf"
|
|
|
|
docker "github.com/fsouza/go-dockerclient"
|
2021-04-22 18:45:16 +00:00
|
|
|
"github.com/hashicorp/consul-template/signals"
|
2018-11-06 05:39:48 +00:00
|
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
|
|
plugin "github.com/hashicorp/go-plugin"
|
|
|
|
"github.com/hashicorp/nomad/drivers/docker/docklog"
|
|
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
2019-01-15 01:02:44 +00:00
|
|
|
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
|
2018-11-06 05:39:48 +00:00
|
|
|
"golang.org/x/net/context"
|
|
|
|
)
|
|
|
|
|
|
|
|
type taskHandle struct {
|
|
|
|
client *docker.Client
|
|
|
|
waitClient *docker.Client
|
|
|
|
logger hclog.Logger
|
|
|
|
dlogger docklog.DockerLogger
|
|
|
|
dloggerPluginClient *plugin.Client
|
|
|
|
task *drivers.TaskConfig
|
2018-11-20 02:41:25 +00:00
|
|
|
containerID string
|
|
|
|
containerImage string
|
2018-11-06 05:39:48 +00:00
|
|
|
doneCh chan bool
|
|
|
|
waitCh chan struct{}
|
|
|
|
removeContainerOnExit bool
|
2019-01-04 23:01:35 +00:00
|
|
|
net *drivers.DriverNetwork
|
2018-11-06 05:39:48 +00:00
|
|
|
|
2018-11-21 01:41:32 +00:00
|
|
|
exitResult *drivers.ExitResult
|
|
|
|
exitResultLock sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *taskHandle) ExitResult() *drivers.ExitResult {
|
|
|
|
h.exitResultLock.Lock()
|
|
|
|
defer h.exitResultLock.Unlock()
|
|
|
|
return h.exitResult.Copy()
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
|
|
|
|
2018-11-12 18:51:53 +00:00
|
|
|
type taskHandleState struct {
|
2018-11-16 18:52:54 +00:00
|
|
|
// ReattachConfig for the docker logger plugin
|
2019-01-15 01:02:44 +00:00
|
|
|
ReattachConfig *pstructs.ReattachConfig
|
2018-11-16 18:52:54 +00:00
|
|
|
|
|
|
|
ContainerID string
|
2019-01-04 23:01:35 +00:00
|
|
|
DriverNetwork *drivers.DriverNetwork
|
2018-11-12 18:51:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *taskHandle) buildState() *taskHandleState {
|
2019-12-07 03:11:41 +00:00
|
|
|
s := &taskHandleState{
|
|
|
|
ContainerID: h.containerID,
|
|
|
|
DriverNetwork: h.net,
|
2018-11-12 18:51:53 +00:00
|
|
|
}
|
2019-12-07 03:11:41 +00:00
|
|
|
if h.dloggerPluginClient != nil {
|
|
|
|
s.ReattachConfig = pstructs.ReattachConfigFromGoPlugin(h.dloggerPluginClient.ReattachConfig())
|
|
|
|
}
|
|
|
|
return s
|
2018-11-12 18:51:53 +00:00
|
|
|
}
|
|
|
|
|
2018-11-06 05:39:48 +00:00
|
|
|
func (h *taskHandle) Exec(ctx context.Context, cmd string, args []string) (*drivers.ExecTaskResult, error) {
|
|
|
|
fullCmd := make([]string, len(args)+1)
|
|
|
|
fullCmd[0] = cmd
|
|
|
|
copy(fullCmd[1:], args)
|
|
|
|
createExecOpts := docker.CreateExecOptions{
|
|
|
|
AttachStdin: false,
|
|
|
|
AttachStdout: true,
|
|
|
|
AttachStderr: true,
|
|
|
|
Tty: false,
|
|
|
|
Cmd: fullCmd,
|
2018-11-20 02:41:25 +00:00
|
|
|
Container: h.containerID,
|
2018-11-06 05:39:48 +00:00
|
|
|
Context: ctx,
|
|
|
|
}
|
|
|
|
exec, err := h.client.CreateExec(createExecOpts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
execResult := &drivers.ExecTaskResult{ExitResult: &drivers.ExitResult{}}
|
2019-01-07 15:01:46 +00:00
|
|
|
stdout, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize))
|
|
|
|
stderr, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize))
|
2018-11-06 05:39:48 +00:00
|
|
|
startOpts := docker.StartExecOptions{
|
|
|
|
Detach: false,
|
|
|
|
Tty: false,
|
|
|
|
OutputStream: stdout,
|
|
|
|
ErrorStream: stderr,
|
|
|
|
Context: ctx,
|
|
|
|
}
|
|
|
|
if err := client.StartExec(exec.ID, startOpts); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
execResult.Stdout = stdout.Bytes()
|
|
|
|
execResult.Stderr = stderr.Bytes()
|
|
|
|
res, err := client.InspectExec(exec.ID)
|
|
|
|
if err != nil {
|
|
|
|
return execResult, err
|
|
|
|
}
|
|
|
|
|
|
|
|
execResult.ExitResult.ExitCode = res.ExitCode
|
|
|
|
return execResult, nil
|
|
|
|
}
|
|
|
|
|
2020-12-02 21:22:38 +00:00
|
|
|
func (h *taskHandle) Signal(ctx context.Context, s os.Signal) error {
|
2018-11-06 05:39:48 +00:00
|
|
|
// Convert types
|
|
|
|
sysSig, ok := s.(syscall.Signal)
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("Failed to determine signal number")
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO When we expose signals we will need a mapping layer that converts
|
|
|
|
// MacOS signals to the correct signal number for docker. Or we change the
|
|
|
|
// interface to take a signal string and leave it up to driver to map?
|
|
|
|
|
|
|
|
dockerSignal := docker.Signal(sysSig)
|
|
|
|
opts := docker.KillContainerOptions{
|
2020-12-02 21:22:38 +00:00
|
|
|
ID: h.containerID,
|
|
|
|
Signal: dockerSignal,
|
|
|
|
Context: ctx,
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
|
|
|
return h.client.KillContainer(opts)
|
2021-04-22 18:45:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// parseSignal interprets the signal name into an os.Signal. If no name is
|
|
|
|
// provided, the docker driver defaults to SIGTERM. If the OS is Windows and
|
|
|
|
// SIGINT is provided, the signal is converted to SIGTERM.
|
|
|
|
func parseSignal(os, signal string) (os.Signal, error) {
|
|
|
|
// Unlike other drivers, docker defaults to SIGTERM, aiming for consistency
|
|
|
|
// with the 'docker stop' command.
|
|
|
|
// https://docs.docker.com/engine/reference/commandline/stop/#extended-description
|
|
|
|
if signal == "" {
|
|
|
|
signal = "SIGTERM"
|
|
|
|
}
|
|
|
|
|
|
|
|
// Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that
|
|
|
|
// allows for graceful shutdown before being followed up by a SIGKILL.
|
|
|
|
// Supported signals:
|
|
|
|
// https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26
|
|
|
|
if os == "windows" && signal == "SIGINT" {
|
|
|
|
signal = "SIGTERM"
|
|
|
|
}
|
2018-11-06 05:39:48 +00:00
|
|
|
|
2021-04-22 18:45:16 +00:00
|
|
|
return signals.Parse(signal)
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Kill is used to terminate the task.
|
2021-04-22 18:45:16 +00:00
|
|
|
func (h *taskHandle) Kill(killTimeout time.Duration, signal string) error {
|
|
|
|
var err error
|
|
|
|
// Calling StopContainer lets docker handle the stop signal (specified
|
|
|
|
// in the Dockerfile or defaulting to SIGTERM). If kill_signal is specified,
|
|
|
|
// Signal is used to kill the container with the desired signal before
|
|
|
|
// calling StopContainer
|
|
|
|
if signal == "" {
|
|
|
|
err = h.client.StopContainer(h.containerID, uint(killTimeout.Seconds()))
|
|
|
|
} else {
|
2020-12-02 21:22:38 +00:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), killTimeout)
|
|
|
|
defer cancel()
|
|
|
|
|
2021-04-22 18:45:16 +00:00
|
|
|
sig, parseErr := parseSignal(runtime.GOOS, signal)
|
|
|
|
if parseErr != nil {
|
|
|
|
return fmt.Errorf("failed to parse signal: %v", parseErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := h.Signal(ctx, sig); err != nil {
|
2019-01-26 00:20:55 +00:00
|
|
|
// Container has already been removed.
|
|
|
|
if strings.Contains(err.Error(), NoSuchContainerError) {
|
|
|
|
h.logger.Debug("attempted to signal nonexistent container")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// Container has already been stopped.
|
|
|
|
if strings.Contains(err.Error(), ContainerNotRunningError) {
|
|
|
|
h.logger.Debug("attempted to signal a not-running container")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
h.logger.Error("failed to signal container while killing", "error", err)
|
2019-01-28 17:53:19 +00:00
|
|
|
return fmt.Errorf("Failed to signal container %q while killing: %v", h.containerID, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-h.waitCh:
|
|
|
|
return nil
|
2020-12-02 21:22:38 +00:00
|
|
|
case <-ctx.Done():
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
2021-04-22 18:45:16 +00:00
|
|
|
|
|
|
|
// Stop the container
|
|
|
|
err = h.client.StopContainer(h.containerID, 0)
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
// Container has already been removed.
|
|
|
|
if strings.Contains(err.Error(), NoSuchContainerError) {
|
|
|
|
h.logger.Debug("attempted to stop nonexistent container")
|
|
|
|
return nil
|
|
|
|
}
|
2018-12-15 19:30:29 +00:00
|
|
|
// Container has already been stopped.
|
|
|
|
if strings.Contains(err.Error(), ContainerNotRunningError) {
|
|
|
|
h.logger.Debug("attempted to stop an not-running container")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-11-06 05:39:48 +00:00
|
|
|
h.logger.Error("failed to stop container", "error", err)
|
2018-11-20 02:41:25 +00:00
|
|
|
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
2019-01-28 17:53:19 +00:00
|
|
|
|
2018-11-06 05:39:48 +00:00
|
|
|
h.logger.Info("stopped container")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-01-14 11:31:30 +00:00
|
|
|
func (h *taskHandle) shutdownLogger() {
|
2019-12-07 03:11:41 +00:00
|
|
|
if h.dlogger == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-01-14 11:31:30 +00:00
|
|
|
if err := h.dlogger.Stop(); err != nil {
|
|
|
|
h.logger.Error("failed to stop docker logger process during StopTask",
|
|
|
|
"error", err, "logger_pid", h.dloggerPluginClient.ReattachConfig().Pid)
|
|
|
|
}
|
|
|
|
h.dloggerPluginClient.Kill()
|
|
|
|
}
|
|
|
|
|
2018-11-06 05:39:48 +00:00
|
|
|
func (h *taskHandle) run() {
|
2019-01-14 11:31:30 +00:00
|
|
|
defer h.shutdownLogger()
|
|
|
|
|
2018-11-20 02:41:25 +00:00
|
|
|
exitCode, werr := h.waitClient.WaitContainer(h.containerID)
|
2018-11-06 05:39:48 +00:00
|
|
|
if werr != nil {
|
|
|
|
h.logger.Error("failed to wait for container; already terminated")
|
|
|
|
}
|
|
|
|
|
|
|
|
if exitCode != 0 {
|
|
|
|
werr = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode)
|
|
|
|
}
|
|
|
|
|
2020-12-10 15:29:18 +00:00
|
|
|
container, ierr := h.waitClient.InspectContainerWithOptions(docker.InspectContainerOptions{
|
|
|
|
ID: h.containerID,
|
|
|
|
})
|
2018-11-06 05:39:48 +00:00
|
|
|
oom := false
|
|
|
|
if ierr != nil {
|
|
|
|
h.logger.Error("failed to inspect container", "error", ierr)
|
|
|
|
} else if container.State.OOMKilled {
|
|
|
|
oom = true
|
|
|
|
werr = fmt.Errorf("OOM Killed")
|
client: enable support for cgroups v2
This PR introduces support for using Nomad on systems with cgroups v2 [1]
enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux
distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems
for Nomad users.
Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer,
but not so for managing cpuset cgroups. Before, Nomad has been making use of
a feature in v1 where a PID could be a member of more than one cgroup. In v2
this is no longer possible, and so the logic around computing cpuset values
must be modified. When Nomad detects v2, it manages cpuset values in-process,
rather than making use of cgroup heirarchy inheritence via shared/reserved
parents.
Nomad will only activate the v2 logic when it detects cgroups2 is mounted at
/sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2
mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to
use the v1 logic, and should operate as before. Systems that do not support
cgroups v2 are also not affected.
When v2 is activated, Nomad will create a parent called nomad.slice (unless
otherwise configured in Client conifg), and create cgroups for tasks using
naming convention <allocID>-<task>.scope. These follow the naming convention
set by systemd and also used by Docker when cgroups v2 is detected.
Client nodes now export a new fingerprint attribute, unique.cgroups.version
which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by
Nomad.
The new cpuset management strategy fixes #11705, where docker tasks that
spawned processes on startup would "leak". In cgroups v2, the PIDs are
started in the cgroup they will always live in, and thus the cause of
the leak is eliminated.
[1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
Closes #11289
Fixes #11705 #11773 #11933
2022-02-28 22:24:01 +00:00
|
|
|
} else if container.State.ExitCode == 137 {
|
|
|
|
// With cgroups.v2 it seems the cgroup OOM killer is not observed by docker
|
|
|
|
// container status. So just fudge the connection for now.
|
|
|
|
// [Mon Mar 21 19:48:21 2022] Memory cgroup out of memory: Killed process 92768 (sh) [...]
|
|
|
|
oom = true
|
|
|
|
werr = fmt.Errorf("OOM Killed (137)")
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-11 20:27:50 +00:00
|
|
|
// Shutdown stats collection
|
2018-11-06 05:39:48 +00:00
|
|
|
close(h.doneCh)
|
|
|
|
|
|
|
|
// Stop the container just incase the docker daemon's wait returned
|
|
|
|
// incorrectly
|
2018-11-20 02:41:25 +00:00
|
|
|
if err := h.client.StopContainer(h.containerID, 0); err != nil {
|
2018-11-06 05:39:48 +00:00
|
|
|
_, noSuchContainer := err.(*docker.NoSuchContainer)
|
|
|
|
_, containerNotRunning := err.(*docker.ContainerNotRunning)
|
|
|
|
if !containerNotRunning && !noSuchContainer {
|
|
|
|
h.logger.Error("error stopping container", "error", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the result
|
2018-11-21 01:41:32 +00:00
|
|
|
h.exitResultLock.Lock()
|
2018-11-06 05:39:48 +00:00
|
|
|
h.exitResult = &drivers.ExitResult{
|
|
|
|
ExitCode: exitCode,
|
|
|
|
Signal: 0,
|
|
|
|
OOMKilled: oom,
|
2018-11-14 11:20:35 +00:00
|
|
|
Err: werr,
|
2018-11-06 05:39:48 +00:00
|
|
|
}
|
2018-11-21 01:41:32 +00:00
|
|
|
h.exitResultLock.Unlock()
|
2018-11-06 05:39:48 +00:00
|
|
|
close(h.waitCh)
|
|
|
|
}
|