open-nomad/drivers/docker/config.go
Seth Hoenig 2e5c6de820 client: enable support for cgroups v2
This PR introduces support for using Nomad on systems with cgroups v2 [1]
enabled as the cgroups controller mounted on /sys/fs/cgroups. Newer Linux
distros like Ubuntu 21.10 are shipping with cgroups v2 only, causing problems
for Nomad users.

Nomad mostly "just works" with cgroups v2 due to the indirection via libcontainer,
but not so for managing cpuset cgroups. Before, Nomad has been making use of
a feature in v1 where a PID could be a member of more than one cgroup. In v2
this is no longer possible, and so the logic around computing cpuset values
must be modified. When Nomad detects v2, it manages cpuset values in-process,
rather than making use of cgroup heirarchy inheritence via shared/reserved
parents.

Nomad will only activate the v2 logic when it detects cgroups2 is mounted at
/sys/fs/cgroups. This means on systems running in hybrid mode with cgroups2
mounted at /sys/fs/cgroups/unified (as is typical) Nomad will continue to
use the v1 logic, and should operate as before. Systems that do not support
cgroups v2 are also not affected.

When v2 is activated, Nomad will create a parent called nomad.slice (unless
otherwise configured in Client conifg), and create cgroups for tasks using
naming convention <allocID>-<task>.scope. These follow the naming convention
set by systemd and also used by Docker when cgroups v2 is detected.

Client nodes now export a new fingerprint attribute, unique.cgroups.version
which will be set to 'v1' or 'v2' to indicate the cgroups regime in use by
Nomad.

The new cpuset management strategy fixes #11705, where docker tasks that
spawned processes on startup would "leak". In cgroups v2, the PIDs are
started in the cgroup they will always live in, and thus the cause of
the leak is eliminated.

[1] https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html

Closes #11289
Fixes #11705 #11773 #11933
2022-03-23 11:35:27 -05:00

781 lines
28 KiB
Go

package docker
import (
"context"
"fmt"
"runtime"
"strconv"
"strings"
"time"
docker "github.com/fsouza/go-dockerclient"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/drivers/shared/capabilities"
"github.com/hashicorp/nomad/helper/pluginutils/hclutils"
"github.com/hashicorp/nomad/helper/pluginutils/loader"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
)
const (
// NoSuchContainerError is returned by the docker daemon if the container
// does not exist.
NoSuchContainerError = "No such container"
// ContainerNotRunningError is returned by the docker daemon if the container
// is not running, yet we requested it to stop
ContainerNotRunningError = "Container not running"
// pluginName is the name of the plugin
pluginName = "docker"
// fingerprintPeriod is the interval at which the driver will send fingerprint responses
fingerprintPeriod = 30 * time.Second
// dockerTimeout is the length of time a request can be outstanding before
// it is timed out.
dockerTimeout = 5 * time.Minute
// dockerAuthHelperPrefix is the prefix to attach to the credential helper
// and should be found in the $PATH. Example: ${prefix-}${helper-name}
dockerAuthHelperPrefix = "docker-credential-"
)
func PluginLoader(opts map[string]string) (map[string]interface{}, error) {
conf := map[string]interface{}{}
if v, ok := opts["docker.endpoint"]; ok {
conf["endpoint"] = v
}
// dockerd auth
authConf := map[string]interface{}{}
if v, ok := opts["docker.auth.config"]; ok {
authConf["config"] = v
}
if v, ok := opts["docker.auth.helper"]; ok {
authConf["helper"] = v
}
conf["auth"] = authConf
// dockerd tls
if _, ok := opts["docker.tls.cert"]; ok {
conf["tls"] = map[string]interface{}{
"cert": opts["docker.tls.cert"],
"key": opts["docker.tls.key"],
"ca": opts["docker.tls.ca"],
}
}
// garbage collection
gcConf := map[string]interface{}{}
if v, err := strconv.ParseBool(opts["docker.cleanup.image"]); err == nil {
gcConf["image"] = v
}
if v, ok := opts["docker.cleanup.image.delay"]; ok {
gcConf["image_delay"] = v
}
if v, err := strconv.ParseBool(opts["docker.cleanup.container"]); err == nil {
gcConf["container"] = v
}
conf["gc"] = gcConf
// volume options
volConf := map[string]interface{}{}
if v, err := strconv.ParseBool(opts["docker.volumes.enabled"]); err == nil {
volConf["enabled"] = v
}
if v, ok := opts["docker.volumes.selinuxlabel"]; ok {
volConf["selinuxlabel"] = v
}
conf["volumes"] = volConf
// capabilities
// COMPAT(1.0) uses inclusive language. whitelist is used for backward compatibility.
if v, ok := opts["docker.caps.allowlist"]; ok {
conf["allow_caps"] = strings.Split(v, ",")
} else if v, ok := opts["docker.caps.whitelist"]; ok {
conf["allow_caps"] = strings.Split(v, ",")
}
// privileged containers
if v, err := strconv.ParseBool(opts["docker.privileged.enabled"]); err == nil {
conf["allow_privileged"] = v
}
// nvidia_runtime
if v, ok := opts["docker.nvidia_runtime"]; ok {
conf["nvidia_runtime"] = v
}
return conf, nil
}
var (
// PluginID is the rawexec plugin metadata registered in the plugin
// catalog.
PluginID = loader.PluginID{
Name: pluginName,
PluginType: base.PluginTypeDriver,
}
// PluginConfig is the rawexec factory function registered in the
// plugin catalog.
PluginConfig = &loader.InternalPluginConfig{
Config: map[string]interface{}{},
Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewDockerDriver(ctx, l) },
}
// pluginInfo is the response returned for the PluginInfo RPC
pluginInfo = &base.PluginInfoResponse{
Type: base.PluginTypeDriver,
PluginApiVersions: []string{drivers.ApiVersion010},
PluginVersion: "0.1.0",
Name: pluginName,
}
danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewDefault(
hclspec.NewAttr("enabled", "bool", false),
hclspec.NewLiteral(`true`),
),
"period": hclspec.NewDefault(
hclspec.NewAttr("period", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"creation_grace": hclspec.NewDefault(
hclspec.NewAttr("creation_grace", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"dry_run": hclspec.NewDefault(
hclspec.NewAttr("dry_run", "bool", false),
hclspec.NewLiteral(`false`),
),
})
// configSpec is the hcl specification returned by the ConfigSchema RPC
// and is used to parse the contents of the 'plugin "docker" {...}' block.
// Example:
// plugin "docker" {
// config {
// endpoint = "unix:///var/run/docker.sock"
// auth {
// config = "/etc/docker-auth.json"
// helper = "docker-credential-aws"
// }
// tls {
// cert = "/etc/nomad/nomad.pub"
// key = "/etc/nomad/nomad.pem"
// ca = "/etc/nomad/nomad.cert"
// }
// gc {
// image = true
// image_delay = "5m"
// container = false
// }
// volumes {
// enabled = true
// selinuxlabel = "z"
// }
// allow_privileged = false
// allow_caps = ["CHOWN", "NET_RAW" ... ]
// nvidia_runtime = "nvidia"
// }
// }
configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
"endpoint": hclspec.NewAttr("endpoint", "string", false),
// docker daemon auth option for image registry
"auth": hclspec.NewBlock("auth", false, hclspec.NewObject(map[string]*hclspec.Spec{
"config": hclspec.NewAttr("config", "string", false),
"helper": hclspec.NewAttr("helper", "string", false),
})),
// client tls options
"tls": hclspec.NewBlock("tls", false, hclspec.NewObject(map[string]*hclspec.Spec{
"cert": hclspec.NewAttr("cert", "string", false),
"key": hclspec.NewAttr("key", "string", false),
"ca": hclspec.NewAttr("ca", "string", false),
})),
// extra docker labels, globs supported
"extra_labels": hclspec.NewAttr("extra_labels", "list(string)", false),
// logging options
"logging": hclspec.NewDefault(hclspec.NewBlock("logging", false, hclspec.NewObject(map[string]*hclspec.Spec{
"type": hclspec.NewAttr("type", "string", false),
"config": hclspec.NewBlockAttrs("config", "string", false),
})), hclspec.NewLiteral(`{
type = "json-file"
config = {
max-file = "2"
max-size = "2m"
}
}`)),
// garbage collection options
// default needed for both if the gc {...} block is not set and
// if the default fields are missing
"gc": hclspec.NewDefault(hclspec.NewBlock("gc", false, hclspec.NewObject(map[string]*hclspec.Spec{
"image": hclspec.NewDefault(
hclspec.NewAttr("image", "bool", false),
hclspec.NewLiteral("true"),
),
"image_delay": hclspec.NewDefault(
hclspec.NewAttr("image_delay", "string", false),
hclspec.NewLiteral("\"3m\""),
),
"container": hclspec.NewDefault(
hclspec.NewAttr("container", "bool", false),
hclspec.NewLiteral("true"),
),
"dangling_containers": hclspec.NewDefault(
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
hclspec.NewLiteral(`{
enabled = true
period = "5m"
creation_grace = "5m"
}`),
),
})), hclspec.NewLiteral(`{
image = true
image_delay = "3m"
container = true
dangling_containers = {
enabled = true
period = "5m"
creation_grace = "5m"
}
}`)),
// docker volume options
// defaulted needed for both if the volumes {...} block is not set and
// if the default fields are missing
"volumes": hclspec.NewDefault(hclspec.NewBlock("volumes", false, hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewAttr("enabled", "bool", false),
"selinuxlabel": hclspec.NewAttr("selinuxlabel", "string", false),
})), hclspec.NewLiteral("{ enabled = false }")),
"allow_privileged": hclspec.NewAttr("allow_privileged", "bool", false),
"allow_caps": hclspec.NewDefault(
hclspec.NewAttr("allow_caps", "list(string)", false),
hclspec.NewLiteral(capabilities.HCLSpecLiteral),
),
"nvidia_runtime": hclspec.NewDefault(
hclspec.NewAttr("nvidia_runtime", "string", false),
hclspec.NewLiteral(`"nvidia"`),
),
// list of docker runtimes allowed to be used
"allow_runtimes": hclspec.NewDefault(
hclspec.NewAttr("allow_runtimes", "list(string)", false),
hclspec.NewLiteral(`["runc", "nvidia"]`),
),
// image to use when creating a network namespace parent container
"infra_image": hclspec.NewDefault(
hclspec.NewAttr("infra_image", "string", false),
hclspec.NewLiteral(fmt.Sprintf(
`"gcr.io/google_containers/pause-%s:3.1"`,
runtime.GOARCH,
)),
),
// timeout to use when pulling the infra image.
"infra_image_pull_timeout": hclspec.NewDefault(
hclspec.NewAttr("infra_image_pull_timeout", "string", false),
hclspec.NewLiteral(`"5m"`),
),
// the duration that the driver will wait for activity from the Docker engine during an image pull
// before canceling the request
"pull_activity_timeout": hclspec.NewDefault(
hclspec.NewAttr("pull_activity_timeout", "string", false),
hclspec.NewLiteral(`"2m"`),
),
"pids_limit": hclspec.NewAttr("pids_limit", "number", false),
// disable_log_collection indicates whether docker driver should collect logs of docker
// task containers. If true, nomad doesn't start docker_logger/logmon processes
"disable_log_collection": hclspec.NewAttr("disable_log_collection", "bool", false),
})
// mountBodySpec is the hcl specification for the `mount` block
mountBodySpec = hclspec.NewObject(map[string]*hclspec.Spec{
"type": hclspec.NewDefault(
hclspec.NewAttr("type", "string", false),
hclspec.NewLiteral("\"volume\""),
),
"target": hclspec.NewAttr("target", "string", false),
"source": hclspec.NewAttr("source", "string", false),
"readonly": hclspec.NewAttr("readonly", "bool", false),
"bind_options": hclspec.NewBlock("bind_options", false, hclspec.NewObject(map[string]*hclspec.Spec{
"propagation": hclspec.NewAttr("propagation", "string", false),
})),
"tmpfs_options": hclspec.NewBlock("tmpfs_options", false, hclspec.NewObject(map[string]*hclspec.Spec{
"size": hclspec.NewAttr("size", "number", false),
"mode": hclspec.NewAttr("mode", "number", false),
})),
"volume_options": hclspec.NewBlock("volume_options", false, hclspec.NewObject(map[string]*hclspec.Spec{
"no_copy": hclspec.NewAttr("no_copy", "bool", false),
"labels": hclspec.NewAttr("labels", "list(map(string))", false),
"driver_config": hclspec.NewBlock("driver_config", false, hclspec.NewObject(map[string]*hclspec.Spec{
"name": hclspec.NewAttr("name", "string", false),
"options": hclspec.NewAttr("options", "list(map(string))", false),
})),
})),
})
// taskConfigSpec is the hcl specification for the driver config section of
// a task within a job. It is returned in the TaskConfigSchema RPC
taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
"image": hclspec.NewAttr("image", "string", true),
"advertise_ipv6_address": hclspec.NewAttr("advertise_ipv6_address", "bool", false),
"args": hclspec.NewAttr("args", "list(string)", false),
"auth": hclspec.NewBlock("auth", false, hclspec.NewObject(map[string]*hclspec.Spec{
"username": hclspec.NewAttr("username", "string", false),
"password": hclspec.NewAttr("password", "string", false),
"email": hclspec.NewAttr("email", "string", false),
"server_address": hclspec.NewAttr("server_address", "string", false),
})),
"auth_soft_fail": hclspec.NewAttr("auth_soft_fail", "bool", false),
"cap_add": hclspec.NewAttr("cap_add", "list(string)", false),
"cap_drop": hclspec.NewAttr("cap_drop", "list(string)", false),
"command": hclspec.NewAttr("command", "string", false),
"cpuset_cpus": hclspec.NewAttr("cpuset_cpus", "string", false),
"cpu_hard_limit": hclspec.NewAttr("cpu_hard_limit", "bool", false),
"cpu_cfs_period": hclspec.NewDefault(
hclspec.NewAttr("cpu_cfs_period", "number", false),
hclspec.NewLiteral(`100000`),
),
"devices": hclspec.NewBlockList("devices", hclspec.NewObject(map[string]*hclspec.Spec{
"host_path": hclspec.NewAttr("host_path", "string", false),
"container_path": hclspec.NewAttr("container_path", "string", false),
"cgroup_permissions": hclspec.NewAttr("cgroup_permissions", "string", false),
})),
"dns_search_domains": hclspec.NewAttr("dns_search_domains", "list(string)", false),
"dns_options": hclspec.NewAttr("dns_options", "list(string)", false),
"dns_servers": hclspec.NewAttr("dns_servers", "list(string)", false),
"entrypoint": hclspec.NewAttr("entrypoint", "list(string)", false),
"extra_hosts": hclspec.NewAttr("extra_hosts", "list(string)", false),
"force_pull": hclspec.NewAttr("force_pull", "bool", false),
"hostname": hclspec.NewAttr("hostname", "string", false),
"init": hclspec.NewAttr("init", "bool", false),
"interactive": hclspec.NewAttr("interactive", "bool", false),
"ipc_mode": hclspec.NewAttr("ipc_mode", "string", false),
"ipv4_address": hclspec.NewAttr("ipv4_address", "string", false),
"ipv6_address": hclspec.NewAttr("ipv6_address", "string", false),
"labels": hclspec.NewAttr("labels", "list(map(string))", false),
"load": hclspec.NewAttr("load", "string", false),
"logging": hclspec.NewBlock("logging", false, hclspec.NewObject(map[string]*hclspec.Spec{
"type": hclspec.NewAttr("type", "string", false),
"driver": hclspec.NewAttr("driver", "string", false),
"config": hclspec.NewAttr("config", "list(map(string))", false),
})),
"mac_address": hclspec.NewAttr("mac_address", "string", false),
"memory_hard_limit": hclspec.NewAttr("memory_hard_limit", "number", false),
// mount and mounts are effectively aliases, but `mounts` is meant for pre-1.0
// assignment syntax `mounts = [{type="..." ..."}]` while
// `mount` is 1.0 repeated block syntax `mount { type = "..." }`
"mount": hclspec.NewBlockList("mount", mountBodySpec),
"mounts": hclspec.NewBlockList("mounts", mountBodySpec),
"network_aliases": hclspec.NewAttr("network_aliases", "list(string)", false),
"network_mode": hclspec.NewAttr("network_mode", "string", false),
"runtime": hclspec.NewAttr("runtime", "string", false),
"pids_limit": hclspec.NewAttr("pids_limit", "number", false),
"pid_mode": hclspec.NewAttr("pid_mode", "string", false),
"ports": hclspec.NewAttr("ports", "list(string)", false),
"port_map": hclspec.NewAttr("port_map", "list(map(number))", false),
"privileged": hclspec.NewAttr("privileged", "bool", false),
"image_pull_timeout": hclspec.NewDefault(
hclspec.NewAttr("image_pull_timeout", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"readonly_rootfs": hclspec.NewAttr("readonly_rootfs", "bool", false),
"security_opt": hclspec.NewAttr("security_opt", "list(string)", false),
"shm_size": hclspec.NewAttr("shm_size", "number", false),
"storage_opt": hclspec.NewBlockAttrs("storage_opt", "string", false),
"sysctl": hclspec.NewAttr("sysctl", "list(map(string))", false),
"tty": hclspec.NewAttr("tty", "bool", false),
"ulimit": hclspec.NewAttr("ulimit", "list(map(string))", false),
"uts_mode": hclspec.NewAttr("uts_mode", "string", false),
"userns_mode": hclspec.NewAttr("userns_mode", "string", false),
"volumes": hclspec.NewAttr("volumes", "list(string)", false),
"volume_driver": hclspec.NewAttr("volume_driver", "string", false),
"work_dir": hclspec.NewAttr("work_dir", "string", false),
})
// driverCapabilities represents the RPC response for what features are
// implemented by the docker task driver
driverCapabilities = &drivers.Capabilities{
SendSignals: true,
Exec: true,
FSIsolation: drivers.FSIsolationImage,
NetIsolationModes: []drivers.NetIsolationMode{
drivers.NetIsolationModeHost,
drivers.NetIsolationModeGroup,
drivers.NetIsolationModeTask,
},
MustInitiateNetwork: true,
MountConfigs: drivers.MountConfigSupportAll,
}
)
type TaskConfig struct {
Image string `codec:"image"`
AdvertiseIPv6Addr bool `codec:"advertise_ipv6_address"`
Args []string `codec:"args"`
Auth DockerAuth `codec:"auth"`
AuthSoftFail bool `codec:"auth_soft_fail"`
CapAdd []string `codec:"cap_add"`
CapDrop []string `codec:"cap_drop"`
Command string `codec:"command"`
CPUCFSPeriod int64 `codec:"cpu_cfs_period"`
CPUHardLimit bool `codec:"cpu_hard_limit"`
CPUSetCPUs string `codec:"cpuset_cpus"`
Devices []DockerDevice `codec:"devices"`
DNSSearchDomains []string `codec:"dns_search_domains"`
DNSOptions []string `codec:"dns_options"`
DNSServers []string `codec:"dns_servers"`
Entrypoint []string `codec:"entrypoint"`
ExtraHosts []string `codec:"extra_hosts"`
ForcePull bool `codec:"force_pull"`
Hostname string `codec:"hostname"`
Init bool `codec:"init"`
Interactive bool `codec:"interactive"`
IPCMode string `codec:"ipc_mode"`
IPv4Address string `codec:"ipv4_address"`
IPv6Address string `codec:"ipv6_address"`
Labels hclutils.MapStrStr `codec:"labels"`
LoadImage string `codec:"load"`
Logging DockerLogging `codec:"logging"`
MacAddress string `codec:"mac_address"`
MemoryHardLimit int64 `codec:"memory_hard_limit"`
Mounts []DockerMount `codec:"mount"`
NetworkAliases []string `codec:"network_aliases"`
NetworkMode string `codec:"network_mode"`
Runtime string `codec:"runtime"`
PidsLimit int64 `codec:"pids_limit"`
PidMode string `codec:"pid_mode"`
Ports []string `codec:"ports"`
PortMap hclutils.MapStrInt `codec:"port_map"`
Privileged bool `codec:"privileged"`
ImagePullTimeout string `codec:"image_pull_timeout"`
ReadonlyRootfs bool `codec:"readonly_rootfs"`
SecurityOpt []string `codec:"security_opt"`
ShmSize int64 `codec:"shm_size"`
StorageOpt map[string]string `codec:"storage_opt"`
Sysctl hclutils.MapStrStr `codec:"sysctl"`
TTY bool `codec:"tty"`
Ulimit hclutils.MapStrStr `codec:"ulimit"`
UTSMode string `codec:"uts_mode"`
UsernsMode string `codec:"userns_mode"`
Volumes []string `codec:"volumes"`
VolumeDriver string `codec:"volume_driver"`
WorkDir string `codec:"work_dir"`
// MountsList supports the pre-1.0 mounts array syntax
MountsList []DockerMount `codec:"mounts"`
}
type DockerAuth struct {
Username string `codec:"username"`
Password string `codec:"password"`
Email string `codec:"email"`
ServerAddr string `codec:"server_address"`
}
type DockerDevice struct {
HostPath string `codec:"host_path"`
ContainerPath string `codec:"container_path"`
CgroupPermissions string `codec:"cgroup_permissions"`
}
func (d DockerDevice) toDockerDevice() (docker.Device, error) {
dd := docker.Device{
PathOnHost: d.HostPath,
PathInContainer: d.ContainerPath,
CgroupPermissions: d.CgroupPermissions,
}
if d.HostPath == "" {
return dd, fmt.Errorf("host path must be set in configuration for devices")
}
if dd.CgroupPermissions == "" {
dd.CgroupPermissions = "rwm"
}
if !validateCgroupPermission(dd.CgroupPermissions) {
return dd, fmt.Errorf("invalid cgroup permission string: %q", dd.CgroupPermissions)
}
return dd, nil
}
type DockerLogging struct {
Type string `codec:"type"`
Driver string `codec:"driver"`
Config hclutils.MapStrStr `codec:"config"`
}
type DockerMount struct {
Type string `codec:"type"`
Target string `codec:"target"`
Source string `codec:"source"`
ReadOnly bool `codec:"readonly"`
BindOptions DockerBindOptions `codec:"bind_options"`
VolumeOptions DockerVolumeOptions `codec:"volume_options"`
TmpfsOptions DockerTmpfsOptions `codec:"tmpfs_options"`
}
func (m DockerMount) toDockerHostMount() (docker.HostMount, error) {
if m.Type == "" {
// for backward compatibility, as type is optional
m.Type = "volume"
}
hm := docker.HostMount{
Target: m.Target,
Source: m.Source,
Type: m.Type,
ReadOnly: m.ReadOnly,
}
switch m.Type {
case "volume":
vo := m.VolumeOptions
hm.VolumeOptions = &docker.VolumeOptions{
NoCopy: vo.NoCopy,
Labels: vo.Labels,
DriverConfig: docker.VolumeDriverConfig{
Name: vo.DriverConfig.Name,
Options: vo.DriverConfig.Options,
},
}
case "bind":
hm.BindOptions = &docker.BindOptions{
Propagation: m.BindOptions.Propagation,
}
case "tmpfs":
if m.Source != "" {
return hm, fmt.Errorf(`invalid source, must be "" for tmpfs`)
}
hm.TempfsOptions = &docker.TempfsOptions{
SizeBytes: m.TmpfsOptions.SizeBytes,
Mode: m.TmpfsOptions.Mode,
}
default:
return hm, fmt.Errorf(`invalid mount type, must be "bind", "volume", "tmpfs": %q`, m.Type)
}
return hm, nil
}
type DockerVolumeOptions struct {
NoCopy bool `codec:"no_copy"`
Labels hclutils.MapStrStr `codec:"labels"`
DriverConfig DockerVolumeDriverConfig `codec:"driver_config"`
}
type DockerBindOptions struct {
Propagation string `codec:"propagation"`
}
type DockerTmpfsOptions struct {
SizeBytes int64 `codec:"size"`
Mode int `codec:"mode"`
}
// DockerVolumeDriverConfig holds a map of volume driver specific options
type DockerVolumeDriverConfig struct {
Name string `codec:"name"`
Options hclutils.MapStrStr `codec:"options"`
}
// ContainerGCConfig controls the behavior of the GC reconciler to detects
// dangling nomad containers that aren't tracked due to docker/nomad bugs
type ContainerGCConfig struct {
// Enabled controls whether container reconciler is enabled
Enabled bool `codec:"enabled"`
// DryRun indicates that reconciler should log unexpectedly running containers
// if found without actually killing them
DryRun bool `codec:"dry_run"`
// PeriodStr controls the frequency of scanning containers
PeriodStr string `codec:"period"`
period time.Duration `codec:"-"`
// CreationGraceStr is the duration allowed for a newly created container
// to live without being registered as a running task in nomad.
// A container is treated as leaked if it lived more than grace duration
// and haven't been registered in tasks.
CreationGraceStr string `codec:"creation_grace"`
CreationGrace time.Duration `codec:"-"`
}
type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
TLS TLSConfig `codec:"tls"`
GC GCConfig `codec:"gc"`
Volumes VolumeConfig `codec:"volumes"`
AllowPrivileged bool `codec:"allow_privileged"`
AllowCaps []string `codec:"allow_caps"`
GPURuntimeName string `codec:"nvidia_runtime"`
InfraImage string `codec:"infra_image"`
InfraImagePullTimeout string `codec:"infra_image_pull_timeout"`
infraImagePullTimeoutDuration time.Duration `codec:"-"`
DisableLogCollection bool `codec:"disable_log_collection"`
PullActivityTimeout string `codec:"pull_activity_timeout"`
PidsLimit int64 `codec:"pids_limit"`
pullActivityTimeoutDuration time.Duration `codec:"-"`
ExtraLabels []string `codec:"extra_labels"`
Logging LoggingConfig `codec:"logging"`
AllowRuntimesList []string `codec:"allow_runtimes"`
allowRuntimes map[string]struct{} `codec:"-"`
}
type AuthConfig struct {
Config string `codec:"config"`
Helper string `codec:"helper"`
}
type TLSConfig struct {
Cert string `codec:"cert"`
Key string `codec:"key"`
CA string `codec:"ca"`
}
type GCConfig struct {
Image bool `codec:"image"`
ImageDelay string `codec:"image_delay"`
imageDelayDuration time.Duration `codec:"-"`
Container bool `codec:"container"`
DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
}
type VolumeConfig struct {
Enabled bool `codec:"enabled"`
SelinuxLabel string `codec:"selinuxlabel"`
}
type LoggingConfig struct {
Type string `codec:"type"`
Config map[string]string `codec:"config"`
}
func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
return pluginInfo, nil
}
func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
return configSpec, nil
}
const danglingContainersCreationGraceMinimum = 1 * time.Minute
const pullActivityTimeoutMinimum = 1 * time.Minute
func (d *Driver) SetConfig(c *base.Config) error {
var config DriverConfig
if len(c.PluginConfig) != 0 {
if err := base.MsgPackDecode(c.PluginConfig, &config); err != nil {
return err
}
}
d.config = &config
d.config.InfraImage = strings.TrimPrefix(d.config.InfraImage, "https://")
if len(d.config.GC.ImageDelay) > 0 {
dur, err := time.ParseDuration(d.config.GC.ImageDelay)
if err != nil {
return fmt.Errorf("failed to parse 'image_delay' duration: %v", err)
}
d.config.GC.imageDelayDuration = dur
}
if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
if err != nil {
return fmt.Errorf("failed to parse 'period' duration: %v", err)
}
d.config.GC.DanglingContainers.period = dur
}
if len(d.config.GC.DanglingContainers.CreationGraceStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationGraceStr)
if err != nil {
return fmt.Errorf("failed to parse 'creation_grace' duration: %v", err)
}
if dur < danglingContainersCreationGraceMinimum {
return fmt.Errorf("creation_grace is less than minimum, %v", danglingContainersCreationGraceMinimum)
}
d.config.GC.DanglingContainers.CreationGrace = dur
}
if len(d.config.PullActivityTimeout) > 0 {
dur, err := time.ParseDuration(d.config.PullActivityTimeout)
if err != nil {
return fmt.Errorf("failed to parse 'pull_activity_timeout' duaration: %v", err)
}
if dur < pullActivityTimeoutMinimum {
return fmt.Errorf("pull_activity_timeout is less than minimum, %v", pullActivityTimeoutMinimum)
}
d.config.pullActivityTimeoutDuration = dur
}
if d.config.InfraImagePullTimeout != "" {
dur, err := time.ParseDuration(d.config.InfraImagePullTimeout)
if err != nil {
return fmt.Errorf("failed to parse 'infra_image_pull_timeout' duaration: %v", err)
}
d.config.infraImagePullTimeoutDuration = dur
}
d.config.allowRuntimes = make(map[string]struct{}, len(d.config.AllowRuntimesList))
for _, r := range d.config.AllowRuntimesList {
d.config.allowRuntimes[r] = struct{}{}
}
if c.AgentConfig != nil {
d.clientConfig = c.AgentConfig.Driver
}
dockerClient, _, err := d.dockerClients()
if err != nil {
return fmt.Errorf("failed to get docker client: %v", err)
}
coordinatorConfig := &dockerCoordinatorConfig{
ctx: d.ctx,
client: dockerClient,
cleanup: d.config.GC.Image,
logger: d.logger,
removeDelay: d.config.GC.imageDelayDuration,
}
d.coordinator = newDockerCoordinator(coordinatorConfig)
d.danglingReconciler = newReconciler(d)
d.cpusetFixer = newCpusetFixer(d)
return nil
}
func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
return taskConfigSpec, nil
}
// Capabilities is returned by the Capabilities RPC and indicates what optional
// features this driver supports.
func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
return driverCapabilities, nil
}
var _ drivers.InternalCapabilitiesDriver = (*Driver)(nil)
func (d *Driver) InternalCapabilities() drivers.InternalCapabilities {
return drivers.InternalCapabilities{
DisableLogCollection: d.config != nil && d.config.DisableLogCollection,
}
}