diff --git a/devices/gpu/nvidia/README.md b/devices/gpu/nvidia/README.md index d057d11f3..a9df9c9e5 100644 --- a/devices/gpu/nvidia/README.md +++ b/devices/gpu/nvidia/README.md @@ -18,4 +18,4 @@ config { The valid configuration options are: * `ignored_gpu_ids` (`list(string)`: `[]`): list of GPU UUIDs strings that should not be exposed to nomad -* `fingerprint_period` (`string`: `"5s"`): The interval to repeat fingerprint process to identify possible changes. +* `fingerprint_period` (`string`: `"1m"`): The interval to repeat fingerprint process to identify possible changes. diff --git a/devices/gpu/nvidia/device.go b/devices/gpu/nvidia/device.go index b288181c0..9e541fb5a 100644 --- a/devices/gpu/nvidia/device.go +++ b/devices/gpu/nvidia/device.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/shared/hclspec" + "github.com/hashicorp/nomad/plugins/shared/loader" ) const ( @@ -35,6 +36,19 @@ const ( ) var ( + // PluginID is the nvidia plugin metadata registered in the plugin + // catalog. + PluginID = loader.PluginID{ + Name: pluginName, + PluginType: base.PluginTypeDevice, + } + + // PluginConfig is the nvidia factory function registered in the + // plugin catalog. + PluginConfig = &loader.InternalPluginConfig{ + Factory: func(l log.Logger) interface{} { return NewNvidiaDevice(l) }, + } + // pluginInfo describes the plugin pluginInfo = &base.PluginInfoResponse{ Type: base.PluginTypeDevice, diff --git a/helper/pluginutils/catalog/register_linux.go b/helper/pluginutils/catalog/register_linux.go index 91a45b33e..bb5175d11 100644 --- a/helper/pluginutils/catalog/register_linux.go +++ b/helper/pluginutils/catalog/register_linux.go @@ -1,10 +1,14 @@ package catalog -import "github.com/hashicorp/nomad/drivers/rkt" +import ( + "github.com/hashicorp/nomad/devices/gpu/nvidia" + "github.com/hashicorp/nomad/drivers/rkt" +) // This file is where all builtin plugins should be registered in the catalog. // Plugins with build restrictions should be placed in the appropriate // register_XXX.go file. func init() { RegisterDeferredConfig(rkt.PluginID, rkt.PluginConfig, rkt.PluginLoader) + Register(nvidia.PluginID, nvidia.PluginConfig) } diff --git a/website/source/docs/devices/community.html.md b/website/source/docs/devices/community.html.md index 514cc2878..cfed010eb 100644 --- a/website/source/docs/devices/community.html.md +++ b/website/source/docs/devices/community.html.md @@ -1,16 +1,19 @@ --- layout: "docs" -page_title: "Drivers: Custom" +page_title: "Device Plugins: Community Supported" sidebar_current: "docs-devices-community" description: |- - Create custom task drivers for Nomad. + A list of community supported Device Plugins. --- -# Custom Drivers +# Community Supported -Nomad does not currently support pluggable task drivers, however the -interface that a task driver must implement is minimal. In the short term, -custom drivers can be implemented in Go and compiled into the binary, -however in the long term we plan to expose a plugin interface such that -task drivers can be dynamically registered without recompiling the Nomad binary. +If you have authored a device plugin that you believe will be useful to the +broader Nomad community and you are commited to maintaining the plugin, please +file a PR to add your plugin to this page. +## Authoring Device Plugins + +Nomad has a plugin system for defining device drivers. External device plugins +will have the same user experience as built in drivers. For details on authoring +a device plugin, please refer to the plugin authoring guide. diff --git a/website/source/docs/devices/index.html.md b/website/source/docs/devices/index.html.md index 9d2831c33..538c5a67f 100644 --- a/website/source/docs/devices/index.html.md +++ b/website/source/docs/devices/index.html.md @@ -8,17 +8,13 @@ description: |- # Device Plugins -Task drivers are used by Nomad clients to execute a task and provide resource -isolation. By having extensible task drivers, Nomad has the flexibility to -support a broad set of workloads across all major operating systems. +Device plugins are used to detect and make devices available to tasks in Nomad. +Devices are physical hardware that exists on a node such as a GPU or an FPGA. By +having extensible device plugins, Nomad has the flexibility to support a broad +set of devices and allows the community to build additional device plugins as +needed. -The list of supported task drivers is provided on the left of this page. -Each task driver documents the configuration available in a -[job specification](/docs/job-specification/index.html), the environments it -can be used in, and the resource isolation mechanisms available. - -Nomad strives to mask the details of running a task from users and instead -provides a clean abstraction. It is possible for the same task to be executed -with different isolation levels depending on the client running the task. -The goal is to use the strictest isolation available and gracefully degrade -protections where necessary. +The list of supported device plugins is provided on the left of this page. +Each device plugin documents its configuration and installation requirements, +the attributes it fingerprints, and the environment variables it exposes to +tasks. diff --git a/website/source/docs/devices/nvidia.html.md b/website/source/docs/devices/nvidia.html.md index b8187c9eb..83a49b125 100644 --- a/website/source/docs/devices/nvidia.html.md +++ b/website/source/docs/devices/nvidia.html.md @@ -1,118 +1,300 @@ --- layout: "docs" -page_title: "Drivers: Raw Exec" +page_title: "Device Plugins: Nvidia" sidebar_current: "docs-devices-nvidia" description: |- - The Raw Exec task driver simply fork/execs and provides no isolation. + The Nvidia Device Plugin detects and makes Nvidia devices available to tasks. --- -# Raw Fork/Exec Driver +# Nvidia GPU Device Plugin -Name: `raw_exec` +Name: `nvidia-gpu` -The `raw_exec` driver is used to execute a command for a task without any -isolation. Further, the task is started as the same user as the Nomad process. -As such, it should be used with extreme care and is disabled by default. +The Nvidia device plugin is used to expose Nvidia GPUs to Nomad. The Nvidia +plugin is built into Nomad and does not need to be downloaded separately. -## Task Configuration +## Fingerprinted Attributes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AttributeUnit
memoryMiB
powerW (Watt)
bar1MiB
driver_versionstring
cores_clockMHz
memory_clockMHz
pci_bandwidthMB/s
display_statestring
persistence_modestring
+ +## Runtime Environment + +The `nvidia-gpu` device plugin exposes the following environment variables: + +* `NVIDIA_VISIBLE_DEVICES` - List of Nvidia GPU IDs available to the task. + +### Additonal Task Configurations + +Additonal environment variables can be set by the task to influence the runtime +environment. See [Nvidia's +documentation](https://github.com/NVIDIA/nvidia-container-runtime#environment-variables-oci-spec). + +## Installation Requirements + +In order to use the `nvidia-gpu` the following prerequisites must be met: + +1. GNU/Linux x86_64 with kernel version > 3.10 +2. NVIDIA GPU with Architecture > Fermi (2.1) +3. NVIDIA drivers >= 340.29 with binary `nvidia-smi` + +### Docker Driver Requirements + +In order to use the Nvidia driver plugin with the Docker driver, please follow +the installation instructions for +[`nvidia-docker`](https://github.com/NVIDIA/nvidia-docker/wiki/Installation-\(version-1.0\)). + +## Plugin Configuration ```hcl -task "webservice" { - driver = "raw_exec" - - config { - command = "my-binary" - args = ["-flag", "1"] - } +plugin "nvidia-gpu" { + ignored_gpu_ids = ["GPU-fef8089b", "GPU-ac81e44d"] + fingerprint_period = "1m" } ``` -The `raw_exec` driver supports the following configuration in the job spec: +The `nvidia-gpu` device plugin supports the following configuration in the agent +config: -* `command` - The command to execute. Must be provided. If executing a binary - that exists on the host, the path must be absolute. If executing a binary that - is downloaded from an [`artifact`](/docs/job-specification/artifact.html), the - path can be relative from the allocations's root directory. +* `ignored_gpu_ids` `(array: [])` - Specifies the set of GPU UUIDs that + should be ignored when fingerprinting. -* `args` - (Optional) A list of arguments to the `command`. References - to environment variables or any [interpretable Nomad - variables](/docs/runtime/interpolation.html) will be interpreted before - launching the task. +* `fingerprint_period` `(string: "1m")` - The period in which to fingerprint for + device changes. + +## Restrictions + +The Nvidia integration only works with drivers who natively integrate with +Nvidia's [container runtime +library](https://github.com/NVIDIA/libnvidia-container). + +Nomad has tested support with the [`docker` driver][docker-driver] and plans to +bring support to the built-in [`exec`][exec-driver] and [`java`][java-driver] +drivers. Support for [`lxc`][lxc-driver] should be possible by installing the +[Nvidia hook](https://github.com/lxc/lxc/blob/master/hooks/nvidia) but is not +tested or documented by Nomad. ## Examples -To run a binary present on the Node: +Inspect a node with a GPU: -``` -task "example" { - driver = "raw_exec" +```sh +$ nomad node status 4d46e59f +ID = 4d46e59f +Name = nomad +Class = +DC = dc1 +Drain = false +Eligibility = eligible +Status = ready +Uptime = 19m43s +Driver Status = docker,mock_driver,raw_exec - config { - # When running a binary that exists on the host, the path must be absolute/ - command = "/bin/sleep" - args = ["1"] - } -} +Node Events +Time Subsystem Message +2019-01-23T18:25:18Z Cluster Node registered + +Allocated Resources +CPU Memory Disk +0/15576 MHz 0 B/55 GiB 0 B/28 GiB + +Allocation Resource Utilization +CPU Memory +0/15576 MHz 0 B/55 GiB + +Host Resource Utilization +CPU Memory Disk +2674/15576 MHz 1.5 GiB/55 GiB 3.0 GiB/31 GiB + +Device Resource Utilization +nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416] 0 / 11441 MiB + +Allocations +No allocations placed ``` -To execute a binary downloaded from an [`artifact`](/docs/job-specification/artifact.html): +Display detailed statistics on a node with a GPU: +```sh +$ nomad node status -stats 4d46e59f +ID = 4d46e59f +Name = nomad +Class = +DC = dc1 +Drain = false +Eligibility = eligible +Status = ready +Uptime = 19m59s +Driver Status = docker,mock_driver,raw_exec + +Node Events +Time Subsystem Message +2019-01-23T18:25:18Z Cluster Node registered + +Allocated Resources +CPU Memory Disk +0/15576 MHz 0 B/55 GiB 0 B/28 GiB + +Allocation Resource Utilization +CPU Memory +0/15576 MHz 0 B/55 GiB + +Host Resource Utilization +CPU Memory Disk +2673/15576 MHz 1.5 GiB/55 GiB 3.0 GiB/31 GiB + +Device Resource Utilization +nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416] 0 / 11441 MiB + +// ...TRUNCATED... + +Device Stats +Device = nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416] +BAR1 buffer state = 2 / 16384 MiB +Decoder utilization = 0 % +ECC L1 errors = 0 +ECC L2 errors = 0 +ECC memory errors = 0 +Encoder utilization = 0 % +GPU utilization = 0 % +Memory state = 0 / 11441 MiB +Memory utilization = 0 % +Power usage = 37 / 149 W +Temperature = 34 C + +Allocations +No allocations placed ``` -task "example" { - driver = "raw_exec" - config { - command = "name-of-my-binary" - } +Run the following example job to see that that the GPU was mounted in the +container: - artifact { - source = "https://internal.file.server/name-of-my-binary" - options { - checksum = "sha256:abd123445ds4555555555" +```hcl +job "gpu-test" { + datacenters = ["dc1"] + type = "batch" + + group "smi" { + task "smi" { + driver = "docker" + + config { + image = "nvidia/cuda:9.0-base" + command = "nvidia-smi" + } + + resources { + device "nvidia/gpu/Tesla K80" {} + } } } } ``` -## Client Requirements +```sh +$ nomad run example.nomad +==> Monitoring evaluation "21bd7584" + Evaluation triggered by job "gpu-test" + Allocation "d250baed" created: node "4d46e59f", group "smi" + Evaluation status changed: "pending" -> "complete" +==> Evaluation "21bd7584" finished with status "complete" -The `raw_exec` driver can run on all supported operating systems. For security -reasons, it is disabled by default. To enable raw exec, the Nomad client -configuration must explicitly enable the `raw_exec` driver in the client's -[options](/docs/configuration/client.html#options): +$ nomad alloc status d250baed +ID = d250baed +Eval ID = 21bd7584 +Name = gpu-test.smi[0] +Node ID = 4d46e59f +Job ID = example +Job Version = 0 +Client Status = complete +Client Description = All tasks have completed +Desired Status = run +Desired Description = +Created = 7s ago +Modified = 2s ago -``` -client { - options = { - "driver.raw_exec.enable" = "1" - } -} +Task "smi" is "dead" +Task Resources +CPU Memory Disk Addresses +0/100 MHz 0 B/300 MiB 300 MiB + +Device Stats +nvidia/gpu/Tesla K80[GPU-e1f6f4f1-1ea5-7b9d-5f03-338a9dc32416] 0 / 11441 MiB + +Task Events: +Started At = 2019-01-23T18:25:32Z +Finished At = 2019-01-23T18:25:34Z +Total Restarts = 0 +Last Restart = N/A + +Recent Events: +Time Type Description +2019-01-23T18:25:34Z Terminated Exit Code: 0 +2019-01-23T18:25:32Z Started Task started by client +2019-01-23T18:25:29Z Task Setup Building Task Directory +2019-01-23T18:25:29Z Received Task received by client + +$ nomad alloc logs d250baed +Wed Jan 23 18:25:32 2019 ++-----------------------------------------------------------------------------+ +| NVIDIA-SMI 410.48 Driver Version: 410.48 | +|-------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | +|===============================+======================+======================| +| 0 Tesla K80 On | 00004477:00:00.0 Off | 0 | +| N/A 33C P8 37W / 149W | 0MiB / 11441MiB | 0% Default | ++-------------------------------+----------------------+----------------------+ + ++-----------------------------------------------------------------------------+ +| Processes: GPU Memory | +| GPU PID Type Process name Usage | +|=============================================================================| +| No running processes found | ++-----------------------------------------------------------------------------+ ``` -## Client Options - -* `driver.raw_exec.enable` - Specifies whether the driver should be enabled or - disabled. - -* `driver.raw_exec.no_cgroups` - Specifies whether the driver should not use - cgroups to manage the process group launched by the driver. By default, - cgroups are used to manage the process tree to ensure full cleanup of all - processes started by the task. The driver only uses cgroups when Nomad is - launched as root, on Linux and when cgroups are detected. - -## Client Attributes - -The `raw_exec` driver will set the following client attributes: - -* `driver.raw_exec` - This will be set to "1", indicating the driver is available. - -## Resource Isolation - -The `raw_exec` driver provides no isolation. - -If the launched process creates a new process group, it is possible that Nomad -will leak processes on shutdown unless the application forwards signals -properly. Nomad will not leak any processes if cgroups are being used to manage -the process tree. Cgroups are used on Linux when Nomad is being run with -appropriate priviledges, the cgroup system is mounted and the operator hasn't -disabled cgroups for the driver. +[docker-driver]: /docs/drivers/docker.html "Nomad docker Driver" +[exec-driver]: /docs/drivers/exec.html "Nomad exec Driver" +[java-driver]: /docs/drivers/java.html "Nomad java Driver" +[lxc-driver]: /docs/drivers/lxc.html "Nomad lxc Driver"