2023-04-10 15:36:59 +00:00
|
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
|
2018-08-07 05:27:47 +00:00
|
|
|
|
syntax = "proto3";
|
|
|
|
|
package hashicorp.nomad.plugins.device;
|
2018-08-13 17:29:29 +00:00
|
|
|
|
option go_package = "proto";
|
2018-08-07 05:27:47 +00:00
|
|
|
|
|
2018-08-23 18:18:35 +00:00
|
|
|
|
import "google/protobuf/timestamp.proto";
|
2018-09-28 17:09:01 +00:00
|
|
|
|
import "google/protobuf/duration.proto";
|
2020-11-10 16:42:35 +00:00
|
|
|
|
import "plugins/shared/structs/proto/attribute.proto";
|
|
|
|
|
import "plugins/shared/structs/proto/stats.proto";
|
2018-08-23 18:18:35 +00:00
|
|
|
|
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// DevicePlugin is the API exposed by device plugins
|
|
|
|
|
service DevicePlugin {
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// Fingerprint allows the device plugin to return a set of
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// detected devices and provide a mechanism to update the state of
|
|
|
|
|
// the device.
|
2018-08-13 17:29:29 +00:00
|
|
|
|
rpc Fingerprint(FingerprintRequest) returns (stream FingerprintResponse) {}
|
2018-08-07 05:27:47 +00:00
|
|
|
|
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// Reserve is called by the client before starting an allocation
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// that requires access to the plugin’s devices. The plugin can use
|
|
|
|
|
// this to run any setup steps and provides the mounting details to
|
|
|
|
|
// the Nomad client
|
2018-08-07 19:49:37 +00:00
|
|
|
|
rpc Reserve(ReserveRequest) returns (ReserveResponse) {}
|
2018-08-23 18:18:35 +00:00
|
|
|
|
|
|
|
|
|
// Stats returns a stream of device statistics.
|
|
|
|
|
rpc Stats(StatsRequest) returns (stream StatsResponse) {}
|
2018-08-07 05:27:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-08-13 17:29:29 +00:00
|
|
|
|
// FingerprintRequest is used to request for devices to be fingerprinted.
|
|
|
|
|
message FingerprintRequest {}
|
|
|
|
|
|
|
|
|
|
// FingerprintResponse returns a set of detected devices.
|
|
|
|
|
message FingerprintResponse {
|
|
|
|
|
// device_group is a group of devices that share a vendor, device_type, and
|
|
|
|
|
// device_name. This is returned as a set so that a single plugin could
|
|
|
|
|
// potentially detect several device types and models.
|
|
|
|
|
repeated DeviceGroup device_group = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DeviceGroup is a group of devices that share a vendor, device type and name.
|
|
|
|
|
message DeviceGroup {
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// vendor is the name of the vendor of the device
|
|
|
|
|
string vendor = 1;
|
|
|
|
|
|
|
|
|
|
// device_type is the type of the device (gpu, fpga, etc).
|
|
|
|
|
string device_type = 2;
|
2018-08-07 05:27:47 +00:00
|
|
|
|
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// device_name is the name of the device.
|
|
|
|
|
string device_name = 3;
|
|
|
|
|
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// devices is the set of devices detected by the plugin.
|
2018-08-07 19:49:37 +00:00
|
|
|
|
repeated DetectedDevice devices = 4;
|
2018-08-07 05:27:47 +00:00
|
|
|
|
|
2018-08-13 17:29:29 +00:00
|
|
|
|
// attributes allows adding attributes to be used for constraints or
|
|
|
|
|
// affinities.
|
2018-10-13 18:43:06 +00:00
|
|
|
|
map<string, hashicorp.nomad.plugins.shared.structs.Attribute> attributes = 5;
|
2018-08-07 05:27:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DetectedDevice is a single detected device.
|
|
|
|
|
message DetectedDevice {
|
2018-08-13 17:29:29 +00:00
|
|
|
|
// ID is the ID of the device. This ID is used during allocation and must be
|
|
|
|
|
// stable across restarts of the device driver.
|
2020-11-25 21:03:01 +00:00
|
|
|
|
// buf:lint:ignore FIELD_LOWER_SNAKE_CASE
|
2018-08-07 05:27:47 +00:00
|
|
|
|
string ID = 1;
|
|
|
|
|
|
|
|
|
|
// Health of the device.
|
|
|
|
|
bool healthy = 2;
|
|
|
|
|
|
|
|
|
|
// health_description allows the device plugin to optionally
|
|
|
|
|
// annotate the health field with a human readable reason.
|
|
|
|
|
string health_description = 3;
|
|
|
|
|
|
2018-08-13 17:29:29 +00:00
|
|
|
|
// hw_locality is optionally set to expose hardware locality information for
|
|
|
|
|
// more optimal placement decisions.
|
|
|
|
|
DeviceLocality hw_locality = 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DeviceLocality is used to expose HW locality information about a device.
|
|
|
|
|
message DeviceLocality {
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// pci_bus_id is the PCI bus ID for the device. If reported, it
|
|
|
|
|
// allows Nomad to make NUMA aware optimizations.
|
2018-08-13 17:29:29 +00:00
|
|
|
|
string pci_bus_id = 1;
|
2018-08-07 05:27:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-08-13 17:29:29 +00:00
|
|
|
|
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// ReserveRequest is used to ask the device driver for information on
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// how to allocate the requested devices.
|
2018-08-07 19:49:37 +00:00
|
|
|
|
message ReserveRequest {
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// device_ids are the requested devices.
|
|
|
|
|
repeated string device_ids = 1;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// ReserveResponse informs Nomad how to expose the requested devices
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// to the the task.
|
2018-08-07 19:49:37 +00:00
|
|
|
|
message ReserveResponse {
|
|
|
|
|
// container_res contains information on how to mount the device
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// into a task isolated using container technologies (where the
|
|
|
|
|
// host is shared)
|
2018-08-07 19:49:37 +00:00
|
|
|
|
ContainerReservation container_res = 1;
|
2018-08-07 05:27:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-08-07 19:49:37 +00:00
|
|
|
|
// ContainerReservation returns how to mount the device into a
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// container that shares the host OS.
|
2018-08-07 19:49:37 +00:00
|
|
|
|
message ContainerReservation {
|
2018-08-07 05:27:47 +00:00
|
|
|
|
// List of environment variable to be set
|
|
|
|
|
map<string, string> envs = 1;
|
|
|
|
|
|
|
|
|
|
// Mounts for the task.
|
|
|
|
|
repeated Mount mounts = 2;
|
|
|
|
|
|
|
|
|
|
// Devices for the task.
|
|
|
|
|
repeated DeviceSpec devices = 3;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Mount specifies a host volume to mount into a task.
|
|
|
|
|
// where device library or tools are installed on host and task
|
|
|
|
|
message Mount {
|
|
|
|
|
// Path of the mount within the task.
|
|
|
|
|
string task_path = 1;
|
|
|
|
|
|
|
|
|
|
// Path of the mount on the host.
|
|
|
|
|
string host_path = 2;
|
|
|
|
|
|
|
|
|
|
// If set, the mount is read-only.
|
|
|
|
|
bool read_only = 3;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DeviceSpec specifies a host device to mount into a task.
|
|
|
|
|
message DeviceSpec {
|
|
|
|
|
// Path of the device within the task.
|
|
|
|
|
string task_path = 1;
|
|
|
|
|
|
|
|
|
|
// Path of the device on the host.
|
|
|
|
|
string host_path = 2;
|
|
|
|
|
|
|
|
|
|
// Cgroups permissions of the device, candidates are one or more of
|
|
|
|
|
// * r - allows task to read from the specified device.
|
|
|
|
|
// * w - allows task to write to the specified device.
|
|
|
|
|
// * m - allows task to create device files that do not yet exist
|
|
|
|
|
string permissions = 3;
|
|
|
|
|
}
|
2018-08-13 17:29:29 +00:00
|
|
|
|
|
2018-08-23 18:18:35 +00:00
|
|
|
|
|
|
|
|
|
// StatsRequest is used to parameterize the retrieval of statistics.
|
2018-09-28 17:09:01 +00:00
|
|
|
|
message StatsRequest {
|
|
|
|
|
// collection_interval is the duration in which to collect statistics.
|
|
|
|
|
google.protobuf.Duration collection_interval = 1;
|
|
|
|
|
}
|
2018-08-23 18:18:35 +00:00
|
|
|
|
|
|
|
|
|
// StatsResponse returns the statistics for each device group.
|
|
|
|
|
message StatsResponse {
|
|
|
|
|
// groups contains statistics for each device group.
|
|
|
|
|
repeated DeviceGroupStats groups = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DeviceGroupStats contains statistics for each device of a particular
|
|
|
|
|
// device group, identified by the vendor, type and name of the device.
|
|
|
|
|
message DeviceGroupStats {
|
|
|
|
|
string vendor = 1;
|
|
|
|
|
string type = 2;
|
|
|
|
|
string name = 3;
|
|
|
|
|
|
|
|
|
|
// instance_stats is a mapping of each device ID to its statistics.
|
|
|
|
|
map<string, DeviceStats> instance_stats = 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DeviceStats is the statistics for an individual device
|
|
|
|
|
message DeviceStats {
|
|
|
|
|
// summary exposes a single summary metric that should be the most
|
|
|
|
|
// informative to users.
|
2018-11-12 00:36:20 +00:00
|
|
|
|
hashicorp.nomad.plugins.shared.structs.StatValue summary = 1;
|
2018-08-23 18:18:35 +00:00
|
|
|
|
|
|
|
|
|
// stats contains the verbose statistics for the device.
|
2018-11-12 00:36:20 +00:00
|
|
|
|
hashicorp.nomad.plugins.shared.structs.StatObject stats = 2;
|
2018-08-23 18:18:35 +00:00
|
|
|
|
|
|
|
|
|
// timestamp is the time the statistics were collected.
|
|
|
|
|
google.protobuf.Timestamp timestamp = 3;
|
|
|
|
|
}
|