2018-08-13 17:29:29 +00:00
|
|
|
package device
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"io"
|
2018-09-28 17:09:01 +00:00
|
|
|
"time"
|
2018-08-13 17:29:29 +00:00
|
|
|
|
2018-09-28 17:09:01 +00:00
|
|
|
"github.com/LK4D4/joincontext"
|
|
|
|
"github.com/golang/protobuf/ptypes"
|
2018-08-13 17:29:29 +00:00
|
|
|
"github.com/hashicorp/nomad/plugins/base"
|
|
|
|
"github.com/hashicorp/nomad/plugins/device/proto"
|
|
|
|
netctx "golang.org/x/net/context"
|
|
|
|
"google.golang.org/grpc/codes"
|
|
|
|
"google.golang.org/grpc/status"
|
|
|
|
)
|
|
|
|
|
|
|
|
// devicePluginClient implements the client side of a remote device plugin, using
|
|
|
|
// gRPC to communicate to the remote plugin.
|
|
|
|
type devicePluginClient struct {
|
|
|
|
// basePluginClient is embedded to give access to the base plugin methods.
|
|
|
|
*base.BasePluginClient
|
|
|
|
|
|
|
|
client proto.DevicePluginClient
|
2018-09-28 17:09:01 +00:00
|
|
|
|
|
|
|
// doneCtx is closed when the plugin exits
|
|
|
|
doneCtx context.Context
|
2018-08-13 17:29:29 +00:00
|
|
|
}
|
|
|
|
|
2018-08-20 22:19:08 +00:00
|
|
|
// Fingerprint is used to retrieve the set of devices and their health from the
|
|
|
|
// device plugin. An error may be immediately returned if the fingerprint call
|
|
|
|
// could not be made or as part of the streaming response. If the context is
|
|
|
|
// cancelled, the error will be propogated.
|
2018-08-13 17:29:29 +00:00
|
|
|
func (d *devicePluginClient) Fingerprint(ctx context.Context) (<-chan *FingerprintResponse, error) {
|
2018-09-28 17:09:01 +00:00
|
|
|
// Join the passed context and the shutdown context
|
|
|
|
ctx, _ = joincontext.Join(ctx, d.doneCtx)
|
|
|
|
|
2018-08-13 17:29:29 +00:00
|
|
|
var req proto.FingerprintRequest
|
|
|
|
stream, err := d.client.Fingerprint(ctx, &req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
out := make(chan *FingerprintResponse, 1)
|
|
|
|
go d.handleFingerprint(ctx, stream, out)
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleFingerprint should be launched in a goroutine and handles converting
|
|
|
|
// the gRPC stream to a channel. Exits either when context is cancelled or the
|
|
|
|
// stream has an error.
|
|
|
|
func (d *devicePluginClient) handleFingerprint(
|
|
|
|
ctx netctx.Context,
|
|
|
|
stream proto.DevicePlugin_FingerprintClient,
|
|
|
|
out chan *FingerprintResponse) {
|
|
|
|
|
|
|
|
for {
|
|
|
|
resp, err := stream.Recv()
|
|
|
|
if err != nil {
|
|
|
|
if err != io.EOF {
|
|
|
|
out <- &FingerprintResponse{
|
2018-09-28 17:09:01 +00:00
|
|
|
Error: d.handleStreamErr(err, ctx),
|
2018-08-13 17:29:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// End the stream
|
|
|
|
close(out)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Send the response
|
|
|
|
out <- &FingerprintResponse{
|
|
|
|
Devices: convertProtoDeviceGroups(resp.GetDeviceGroup()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *devicePluginClient) Reserve(deviceIDs []string) (*ContainerReservation, error) {
|
|
|
|
// Build the request
|
|
|
|
req := &proto.ReserveRequest{
|
|
|
|
DeviceIds: deviceIDs,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the request
|
|
|
|
resp, err := d.client.Reserve(context.Background(), req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Convert the response
|
|
|
|
out := convertProtoContainerReservation(resp.GetContainerRes())
|
|
|
|
return out, nil
|
|
|
|
}
|
2018-08-27 23:11:07 +00:00
|
|
|
|
|
|
|
// Stats is used to retrieve device statistics from the device plugin. An error
|
|
|
|
// may be immediately returned if the stats call could not be made or as part of
|
|
|
|
// the streaming response. If the context is cancelled, the error will be
|
|
|
|
// propogated.
|
2018-09-28 17:09:01 +00:00
|
|
|
func (d *devicePluginClient) Stats(ctx context.Context, interval time.Duration) (<-chan *StatsResponse, error) {
|
|
|
|
// Join the passed context and the shutdown context
|
|
|
|
ctx, _ = joincontext.Join(ctx, d.doneCtx)
|
|
|
|
|
|
|
|
req := proto.StatsRequest{
|
|
|
|
CollectionInterval: ptypes.DurationProto(interval),
|
|
|
|
}
|
2018-08-27 23:11:07 +00:00
|
|
|
stream, err := d.client.Stats(ctx, &req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
out := make(chan *StatsResponse, 1)
|
|
|
|
go d.handleStats(ctx, stream, out)
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleStats should be launched in a goroutine and handles converting
|
|
|
|
// the gRPC stream to a channel. Exits either when context is cancelled or the
|
|
|
|
// stream has an error.
|
|
|
|
func (d *devicePluginClient) handleStats(
|
|
|
|
ctx netctx.Context,
|
|
|
|
stream proto.DevicePlugin_StatsClient,
|
|
|
|
out chan *StatsResponse) {
|
|
|
|
|
|
|
|
for {
|
|
|
|
resp, err := stream.Recv()
|
|
|
|
if err != nil {
|
|
|
|
if err != io.EOF {
|
|
|
|
out <- &StatsResponse{
|
2018-09-28 17:09:01 +00:00
|
|
|
Error: d.handleStreamErr(err, ctx),
|
2018-08-27 23:11:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// End the stream
|
|
|
|
close(out)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Send the response
|
|
|
|
out <- &StatsResponse{
|
|
|
|
Groups: convertProtoDeviceGroupsStats(resp.GetGroups()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-09-28 17:09:01 +00:00
|
|
|
|
|
|
|
// handleStreamErr is used to handle a non io.EOF error in a stream. It handles
|
|
|
|
// detecting if the plugin has shutdown
|
|
|
|
func (d *devicePluginClient) handleStreamErr(err error, ctx context.Context) error {
|
|
|
|
if err == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine if the error is because the plugin shutdown
|
|
|
|
if errStatus, ok := status.FromError(err); ok && errStatus.Code() == codes.Unavailable {
|
|
|
|
// Potentially wait a little before returning an error so we can detect
|
|
|
|
// the exit
|
|
|
|
select {
|
|
|
|
case <-d.doneCtx.Done():
|
|
|
|
err = base.ErrPluginShutdown
|
|
|
|
case <-ctx.Done():
|
|
|
|
err = ctx.Err()
|
|
|
|
|
|
|
|
// There is no guarantee that the select will choose the
|
|
|
|
// doneCtx first so we have to double check
|
|
|
|
select {
|
|
|
|
case <-d.doneCtx.Done():
|
|
|
|
err = base.ErrPluginShutdown
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
case <-time.After(3 * time.Second):
|
|
|
|
// Its okay to wait a while since the connection isn't available and
|
|
|
|
// on local host it is likely shutting down. It is not expected for
|
|
|
|
// this to ever reach even close to 3 seconds.
|
|
|
|
}
|
|
|
|
|
|
|
|
// It is an error we don't know how to handle, so return it
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Context was cancelled
|
|
|
|
if errStatus := status.FromContextError(ctx.Err()); errStatus.Code() == codes.Canceled {
|
|
|
|
return context.Canceled
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|