open-nomad/plugins/drivers/client.go

509 lines
14 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package drivers
import (
"context"
"errors"
"io"
"time"
"github.com/LK4D4/joincontext"
"github.com/golang/protobuf/ptypes"
hclog "github.com/hashicorp/go-hclog"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper/pluginutils/grpcutils"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers/proto"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
sproto "github.com/hashicorp/nomad/plugins/shared/structs/proto"
"google.golang.org/grpc/status"
)
var _ DriverPlugin = &driverPluginClient{}
type driverPluginClient struct {
*base.BasePluginClient
client proto.DriverClient
logger hclog.Logger
// doneCtx is closed when the plugin exits
doneCtx context.Context
}
func (d *driverPluginClient) TaskConfigSchema() (*hclspec.Spec, error) {
req := &proto.TaskConfigSchemaRequest{}
resp, err := d.client.TaskConfigSchema(d.doneCtx, req)
if err != nil {
return nil, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
return resp.Spec, nil
}
func (d *driverPluginClient) Capabilities() (*Capabilities, error) {
req := &proto.CapabilitiesRequest{}
resp, err := d.client.Capabilities(d.doneCtx, req)
if err != nil {
return nil, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
caps := &Capabilities{}
if resp.Capabilities != nil {
caps.SendSignals = resp.Capabilities.SendSignals
caps.Exec = resp.Capabilities.Exec
caps.MustInitiateNetwork = resp.Capabilities.MustCreateNetwork
for _, mode := range resp.Capabilities.NetworkIsolationModes {
caps.NetIsolationModes = append(caps.NetIsolationModes, netIsolationModeFromProto(mode))
}
switch resp.Capabilities.FsIsolation {
case proto.DriverCapabilities_NONE:
caps.FSIsolation = FSIsolationNone
case proto.DriverCapabilities_CHROOT:
caps.FSIsolation = FSIsolationChroot
case proto.DriverCapabilities_IMAGE:
caps.FSIsolation = FSIsolationImage
default:
caps.FSIsolation = FSIsolationNone
}
caps.MountConfigs = MountConfigSupport(resp.Capabilities.MountConfigs)
caps.RemoteTasks = resp.Capabilities.RemoteTasks
caps.DisableLogCollection = resp.Capabilities.DisableLogCollection
}
return caps, nil
}
// Fingerprint the driver, return a chan that will be pushed to periodically and on changes to health
func (d *driverPluginClient) Fingerprint(ctx context.Context) (<-chan *Fingerprint, error) {
req := &proto.FingerprintRequest{}
// Join the passed context and the shutdown context
joinedCtx, _ := joincontext.Join(ctx, d.doneCtx)
stream, err := d.client.Fingerprint(joinedCtx, req)
if err != nil {
return nil, grpcutils.HandleReqCtxGrpcErr(err, ctx, d.doneCtx)
}
ch := make(chan *Fingerprint, 1)
go d.handleFingerprint(ctx, ch, stream)
return ch, nil
}
func (d *driverPluginClient) handleFingerprint(reqCtx context.Context, ch chan *Fingerprint, stream proto.Driver_FingerprintClient) {
defer close(ch)
for {
pb, err := stream.Recv()
if err != nil {
if err != io.EOF {
ch <- &Fingerprint{
Err: grpcutils.HandleReqCtxGrpcErr(err, reqCtx, d.doneCtx),
}
}
// End the stream
return
}
f := &Fingerprint{
Attributes: pstructs.ConvertProtoAttributeMap(pb.Attributes),
Health: healthStateFromProto(pb.Health),
HealthDescription: pb.HealthDescription,
}
select {
case <-reqCtx.Done():
return
case ch <- f:
}
}
}
// RecoverTask does internal state recovery to be able to control the task of
// the given TaskHandle
func (d *driverPluginClient) RecoverTask(h *TaskHandle) error {
req := &proto.RecoverTaskRequest{Handle: taskHandleToProto(h)}
_, err := d.client.RecoverTask(d.doneCtx, req)
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
// StartTask starts execution of a task with the given TaskConfig. A TaskHandle
// is returned to the caller that can be used to recover state of the task,
// should the driver crash or exit prematurely.
func (d *driverPluginClient) StartTask(c *TaskConfig) (*TaskHandle, *DriverNetwork, error) {
req := &proto.StartTaskRequest{
Task: taskConfigToProto(c),
}
resp, err := d.client.StartTask(d.doneCtx, req)
if err != nil {
st := status.Convert(err)
if len(st.Details()) > 0 {
if rec, ok := st.Details()[0].(*sproto.RecoverableError); ok {
return nil, nil, structs.NewRecoverableError(err, rec.Recoverable)
}
}
return nil, nil, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
var net *DriverNetwork
if resp.NetworkOverride != nil {
net = &DriverNetwork{
PortMap: map[string]int{},
IP: resp.NetworkOverride.Addr,
AutoAdvertise: resp.NetworkOverride.AutoAdvertise,
}
for k, v := range resp.NetworkOverride.PortMap {
net.PortMap[k] = int(v)
}
}
return taskHandleFromProto(resp.Handle), net, nil
}
// WaitTask returns a channel that will have an ExitResult pushed to it once when the task
// exits on its own or is killed. If WaitTask is called after the task has exited, the channel
// will immedialy return the ExitResult. WaitTask can be called multiple times for
// the same task without issue.
func (d *driverPluginClient) WaitTask(ctx context.Context, id string) (<-chan *ExitResult, error) {
ch := make(chan *ExitResult)
go d.handleWaitTask(ctx, id, ch)
return ch, nil
}
func (d *driverPluginClient) handleWaitTask(ctx context.Context, id string, ch chan *ExitResult) {
defer close(ch)
var result ExitResult
req := &proto.WaitTaskRequest{
TaskId: id,
}
// Join the passed context and the shutdown context
joinedCtx, joinedCtxCancel := joincontext.Join(ctx, d.doneCtx)
defer joinedCtxCancel()
resp, err := d.client.WaitTask(joinedCtx, req)
if err != nil {
result.Err = grpcutils.HandleReqCtxGrpcErr(err, ctx, d.doneCtx)
} else {
result.ExitCode = int(resp.Result.ExitCode)
result.Signal = int(resp.Result.Signal)
result.OOMKilled = resp.Result.OomKilled
if len(resp.Err) > 0 {
result.Err = errors.New(resp.Err)
}
}
ch <- &result
}
// StopTask stops the task with the given taskID. A timeout and signal can be
// given to control a graceful termination of the task. The driver will send the
// given signal to the task and wait for the given timeout for it to exit. If the
// task does not exit within the timeout it will be forcefully killed.
func (d *driverPluginClient) StopTask(taskID string, timeout time.Duration, signal string) error {
req := &proto.StopTaskRequest{
TaskId: taskID,
Timeout: ptypes.DurationProto(timeout),
Signal: signal,
}
_, err := d.client.StopTask(d.doneCtx, req)
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
// DestroyTask removes the task from the driver's in memory state. The task
// cannot be running unless force is set to true. If force is set to true the
// driver will forcefully terminate the task before removing it.
func (d *driverPluginClient) DestroyTask(taskID string, force bool) error {
req := &proto.DestroyTaskRequest{
TaskId: taskID,
Force: force,
}
_, err := d.client.DestroyTask(d.doneCtx, req)
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
// InspectTask returns status information for a task
func (d *driverPluginClient) InspectTask(taskID string) (*TaskStatus, error) {
req := &proto.InspectTaskRequest{TaskId: taskID}
resp, err := d.client.InspectTask(d.doneCtx, req)
if err != nil {
return nil, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
status, err := taskStatusFromProto(resp.Task)
if err != nil {
return nil, err
}
if resp.Driver != nil {
status.DriverAttributes = resp.Driver.Attributes
}
if resp.NetworkOverride != nil {
status.NetworkOverride = &DriverNetwork{
PortMap: map[string]int{},
IP: resp.NetworkOverride.Addr,
AutoAdvertise: resp.NetworkOverride.AutoAdvertise,
}
for k, v := range resp.NetworkOverride.PortMap {
status.NetworkOverride.PortMap[k] = int(v)
}
}
return status, nil
}
// TaskStats returns resource usage statistics for the task
func (d *driverPluginClient) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) {
req := &proto.TaskStatsRequest{
TaskId: taskID,
CollectionInterval: ptypes.DurationProto(interval),
}
ctx, _ = joincontext.Join(ctx, d.doneCtx)
stream, err := d.client.TaskStats(ctx, req)
if err != nil {
st := status.Convert(err)
if len(st.Details()) > 0 {
if rec, ok := st.Details()[0].(*sproto.RecoverableError); ok {
return nil, structs.NewRecoverableError(err, rec.Recoverable)
}
}
return nil, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
ch := make(chan *cstructs.TaskResourceUsage, 1)
go d.handleStats(ctx, ch, stream)
return ch, nil
}
func (d *driverPluginClient) handleStats(ctx context.Context, ch chan<- *cstructs.TaskResourceUsage, stream proto.Driver_TaskStatsClient) {
defer close(ch)
for {
resp, err := stream.Recv()
if ctx.Err() != nil {
// Context canceled; exit gracefully
return
}
if err != nil {
if err != io.EOF {
d.logger.Error("error receiving stream from TaskStats driver RPC, closing stream", "error", err)
}
// End of stream
return
}
stats, err := TaskStatsFromProto(resp.Stats)
if err != nil {
d.logger.Error("failed to decode stats from RPC", "error", err, "stats", resp.Stats)
continue
}
select {
case ch <- stats:
case <-ctx.Done():
return
}
}
}
// TaskEvents returns a channel that will receive events from the driver about all
// tasks such as lifecycle events, terminal errors, etc.
func (d *driverPluginClient) TaskEvents(ctx context.Context) (<-chan *TaskEvent, error) {
req := &proto.TaskEventsRequest{}
// Join the passed context and the shutdown context
joinedCtx, _ := joincontext.Join(ctx, d.doneCtx)
stream, err := d.client.TaskEvents(joinedCtx, req)
if err != nil {
return nil, grpcutils.HandleReqCtxGrpcErr(err, ctx, d.doneCtx)
}
ch := make(chan *TaskEvent, 1)
go d.handleTaskEvents(ctx, ch, stream)
return ch, nil
}
func (d *driverPluginClient) handleTaskEvents(reqCtx context.Context, ch chan *TaskEvent, stream proto.Driver_TaskEventsClient) {
defer close(ch)
for {
ev, err := stream.Recv()
if err != nil {
if err != io.EOF {
ch <- &TaskEvent{
Err: grpcutils.HandleReqCtxGrpcErr(err, reqCtx, d.doneCtx),
}
}
// End the stream
return
}
timestamp, _ := ptypes.Timestamp(ev.Timestamp)
event := &TaskEvent{
TaskID: ev.TaskId,
AllocID: ev.AllocId,
TaskName: ev.TaskName,
Annotations: ev.Annotations,
Message: ev.Message,
Timestamp: timestamp,
}
select {
case <-reqCtx.Done():
return
case ch <- event:
}
}
}
// SignalTask will send the given signal to the specified task
func (d *driverPluginClient) SignalTask(taskID string, signal string) error {
req := &proto.SignalTaskRequest{
TaskId: taskID,
Signal: signal,
}
_, err := d.client.SignalTask(d.doneCtx, req)
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
// ExecTask will run the given command within the execution context of the task.
// The driver will wait for the given timeout for the command to complete before
// terminating it. The stdout and stderr of the command will be return to the caller,
// along with other exit information such as exit code.
func (d *driverPluginClient) ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) {
req := &proto.ExecTaskRequest{
TaskId: taskID,
Command: cmd,
Timeout: ptypes.DurationProto(timeout),
}
resp, err := d.client.ExecTask(d.doneCtx, req)
if err != nil {
return nil, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
result := &ExecTaskResult{
Stdout: resp.Stdout,
Stderr: resp.Stderr,
ExitResult: exitResultFromProto(resp.Result),
}
return result, nil
}
var _ ExecTaskStreamingRawDriver = (*driverPluginClient)(nil)
func (d *driverPluginClient) ExecTaskStreamingRaw(ctx context.Context,
taskID string,
command []string,
tty bool,
execStream ExecTaskStream) error {
stream, err := d.client.ExecTaskStreaming(ctx)
if err != nil {
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
err = stream.Send(&proto.ExecTaskStreamingRequest{
Setup: &proto.ExecTaskStreamingRequest_Setup{
TaskId: taskID,
Command: command,
Tty: tty,
},
})
if err != nil {
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
errCh := make(chan error, 1)
go func() {
for {
m, err := execStream.Recv()
if err == io.EOF {
return
} else if err != nil {
errCh <- err
return
}
if err := stream.Send(m); err != nil {
errCh <- err
return
}
}
}()
for {
select {
case err := <-errCh:
return err
default:
}
m, err := stream.Recv()
if err == io.EOF {
// Once we get to the end of stream successfully, we can ignore errCh:
// e.g. input write failures after process terminates shouldn't cause method to fail
return nil
} else if err != nil {
return err
}
if err := execStream.Send(m); err != nil {
return err
}
}
}
var _ DriverNetworkManager = (*driverPluginClient)(nil)
func (d *driverPluginClient) CreateNetwork(allocID string, _ *NetworkCreateRequest) (*NetworkIsolationSpec, bool, error) {
req := &proto.CreateNetworkRequest{
AllocId: allocID,
}
resp, err := d.client.CreateNetwork(d.doneCtx, req)
if err != nil {
return nil, false, grpcutils.HandleGrpcErr(err, d.doneCtx)
}
return NetworkIsolationSpecFromProto(resp.IsolationSpec), resp.Created, nil
}
func (d *driverPluginClient) DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error {
if spec == nil {
return nil
}
req := &proto.DestroyNetworkRequest{
AllocId: allocID,
IsolationSpec: NetworkIsolationSpecToProto(spec),
}
_, err := d.client.DestroyNetwork(d.doneCtx, req)
if err != nil {
return grpcutils.HandleGrpcErr(err, d.doneCtx)
}
return nil
}