csi: use a blocking initial connection with timeout (#7965)

The plugin supervisor lazily connects to plugins, but this means we
only get "Unavailable" back from the gRPC call in cases where the
plugin can never be reached (for example, if the Nomad client has the
wrong permissions for the socket).

This changeset improves the operator experience by switching to a
blocking `DialWithContext`. It eagerly connects so that we can
validate the connection is real and get a "failed to open" error in
case where Nomad can't establish the initial connection.
This commit is contained in:
Tim Gross 2020-05-15 08:17:11 -04:00 committed by GitHub
commit 6a463dc13a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 2 deletions

View file

@ -335,10 +335,10 @@ func (h *csiPluginSupervisorHook) supervisorLoopOnce(ctx context.Context, socket
}
client, err := csi.NewClient(socketPath, h.logger.Named("csi_client").With("plugin.name", h.task.CSIPluginConfig.ID, "plugin.type", h.task.CSIPluginConfig.Type))
defer client.Close()
if err != nil {
return false, fmt.Errorf("failed to create csi client: %v", err)
}
defer client.Close()
healthy, err := client.PluginProbe(ctx)
if err != nil {

View file

@ -114,8 +114,12 @@ func NewClient(addr string, logger hclog.Logger) (CSIPlugin, error) {
}
func newGrpcConn(addr string, logger hclog.Logger) (*grpc.ClientConn, error) {
conn, err := grpc.Dial(
ctx, cancel := context.WithTimeout(context.Background(), time.Second*1)
defer cancel()
conn, err := grpc.DialContext(
ctx,
addr,
grpc.WithBlock(),
grpc.WithInsecure(),
grpc.WithUnaryInterceptor(logging.UnaryClientInterceptor(logger)),
grpc.WithStreamInterceptor(logging.StreamClientInterceptor(logger)),