csi: use a blocking initial connection with timeout (#7965)
The plugin supervisor lazily connects to plugins, but this means we only get "Unavailable" back from the gRPC call in cases where the plugin can never be reached (for example, if the Nomad client has the wrong permissions for the socket). This changeset improves the operator experience by switching to a blocking `DialWithContext`. It eagerly connects so that we can validate the connection is real and get a "failed to open" error in case where Nomad can't establish the initial connection.
This commit is contained in:
commit
6a463dc13a
|
@ -335,10 +335,10 @@ func (h *csiPluginSupervisorHook) supervisorLoopOnce(ctx context.Context, socket
|
|||
}
|
||||
|
||||
client, err := csi.NewClient(socketPath, h.logger.Named("csi_client").With("plugin.name", h.task.CSIPluginConfig.ID, "plugin.type", h.task.CSIPluginConfig.Type))
|
||||
defer client.Close()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to create csi client: %v", err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
healthy, err := client.PluginProbe(ctx)
|
||||
if err != nil {
|
||||
|
|
|
@ -114,8 +114,12 @@ func NewClient(addr string, logger hclog.Logger) (CSIPlugin, error) {
|
|||
}
|
||||
|
||||
func newGrpcConn(addr string, logger hclog.Logger) (*grpc.ClientConn, error) {
|
||||
conn, err := grpc.Dial(
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*1)
|
||||
defer cancel()
|
||||
conn, err := grpc.DialContext(
|
||||
ctx,
|
||||
addr,
|
||||
grpc.WithBlock(),
|
||||
grpc.WithInsecure(),
|
||||
grpc.WithUnaryInterceptor(logging.UnaryClientInterceptor(logger)),
|
||||
grpc.WithStreamInterceptor(logging.StreamClientInterceptor(logger)),
|
||||
|
|
Loading…
Reference in a new issue