csi: stub fingerprint on instance manager shutdown (#7388)
Run the plugin fingerprint one last time with a closed client during instance manager shutdown. This will return quickly and will give us a correctly-populated `PluginInfo` marked as unhealthy so the Nomad client can update the server about plugin health.
This commit is contained in:
parent
5a0bcd39d1
commit
32b94bf1a4
|
@ -126,15 +126,26 @@ func (i *instanceManager) runLoop() {
|
|||
i.client.Close()
|
||||
i.client = nil
|
||||
}
|
||||
close(i.shutdownCh)
|
||||
return
|
||||
case <-timer.C:
|
||||
ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval)
|
||||
|
||||
// run one last fingerprint so that we mark the plugin as unhealthy.
|
||||
// the client has been closed so this will return quickly with the
|
||||
// plugin's basic info
|
||||
ctx, cancelFn := i.requestCtxWithTimeout(time.Second)
|
||||
info := i.fp.fingerprint(ctx)
|
||||
cancelFn()
|
||||
i.updater(i.info.Name, info)
|
||||
if info != nil {
|
||||
i.updater(i.info.Name, info)
|
||||
}
|
||||
close(i.shutdownCh)
|
||||
return
|
||||
|
||||
case <-timer.C:
|
||||
ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval)
|
||||
info := i.fp.fingerprint(ctx)
|
||||
cancelFn()
|
||||
if info != nil {
|
||||
i.updater(i.info.Name, info)
|
||||
}
|
||||
timer.Reset(managerFingerprintInterval)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,18 @@
|
|||
package csimanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/client/dynamicplugins"
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/plugins/csi"
|
||||
"github.com/hashicorp/nomad/plugins/csi/fake"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func setupTestNodeInstanceManager(t *testing.T) (*fake.Client, *instanceManager) {
|
||||
|
@ -21,10 +28,55 @@ func setupTestNodeInstanceManager(t *testing.T) (*fake.Client, *instanceManager)
|
|||
info: pinfo,
|
||||
client: tp,
|
||||
fp: &pluginFingerprinter{
|
||||
logger: logger.Named("fingerprinter"),
|
||||
info: pinfo,
|
||||
client: tp,
|
||||
fingerprintNode: true,
|
||||
logger: logger.Named("fingerprinter"),
|
||||
info: pinfo,
|
||||
client: tp,
|
||||
fingerprintNode: true,
|
||||
hadFirstSuccessfulFingerprintCh: make(chan struct{}),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestInstanceManager_Shutdown(t *testing.T) {
|
||||
|
||||
var pluginHealth bool
|
||||
var lock sync.Mutex
|
||||
ctx, cancelFn := context.WithCancel(context.Background())
|
||||
client, im := setupTestNodeInstanceManager(t)
|
||||
im.shutdownCtx = ctx
|
||||
im.shutdownCtxCancelFn = cancelFn
|
||||
im.shutdownCh = make(chan struct{})
|
||||
im.updater = func(_ string, info *structs.CSIInfo) {
|
||||
fmt.Println(info)
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
pluginHealth = info.Healthy
|
||||
}
|
||||
|
||||
// set up a mock successful fingerprint so that we can get
|
||||
// a healthy plugin before shutting down
|
||||
client.NextPluginGetCapabilitiesResponse = &csi.PluginCapabilitySet{}
|
||||
client.NextPluginGetCapabilitiesErr = nil
|
||||
client.NextNodeGetInfoResponse = &csi.NodeGetInfoResponse{NodeID: "foo"}
|
||||
client.NextNodeGetInfoErr = nil
|
||||
client.NextNodeGetCapabilitiesResponse = &csi.NodeCapabilitySet{}
|
||||
client.NextNodeGetCapabilitiesErr = nil
|
||||
client.NextPluginProbeResponse = true
|
||||
|
||||
go im.runLoop()
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
return pluginHealth
|
||||
}, 1*time.Second, 10*time.Millisecond)
|
||||
|
||||
cancelFn() // fires im.shutdown()
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
return !pluginHealth
|
||||
}, 1*time.Second, 10*time.Millisecond)
|
||||
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package fake
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/nomad/plugins/base"
|
||||
|
@ -232,5 +233,44 @@ func (c *Client) NodeUnpublishVolume(ctx context.Context, volumeID, targetPath s
|
|||
|
||||
// Shutdown the client and ensure any connections are cleaned up.
|
||||
func (c *Client) Close() error {
|
||||
|
||||
c.NextPluginInfoResponse = nil
|
||||
c.NextPluginInfoErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextPluginProbeResponse = false
|
||||
c.NextPluginProbeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextPluginGetInfoNameResponse = ""
|
||||
c.NextPluginGetInfoVersionResponse = ""
|
||||
c.NextPluginGetInfoErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextPluginGetCapabilitiesResponse = nil
|
||||
c.NextPluginGetCapabilitiesErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextControllerGetCapabilitiesResponse = nil
|
||||
c.NextControllerGetCapabilitiesErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextControllerPublishVolumeResponse = nil
|
||||
c.NextControllerPublishVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextControllerUnpublishVolumeResponse = nil
|
||||
c.NextControllerUnpublishVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextControllerValidateVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextNodeGetCapabilitiesResponse = nil
|
||||
c.NextNodeGetCapabilitiesErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextNodeGetInfoResponse = nil
|
||||
c.NextNodeGetInfoErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextNodeStageVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextNodeUnstageVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextNodePublishVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
c.NextNodeUnpublishVolumeErr = fmt.Errorf("closed client")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue