csi: stub fingerprint on instance manager shutdown (#7388)

Run the plugin fingerprint one last time with a closed client during
instance manager shutdown. This will return quickly and will give us a
correctly-populated `PluginInfo` marked as unhealthy so the Nomad
client can update the server about plugin health.
This commit is contained in:
Tim Gross 2020-03-19 17:09:49 -04:00 committed by Tim Gross
parent 5a0bcd39d1
commit 32b94bf1a4
3 changed files with 112 additions and 9 deletions

View file

@ -126,15 +126,26 @@ func (i *instanceManager) runLoop() {
i.client.Close()
i.client = nil
}
close(i.shutdownCh)
return
case <-timer.C:
ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval)
// run one last fingerprint so that we mark the plugin as unhealthy.
// the client has been closed so this will return quickly with the
// plugin's basic info
ctx, cancelFn := i.requestCtxWithTimeout(time.Second)
info := i.fp.fingerprint(ctx)
cancelFn()
i.updater(i.info.Name, info)
if info != nil {
i.updater(i.info.Name, info)
}
close(i.shutdownCh)
return
case <-timer.C:
ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval)
info := i.fp.fingerprint(ctx)
cancelFn()
if info != nil {
i.updater(i.info.Name, info)
}
timer.Reset(managerFingerprintInterval)
}
}

View file

@ -1,11 +1,18 @@
package csimanager
import (
"context"
"fmt"
"sync"
"testing"
"time"
"github.com/hashicorp/nomad/client/dynamicplugins"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/csi"
"github.com/hashicorp/nomad/plugins/csi/fake"
"github.com/stretchr/testify/require"
)
func setupTestNodeInstanceManager(t *testing.T) (*fake.Client, *instanceManager) {
@ -21,10 +28,55 @@ func setupTestNodeInstanceManager(t *testing.T) (*fake.Client, *instanceManager)
info: pinfo,
client: tp,
fp: &pluginFingerprinter{
logger: logger.Named("fingerprinter"),
info: pinfo,
client: tp,
fingerprintNode: true,
logger: logger.Named("fingerprinter"),
info: pinfo,
client: tp,
fingerprintNode: true,
hadFirstSuccessfulFingerprintCh: make(chan struct{}),
},
}
}
func TestInstanceManager_Shutdown(t *testing.T) {
var pluginHealth bool
var lock sync.Mutex
ctx, cancelFn := context.WithCancel(context.Background())
client, im := setupTestNodeInstanceManager(t)
im.shutdownCtx = ctx
im.shutdownCtxCancelFn = cancelFn
im.shutdownCh = make(chan struct{})
im.updater = func(_ string, info *structs.CSIInfo) {
fmt.Println(info)
lock.Lock()
defer lock.Unlock()
pluginHealth = info.Healthy
}
// set up a mock successful fingerprint so that we can get
// a healthy plugin before shutting down
client.NextPluginGetCapabilitiesResponse = &csi.PluginCapabilitySet{}
client.NextPluginGetCapabilitiesErr = nil
client.NextNodeGetInfoResponse = &csi.NodeGetInfoResponse{NodeID: "foo"}
client.NextNodeGetInfoErr = nil
client.NextNodeGetCapabilitiesResponse = &csi.NodeCapabilitySet{}
client.NextNodeGetCapabilitiesErr = nil
client.NextPluginProbeResponse = true
go im.runLoop()
require.Eventually(t, func() bool {
lock.Lock()
defer lock.Unlock()
return pluginHealth
}, 1*time.Second, 10*time.Millisecond)
cancelFn() // fires im.shutdown()
require.Eventually(t, func() bool {
lock.Lock()
defer lock.Unlock()
return !pluginHealth
}, 1*time.Second, 10*time.Millisecond)
}

View file

@ -5,6 +5,7 @@ package fake
import (
"context"
"errors"
"fmt"
"sync"
"github.com/hashicorp/nomad/plugins/base"
@ -232,5 +233,44 @@ func (c *Client) NodeUnpublishVolume(ctx context.Context, volumeID, targetPath s
// Shutdown the client and ensure any connections are cleaned up.
func (c *Client) Close() error {
c.NextPluginInfoResponse = nil
c.NextPluginInfoErr = fmt.Errorf("closed client")
c.NextPluginProbeResponse = false
c.NextPluginProbeErr = fmt.Errorf("closed client")
c.NextPluginGetInfoNameResponse = ""
c.NextPluginGetInfoVersionResponse = ""
c.NextPluginGetInfoErr = fmt.Errorf("closed client")
c.NextPluginGetCapabilitiesResponse = nil
c.NextPluginGetCapabilitiesErr = fmt.Errorf("closed client")
c.NextControllerGetCapabilitiesResponse = nil
c.NextControllerGetCapabilitiesErr = fmt.Errorf("closed client")
c.NextControllerPublishVolumeResponse = nil
c.NextControllerPublishVolumeErr = fmt.Errorf("closed client")
c.NextControllerUnpublishVolumeResponse = nil
c.NextControllerUnpublishVolumeErr = fmt.Errorf("closed client")
c.NextControllerValidateVolumeErr = fmt.Errorf("closed client")
c.NextNodeGetCapabilitiesResponse = nil
c.NextNodeGetCapabilitiesErr = fmt.Errorf("closed client")
c.NextNodeGetInfoResponse = nil
c.NextNodeGetInfoErr = fmt.Errorf("closed client")
c.NextNodeStageVolumeErr = fmt.Errorf("closed client")
c.NextNodeUnstageVolumeErr = fmt.Errorf("closed client")
c.NextNodePublishVolumeErr = fmt.Errorf("closed client")
c.NextNodeUnpublishVolumeErr = fmt.Errorf("closed client")
return nil
}