open-nomad/client/allocrunner/state/state.go
Tim Gross ad7355e58b
CSI: persist previous mounts on client to restore during restart (#17840)
When claiming a CSI volume, we need to ensure the CSI node plugin is running
before we send any CSI RPCs. This extends even to the controller publish RPC
because it requires the storage provider's "external node ID" for the
client. This primarily impacts client restarts but also is a problem if the node
plugin exits (and fingerprints) while the allocation that needs a CSI volume
claim is being placed.

Unfortunately there's no mapping of volume to plugin ID available in the
jobspec, so we don't have enough information to wait on plugins until we either
get the volume from the server or retrieve the plugin ID from data we've
persisted on the client.

If we always require getting the volume from the server before making the claim,
a client restart for disconnected clients will cause all the allocations that
need CSI volumes to fail. Even while connected, checking in with the server to
verify the volume's plugin before trying to make a claim RPC is inherently racy,
so we'll leave that case as-is and it will fail the claim if the node plugin
needed to support a newly-placed allocation is flapping such that the node
fingerprint is changing.

This changeset persists a minimum subset of data about the volume and its plugin
in the client state DB, and retrieves that data during the CSI hook's prerun to
avoid re-claiming and remounting the volume unnecessarily.

This changeset also updates the RPC handler to use the external node ID from the
claim whenever it is available.

Fixes: #13028
2023-07-10 13:20:15 -04:00

94 lines
2.7 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package state
import (
"time"
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
"github.com/hashicorp/nomad/nomad/structs"
)
// State captures the state of the allocation runner.
type State struct {
// ClientStatus captures the overall state of the allocation
ClientStatus string
// ClientDescription is an optional human readable description of the
// allocations client state
ClientDescription string
// DeploymentStatus captures the status of the deployment
DeploymentStatus *structs.AllocDeploymentStatus
// TaskStates is a snapshot of task states.
TaskStates map[string]*structs.TaskState
// NetworkStatus captures network details not known until runtime
NetworkStatus *structs.AllocNetworkStatus
}
// SetDeploymentStatus is a helper for updating the client-controlled
// DeploymentStatus fields: Healthy and Timestamp. The Canary and ModifyIndex
// fields should only be updated by the server.
func (s *State) SetDeploymentStatus(timestamp time.Time, healthy bool) {
if s.DeploymentStatus == nil {
s.DeploymentStatus = &structs.AllocDeploymentStatus{}
}
s.DeploymentStatus.Healthy = &healthy
s.DeploymentStatus.Timestamp = timestamp
}
// ClearDeploymentStatus is a helper to clear the client-controlled
// DeploymentStatus fields: Healthy and Timestamp. The Canary and ModifyIndex
// fields should only be updated by the server.
func (s *State) ClearDeploymentStatus() {
if s.DeploymentStatus == nil {
return
}
s.DeploymentStatus.Healthy = nil
s.DeploymentStatus.Timestamp = time.Time{}
}
// Copy returns a deep copy of State.
func (s *State) Copy() *State {
taskStates := make(map[string]*structs.TaskState, len(s.TaskStates))
for k, v := range s.TaskStates {
taskStates[k] = v.Copy()
}
return &State{
ClientStatus: s.ClientStatus,
ClientDescription: s.ClientDescription,
DeploymentStatus: s.DeploymentStatus.Copy(),
TaskStates: taskStates,
NetworkStatus: s.NetworkStatus.Copy(),
}
}
// ClientTerminalStatus returns if the client status is terminal and will no longer transition
func (s *State) ClientTerminalStatus() bool {
switch s.ClientStatus {
case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed, structs.AllocClientStatusLost:
return true
default:
return false
}
}
type AllocVolumes struct {
CSIVolumes map[string]*CSIVolumeStub // volume request name -> CSIVolumeStub
}
// CSIVolumeStub is a stripped-down version of the CSIVolume with just the
// relevant data that we need to persist about the volume.
type CSIVolumeStub struct {
VolumeID string
VolumeExternalID string
PluginID string
ExternalNodeID string
MountInfo *csimanager.MountInfo
}