open-nomad/client/allocrunner/state/state.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package state

import (
	"time"

	"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
	"github.com/hashicorp/nomad/nomad/structs"
)

// State captures the state of the allocation runner.
type State struct {
	// ClientStatus captures the overall state of the allocation
	ClientStatus string

	// ClientDescription is an optional human readable description of the
	// allocations client state
	ClientDescription string

	// DeploymentStatus captures the status of the deployment
	DeploymentStatus *structs.AllocDeploymentStatus

	// TaskStates is a snapshot of task states.
	TaskStates map[string]*structs.TaskState

	// NetworkStatus captures network details not known until runtime
	NetworkStatus *structs.AllocNetworkStatus
}

// SetDeploymentStatus is a helper for updating the client-controlled
// DeploymentStatus fields: Healthy and Timestamp. The Canary and ModifyIndex
// fields should only be updated by the server.
func (s *State) SetDeploymentStatus(timestamp time.Time, healthy bool) {
	if s.DeploymentStatus == nil {
		s.DeploymentStatus = &structs.AllocDeploymentStatus{}
	}

	s.DeploymentStatus.Healthy = &healthy
	s.DeploymentStatus.Timestamp = timestamp
}

// ClearDeploymentStatus is a helper to clear the client-controlled
// DeploymentStatus fields: Healthy and Timestamp. The Canary and ModifyIndex
// fields should only be updated by the server.
func (s *State) ClearDeploymentStatus() {
	if s.DeploymentStatus == nil {
		return
	}

	s.DeploymentStatus.Healthy = nil
	s.DeploymentStatus.Timestamp = time.Time{}
}

// Copy returns a deep copy of State.
func (s *State) Copy() *State {
	taskStates := make(map[string]*structs.TaskState, len(s.TaskStates))
	for k, v := range s.TaskStates {
		taskStates[k] = v.Copy()
	}
	return &State{
		ClientStatus:      s.ClientStatus,
		ClientDescription: s.ClientDescription,
		DeploymentStatus:  s.DeploymentStatus.Copy(),
		TaskStates:        taskStates,
		NetworkStatus:     s.NetworkStatus.Copy(),
	}
}

// ClientTerminalStatus returns if the client status is terminal and will no longer transition
func (s *State) ClientTerminalStatus() bool {
	switch s.ClientStatus {
	case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed, structs.AllocClientStatusLost:
		return true
	default:
		return false
	}
}

type AllocVolumes struct {
	CSIVolumes map[string]*CSIVolumeStub // volume request name -> CSIVolumeStub
}

// CSIVolumeStub is a stripped-down version of the CSIVolume with just the
// relevant data that we need to persist about the volume.
type CSIVolumeStub struct {
	VolumeID         string
	VolumeExternalID string
	PluginID         string
	ExternalNodeID   string
	MountInfo        *csimanager.MountInfo
}
[COMPLIANCE] Add Copyright and License Headers 2023-04-10 15:36:59 +00:00			`// Copyright (c) HashiCorp, Inc.`
			`// SPDX-License-Identifier: MPL-2.0`

Initial V2 alloc runner 2018-06-22 00:35:07 +00:00			`package state`

			`import (`
arv2: implement alloc health watching Also remove initial alloc from broadcaster as it just caused useless extra processing. 2018-08-30 21:33:50 +00:00			`"time"`

CSI: persist previous mounts on client to restore during restart (#17840) When claiming a CSI volume, we need to ensure the CSI node plugin is running before we send any CSI RPCs. This extends even to the controller publish RPC because it requires the storage provider's "external node ID" for the client. This primarily impacts client restarts but also is a problem if the node plugin exits (and fingerprints) while the allocation that needs a CSI volume claim is being placed. Unfortunately there's no mapping of volume to plugin ID available in the jobspec, so we don't have enough information to wait on plugins until we either get the volume from the server or retrieve the plugin ID from data we've persisted on the client. If we always require getting the volume from the server before making the claim, a client restart for disconnected clients will cause all the allocations that need CSI volumes to fail. Even while connected, checking in with the server to verify the volume's plugin before trying to make a claim RPC is inherently racy, so we'll leave that case as-is and it will fail the claim if the node plugin needed to support a newly-placed allocation is flapping such that the node fingerprint is changing. This changeset persists a minimum subset of data about the volume and its plugin in the client state DB, and retrieves that data during the CSI hook's prerun to avoid re-claiming and remounting the volume unnecessarily. This changeset also updates the RPC handler to use the external node ID from the claim whenever it is available. Fixes: #13028 2023-07-10 17:20:15 +00:00			`"github.com/hashicorp/nomad/client/pluginmanager/csimanager"`
Initial V2 alloc runner 2018-06-22 00:35:07 +00:00			`"github.com/hashicorp/nomad/nomad/structs"`
			`)`

			`// State captures the state of the allocation runner.`
			`type State struct {`
Update state with server 2018-07-19 00:06:44 +00:00			`// ClientStatus captures the overall state of the allocation`
			`ClientStatus string`
Initial V2 alloc runner 2018-06-22 00:35:07 +00:00
Update state with server 2018-07-19 00:06:44 +00:00			`// ClientDescription is an optional human readable description of the`
Initial V2 alloc runner 2018-06-22 00:35:07 +00:00			`// allocations client state`
Update state with server 2018-07-19 00:06:44 +00:00			`ClientDescription string`
Initial V2 alloc runner 2018-06-22 00:35:07 +00:00
			`// DeploymentStatus captures the status of the deployment`
			`DeploymentStatus *structs.AllocDeploymentStatus`
client: expose task state to client The interesting decision in this commit was to expose AR's state and not a fully materialized Allocation struct. AR.clientAlloc builds an Alloc that contains the task state, so I considered simply memoizing and exposing that method. However, that would lead to AR having two awkwardly similar methods: - Alloc() - which returns the server-sent alloc - ClientAlloc() - which returns the fully materialized client alloc Since ClientAlloc() could be memoized it would be just as cheap to call as Alloc(), so why not replace Alloc() entirely? Replacing Alloc() entirely would require Update() to immediately materialize the task states on server-sent Allocs as there may have been local task state changes since the server received an Alloc update. This quickly becomes difficult to reason about: should Update hooks use the TaskStates? Are state changes caused by TR Update hooks immediately reflected in the Alloc? Should AR persist its copy of the Alloc? If so, are its TaskStates canonical or the TaskStates on TR? So! Forget that. Let's separate the static Allocation from the dynamic AR & TR state! - AR.Alloc() is for static Allocation access (often for the Job) - AR.AllocState() is for the dynamic AR & TR runtime state (deployment status, task states, etc). If code needs to know the status of a task: AllocState() If code needs to know the names of tasks: Alloc() It should be very easy for a developer to reason about which method they should call and what they can do with the return values. 2018-09-27 00:08:43 +00:00
			`// TaskStates is a snapshot of task states.`
			`TaskStates map[string]*structs.TaskState`
client: add NetworkStatus to Allocation (#8657) 2020-10-12 17:43:04 +00:00
			`// NetworkStatus captures network details not known until runtime`
			`NetworkStatus *structs.AllocNetworkStatus`
Initial V2 alloc runner 2018-06-22 00:35:07 +00:00			`}`
arv2: implement alloc health watching Also remove initial alloc from broadcaster as it just caused useless extra processing. 2018-08-30 21:33:50 +00:00
			`// SetDeploymentStatus is a helper for updating the client-controlled`
			`// DeploymentStatus fields: Healthy and Timestamp. The Canary and ModifyIndex`
			`// fields should only be updated by the server.`
			`func (s *State) SetDeploymentStatus(timestamp time.Time, healthy bool) {`
			`if s.DeploymentStatus == nil {`
			`s.DeploymentStatus = &structs.AllocDeploymentStatus{}`
			`}`

			`s.DeploymentStatus.Healthy = &healthy`
			`s.DeploymentStatus.Timestamp = timestamp`
			`}`

			`// ClearDeploymentStatus is a helper to clear the client-controlled`
			`// DeploymentStatus fields: Healthy and Timestamp. The Canary and ModifyIndex`
			`// fields should only be updated by the server.`
			`func (s *State) ClearDeploymentStatus() {`
			`if s.DeploymentStatus == nil {`
			`return`
			`}`

			`s.DeploymentStatus.Healthy = nil`
			`s.DeploymentStatus.Timestamp = time.Time{}`
			`}`
client: expose task state to client The interesting decision in this commit was to expose AR's state and not a fully materialized Allocation struct. AR.clientAlloc builds an Alloc that contains the task state, so I considered simply memoizing and exposing that method. However, that would lead to AR having two awkwardly similar methods: - Alloc() - which returns the server-sent alloc - ClientAlloc() - which returns the fully materialized client alloc Since ClientAlloc() could be memoized it would be just as cheap to call as Alloc(), so why not replace Alloc() entirely? Replacing Alloc() entirely would require Update() to immediately materialize the task states on server-sent Allocs as there may have been local task state changes since the server received an Alloc update. This quickly becomes difficult to reason about: should Update hooks use the TaskStates? Are state changes caused by TR Update hooks immediately reflected in the Alloc? Should AR persist its copy of the Alloc? If so, are its TaskStates canonical or the TaskStates on TR? So! Forget that. Let's separate the static Allocation from the dynamic AR & TR state! - AR.Alloc() is for static Allocation access (often for the Job) - AR.AllocState() is for the dynamic AR & TR runtime state (deployment status, task states, etc). If code needs to know the status of a task: AllocState() If code needs to know the names of tasks: Alloc() It should be very easy for a developer to reason about which method they should call and what they can do with the return values. 2018-09-27 00:08:43 +00:00
			`// Copy returns a deep copy of State.`
			`func (s State) Copy() State {`
			`taskStates := make(map[string]*structs.TaskState, len(s.TaskStates))`
			`for k, v := range s.TaskStates {`
			`taskStates[k] = v.Copy()`
			`}`
			`return &State{`
			`ClientStatus: s.ClientStatus,`
			`ClientDescription: s.ClientDescription,`
			`DeploymentStatus: s.DeploymentStatus.Copy(),`
			`TaskStates: taskStates,`
client: add NetworkStatus to Allocation (#8657) 2020-10-12 17:43:04 +00:00			`NetworkStatus: s.NetworkStatus.Copy(),`
client: expose task state to client The interesting decision in this commit was to expose AR's state and not a fully materialized Allocation struct. AR.clientAlloc builds an Alloc that contains the task state, so I considered simply memoizing and exposing that method. However, that would lead to AR having two awkwardly similar methods: - Alloc() - which returns the server-sent alloc - ClientAlloc() - which returns the fully materialized client alloc Since ClientAlloc() could be memoized it would be just as cheap to call as Alloc(), so why not replace Alloc() entirely? Replacing Alloc() entirely would require Update() to immediately materialize the task states on server-sent Allocs as there may have been local task state changes since the server received an Alloc update. This quickly becomes difficult to reason about: should Update hooks use the TaskStates? Are state changes caused by TR Update hooks immediately reflected in the Alloc? Should AR persist its copy of the Alloc? If so, are its TaskStates canonical or the TaskStates on TR? So! Forget that. Let's separate the static Allocation from the dynamic AR & TR state! - AR.Alloc() is for static Allocation access (often for the Job) - AR.AllocState() is for the dynamic AR & TR runtime state (deployment status, task states, etc). If code needs to know the status of a task: AllocState() If code needs to know the names of tasks: Alloc() It should be very easy for a developer to reason about which method they should call and what they can do with the return values. 2018-09-27 00:08:43 +00:00			`}`
			`}`
stale allocation data leads to incorrect (and even negative) metrics (#5637) * client: was not using up-to-date client state in determining which alloc count towards allocated resources * Update client/client.go Co-Authored-By: cgbaker <cgbaker@hashicorp.com> 2019-05-07 19:54:36 +00:00
			`// ClientTerminalStatus returns if the client status is terminal and will no longer transition`
chore: fixup inconsistent method receiver names. (#11704) 2021-12-20 10:44:21 +00:00			`func (s *State) ClientTerminalStatus() bool {`
			`switch s.ClientStatus {`
stale allocation data leads to incorrect (and even negative) metrics (#5637) * client: was not using up-to-date client state in determining which alloc count towards allocated resources * Update client/client.go Co-Authored-By: cgbaker <cgbaker@hashicorp.com> 2019-05-07 19:54:36 +00:00			`case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed, structs.AllocClientStatusLost:`
			`return true`
			`default:`
			`return false`
			`}`
			`}`
CSI: persist previous mounts on client to restore during restart (#17840) When claiming a CSI volume, we need to ensure the CSI node plugin is running before we send any CSI RPCs. This extends even to the controller publish RPC because it requires the storage provider's "external node ID" for the client. This primarily impacts client restarts but also is a problem if the node plugin exits (and fingerprints) while the allocation that needs a CSI volume claim is being placed. Unfortunately there's no mapping of volume to plugin ID available in the jobspec, so we don't have enough information to wait on plugins until we either get the volume from the server or retrieve the plugin ID from data we've persisted on the client. If we always require getting the volume from the server before making the claim, a client restart for disconnected clients will cause all the allocations that need CSI volumes to fail. Even while connected, checking in with the server to verify the volume's plugin before trying to make a claim RPC is inherently racy, so we'll leave that case as-is and it will fail the claim if the node plugin needed to support a newly-placed allocation is flapping such that the node fingerprint is changing. This changeset persists a minimum subset of data about the volume and its plugin in the client state DB, and retrieves that data during the CSI hook's prerun to avoid re-claiming and remounting the volume unnecessarily. This changeset also updates the RPC handler to use the external node ID from the claim whenever it is available. Fixes: #13028 2023-07-10 17:20:15 +00:00
			`type AllocVolumes struct {`
			`CSIVolumes map[string]*CSIVolumeStub // volume request name -> CSIVolumeStub`
			`}`

			`// CSIVolumeStub is a stripped-down version of the CSIVolume with just the`
			`// relevant data that we need to persist about the volume.`
			`type CSIVolumeStub struct {`
			`VolumeID string`
			`VolumeExternalID string`
			`PluginID string`
			`ExternalNodeID string`
			`MountInfo *csimanager.MountInfo`
			`}`