drainer: defer CSI plugins until last (#12324)

When a node is drained, system jobs are left until last so that
operators can rely on things like log shippers running even as their
applications are getting drained off. Include CSI plugins in this set
so that Controller plugins deployed as services can be handled as
gracefully as Node plugins that are running as system jobs.
This commit is contained in:
Tim Gross 2022-03-22 10:26:56 -04:00 committed by GitHub
parent 4635be07ab
commit 60cfeacd76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 18 additions and 4 deletions

3
.changelog/12324.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
drainer: defer draining CSI plugin jobs until system jobs are drained
```

View File

@ -139,7 +139,7 @@ func (n *drainingNode) DrainingJobs() ([]structs.NamespacedID, error) {
jobIDs := make(map[structs.NamespacedID]struct{})
var jobs []structs.NamespacedID
for _, alloc := range allocs {
if alloc.TerminalStatus() || alloc.Job.Type == structs.JobTypeSystem {
if alloc.TerminalStatus() || alloc.Job.Type == structs.JobTypeSystem || alloc.Job.IsPlugin() {
continue
}

View File

@ -4566,6 +4566,18 @@ func (j *Job) IsMultiregion() bool {
return j.Multiregion != nil && j.Multiregion.Regions != nil && len(j.Multiregion.Regions) > 0
}
// IsPlugin returns whether a job is implements a plugin (currently just CSI)
func (j *Job) IsPlugin() bool {
for _, tg := range j.TaskGroups {
for _, task := range tg.Tasks {
if task.CSIPluginConfig != nil {
return true
}
}
}
return false
}
// VaultPolicies returns the set of Vault policies per task group, per task
func (j *Job) VaultPolicies() map[string]map[string]*Vault {
policies := make(map[string]map[string]*Vault, len(j.TaskGroups))

View File

@ -70,9 +70,8 @@ capability.
without being force stopped after a certain deadline.
- `-ignore-system`: Ignore system allows the drain to complete without
stopping system job allocations. By default system jobs are stopped
last. You should always use this flag when draining a node running
[CSI node plugins][internals-csi].
stopping system job allocations. By default system jobs (and CSI
plugins) are stopped last, after the `deadline` time has expired.
- `-keep-ineligible`: Keep ineligible will maintain the node's scheduling
ineligibility even if the drain is being disabled. This is useful when an