scheduler: prevent panic in spread iterator during alloc stop

The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.

To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
  failed allocation.

In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.

This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
This commit is contained in:
Tim Gross 2022-02-02 13:26:05 -05:00 committed by Luiz Aoqui
parent 15f9d54dea
commit 74486d86fb
No known key found for this signature in database
GPG Key ID: 29F459C0D9CBB573
3 changed files with 113 additions and 0 deletions

3
.changelog/12039.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:security
Prevent panic in spread iterator during allocation stop. [CVE-2022-24684](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-24684)
```

View File

@ -71,6 +71,12 @@ func (iter *SpreadIterator) SetJob(job *structs.Job) {
if job.Spreads != nil {
iter.jobSpreads = job.Spreads
}
// reset group spread/property so that when we temporarily SetJob
// to an older version to calculate stops we don't leak old
// versions of spread/properties to the new job version
iter.tgSpreadInfo = make(map[string]spreadAttributeMap)
iter.groupPropertySets = make(map[string][]*propertySet)
}
func (iter *SpreadIterator) SetTaskGroup(tg *structs.TaskGroup) {
@ -134,6 +140,15 @@ func (iter *SpreadIterator) Next() *RankedNode {
spreadAttributeMap := iter.tgSpreadInfo[tgName]
spreadDetails := spreadAttributeMap[pset.targetAttribute]
if spreadDetails == nil {
iter.ctx.Logger().Named("spread").Error(
"error reading spread attribute map for task group",
"task_group", tgName,
"target", pset.targetAttribute,
)
continue
}
if len(spreadDetails.desiredCounts) == 0 {
// When desired counts map is empty the user didn't specify any targets
// Use even spreading scoring algorithm for this scenario

View File

@ -9,6 +9,7 @@ import (
"fmt"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
@ -811,3 +812,97 @@ func validateEqualSpread(h *Harness) error {
}
return fmt.Errorf("expected even distributon of allocs to racks, but got:\n%+v", countSet)
}
func TestSpreadPanicDowngrade(t *testing.T) {
h := NewHarness(t)
nodes := []*structs.Node{}
for i := 0; i < 5; i++ {
node := mock.Node()
nodes = append(nodes, node)
err := h.State.UpsertNode(structs.MsgTypeTestSetup,
h.NextIndex(), node)
require.NoError(t, err)
}
// job version 1
// max_parallel = 0, canary = 1, spread != nil, 1 failed alloc
job1 := mock.Job()
job1.Spreads = []*structs.Spread{
{
Attribute: "${node.unique.name}",
Weight: 50,
SpreadTarget: []*structs.SpreadTarget{},
},
}
job1.Update = structs.UpdateStrategy{
Stagger: time.Duration(30 * time.Second),
MaxParallel: 0,
}
job1.Status = structs.JobStatusRunning
job1.TaskGroups[0].Count = 4
job1.TaskGroups[0].Update = &structs.UpdateStrategy{
Stagger: time.Duration(30 * time.Second),
MaxParallel: 1,
HealthCheck: "checks",
MinHealthyTime: time.Duration(30 * time.Second),
HealthyDeadline: time.Duration(9 * time.Minute),
ProgressDeadline: time.Duration(10 * time.Minute),
AutoRevert: true,
Canary: 1,
}
job1.Version = 1
job1.TaskGroups[0].Count = 5
err := h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job1)
require.NoError(t, err)
allocs := []*structs.Allocation{}
for i := 0; i < 4; i++ {
alloc := mock.Alloc()
alloc.Job = job1
alloc.JobID = job1.ID
alloc.NodeID = nodes[i].ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(true),
Timestamp: time.Now(),
Canary: false,
ModifyIndex: h.NextIndex(),
}
if i == 0 {
alloc.DeploymentStatus.Canary = true
}
if i == 1 {
alloc.ClientStatus = structs.AllocClientStatusFailed
}
allocs = append(allocs, alloc)
}
err = h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs)
// job version 2
// max_parallel = 0, canary = 1, spread == nil
job2 := job1.Copy()
job2.Version = 2
job2.Spreads = nil
err = h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job2)
require.NoError(t, err)
eval := &structs.Evaluation{
Namespace: job2.Namespace,
ID: uuid.Generate(),
Priority: job2.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: job2.ID,
Status: structs.EvalStatusPending,
}
err = h.State.UpsertEvals(structs.MsgTypeTestSetup,
h.NextIndex(), []*structs.Evaluation{eval})
require.NoError(t, err)
processErr := h.Process(NewServiceScheduler, eval)
require.NoError(t, processErr, "failed to process eval")
require.Len(t, h.Plans, 1)
}