2023-04-10 15:36:59 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2018-07-17 22:25:38 +00:00
|
|
|
package scheduler
|
|
|
|
|
|
|
|
import (
|
2022-03-15 12:42:43 +00:00
|
|
|
"fmt"
|
2019-05-15 17:00:24 +00:00
|
|
|
"math"
|
2021-12-21 15:10:01 +00:00
|
|
|
"math/rand"
|
|
|
|
"sort"
|
2018-07-17 22:25:38 +00:00
|
|
|
"testing"
|
2021-12-21 15:10:01 +00:00
|
|
|
"time"
|
2018-07-17 22:25:38 +00:00
|
|
|
|
2023-05-17 14:25:00 +00:00
|
|
|
"github.com/shoenig/test"
|
|
|
|
"github.com/shoenig/test/must"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
|
|
|
"github.com/hashicorp/go-set"
|
2022-03-15 12:42:43 +00:00
|
|
|
"github.com/hashicorp/nomad/ci"
|
2022-08-17 16:26:34 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/pointer"
|
2018-07-17 22:25:38 +00:00
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestSpreadIterator_SingleAttribute(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-07-17 22:25:38 +00:00
|
|
|
state, ctx := testContext(t)
|
|
|
|
dcs := []string{"dc1", "dc2", "dc1", "dc1"}
|
|
|
|
var nodes []*RankedNode
|
|
|
|
|
|
|
|
// Add these nodes to the state store
|
|
|
|
for i, dc := range dcs {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = dc
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertNode(structs.MsgTypeTestSetup, uint64(100+i), node); err != nil {
|
2018-07-17 22:25:38 +00:00
|
|
|
t.Fatalf("failed to upsert node: %v", err)
|
|
|
|
}
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
}
|
|
|
|
|
|
|
|
static := NewStaticRankIterator(ctx, nodes)
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
tg := job.TaskGroups[0]
|
2018-07-31 02:59:35 +00:00
|
|
|
job.TaskGroups[0].Count = 10
|
2018-07-17 22:25:38 +00:00
|
|
|
// add allocs to nodes in dc1
|
|
|
|
upserting := []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
EvalID: uuid.Generate(),
|
|
|
|
NodeID: nodes[0].Node.ID,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
EvalID: uuid.Generate(),
|
|
|
|
NodeID: nodes[2].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, upserting); err != nil {
|
2018-07-17 22:25:38 +00:00
|
|
|
t.Fatalf("failed to UpsertAllocs: %v", err)
|
|
|
|
}
|
|
|
|
|
2018-07-26 15:32:06 +00:00
|
|
|
// Create spread target of 80% in dc1
|
|
|
|
// Implicitly, this means 20% in dc2
|
2018-07-17 22:25:38 +00:00
|
|
|
spread := &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 80,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
tg.Spreads = []*structs.Spread{spread}
|
|
|
|
spreadIter := NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
|
|
|
|
scoreNorm := NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
|
|
|
|
out := collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Expect nodes in dc1 with existing allocs to get a boost
|
2018-07-31 02:59:35 +00:00
|
|
|
// Boost should be ((desiredCount-actual)/desired)*spreadWeight
|
|
|
|
// For this test, that becomes dc1 = ((8-3)/8 ) = 0.5, and dc2=(2-1)/2
|
2018-07-17 22:25:38 +00:00
|
|
|
expectedScores := map[string]float64{
|
2018-07-31 02:59:35 +00:00
|
|
|
"dc1": 0.625,
|
|
|
|
"dc2": 0.5,
|
2018-07-17 22:25:38 +00:00
|
|
|
}
|
|
|
|
for _, rn := range out {
|
|
|
|
require.Equal(t, expectedScores[rn.Node.Datacenter], rn.FinalScore)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the plan to add more allocs to nodes in dc1
|
|
|
|
// After this step there are enough allocs to meet the desired count in dc1
|
|
|
|
ctx.plan.NodeAllocation[nodes[0].Node.ID] = []*structs.Allocation{
|
2018-07-31 02:59:35 +00:00
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[0].Node.ID,
|
|
|
|
},
|
2018-07-17 22:25:38 +00:00
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[0].Node.ID,
|
|
|
|
},
|
|
|
|
// Should be ignored as it is a different job.
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: "bbb",
|
|
|
|
JobID: "ignore 2",
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[0].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
ctx.plan.NodeAllocation[nodes[3].Node.ID] = []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[3].Node.ID,
|
|
|
|
},
|
2018-07-31 02:59:35 +00:00
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[3].Node.ID,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[3].Node.ID,
|
|
|
|
},
|
2018-07-17 22:25:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Reset the scores
|
|
|
|
for _, node := range nodes {
|
|
|
|
node.Scores = nil
|
|
|
|
node.FinalScore = 0
|
|
|
|
}
|
|
|
|
static = NewStaticRankIterator(ctx, nodes)
|
|
|
|
spreadIter = NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
out = collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Expect nodes in dc2 with existing allocs to get a boost
|
|
|
|
// DC1 nodes are not boosted because there are enough allocs to meet
|
|
|
|
// the desired count
|
|
|
|
expectedScores = map[string]float64{
|
|
|
|
"dc1": 0,
|
2018-07-31 02:59:35 +00:00
|
|
|
"dc2": 0.5,
|
2018-07-17 22:25:38 +00:00
|
|
|
}
|
|
|
|
for _, rn := range out {
|
|
|
|
require.Equal(t, expectedScores[rn.Node.Datacenter], rn.FinalScore)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestSpreadIterator_MultipleAttributes(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-07-17 22:25:38 +00:00
|
|
|
state, ctx := testContext(t)
|
|
|
|
dcs := []string{"dc1", "dc2", "dc1", "dc1"}
|
|
|
|
rack := []string{"r1", "r1", "r2", "r2"}
|
|
|
|
var nodes []*RankedNode
|
|
|
|
|
|
|
|
// Add these nodes to the state store
|
|
|
|
for i, dc := range dcs {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = dc
|
|
|
|
node.Meta["rack"] = rack[i]
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertNode(structs.MsgTypeTestSetup, uint64(100+i), node); err != nil {
|
2018-07-17 22:25:38 +00:00
|
|
|
t.Fatalf("failed to upsert node: %v", err)
|
|
|
|
}
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
}
|
|
|
|
|
|
|
|
static := NewStaticRankIterator(ctx, nodes)
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
tg := job.TaskGroups[0]
|
2018-07-31 02:59:35 +00:00
|
|
|
job.TaskGroups[0].Count = 10
|
2018-07-17 22:25:38 +00:00
|
|
|
// add allocs to nodes in dc1
|
|
|
|
upserting := []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
EvalID: uuid.Generate(),
|
|
|
|
NodeID: nodes[0].Node.ID,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
EvalID: uuid.Generate(),
|
|
|
|
NodeID: nodes[2].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, upserting); err != nil {
|
2018-07-17 22:25:38 +00:00
|
|
|
t.Fatalf("failed to UpsertAllocs: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
spread1 := &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 60,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "dc2",
|
|
|
|
Percent: 40,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
spread2 := &structs.Spread{
|
|
|
|
Weight: 50,
|
|
|
|
Attribute: "${meta.rack}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "r1",
|
|
|
|
Percent: 40,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "r2",
|
|
|
|
Percent: 60,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
tg.Spreads = []*structs.Spread{spread1, spread2}
|
|
|
|
spreadIter := NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
|
|
|
|
scoreNorm := NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
|
|
|
|
out := collectRanked(scoreNorm)
|
|
|
|
|
2018-07-31 02:59:35 +00:00
|
|
|
// Score comes from combining two different spread factors
|
2018-07-17 22:25:38 +00:00
|
|
|
// Second node should have the highest score because it has no allocs and its in dc2/r1
|
|
|
|
expectedScores := map[string]float64{
|
2018-07-31 02:59:35 +00:00
|
|
|
nodes[0].Node.ID: 0.500,
|
|
|
|
nodes[1].Node.ID: 0.667,
|
|
|
|
nodes[2].Node.ID: 0.556,
|
|
|
|
nodes[3].Node.ID: 0.556,
|
2018-07-17 22:25:38 +00:00
|
|
|
}
|
|
|
|
for _, rn := range out {
|
|
|
|
require.Equal(t, fmt.Sprintf("%.3f", expectedScores[rn.Node.ID]), fmt.Sprintf("%.3f", rn.FinalScore))
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2018-07-26 22:14:27 +00:00
|
|
|
|
|
|
|
func TestSpreadIterator_EvenSpread(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-07-26 22:14:27 +00:00
|
|
|
state, ctx := testContext(t)
|
|
|
|
dcs := []string{"dc1", "dc2", "dc1", "dc2", "dc1", "dc2", "dc2", "dc1", "dc1", "dc1"}
|
|
|
|
var nodes []*RankedNode
|
|
|
|
|
|
|
|
// Add these nodes to the state store
|
|
|
|
for i, dc := range dcs {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = dc
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertNode(structs.MsgTypeTestSetup, uint64(100+i), node); err != nil {
|
2018-07-26 22:14:27 +00:00
|
|
|
t.Fatalf("failed to upsert node: %v", err)
|
|
|
|
}
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
}
|
|
|
|
|
|
|
|
static := NewStaticRankIterator(ctx, nodes)
|
|
|
|
job := mock.Job()
|
|
|
|
tg := job.TaskGroups[0]
|
|
|
|
job.TaskGroups[0].Count = 10
|
|
|
|
|
|
|
|
// Configure even spread across node.datacenter
|
|
|
|
spread := &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
}
|
|
|
|
tg.Spreads = []*structs.Spread{spread}
|
|
|
|
spreadIter := NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
|
|
|
|
scoreNorm := NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
|
|
|
|
out := collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Nothing placed so both dc nodes get 0 as the score
|
|
|
|
expectedScores := map[string]float64{
|
|
|
|
"dc1": 0,
|
|
|
|
"dc2": 0,
|
|
|
|
}
|
|
|
|
for _, rn := range out {
|
2018-07-29 00:35:28 +00:00
|
|
|
require.Equal(t, fmt.Sprintf("%.3f", expectedScores[rn.Node.Datacenter]), fmt.Sprintf("%.3f", rn.FinalScore))
|
2019-05-15 17:00:24 +00:00
|
|
|
|
2018-07-26 22:14:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Update the plan to add allocs to nodes in dc1
|
|
|
|
// After this step dc2 nodes should get boosted
|
|
|
|
ctx.plan.NodeAllocation[nodes[0].Node.ID] = []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[0].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
ctx.plan.NodeAllocation[nodes[2].Node.ID] = []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[2].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset the scores
|
|
|
|
for _, node := range nodes {
|
|
|
|
node.Scores = nil
|
|
|
|
node.FinalScore = 0
|
|
|
|
}
|
|
|
|
static = NewStaticRankIterator(ctx, nodes)
|
|
|
|
spreadIter = NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
out = collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Expect nodes in dc2 with existing allocs to get a boost
|
|
|
|
// dc1 nodes are penalized because they have allocs
|
|
|
|
expectedScores = map[string]float64{
|
2018-07-27 21:15:32 +00:00
|
|
|
"dc1": -1,
|
|
|
|
"dc2": 1,
|
2018-07-26 22:14:27 +00:00
|
|
|
}
|
|
|
|
for _, rn := range out {
|
|
|
|
require.Equal(t, expectedScores[rn.Node.Datacenter], rn.FinalScore)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the plan to add more allocs to nodes in dc2
|
|
|
|
// After this step dc1 nodes should get boosted
|
|
|
|
ctx.plan.NodeAllocation[nodes[1].Node.ID] = []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[1].Node.ID,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[1].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
ctx.plan.NodeAllocation[nodes[3].Node.ID] = []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[3].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset the scores
|
|
|
|
for _, node := range nodes {
|
|
|
|
node.Scores = nil
|
|
|
|
node.FinalScore = 0
|
|
|
|
}
|
|
|
|
static = NewStaticRankIterator(ctx, nodes)
|
|
|
|
spreadIter = NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
out = collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Expect nodes in dc2 to be penalized because there are 3 allocs there now
|
|
|
|
// dc1 nodes are boosted because that has 2 allocs
|
|
|
|
expectedScores = map[string]float64{
|
2018-07-29 00:35:28 +00:00
|
|
|
"dc1": 0.5,
|
2018-07-26 22:14:27 +00:00
|
|
|
"dc2": -0.5,
|
|
|
|
}
|
|
|
|
for _, rn := range out {
|
2018-07-29 00:35:28 +00:00
|
|
|
require.Equal(t, fmt.Sprintf("%3.3f", expectedScores[rn.Node.Datacenter]), fmt.Sprintf("%3.3f", rn.FinalScore))
|
2018-07-26 22:14:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add another node in dc3
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = "dc3"
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertNode(structs.MsgTypeTestSetup, uint64(1111), node); err != nil {
|
2018-07-26 22:14:27 +00:00
|
|
|
t.Fatalf("failed to upsert node: %v", err)
|
|
|
|
}
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
|
|
|
|
// Add another alloc to dc1, now its count matches dc2
|
|
|
|
ctx.plan.NodeAllocation[nodes[4].Node.ID] = []*structs.Allocation{
|
|
|
|
{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
TaskGroup: tg.Name,
|
|
|
|
JobID: job.ID,
|
|
|
|
Job: job,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
NodeID: nodes[4].Node.ID,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset scores
|
|
|
|
for _, node := range nodes {
|
|
|
|
node.Scores = nil
|
|
|
|
node.FinalScore = 0
|
|
|
|
}
|
|
|
|
static = NewStaticRankIterator(ctx, nodes)
|
|
|
|
spreadIter = NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
out = collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Expect dc1 and dc2 to be penalized because they have 3 allocs
|
|
|
|
// dc3 should get a boost because it has 0 allocs
|
|
|
|
expectedScores = map[string]float64{
|
2018-07-27 21:15:32 +00:00
|
|
|
"dc1": -1,
|
|
|
|
"dc2": -1,
|
|
|
|
"dc3": 1,
|
2018-07-26 22:14:27 +00:00
|
|
|
}
|
|
|
|
for _, rn := range out {
|
2018-07-29 00:35:28 +00:00
|
|
|
require.Equal(t, fmt.Sprintf("%.3f", expectedScores[rn.Node.Datacenter]), fmt.Sprintf("%.3f", rn.FinalScore))
|
2018-07-26 22:14:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2018-07-30 00:49:56 +00:00
|
|
|
|
|
|
|
// Test scenarios where the spread iterator sets maximum penalty (-1.0)
|
|
|
|
func TestSpreadIterator_MaxPenalty(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2018-07-30 00:49:56 +00:00
|
|
|
state, ctx := testContext(t)
|
|
|
|
var nodes []*RankedNode
|
|
|
|
|
|
|
|
// Add nodes in dc3 to the state store
|
|
|
|
for i := 0; i < 5; i++ {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = "dc3"
|
2020-10-19 13:30:15 +00:00
|
|
|
if err := state.UpsertNode(structs.MsgTypeTestSetup, uint64(100+i), node); err != nil {
|
2018-07-30 00:49:56 +00:00
|
|
|
t.Fatalf("failed to upsert node: %v", err)
|
|
|
|
}
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
}
|
|
|
|
|
|
|
|
static := NewStaticRankIterator(ctx, nodes)
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
tg := job.TaskGroups[0]
|
|
|
|
job.TaskGroups[0].Count = 5
|
|
|
|
|
|
|
|
// Create spread target of 80% in dc1
|
|
|
|
// and 20% in dc2
|
|
|
|
spread := &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 80,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "dc2",
|
|
|
|
Percent: 20,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
tg.Spreads = []*structs.Spread{spread}
|
|
|
|
spreadIter := NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
|
|
|
|
scoreNorm := NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
|
|
|
|
out := collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// All nodes are in dc3 so score should be -1
|
|
|
|
for _, rn := range out {
|
|
|
|
require.Equal(t, -1.0, rn.FinalScore)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset scores
|
|
|
|
for _, node := range nodes {
|
|
|
|
node.Scores = nil
|
|
|
|
node.FinalScore = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create spread on attribute that doesn't exist on any nodes
|
|
|
|
spread = &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${meta.foo}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "bar",
|
|
|
|
Percent: 80,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "baz",
|
|
|
|
Percent: 20,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
tg.Spreads = []*structs.Spread{spread}
|
|
|
|
static = NewStaticRankIterator(ctx, nodes)
|
|
|
|
spreadIter = NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
out = collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// All nodes don't have the spread attribute so score should be -1
|
|
|
|
for _, rn := range out {
|
|
|
|
require.Equal(t, -1.0, rn.FinalScore)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2019-05-15 17:00:24 +00:00
|
|
|
|
2023-05-16 20:01:32 +00:00
|
|
|
func TestSpreadIterator_NoInfinity(t *testing.T) {
|
|
|
|
ci.Parallel(t)
|
|
|
|
|
|
|
|
store, ctx := testContext(t)
|
|
|
|
var nodes []*RankedNode
|
|
|
|
|
|
|
|
// Add 3 nodes in different DCs to the state store
|
|
|
|
for i := 1; i < 4; i++ {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = fmt.Sprintf("dc%d", i)
|
|
|
|
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, uint64(100+i), node))
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
}
|
|
|
|
|
|
|
|
static := NewStaticRankIterator(ctx, nodes)
|
|
|
|
|
|
|
|
job := mock.Job()
|
|
|
|
tg := job.TaskGroups[0]
|
|
|
|
job.TaskGroups[0].Count = 8
|
|
|
|
|
|
|
|
// Create spread target of 50% in dc1, 50% in dc2, and 0% in the implicit target
|
|
|
|
spread := &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 50,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "dc2",
|
|
|
|
Percent: 50,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "*",
|
|
|
|
Percent: 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
tg.Spreads = []*structs.Spread{spread}
|
|
|
|
spreadIter := NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
|
|
|
|
scoreNorm := NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
|
|
|
|
out := collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Scores should be even between dc1 and dc2 nodes, without an -Inf on dc3
|
|
|
|
must.Len(t, 3, out)
|
|
|
|
test.Eq(t, 0.75, out[0].FinalScore)
|
|
|
|
test.Eq(t, 0.75, out[1].FinalScore)
|
|
|
|
test.Eq(t, -1, out[2].FinalScore)
|
|
|
|
|
|
|
|
// Reset scores
|
|
|
|
for _, node := range nodes {
|
|
|
|
node.Scores = nil
|
|
|
|
node.FinalScore = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create very unbalanced spread target to force large negative scores
|
|
|
|
spread = &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 99,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "dc2",
|
|
|
|
Percent: 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "*",
|
|
|
|
Percent: 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
tg.Spreads = []*structs.Spread{spread}
|
|
|
|
static = NewStaticRankIterator(ctx, nodes)
|
|
|
|
spreadIter = NewSpreadIterator(ctx, static)
|
|
|
|
spreadIter.SetJob(job)
|
|
|
|
spreadIter.SetTaskGroup(tg)
|
|
|
|
|
|
|
|
scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)
|
|
|
|
|
|
|
|
out = collectRanked(scoreNorm)
|
|
|
|
|
|
|
|
// Scores should heavily favor dc1, with an -Inf on dc3
|
|
|
|
must.Len(t, 3, out)
|
|
|
|
desired := 8 * 0.99 // 8 allocs * 99%
|
|
|
|
test.Eq(t, (desired-1)/desired, out[0].FinalScore)
|
|
|
|
test.Eq(t, -11.5, out[1].FinalScore)
|
|
|
|
test.LessEq(t, out[1].FinalScore, out[2].FinalScore,
|
|
|
|
test.Sprintf("expected implicit dc3 to be <= dc2"))
|
|
|
|
}
|
|
|
|
|
2019-05-15 17:00:24 +00:00
|
|
|
func Test_evenSpreadScoreBoost(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
|
|
|
|
2019-05-15 17:00:24 +00:00
|
|
|
pset := &propertySet{
|
|
|
|
existingValues: map[string]uint64{},
|
|
|
|
proposedValues: map[string]uint64{
|
|
|
|
"dc2": 1,
|
|
|
|
"dc1": 1,
|
|
|
|
"dc3": 1,
|
|
|
|
},
|
|
|
|
clearedValues: map[string]uint64{
|
|
|
|
"dc2": 1,
|
|
|
|
"dc3": 1,
|
|
|
|
},
|
|
|
|
targetAttribute: "${node.datacenter}",
|
2023-05-17 14:25:00 +00:00
|
|
|
targetValues: &set.Set[string]{},
|
2019-05-15 17:00:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
opt := &structs.Node{
|
|
|
|
Datacenter: "dc2",
|
|
|
|
}
|
|
|
|
boost := evenSpreadScoreBoost(pset, opt)
|
|
|
|
require.False(t, math.IsInf(boost, 1))
|
2019-05-15 17:35:57 +00:00
|
|
|
require.Equal(t, 1.0, boost)
|
2019-05-15 17:00:24 +00:00
|
|
|
}
|
2021-12-21 15:10:01 +00:00
|
|
|
|
|
|
|
// TestSpreadOnLargeCluster exercises potentially quadratic
|
|
|
|
// performance cases with spread scheduling when we have a large
|
|
|
|
// number of eligible nodes unless we limit the number that each
|
|
|
|
// MaxScore attempt considers. By reducing the total from MaxInt, we
|
|
|
|
// can prevent quadratic performance but then we need this test to
|
|
|
|
// verify we have satisfactory spread results.
|
|
|
|
func TestSpreadOnLargeCluster(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2021-12-21 15:10:01 +00:00
|
|
|
cases := []struct {
|
|
|
|
name string
|
|
|
|
nodeCount int
|
|
|
|
racks map[string]int
|
|
|
|
allocs int
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
name: "nodes=10k even racks=100 allocs=500",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateEvenRacks(10000, 100),
|
|
|
|
allocs: 500,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k even racks=100 allocs=50",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateEvenRacks(10000, 100),
|
|
|
|
allocs: 50,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k even racks=10 allocs=500",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateEvenRacks(10000, 10),
|
|
|
|
allocs: 500,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k even racks=10 allocs=50",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateEvenRacks(10000, 10),
|
|
|
|
allocs: 500,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k small uneven racks allocs=500",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateUnevenRacks(t, 10000, 50),
|
|
|
|
allocs: 500,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k small uneven racks allocs=50",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateUnevenRacks(t, 10000, 50),
|
|
|
|
allocs: 500,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k many uneven racks allocs=500",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateUnevenRacks(t, 10000, 500),
|
|
|
|
allocs: 500,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "nodes=10k many uneven racks allocs=50",
|
|
|
|
nodeCount: 10000,
|
|
|
|
racks: generateUnevenRacks(t, 10000, 500),
|
|
|
|
allocs: 50,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range cases {
|
|
|
|
tc := cases[i]
|
|
|
|
t.Run(tc.name, func(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
2021-12-21 15:10:01 +00:00
|
|
|
h := NewHarness(t)
|
|
|
|
err := upsertNodes(h, tc.nodeCount, tc.racks)
|
|
|
|
require.NoError(t, err)
|
|
|
|
job := generateJob(tc.allocs)
|
|
|
|
eval, err := upsertJob(h, job)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
err = h.Process(NewServiceScheduler, eval)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.LessOrEqual(t, time.Since(start), time.Duration(60*time.Second),
|
|
|
|
"time to evaluate exceeded EvalNackTimeout")
|
|
|
|
|
|
|
|
require.Len(t, h.Plans, 1)
|
|
|
|
require.False(t, h.Plans[0].IsNoOp())
|
|
|
|
require.NoError(t, validateEqualSpread(h))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// generateUnevenRacks creates a map of rack names to a count of nodes
|
|
|
|
// evenly distributed in those racks
|
|
|
|
func generateEvenRacks(nodes int, rackCount int) map[string]int {
|
|
|
|
racks := map[string]int{}
|
|
|
|
for i := 0; i < nodes; i++ {
|
|
|
|
racks[fmt.Sprintf("r%d", i%rackCount)]++
|
|
|
|
}
|
|
|
|
return racks
|
|
|
|
}
|
|
|
|
|
|
|
|
// generateUnevenRacks creates a random map of rack names to a count
|
|
|
|
// of nodes in that rack
|
|
|
|
func generateUnevenRacks(t *testing.T, nodes int, rackCount int) map[string]int {
|
|
|
|
rackNames := []string{}
|
|
|
|
for i := 0; i < rackCount; i++ {
|
|
|
|
rackNames = append(rackNames, fmt.Sprintf("r%d", i))
|
|
|
|
}
|
|
|
|
|
|
|
|
// print this so that any future test flakes can be more easily
|
|
|
|
// reproduced
|
2023-02-07 15:19:38 +00:00
|
|
|
seed := time.Now().Unix()
|
|
|
|
random := rand.NewSource(seed)
|
2021-12-21 15:10:01 +00:00
|
|
|
t.Logf("nodes=%d racks=%d seed=%d\n", nodes, rackCount, seed)
|
|
|
|
|
|
|
|
racks := map[string]int{}
|
|
|
|
for i := 0; i < nodes; i++ {
|
2023-02-07 15:19:38 +00:00
|
|
|
idx := int(random.Int63()) % len(rackNames)
|
2021-12-21 15:10:01 +00:00
|
|
|
racks[rackNames[idx]]++
|
|
|
|
}
|
|
|
|
return racks
|
|
|
|
}
|
|
|
|
|
|
|
|
// upsertNodes creates a collection of Nodes in the state store,
|
|
|
|
// distributed among the racks
|
|
|
|
func upsertNodes(h *Harness, count int, racks map[string]int) error {
|
|
|
|
|
|
|
|
datacenters := []string{"dc-1", "dc-2"}
|
|
|
|
rackAssignments := []string{}
|
|
|
|
for rack, count := range racks {
|
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
rackAssignments = append(rackAssignments, rack)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < count; i++ {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = datacenters[i%2]
|
|
|
|
node.Meta = map[string]string{}
|
|
|
|
node.Meta["rack"] = fmt.Sprintf("r%s", rackAssignments[i])
|
|
|
|
node.NodeResources.Cpu.CpuShares = 14000
|
|
|
|
node.NodeResources.Memory.MemoryMB = 32000
|
|
|
|
err := h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func generateJob(jobSize int) *structs.Job {
|
|
|
|
job := mock.Job()
|
|
|
|
job.Datacenters = []string{"dc-1", "dc-2"}
|
|
|
|
job.Spreads = []*structs.Spread{{Attribute: "${meta.rack}"}}
|
|
|
|
job.Constraints = []*structs.Constraint{}
|
|
|
|
job.TaskGroups[0].Count = jobSize
|
|
|
|
job.TaskGroups[0].Networks = nil
|
|
|
|
job.TaskGroups[0].Services = []*structs.Service{}
|
|
|
|
job.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
|
|
|
|
CPU: 6000,
|
|
|
|
MemoryMB: 6000,
|
|
|
|
}
|
|
|
|
return job
|
|
|
|
}
|
|
|
|
|
|
|
|
func upsertJob(h *Harness, job *structs.Job) (*structs.Evaluation, error) {
|
2023-04-11 13:45:08 +00:00
|
|
|
err := h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)
|
2021-12-21 15:10:01 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
eval := &structs.Evaluation{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
Priority: job.Priority,
|
|
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
|
|
JobID: job.ID,
|
|
|
|
Status: structs.EvalStatusPending,
|
|
|
|
}
|
|
|
|
err = h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
|
|
h.NextIndex(), []*structs.Evaluation{eval})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return eval, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// validateEqualSpread compares the resulting plan to the node
|
|
|
|
// metadata to verify that each group of spread targets has an equal
|
|
|
|
// distribution.
|
|
|
|
func validateEqualSpread(h *Harness) error {
|
|
|
|
|
|
|
|
iter, err := h.State.Nodes(nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
i := 0
|
|
|
|
nodesToRacks := map[string]string{}
|
|
|
|
racksToAllocCount := map[string]int{}
|
|
|
|
for {
|
|
|
|
raw := iter.Next()
|
|
|
|
if raw == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
node := raw.(*structs.Node)
|
|
|
|
rack, ok := node.Meta["rack"]
|
|
|
|
if ok {
|
|
|
|
nodesToRacks[node.ID] = rack
|
|
|
|
racksToAllocCount[rack] = 0
|
|
|
|
}
|
|
|
|
i++
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collapse the count of allocations per node into a list of
|
|
|
|
// counts. The results should be clustered within one of each
|
|
|
|
// other.
|
|
|
|
for nodeID, nodeAllocs := range h.Plans[0].NodeAllocation {
|
|
|
|
racksToAllocCount[nodesToRacks[nodeID]] += len(nodeAllocs)
|
|
|
|
}
|
|
|
|
countSet := map[int]int{}
|
|
|
|
for _, count := range racksToAllocCount {
|
|
|
|
countSet[count]++
|
|
|
|
}
|
|
|
|
|
|
|
|
countSlice := []int{}
|
|
|
|
for count := range countSet {
|
|
|
|
countSlice = append(countSlice, count)
|
|
|
|
}
|
|
|
|
|
|
|
|
switch len(countSlice) {
|
|
|
|
case 1:
|
|
|
|
return nil
|
|
|
|
case 2, 3:
|
|
|
|
sort.Ints(countSlice)
|
|
|
|
for i := 1; i < len(countSlice); i++ {
|
|
|
|
if countSlice[i] != countSlice[i-1]+1 {
|
|
|
|
return fmt.Errorf("expected even distributon of allocs to racks, but got:\n%+v", countSet)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return fmt.Errorf("expected even distributon of allocs to racks, but got:\n%+v", countSet)
|
|
|
|
}
|
scheduler: prevent panic in spread iterator during alloc stop
The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.
To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
failed allocation.
In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.
This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
2022-02-02 18:26:05 +00:00
|
|
|
|
|
|
|
func TestSpreadPanicDowngrade(t *testing.T) {
|
2022-03-15 12:42:43 +00:00
|
|
|
ci.Parallel(t)
|
scheduler: prevent panic in spread iterator during alloc stop
The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.
To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
failed allocation.
In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.
This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
2022-02-02 18:26:05 +00:00
|
|
|
|
|
|
|
h := NewHarness(t)
|
|
|
|
|
|
|
|
nodes := []*structs.Node{}
|
|
|
|
for i := 0; i < 5; i++ {
|
|
|
|
node := mock.Node()
|
|
|
|
nodes = append(nodes, node)
|
|
|
|
err := h.State.UpsertNode(structs.MsgTypeTestSetup,
|
|
|
|
h.NextIndex(), node)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// job version 1
|
|
|
|
// max_parallel = 0, canary = 1, spread != nil, 1 failed alloc
|
|
|
|
|
|
|
|
job1 := mock.Job()
|
|
|
|
job1.Spreads = []*structs.Spread{
|
|
|
|
{
|
|
|
|
Attribute: "${node.unique.name}",
|
|
|
|
Weight: 50,
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
job1.Update = structs.UpdateStrategy{
|
|
|
|
Stagger: time.Duration(30 * time.Second),
|
|
|
|
MaxParallel: 0,
|
|
|
|
}
|
|
|
|
job1.Status = structs.JobStatusRunning
|
|
|
|
job1.TaskGroups[0].Count = 4
|
|
|
|
job1.TaskGroups[0].Update = &structs.UpdateStrategy{
|
|
|
|
Stagger: time.Duration(30 * time.Second),
|
|
|
|
MaxParallel: 1,
|
|
|
|
HealthCheck: "checks",
|
|
|
|
MinHealthyTime: time.Duration(30 * time.Second),
|
|
|
|
HealthyDeadline: time.Duration(9 * time.Minute),
|
|
|
|
ProgressDeadline: time.Duration(10 * time.Minute),
|
|
|
|
AutoRevert: true,
|
|
|
|
Canary: 1,
|
|
|
|
}
|
|
|
|
|
|
|
|
job1.Version = 1
|
|
|
|
job1.TaskGroups[0].Count = 5
|
2023-04-11 13:45:08 +00:00
|
|
|
err := h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job1)
|
scheduler: prevent panic in spread iterator during alloc stop
The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.
To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
failed allocation.
In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.
This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
2022-02-02 18:26:05 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
allocs := []*structs.Allocation{}
|
|
|
|
for i := 0; i < 4; i++ {
|
|
|
|
alloc := mock.Alloc()
|
|
|
|
alloc.Job = job1
|
|
|
|
alloc.JobID = job1.ID
|
|
|
|
alloc.NodeID = nodes[i].ID
|
|
|
|
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
|
2022-08-17 16:26:34 +00:00
|
|
|
Healthy: pointer.Of(true),
|
scheduler: prevent panic in spread iterator during alloc stop
The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.
To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
failed allocation.
In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.
This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
2022-02-02 18:26:05 +00:00
|
|
|
Timestamp: time.Now(),
|
|
|
|
Canary: false,
|
|
|
|
ModifyIndex: h.NextIndex(),
|
|
|
|
}
|
|
|
|
if i == 0 {
|
|
|
|
alloc.DeploymentStatus.Canary = true
|
|
|
|
}
|
|
|
|
if i == 1 {
|
|
|
|
alloc.ClientStatus = structs.AllocClientStatusFailed
|
|
|
|
}
|
|
|
|
allocs = append(allocs, alloc)
|
|
|
|
}
|
|
|
|
err = h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(), allocs)
|
2022-02-15 06:11:45 +00:00
|
|
|
require.NoError(t, err)
|
scheduler: prevent panic in spread iterator during alloc stop
The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.
To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
failed allocation.
In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.
This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
2022-02-02 18:26:05 +00:00
|
|
|
|
|
|
|
// job version 2
|
|
|
|
// max_parallel = 0, canary = 1, spread == nil
|
|
|
|
|
|
|
|
job2 := job1.Copy()
|
|
|
|
job2.Version = 2
|
|
|
|
job2.Spreads = nil
|
2023-04-11 13:45:08 +00:00
|
|
|
err = h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job2)
|
scheduler: prevent panic in spread iterator during alloc stop
The spread iterator can panic when processing an evaluation, resulting
in an unrecoverable state in the cluster. Whenever a panicked server
restarts and quorum is restored, the next server to dequeue the
evaluation will panic.
To trigger this state:
* The job must have `max_parallel = 0` and a `canary >= 1`.
* The job must not have a `spread` block.
* The job must have a previous version.
* The previous version must have a `spread` block and at least one
failed allocation.
In this scenario, the desired changes include `(place 1+) (stop
1+), (ignore n) (canary 1)`. Before the scheduler can place the canary
allocation, it tries to find out which allocations can be
stopped. This passes back through the stack so that we can determine
previous-node penalties, etc. We call `SetJob` on the stack with the
previous version of the job, which will include assessing the `spread`
block (even though the results are unused). The task group spread info
state from that pass through the spread iterator is not reset when we
call `SetJob` again. When the new job version iterates over the
`groupPropertySets`, it will get an empty `spreadAttributeMap`,
resulting in an unexpected nil pointer dereference.
This changeset resets the spread iterator internal state when setting
the job, logging with a bypass around the bug in case we hit similar
cases, and a test that panics the scheduler without the patch.
2022-02-02 18:26:05 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
eval := &structs.Evaluation{
|
|
|
|
Namespace: job2.Namespace,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
Priority: job2.Priority,
|
|
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
|
|
JobID: job2.ID,
|
|
|
|
Status: structs.EvalStatusPending,
|
|
|
|
}
|
|
|
|
err = h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
|
|
h.NextIndex(), []*structs.Evaluation{eval})
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
processErr := h.Process(NewServiceScheduler, eval)
|
|
|
|
require.NoError(t, processErr, "failed to process eval")
|
|
|
|
require.Len(t, h.Plans, 1)
|
|
|
|
}
|
2023-05-17 14:25:00 +00:00
|
|
|
|
|
|
|
func TestSpread_ImplicitTargets(t *testing.T) {
|
|
|
|
|
|
|
|
dcs := []string{"dc1", "dc2", "dc3"}
|
|
|
|
|
|
|
|
setupNodes := func(h *Harness) map[string]string {
|
|
|
|
nodesToDcs := map[string]string{}
|
|
|
|
var nodes []*RankedNode
|
|
|
|
|
|
|
|
for i, dc := range dcs {
|
|
|
|
for n := 0; n < 4; n++ {
|
|
|
|
node := mock.Node()
|
|
|
|
node.Datacenter = dc
|
|
|
|
must.NoError(t, h.State.UpsertNode(
|
|
|
|
structs.MsgTypeTestSetup, uint64(100+i), node))
|
|
|
|
nodes = append(nodes, &RankedNode{Node: node})
|
|
|
|
nodesToDcs[node.ID] = node.Datacenter
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nodesToDcs
|
|
|
|
}
|
|
|
|
|
|
|
|
setupJob := func(h *Harness, testCaseSpread *structs.Spread) *structs.Evaluation {
|
|
|
|
job := mock.MinJob()
|
|
|
|
job.Datacenters = dcs
|
|
|
|
job.TaskGroups[0].Count = 12
|
|
|
|
|
|
|
|
job.TaskGroups[0].Spreads = []*structs.Spread{testCaseSpread}
|
|
|
|
must.NoError(t, h.State.UpsertJob(
|
|
|
|
structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
|
|
|
|
|
|
|
|
eval := &structs.Evaluation{
|
|
|
|
Namespace: structs.DefaultNamespace,
|
|
|
|
ID: uuid.Generate(),
|
|
|
|
Priority: job.Priority,
|
|
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
|
|
JobID: job.ID,
|
|
|
|
Status: structs.EvalStatusPending,
|
|
|
|
}
|
|
|
|
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
|
|
h.NextIndex(), []*structs.Evaluation{eval}))
|
|
|
|
|
|
|
|
return eval
|
|
|
|
}
|
|
|
|
|
|
|
|
testCases := []struct {
|
|
|
|
name string
|
|
|
|
spread *structs.Spread
|
|
|
|
expect map[string]int
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
|
|
|
|
name: "empty implicit target",
|
|
|
|
spread: &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 50,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
expect: map[string]int{"dc1": 6},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "wildcard implicit target",
|
|
|
|
spread: &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 50,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "*",
|
|
|
|
Percent: 50,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
expect: map[string]int{"dc1": 6},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "explicit targets",
|
|
|
|
spread: &structs.Spread{
|
|
|
|
Weight: 100,
|
|
|
|
Attribute: "${node.datacenter}",
|
|
|
|
SpreadTarget: []*structs.SpreadTarget{
|
|
|
|
{
|
|
|
|
Value: "dc1",
|
|
|
|
Percent: 50,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "dc2",
|
|
|
|
Percent: 25,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Value: "dc3",
|
|
|
|
Percent: 25,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
expect: map[string]int{"dc1": 6, "dc2": 3, "dc3": 3},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
|
|
h := NewHarness(t)
|
|
|
|
nodesToDcs := setupNodes(h)
|
|
|
|
eval := setupJob(h, tc.spread)
|
|
|
|
must.NoError(t, h.Process(NewServiceScheduler, eval))
|
|
|
|
must.Len(t, 1, h.Plans)
|
|
|
|
|
|
|
|
plan := h.Plans[0]
|
|
|
|
must.False(t, plan.IsNoOp())
|
|
|
|
|
|
|
|
dcCounts := map[string]int{}
|
|
|
|
for node, allocs := range plan.NodeAllocation {
|
|
|
|
dcCounts[nodesToDcs[node]] += len(allocs)
|
|
|
|
}
|
|
|
|
for dc, expectVal := range tc.expect {
|
|
|
|
// not using test.MapEqual here because we have incomplete
|
|
|
|
// expectations for the implicit DCs on some tests.
|
|
|
|
test.Eq(t, expectVal, dcCounts[dc],
|
|
|
|
test.Sprintf("expected %d in %q", expectVal, dc))
|
|
|
|
}
|
|
|
|
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|