scheduler: count implicit spread targets as a single target (#17195)

When calculating the score in the `SpreadIterator`, the score boost is
proportional to the difference between the current and desired count. But when
there are implicit spread targets, the current count is the sum of the possible
implicit targets, which results in incorrect scoring unless there's only one
implicit target.

This changeset updates the `propertySet` struct to accept a set of explicit
target values so it can detect when a property value falls into the implicit set
and should be combined with other implicit values.

Fixes: #11823
This commit is contained in:
Tim Gross 2023-05-17 10:25:00 -04:00 committed by GitHub
parent 710afecf61
commit 5fc63ace0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 179 additions and 11 deletions

3
.changelog/17195.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where implicit `spread` targets were treated as separate targets for scoring
```

View File

@ -9,6 +9,8 @@ import (
log "github.com/hashicorp/go-hclog" log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb" memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-set"
"github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
) )
@ -33,6 +35,10 @@ type propertySet struct {
// targetAttribute is the attribute this property set is checking // targetAttribute is the attribute this property set is checking
targetAttribute string targetAttribute string
// targetValues are the set of attribute values that are explicitly expected,
// so we can combine the count of values that belong to any implicit targets.
targetValues *set.Set[string]
// allowedCount is the allowed number of allocations that can have the // allowedCount is the allowed number of allocations that can have the
// distinct property // distinct property
allowedCount uint64 allowedCount uint64
@ -62,6 +68,7 @@ func NewPropertySet(ctx Context, job *structs.Job) *propertySet {
jobID: job.ID, jobID: job.ID,
namespace: job.Namespace, namespace: job.Namespace,
existingValues: make(map[string]uint64), existingValues: make(map[string]uint64),
targetValues: set.From([]string{}),
logger: ctx.Logger().Named("property_set"), logger: ctx.Logger().Named("property_set"),
} }
@ -130,6 +137,10 @@ func (p *propertySet) setTargetAttributeWithCount(targetAttribute string, allowe
p.PopulateProposed() p.PopulateProposed()
} }
func (p *propertySet) SetTargetValues(values []string) {
p.targetValues = set.From(values)
}
// populateExisting is a helper shared when setting the constraint to populate // populateExisting is a helper shared when setting the constraint to populate
// the existing values. // the existing values.
func (p *propertySet) populateExisting() { func (p *propertySet) populateExisting() {
@ -231,7 +242,7 @@ func (p *propertySet) SatisfiesDistinctProperties(option *structs.Node, tg strin
// UsedCount returns the number of times the value of the attribute being tracked by this // UsedCount returns the number of times the value of the attribute being tracked by this
// property set is used across current and proposed allocations. It also returns the resolved // property set is used across current and proposed allocations. It also returns the resolved
// attribute value for the node, and an error message if it couldn't be resolved correctly // attribute value for the node, and an error message if it couldn't be resolved correctly
func (p *propertySet) UsedCount(option *structs.Node, tg string) (string, string, uint64) { func (p *propertySet) UsedCount(option *structs.Node, _ string) (string, string, uint64) {
// Check if there was an error building // Check if there was an error building
if p.errorBuilding != nil { if p.errorBuilding != nil {
return "", p.errorBuilding.Error(), 0 return "", p.errorBuilding.Error(), 0
@ -239,12 +250,13 @@ func (p *propertySet) UsedCount(option *structs.Node, tg string) (string, string
// Get the nodes property value // Get the nodes property value
nValue, ok := getProperty(option, p.targetAttribute) nValue, ok := getProperty(option, p.targetAttribute)
targetPropertyValue := p.targetedPropertyValue(nValue)
if !ok { if !ok {
return nValue, fmt.Sprintf("missing property %q", p.targetAttribute), 0 return nValue, fmt.Sprintf("missing property %q", p.targetAttribute), 0
} }
combinedUse := p.GetCombinedUseMap() combinedUse := p.GetCombinedUseMap()
usedCount := combinedUse[nValue] usedCount := combinedUse[targetPropertyValue]
return nValue, "", usedCount return targetPropertyValue, "", usedCount
} }
// GetCombinedUseMap counts how many times the property has been used by // GetCombinedUseMap counts how many times the property has been used by
@ -254,23 +266,25 @@ func (p *propertySet) GetCombinedUseMap() map[string]uint64 {
combinedUse := make(map[string]uint64, helper.Max(len(p.existingValues), len(p.proposedValues))) combinedUse := make(map[string]uint64, helper.Max(len(p.existingValues), len(p.proposedValues)))
for _, usedValues := range []map[string]uint64{p.existingValues, p.proposedValues} { for _, usedValues := range []map[string]uint64{p.existingValues, p.proposedValues} {
for propertyValue, usedCount := range usedValues { for propertyValue, usedCount := range usedValues {
combinedUse[propertyValue] += usedCount targetPropertyValue := p.targetedPropertyValue(propertyValue)
combinedUse[targetPropertyValue] += usedCount
} }
} }
// Go through and discount the combined count when the value has been // Go through and discount the combined count when the value has been
// cleared by a proposed stop. // cleared by a proposed stop.
for propertyValue, clearedCount := range p.clearedValues { for propertyValue, clearedCount := range p.clearedValues {
combined, ok := combinedUse[propertyValue] targetPropertyValue := p.targetedPropertyValue(propertyValue)
combined, ok := combinedUse[targetPropertyValue]
if !ok { if !ok {
continue continue
} }
// Don't clear below 0. // Don't clear below 0.
if combined >= clearedCount { if combined >= clearedCount {
combinedUse[propertyValue] = combined - clearedCount combinedUse[targetPropertyValue] = combined - clearedCount
} else { } else {
combinedUse[propertyValue] = 0 combinedUse[targetPropertyValue] = 0
} }
} }
return combinedUse return combinedUse
@ -335,7 +349,8 @@ func (p *propertySet) populateProperties(allocs []*structs.Allocation, nodes map
continue continue
} }
properties[nProperty]++ targetPropertyValue := p.targetedPropertyValue(nProperty)
properties[targetPropertyValue]++
} }
} }
@ -347,3 +362,14 @@ func getProperty(n *structs.Node, property string) (string, bool) {
return resolveTarget(property, n) return resolveTarget(property, n)
} }
// targetedPropertyValue transforms the property value to combine all implicit
// target values into a single wildcard placeholder so that we get accurate
// counts when we compare an explicitly-defined target against multiple implicit
// targets.
func (p *propertySet) targetedPropertyValue(propertyValue string) string {
if p.targetValues.Empty() || p.targetValues.Contains(propertyValue) {
return propertyValue
}
return "*"
}

View File

@ -4,6 +4,7 @@
package scheduler package scheduler
import ( import (
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
) )
@ -95,6 +96,8 @@ func (iter *SpreadIterator) SetTaskGroup(tg *structs.TaskGroup) {
for _, spread := range iter.jobSpreads { for _, spread := range iter.jobSpreads {
pset := NewPropertySet(iter.ctx, iter.job) pset := NewPropertySet(iter.ctx, iter.job)
pset.SetTargetAttribute(spread.Attribute, tg.Name) pset.SetTargetAttribute(spread.Attribute, tg.Name)
pset.SetTargetValues(helper.ConvertSlice(spread.SpreadTarget,
func(t *structs.SpreadTarget) string { return t.Value }))
iter.groupPropertySets[tg.Name] = append(iter.groupPropertySets[tg.Name], pset) iter.groupPropertySets[tg.Name] = append(iter.groupPropertySets[tg.Name], pset)
} }
@ -102,6 +105,8 @@ func (iter *SpreadIterator) SetTaskGroup(tg *structs.TaskGroup) {
for _, spread := range tg.Spreads { for _, spread := range tg.Spreads {
pset := NewPropertySet(iter.ctx, iter.job) pset := NewPropertySet(iter.ctx, iter.job)
pset.SetTargetAttribute(spread.Attribute, tg.Name) pset.SetTargetAttribute(spread.Attribute, tg.Name)
pset.SetTargetValues(helper.ConvertSlice(spread.SpreadTarget,
func(t *structs.SpreadTarget) string { return t.Value }))
iter.groupPropertySets[tg.Name] = append(iter.groupPropertySets[tg.Name], pset) iter.groupPropertySets[tg.Name] = append(iter.groupPropertySets[tg.Name], pset)
} }
} }

View File

@ -11,14 +11,16 @@ import (
"testing" "testing"
"time" "time"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
"github.com/hashicorp/go-set"
"github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
) )
func TestSpreadIterator_SingleAttribute(t *testing.T) { func TestSpreadIterator_SingleAttribute(t *testing.T) {
@ -676,6 +678,7 @@ func Test_evenSpreadScoreBoost(t *testing.T) {
"dc3": 1, "dc3": 1,
}, },
targetAttribute: "${node.datacenter}", targetAttribute: "${node.datacenter}",
targetValues: &set.Set[string]{},
} }
opt := &structs.Node{ opt := &structs.Node{
@ -1021,3 +1024,134 @@ func TestSpreadPanicDowngrade(t *testing.T) {
require.NoError(t, processErr, "failed to process eval") require.NoError(t, processErr, "failed to process eval")
require.Len(t, h.Plans, 1) require.Len(t, h.Plans, 1)
} }
func TestSpread_ImplicitTargets(t *testing.T) {
dcs := []string{"dc1", "dc2", "dc3"}
setupNodes := func(h *Harness) map[string]string {
nodesToDcs := map[string]string{}
var nodes []*RankedNode
for i, dc := range dcs {
for n := 0; n < 4; n++ {
node := mock.Node()
node.Datacenter = dc
must.NoError(t, h.State.UpsertNode(
structs.MsgTypeTestSetup, uint64(100+i), node))
nodes = append(nodes, &RankedNode{Node: node})
nodesToDcs[node.ID] = node.Datacenter
}
}
return nodesToDcs
}
setupJob := func(h *Harness, testCaseSpread *structs.Spread) *structs.Evaluation {
job := mock.MinJob()
job.Datacenters = dcs
job.TaskGroups[0].Count = 12
job.TaskGroups[0].Spreads = []*structs.Spread{testCaseSpread}
must.NoError(t, h.State.UpsertJob(
structs.MsgTypeTestSetup, h.NextIndex(), nil, job))
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: job.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
h.NextIndex(), []*structs.Evaluation{eval}))
return eval
}
testCases := []struct {
name string
spread *structs.Spread
expect map[string]int
}{
{
name: "empty implicit target",
spread: &structs.Spread{
Weight: 100,
Attribute: "${node.datacenter}",
SpreadTarget: []*structs.SpreadTarget{
{
Value: "dc1",
Percent: 50,
},
},
},
expect: map[string]int{"dc1": 6},
},
{
name: "wildcard implicit target",
spread: &structs.Spread{
Weight: 100,
Attribute: "${node.datacenter}",
SpreadTarget: []*structs.SpreadTarget{
{
Value: "dc1",
Percent: 50,
},
{
Value: "*",
Percent: 50,
},
},
},
expect: map[string]int{"dc1": 6},
},
{
name: "explicit targets",
spread: &structs.Spread{
Weight: 100,
Attribute: "${node.datacenter}",
SpreadTarget: []*structs.SpreadTarget{
{
Value: "dc1",
Percent: 50,
},
{
Value: "dc2",
Percent: 25,
},
{
Value: "dc3",
Percent: 25,
},
},
},
expect: map[string]int{"dc1": 6, "dc2": 3, "dc3": 3},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
h := NewHarness(t)
nodesToDcs := setupNodes(h)
eval := setupJob(h, tc.spread)
must.NoError(t, h.Process(NewServiceScheduler, eval))
must.Len(t, 1, h.Plans)
plan := h.Plans[0]
must.False(t, plan.IsNoOp())
dcCounts := map[string]int{}
for node, allocs := range plan.NodeAllocation {
dcCounts[nodesToDcs[node]] += len(allocs)
}
for dc, expectVal := range tc.expect {
// not using test.MapEqual here because we have incomplete
// expectations for the implicit DCs on some tests.
test.Eq(t, expectVal, dcCounts[dc],
test.Sprintf("expected %d in %q", expectVal, dc))
}
})
}
}