This change fixes a bug within the generic scheduler which meant duplicate alloc indexes (names) could be submitted to the plan applier and written to state. The bug originates from the placements calculation notion that names of allocations being replaced are blindly copied to their replacement. This is not correct in all cases, particularly when dealing with canaries. The fix updates the alloc name index tracker to include minor duplicate tracking. This can be used when computing placements to ensure duplicate are found, and a new name picked before the plan is submitted. The name index tracking is now passed from the reconciler to the generic scheduler via the results, so this does not have to be regenerated, or another data structure used. Co-authored-by: James Rasell <jrasell@users.noreply.github.com>
This commit is contained in:
parent
50c9af53b7
commit
90cb544835
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
scheduler: Ensure duplicate allocation IDs are tracked and fixed when performing job updates
|
||||
```
|
|
@ -10544,13 +10544,19 @@ func (a *Allocation) JobNamespacedID() NamespacedID {
|
|||
// Index returns the index of the allocation. If the allocation is from a task
|
||||
// group with count greater than 1, there will be multiple allocations for it.
|
||||
func (a *Allocation) Index() uint {
|
||||
l := len(a.Name)
|
||||
prefix := len(a.JobID) + len(a.TaskGroup) + 2
|
||||
return AllocIndexFromName(a.Name, a.JobID, a.TaskGroup)
|
||||
}
|
||||
|
||||
// AllocIndexFromName returns the index of an allocation given its name, the
|
||||
// jobID and the task group name.
|
||||
func AllocIndexFromName(allocName, jobID, taskGroup string) uint {
|
||||
l := len(allocName)
|
||||
prefix := len(jobID) + len(taskGroup) + 2
|
||||
if l <= 3 || l <= prefix {
|
||||
return uint(0)
|
||||
}
|
||||
|
||||
strNum := a.Name[prefix : len(a.Name)-1]
|
||||
strNum := allocName[prefix : len(allocName)-1]
|
||||
num, _ := strconv.Atoi(strNum)
|
||||
return uint(num)
|
||||
}
|
||||
|
|
|
@ -466,7 +466,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
|
|||
s.queuedAllocs[p.placeTaskGroup.Name] += 1
|
||||
destructive = append(destructive, p)
|
||||
}
|
||||
return s.computePlacements(destructive, place)
|
||||
return s.computePlacements(destructive, place, results.taskGroupAllocNameIndexes)
|
||||
}
|
||||
|
||||
// downgradedJobForPlacement returns the job appropriate for non-canary placement replacement
|
||||
|
@ -508,7 +508,8 @@ func (s *GenericScheduler) downgradedJobForPlacement(p placementResult) (string,
|
|||
|
||||
// computePlacements computes placements for allocations. It is given the set of
|
||||
// destructive updates to place and the set of new placements to place.
|
||||
func (s *GenericScheduler) computePlacements(destructive, place []placementResult) error {
|
||||
func (s *GenericScheduler) computePlacements(destructive, place []placementResult, nameIndex map[string]*allocNameIndex) error {
|
||||
|
||||
// Get the base nodes
|
||||
nodes, byDC, err := s.setNodes(s.job)
|
||||
if err != nil {
|
||||
|
@ -531,6 +532,12 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
|
|||
// Get the task group
|
||||
tg := missing.TaskGroup()
|
||||
|
||||
// This is populated from the reconciler via the compute results,
|
||||
// therefore we cannot have an allocation belonging to a task group
|
||||
// that has not generated and been through allocation name index
|
||||
// tracking.
|
||||
taskGroupNameIndex := nameIndex[tg.Name]
|
||||
|
||||
var downgradedJob *structs.Job
|
||||
|
||||
if missing.DowngradeNonCanary() {
|
||||
|
@ -628,12 +635,34 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
|
|||
resources.Shared.Ports = option.AllocResources.Ports
|
||||
}
|
||||
|
||||
// Pull the allocation name as a new variables, so we can alter
|
||||
// this as needed without making changes to the original
|
||||
// object.
|
||||
newAllocName := missing.Name()
|
||||
|
||||
// Identify the index from the name, so we can check this
|
||||
// against the allocation name index tracking for duplicates.
|
||||
allocIndex := structs.AllocIndexFromName(newAllocName, s.job.ID, tg.Name)
|
||||
|
||||
// If the allocation index is a duplicate, we cannot simply
|
||||
// create a new allocation with the same name. We need to
|
||||
// generate a new index and use this. The log message is useful
|
||||
// for debugging and development, but could be removed in a
|
||||
// future version of Nomad.
|
||||
if taskGroupNameIndex.IsDuplicate(allocIndex) {
|
||||
oldAllocName := newAllocName
|
||||
newAllocName = taskGroupNameIndex.Next(1)[0]
|
||||
taskGroupNameIndex.UnsetIndex(allocIndex)
|
||||
s.logger.Debug("duplicate alloc index found and changed",
|
||||
"old_alloc_name", oldAllocName, "new_alloc_name", newAllocName)
|
||||
}
|
||||
|
||||
// Create an allocation for this
|
||||
alloc := &structs.Allocation{
|
||||
ID: uuid.Generate(),
|
||||
Namespace: s.job.Namespace,
|
||||
EvalID: s.eval.ID,
|
||||
Name: missing.Name(),
|
||||
Name: newAllocName,
|
||||
JobID: s.job.ID,
|
||||
TaskGroup: tg.Name,
|
||||
Metrics: s.ctx.Metrics(),
|
||||
|
|
|
@ -1979,6 +1979,305 @@ func TestServiceSched_JobModify(t *testing.T) {
|
|||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestServiceSched_JobModify_ExistingDuplicateAllocIndex(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
testHarness := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
mockJob := mock.Job()
|
||||
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
|
||||
|
||||
// Generate some allocations which will represent our pre-existing
|
||||
// allocations. These have aggressive duplicate names.
|
||||
var allocs []*structs.Allocation
|
||||
for i := 0; i < 10; i++ {
|
||||
alloc := mock.Alloc()
|
||||
alloc.Job = mockJob
|
||||
alloc.JobID = mockJob.ID
|
||||
alloc.NodeID = nodes[i].ID
|
||||
|
||||
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
||||
|
||||
if i%2 == 0 {
|
||||
alloc.Name = "my-job.web[0]"
|
||||
}
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
|
||||
|
||||
// Generate a job modification which will force a destructive update.
|
||||
mockJob2 := mock.Job()
|
||||
mockJob2.ID = mockJob.ID
|
||||
mockJob2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
||||
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
|
||||
|
||||
// Create a mock evaluation which represents work to reconcile the job
|
||||
// update.
|
||||
eval := &structs.Evaluation{
|
||||
Namespace: structs.DefaultNamespace,
|
||||
ID: uuid.Generate(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: mockJob2.ID,
|
||||
Status: structs.EvalStatusPending,
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
|
||||
|
||||
// Process the evaluation and ensure we get a single plan as a result.
|
||||
must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
|
||||
must.Len(t, 1, testHarness.Plans)
|
||||
|
||||
// Iterate and track the node allocations to ensure we have the correct
|
||||
// amount, and that there a now no duplicate names.
|
||||
totalNodeAllocations := 0
|
||||
allocIndexNames := make(map[string]int)
|
||||
|
||||
for _, planNodeAlloc := range testHarness.Plans[0].NodeAllocation {
|
||||
for _, nodeAlloc := range planNodeAlloc {
|
||||
totalNodeAllocations++
|
||||
allocIndexNames[nodeAlloc.Name]++
|
||||
|
||||
if val, ok := allocIndexNames[nodeAlloc.Name]; ok && val > 1 {
|
||||
t.Fatalf("found duplicate alloc name %q found", nodeAlloc.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
must.Eq(t, 10, totalNodeAllocations)
|
||||
|
||||
testHarness.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestServiceSched_JobModify_ProposedDuplicateAllocIndex(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
testHarness := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a job which includes a canary update strategy.
|
||||
mockJob := mock.MinJob()
|
||||
mockJob.TaskGroups[0].Count = 3
|
||||
mockJob.Update = structs.UpdateStrategy{
|
||||
Canary: 1,
|
||||
MaxParallel: 3,
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
|
||||
|
||||
// Generate some allocations which will represent our pre-existing
|
||||
// allocations.
|
||||
var allocs []*structs.Allocation
|
||||
for i := 0; i < 3; i++ {
|
||||
alloc := mock.MinAlloc()
|
||||
alloc.Namespace = structs.DefaultNamespace
|
||||
alloc.Job = mockJob
|
||||
alloc.JobID = mockJob.ID
|
||||
alloc.NodeID = nodes[i].ID
|
||||
alloc.Name = structs.AllocName(mockJob.ID, mockJob.TaskGroups[0].Name, uint(i))
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
|
||||
|
||||
// Generate a job modification which will force a destructive update as
|
||||
// well as a scaling.
|
||||
mockJob2 := mockJob.Copy()
|
||||
mockJob2.Version++
|
||||
mockJob2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
|
||||
mockJob2.TaskGroups[0].Count++
|
||||
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
|
||||
|
||||
nextRaftIndex := testHarness.NextIndex()
|
||||
deploymentID := uuid.Generate()
|
||||
|
||||
// Upsert a canary into state, this represents the first stage of the
|
||||
// deployment process and jumps us to the point where duplicate allocation
|
||||
// indexes could be produced.
|
||||
canaryAlloc := mock.MinAlloc()
|
||||
canaryAlloc.Namespace = structs.DefaultNamespace
|
||||
canaryAlloc.Job = mockJob2
|
||||
canaryAlloc.JobID = mockJob2.ID
|
||||
canaryAlloc.NodeID = nodes[1].ID
|
||||
canaryAlloc.Name = structs.AllocName(mockJob2.ID, mockJob2.TaskGroups[0].Name, uint(0))
|
||||
canaryAlloc.DeploymentID = deploymentID
|
||||
canaryAlloc.ClientStatus = structs.AllocClientStatusRunning
|
||||
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, nextRaftIndex, []*structs.Allocation{
|
||||
canaryAlloc,
|
||||
}))
|
||||
|
||||
// Craft our deployment object which represents the post-canary state. This
|
||||
// unblocks the rest of the deployment process, where we replace the old
|
||||
// job version allocations.
|
||||
canaryDeployment := structs.Deployment{
|
||||
ID: deploymentID,
|
||||
Namespace: mockJob2.Namespace,
|
||||
JobID: mockJob2.ID,
|
||||
JobVersion: mockJob2.Version,
|
||||
TaskGroups: map[string]*structs.DeploymentState{
|
||||
mockJob2.TaskGroups[0].Name: {
|
||||
Promoted: true,
|
||||
DesiredTotal: 4,
|
||||
HealthyAllocs: 1,
|
||||
PlacedAllocs: 1,
|
||||
PlacedCanaries: []string{canaryAlloc.ID},
|
||||
},
|
||||
},
|
||||
Status: structs.DeploymentStatusRunning,
|
||||
StatusDescription: structs.DeploymentStatusDescriptionRunning,
|
||||
EvalPriority: 50,
|
||||
JobCreateIndex: mockJob2.CreateIndex,
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertDeployment(nextRaftIndex, &canaryDeployment))
|
||||
|
||||
// Create a mock evaluation which represents work to reconcile the job
|
||||
// update.
|
||||
eval := &structs.Evaluation{
|
||||
Namespace: structs.DefaultNamespace,
|
||||
ID: uuid.Generate(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: mockJob2.ID,
|
||||
Status: structs.EvalStatusPending,
|
||||
DeploymentID: deploymentID,
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
|
||||
|
||||
// Process the evaluation and ensure we get a single plan as a result.
|
||||
must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
|
||||
must.Len(t, 1, testHarness.Plans)
|
||||
|
||||
// Iterate and track the node allocations to ensure we have the correct
|
||||
// amount, and that there a now no duplicate names. Before the duplicate
|
||||
// allocation name fix, this section of testing would fail.
|
||||
totalNodeAllocations := 0
|
||||
allocIndexNames := map[string]int{canaryAlloc.Name: 1}
|
||||
|
||||
for _, planNodeAlloc := range testHarness.Plans[0].NodeAllocation {
|
||||
for _, nodeAlloc := range planNodeAlloc {
|
||||
totalNodeAllocations++
|
||||
allocIndexNames[nodeAlloc.Name]++
|
||||
|
||||
if val, ok := allocIndexNames[nodeAlloc.Name]; ok && val > 1 {
|
||||
t.Fatalf("found duplicate alloc name %q found", nodeAlloc.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
must.Eq(t, 3, totalNodeAllocations)
|
||||
|
||||
// Ensure the correct number of destructive node updates.
|
||||
totalNodeUpdates := 0
|
||||
|
||||
for _, planNodeUpdate := range testHarness.Plans[0].NodeUpdate {
|
||||
totalNodeUpdates += len(planNodeUpdate)
|
||||
}
|
||||
must.Eq(t, 3, totalNodeUpdates)
|
||||
|
||||
testHarness.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
}
|
||||
|
||||
func TestServiceSched_JobModify_ExistingDuplicateAllocIndexNonDestructive(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
testHarness := NewHarness(t)
|
||||
|
||||
// Create some nodes
|
||||
var nodes []*structs.Node
|
||||
for i := 0; i < 10; i++ {
|
||||
node := mock.Node()
|
||||
nodes = append(nodes, node)
|
||||
must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
|
||||
}
|
||||
|
||||
// Generate a fake job with allocations
|
||||
mockJob := mock.MinJob()
|
||||
mockJob.TaskGroups[0].Count = 10
|
||||
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
|
||||
|
||||
// Generate some allocations which will represent our pre-existing
|
||||
// allocations. These have aggressive duplicate names.
|
||||
var (
|
||||
allocs []*structs.Allocation
|
||||
allocIDs []string
|
||||
)
|
||||
for i := 0; i < 10; i++ {
|
||||
alloc := mock.MinAlloc()
|
||||
alloc.Namespace = structs.DefaultNamespace
|
||||
alloc.Job = mockJob
|
||||
alloc.JobID = mockJob.ID
|
||||
alloc.NodeID = nodes[i].ID
|
||||
|
||||
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
||||
|
||||
if i%2 == 0 {
|
||||
alloc.Name = "my-job.web[0]"
|
||||
}
|
||||
allocs = append(allocs, alloc)
|
||||
allocIDs = append(allocIDs, alloc.ID)
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
|
||||
|
||||
// Generate a job modification which will be an in-place update.
|
||||
mockJob2 := mockJob.Copy()
|
||||
mockJob2.ID = mockJob.ID
|
||||
mockJob2.Update.MaxParallel = 2
|
||||
must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
|
||||
|
||||
// Create a mock evaluation which represents work to reconcile the job
|
||||
// update.
|
||||
eval := &structs.Evaluation{
|
||||
Namespace: structs.DefaultNamespace,
|
||||
ID: uuid.Generate(),
|
||||
Priority: 50,
|
||||
TriggeredBy: structs.EvalTriggerJobRegister,
|
||||
JobID: mockJob2.ID,
|
||||
Status: structs.EvalStatusPending,
|
||||
}
|
||||
must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
|
||||
|
||||
// Process the evaluation and ensure we get a single plan as a result.
|
||||
must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
|
||||
must.Len(t, 1, testHarness.Plans)
|
||||
|
||||
// Ensure the plan did not want to perform any destructive updates.
|
||||
var nodeUpdateCount int
|
||||
|
||||
for _, nodeUpdateAllocs := range testHarness.Plans[0].NodeUpdate {
|
||||
nodeUpdateCount += len(nodeUpdateAllocs)
|
||||
}
|
||||
must.Zero(t, nodeUpdateCount)
|
||||
|
||||
// Ensure the plan updated the existing allocs by checking the count, the
|
||||
// job object, and the allocation IDs.
|
||||
var (
|
||||
nodeAllocationCount int
|
||||
nodeAllocationIDs []string
|
||||
)
|
||||
|
||||
for _, nodeAllocs := range testHarness.Plans[0].NodeAllocation {
|
||||
nodeAllocationCount += len(nodeAllocs)
|
||||
|
||||
for _, nodeAlloc := range nodeAllocs {
|
||||
must.Eq(t, mockJob2, nodeAlloc.Job)
|
||||
nodeAllocationIDs = append(nodeAllocationIDs, nodeAlloc.ID)
|
||||
}
|
||||
}
|
||||
must.Eq(t, 10, nodeAllocationCount)
|
||||
must.SliceContainsAll(t, allocIDs, nodeAllocationIDs)
|
||||
}
|
||||
|
||||
func TestServiceSched_JobModify_Datacenters(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
|
|
|
@ -141,6 +141,13 @@ type reconcileResults struct {
|
|||
// desiredFollowupEvals is the map of follow up evaluations to create per task group
|
||||
// This is used to create a delayed evaluation for rescheduling failed allocations.
|
||||
desiredFollowupEvals map[string][]*structs.Evaluation
|
||||
|
||||
// taskGroupAllocNameIndexes is a tracking of the allocation name index,
|
||||
// keyed by the task group name. This is stored within the results, so the
|
||||
// generic scheduler can use this to perform duplicate alloc index checks
|
||||
// before submitting the plan. This is always non-nil and is handled within
|
||||
// a single routine, so does not require a mutex.
|
||||
taskGroupAllocNameIndexes map[string]*allocNameIndex
|
||||
}
|
||||
|
||||
// delayedRescheduleInfo contains the allocation id and a time when its eligible to be rescheduled.
|
||||
|
@ -193,11 +200,12 @@ func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch
|
|||
supportsDisconnectedClients: supportsDisconnectedClients,
|
||||
now: time.Now(),
|
||||
result: &reconcileResults{
|
||||
attributeUpdates: make(map[string]*structs.Allocation),
|
||||
disconnectUpdates: make(map[string]*structs.Allocation),
|
||||
reconnectUpdates: make(map[string]*structs.Allocation),
|
||||
desiredTGUpdates: make(map[string]*structs.DesiredUpdates),
|
||||
desiredFollowupEvals: make(map[string][]*structs.Evaluation),
|
||||
attributeUpdates: make(map[string]*structs.Allocation),
|
||||
disconnectUpdates: make(map[string]*structs.Allocation),
|
||||
reconnectUpdates: make(map[string]*structs.Allocation),
|
||||
desiredTGUpdates: make(map[string]*structs.DesiredUpdates),
|
||||
desiredFollowupEvals: make(map[string][]*structs.Evaluation),
|
||||
taskGroupAllocNameIndexes: make(map[string]*allocNameIndex),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -494,6 +502,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
|
|||
// which is the union of untainted, rescheduled, allocs on migrating
|
||||
// nodes, and allocs on down nodes (includes canaries)
|
||||
nameIndex := newAllocNameIndex(a.jobID, groupName, tg.Count, untainted.union(migrate, rescheduleNow, lost))
|
||||
a.result.taskGroupAllocNameIndexes[groupName] = nameIndex
|
||||
|
||||
// Stop any unneeded allocations and update the untainted set to not
|
||||
// include stopped allocations.
|
||||
|
@ -988,6 +997,13 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
|
|||
knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown)
|
||||
|
||||
// Hot path the nothing to do case
|
||||
//
|
||||
// Note that this path can result in duplication allocation indexes in a
|
||||
// scenario with a destructive job change (ex. image update) happens with
|
||||
// an increased group count. Once the canary is replaced, and we compute
|
||||
// the next set of stops, the untainted set equals the new group count,
|
||||
// which results is missing one removal. The duplicate alloc index is
|
||||
// corrected in `computePlacements`
|
||||
remove := len(knownUntainted) + len(migrate) - group.Count
|
||||
if remove <= 0 {
|
||||
return stop
|
||||
|
|
|
@ -614,24 +614,35 @@ type allocNameIndex struct {
|
|||
job, taskGroup string
|
||||
count int
|
||||
b structs.Bitmap
|
||||
|
||||
// duplicates is used to store duplicate allocation indexes which are
|
||||
// currently present within the index tracking. The map key is the index,
|
||||
// and the current count of duplicates. The map is only accessed within a
|
||||
// single routine and multiple times per job scheduler invocation,
|
||||
// therefore no lock is used.
|
||||
duplicates map[uint]int
|
||||
}
|
||||
|
||||
// newAllocNameIndex returns an allocNameIndex for use in selecting names of
|
||||
// allocations to create or stop. It takes the job and task group name, desired
|
||||
// count and any existing allocations as input.
|
||||
func newAllocNameIndex(job, taskGroup string, count int, in allocSet) *allocNameIndex {
|
||||
|
||||
bitMap, duplicates := bitmapFrom(in, uint(count))
|
||||
|
||||
return &allocNameIndex{
|
||||
count: count,
|
||||
b: bitmapFrom(in, uint(count)),
|
||||
job: job,
|
||||
taskGroup: taskGroup,
|
||||
count: count,
|
||||
b: bitMap,
|
||||
job: job,
|
||||
taskGroup: taskGroup,
|
||||
duplicates: duplicates,
|
||||
}
|
||||
}
|
||||
|
||||
// bitmapFrom creates a bitmap from the given allocation set and a minimum size
|
||||
// maybe given. The size of the bitmap is as the larger of the passed minimum
|
||||
// and the maximum alloc index of the passed input (byte aligned).
|
||||
func bitmapFrom(input allocSet, minSize uint) structs.Bitmap {
|
||||
func bitmapFrom(input allocSet, minSize uint) (structs.Bitmap, map[uint]int) {
|
||||
var max uint
|
||||
for _, a := range input {
|
||||
if num := a.Index(); num > max {
|
||||
|
@ -666,11 +677,25 @@ func bitmapFrom(input allocSet, minSize uint) structs.Bitmap {
|
|||
panic(err)
|
||||
}
|
||||
|
||||
// Initialize our duplicates mapping, allowing us to store a non-nil map
|
||||
// at the cost of 48 bytes.
|
||||
duplicates := make(map[uint]int)
|
||||
|
||||
// Iterate through the allocSet input and hydrate the bitmap. We check that
|
||||
// the bitmap does not contain the index first, so we can track duplicate
|
||||
// entries.
|
||||
for _, a := range input {
|
||||
bitmap.Set(a.Index())
|
||||
|
||||
allocIndex := a.Index()
|
||||
|
||||
if bitmap.Check(allocIndex) {
|
||||
duplicates[allocIndex]++
|
||||
} else {
|
||||
bitmap.Set(allocIndex)
|
||||
}
|
||||
}
|
||||
|
||||
return bitmap
|
||||
return bitmap, duplicates
|
||||
}
|
||||
|
||||
// Highest removes and returns the highest n used names. The returned set
|
||||
|
@ -689,9 +714,25 @@ func (a *allocNameIndex) Highest(n uint) map[string]struct{} {
|
|||
return h
|
||||
}
|
||||
|
||||
// IsDuplicate checks whether the passed allocation index is duplicated within
|
||||
// the tracking.
|
||||
func (a *allocNameIndex) IsDuplicate(idx uint) bool {
|
||||
val, ok := a.duplicates[idx]
|
||||
return ok && val > 0
|
||||
}
|
||||
|
||||
// UnsetIndex unsets the index as having its name used
|
||||
func (a *allocNameIndex) UnsetIndex(idx uint) {
|
||||
a.b.Unset(idx)
|
||||
|
||||
// If this index is a duplicate, remove the duplicate entry. Otherwise, we
|
||||
// can remove it from the bitmap tracking.
|
||||
if num, ok := a.duplicates[idx]; ok {
|
||||
if num--; num == 0 {
|
||||
delete(a.duplicates, idx)
|
||||
}
|
||||
} else {
|
||||
a.b.Unset(idx)
|
||||
}
|
||||
}
|
||||
|
||||
// NextCanaries returns the next n names for use as canaries and sets them as
|
||||
|
@ -702,9 +743,11 @@ func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) []
|
|||
// Create a name index
|
||||
existingNames := existing.nameSet()
|
||||
|
||||
// First select indexes from the allocations that are undergoing destructive
|
||||
// updates. This way we avoid duplicate names as they will get replaced.
|
||||
dmap := bitmapFrom(destructive, uint(a.count))
|
||||
// First select indexes from the allocations that are undergoing
|
||||
// destructive updates. This way we avoid duplicate names as they will get
|
||||
// replaced. As this process already takes into account duplicate checking,
|
||||
// we can discard the duplicate mapping when generating the bitmap.
|
||||
dmap, _ := bitmapFrom(destructive, uint(a.count))
|
||||
remainder := n
|
||||
for _, idx := range dmap.IndexesInRange(true, uint(0), uint(a.count)-1) {
|
||||
name := structs.AllocName(a.job, a.taskGroup, uint(idx))
|
||||
|
|
|
@ -948,11 +948,13 @@ func TestBitmapFrom(t *testing.T) {
|
|||
Name: "foo.bar[8]",
|
||||
},
|
||||
}
|
||||
b := bitmapFrom(input, 1)
|
||||
b, dups := bitmapFrom(input, 1)
|
||||
must.Eq(t, 16, b.Size())
|
||||
must.MapEmpty(t, dups)
|
||||
|
||||
b = bitmapFrom(input, 8)
|
||||
b, dups = bitmapFrom(input, 8)
|
||||
must.Eq(t, 16, b.Size())
|
||||
must.MapEmpty(t, dups)
|
||||
}
|
||||
|
||||
func Test_allocNameIndex_Highest(t *testing.T) {
|
||||
|
@ -1168,6 +1170,51 @@ func Test_allocNameIndex_Next(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func Test_allocNameIndex_Duplicates(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
inputAllocSet := map[string]*structs.Allocation{
|
||||
"6b255fa3-c2cb-94de-5ddd-41aac25a6851": {
|
||||
Name: "example.cache[0]",
|
||||
JobID: "example",
|
||||
TaskGroup: "cache",
|
||||
},
|
||||
"e24771e6-8900-5d2d-ec93-e7076284774a": {
|
||||
Name: "example.cache[1]",
|
||||
JobID: "example",
|
||||
TaskGroup: "cache",
|
||||
},
|
||||
"d7842822-32c4-1a1c-bac8-66c3f20dfb0f": {
|
||||
Name: "example.cache[2]",
|
||||
JobID: "example",
|
||||
TaskGroup: "cache",
|
||||
},
|
||||
"76a6a487-016b-2fc2-8295-d811473ca93d": {
|
||||
Name: "example.cache[0]",
|
||||
JobID: "example",
|
||||
TaskGroup: "cache",
|
||||
},
|
||||
}
|
||||
|
||||
// Build the tracker, and check some key information.
|
||||
allocNameIndexTracker := newAllocNameIndex("example", "cache", 4, inputAllocSet)
|
||||
must.Eq(t, 8, allocNameIndexTracker.b.Size())
|
||||
must.MapLen(t, 1, allocNameIndexTracker.duplicates)
|
||||
must.True(t, allocNameIndexTracker.IsDuplicate(0))
|
||||
|
||||
// Unsetting the index should remove the duplicate entry, but not the entry
|
||||
// from the underlying bitmap.
|
||||
allocNameIndexTracker.UnsetIndex(0)
|
||||
must.MapLen(t, 0, allocNameIndexTracker.duplicates)
|
||||
must.True(t, allocNameIndexTracker.b.Check(0))
|
||||
|
||||
// If we now select a new index, having previously checked for a duplicate,
|
||||
// we should get a non-duplicate.
|
||||
nextAllocNames := allocNameIndexTracker.Next(1)
|
||||
must.Len(t, 1, nextAllocNames)
|
||||
must.Eq(t, "example.cache[3]", nextAllocNames[0])
|
||||
}
|
||||
|
||||
func TestAllocSet_filterByRescheduleable(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
|
|
Loading…
Reference in New Issue