scheduler: Fix bug where the would treat multiregion jobs as paused for job types that don't use deployments (#14659)

* scheduler: Fix bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments

Co-authored-by: Tim Gross <tgross@hashicorp.com>

Co-authored-by: Tim Gross <tgross@hashicorp.com>
This commit is contained in:
Derek Strickland 2022-09-22 14:31:27 -04:00 committed by GitHub
parent 92158a1c62
commit 6874997f91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 134 additions and 4 deletions

3
.changelog/14659.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments
```

View File

@ -5747,6 +5747,17 @@ func (j *Job) GetScalingPolicies() []*ScalingPolicy {
return ret
}
// UsesDeployments returns a boolean indicating whether the job configuration
// results in a deployment during scheduling.
func (j *Job) UsesDeployments() bool {
switch j.Type {
case JobTypeService:
return true
default:
return false
}
}
// ScalingPolicyListStub is used to return a subset of scaling policy information
// for the scaling policy list
type ScalingPolicyListStub struct {

View File

@ -265,6 +265,16 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) {
}
}
// computeDeploymentPaused is responsible for setting flags on the
// allocReconciler that indicate the state of the deployment if one
// is required. The flags that are managed are:
// 1. deploymentFailed: Did the current deployment fail just as named.
// 2. deploymentPaused: Multiregion job types that use deployments run
// the deployments later during the fan-out stage. When the deployment
// is created it will be in a pending state. If an invariant violation
// is detected by the deploymentWatcher during it will enter a paused
// state. This flag tells Compute we're paused or pending, so we should
// not make placements on the deployment.
func (a *allocReconciler) computeDeploymentPaused() {
if a.deployment != nil {
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused ||
@ -272,10 +282,10 @@ func (a *allocReconciler) computeDeploymentPaused() {
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
}
if a.deployment == nil {
// When we create the deployment later, it will be in a pending
// state. But we also need to tell Compute we're paused, otherwise we
// make placements on the paused deployment.
if a.job.IsMultiregion() && !(a.job.IsPeriodic() || a.job.IsParameterized()) {
if a.job.IsMultiregion() &&
a.job.UsesDeployments() &&
!(a.job.IsPeriodic() || a.job.IsParameterized()) {
a.deploymentPaused = true
}
}

View File

@ -5995,3 +5995,109 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
})
}
}
// Tests the reconciler properly handles the logic for computeDeploymentPaused
// for various job types.
func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
ci.Parallel(t)
type testCase struct {
name string
jobType string
isMultiregion bool
isPeriodic bool
isParameterized bool
expected bool
}
multiregionCfg := mock.MultiregionJob().Multiregion
periodicCfg := mock.PeriodicJob().Periodic
parameterizedCfg := &structs.ParameterizedJobConfig{
Payload: structs.DispatchPayloadRequired,
}
testCases := []testCase{
{
name: "single region service is not paused",
jobType: structs.JobTypeService,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
{
name: "multiregion service is paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
expected: true,
},
{
name: "multiregion periodic service is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: true,
isParameterized: false,
expected: false,
},
{
name: "multiregion parameterized service is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: true,
expected: false,
},
{
name: "single region batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
{
name: "multiregion batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var job *structs.Job
if tc.jobType == structs.JobTypeService {
job = mock.Job()
} else if tc.jobType == structs.JobTypeBatch {
job = mock.BatchJob()
}
require.NotNil(t, job, "invalid job type", tc.jobType)
if tc.isMultiregion {
job.Multiregion = multiregionCfg
}
if tc.isPeriodic {
job.Periodic = periodicCfg
}
if tc.isParameterized {
job.ParameterizedJob = parameterizedCfg
}
reconciler := NewAllocReconciler(
testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, nil, nil, "", job.Priority, true)
_ = reconciler.Compute()
require.Equal(t, tc.expected, reconciler.deploymentPaused)
})
}
}