scheduler: Fix bug where the would treat multiregion jobs as paused for job types that don't use deployments (#14659)
* scheduler: Fix bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments Co-authored-by: Tim Gross <tgross@hashicorp.com> Co-authored-by: Tim Gross <tgross@hashicorp.com>
This commit is contained in:
parent
92158a1c62
commit
6874997f91
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
scheduler: Fixed bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments
|
||||
```
|
|
@ -5747,6 +5747,17 @@ func (j *Job) GetScalingPolicies() []*ScalingPolicy {
|
|||
return ret
|
||||
}
|
||||
|
||||
// UsesDeployments returns a boolean indicating whether the job configuration
|
||||
// results in a deployment during scheduling.
|
||||
func (j *Job) UsesDeployments() bool {
|
||||
switch j.Type {
|
||||
case JobTypeService:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// ScalingPolicyListStub is used to return a subset of scaling policy information
|
||||
// for the scaling policy list
|
||||
type ScalingPolicyListStub struct {
|
||||
|
|
|
@ -265,6 +265,16 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) {
|
|||
}
|
||||
}
|
||||
|
||||
// computeDeploymentPaused is responsible for setting flags on the
|
||||
// allocReconciler that indicate the state of the deployment if one
|
||||
// is required. The flags that are managed are:
|
||||
// 1. deploymentFailed: Did the current deployment fail just as named.
|
||||
// 2. deploymentPaused: Multiregion job types that use deployments run
|
||||
// the deployments later during the fan-out stage. When the deployment
|
||||
// is created it will be in a pending state. If an invariant violation
|
||||
// is detected by the deploymentWatcher during it will enter a paused
|
||||
// state. This flag tells Compute we're paused or pending, so we should
|
||||
// not make placements on the deployment.
|
||||
func (a *allocReconciler) computeDeploymentPaused() {
|
||||
if a.deployment != nil {
|
||||
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused ||
|
||||
|
@ -272,10 +282,10 @@ func (a *allocReconciler) computeDeploymentPaused() {
|
|||
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
|
||||
}
|
||||
if a.deployment == nil {
|
||||
// When we create the deployment later, it will be in a pending
|
||||
// state. But we also need to tell Compute we're paused, otherwise we
|
||||
// make placements on the paused deployment.
|
||||
if a.job.IsMultiregion() && !(a.job.IsPeriodic() || a.job.IsParameterized()) {
|
||||
if a.job.IsMultiregion() &&
|
||||
a.job.UsesDeployments() &&
|
||||
!(a.job.IsPeriodic() || a.job.IsParameterized()) {
|
||||
|
||||
a.deploymentPaused = true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5995,3 +5995,109 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Tests the reconciler properly handles the logic for computeDeploymentPaused
|
||||
// for various job types.
|
||||
func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
type testCase struct {
|
||||
name string
|
||||
jobType string
|
||||
isMultiregion bool
|
||||
isPeriodic bool
|
||||
isParameterized bool
|
||||
expected bool
|
||||
}
|
||||
|
||||
multiregionCfg := mock.MultiregionJob().Multiregion
|
||||
periodicCfg := mock.PeriodicJob().Periodic
|
||||
parameterizedCfg := &structs.ParameterizedJobConfig{
|
||||
Payload: structs.DispatchPayloadRequired,
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
name: "single region service is not paused",
|
||||
jobType: structs.JobTypeService,
|
||||
isMultiregion: false,
|
||||
isPeriodic: false,
|
||||
isParameterized: false,
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "multiregion service is paused",
|
||||
jobType: structs.JobTypeService,
|
||||
isMultiregion: true,
|
||||
isPeriodic: false,
|
||||
isParameterized: false,
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "multiregion periodic service is not paused",
|
||||
jobType: structs.JobTypeService,
|
||||
isMultiregion: true,
|
||||
isPeriodic: true,
|
||||
isParameterized: false,
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "multiregion parameterized service is not paused",
|
||||
jobType: structs.JobTypeService,
|
||||
isMultiregion: true,
|
||||
isPeriodic: false,
|
||||
isParameterized: true,
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "single region batch job is not paused",
|
||||
jobType: structs.JobTypeBatch,
|
||||
isMultiregion: false,
|
||||
isPeriodic: false,
|
||||
isParameterized: false,
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "multiregion batch job is not paused",
|
||||
jobType: structs.JobTypeBatch,
|
||||
isMultiregion: false,
|
||||
isPeriodic: false,
|
||||
isParameterized: false,
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
var job *structs.Job
|
||||
|
||||
if tc.jobType == structs.JobTypeService {
|
||||
job = mock.Job()
|
||||
} else if tc.jobType == structs.JobTypeBatch {
|
||||
job = mock.BatchJob()
|
||||
}
|
||||
|
||||
require.NotNil(t, job, "invalid job type", tc.jobType)
|
||||
|
||||
if tc.isMultiregion {
|
||||
job.Multiregion = multiregionCfg
|
||||
}
|
||||
|
||||
if tc.isPeriodic {
|
||||
job.Periodic = periodicCfg
|
||||
}
|
||||
|
||||
if tc.isParameterized {
|
||||
job.ParameterizedJob = parameterizedCfg
|
||||
}
|
||||
|
||||
reconciler := NewAllocReconciler(
|
||||
testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
|
||||
nil, nil, nil, "", job.Priority, true)
|
||||
|
||||
_ = reconciler.Compute()
|
||||
|
||||
require.Equal(t, tc.expected, reconciler.deploymentPaused)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue