Merge pull request #5645 from hashicorp/b-deploy-health

Update deployment health on failed allocations only if health is unset
This commit is contained in:
Preetha 2019-05-03 17:04:18 -05:00 committed by GitHub
commit 116b7d0a61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 3 deletions

View File

@ -551,10 +551,11 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st
if a.ClientTerminalStatus() {
alloc := ar.Alloc()
// If we are part of a deployment and the task has failed, mark the
// If we are part of a deployment and the alloc has failed, mark the
// alloc as unhealthy. This guards against the watcher not be started.
// If the health status is already set then terminal allocations should not
if a.ClientStatus == structs.AllocClientStatusFailed &&
alloc.DeploymentID != "" && !a.DeploymentStatus.IsUnhealthy() {
alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() {
a.DeploymentStatus = &structs.AllocDeploymentStatus{
Healthy: helper.BoolToPtr(false),
}

View File

@ -780,7 +780,7 @@ func TestAllocRunner_HandlesArtifactFailure(t *testing.T) {
// Test that alloc runner kills tasks in task group when another task fails
func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
alloc := mock.BatchAlloc()
alloc := mock.Alloc()
tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name]
alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0
@ -792,6 +792,22 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
task.Config = map[string]interface{}{
"run_for": "10s",
}
// Set a service with check
task.Services = []*structs.Service{
{
Name: "fakservice",
PortLabel: "http",
Checks: []*structs.ServiceCheck{
{
Name: "fakecheck",
Type: structs.ServiceCheckScript,
Command: "true",
Interval: 30 * time.Second,
Timeout: 5 * time.Second,
},
},
},
}
task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy()
task2.Name = "task 2"
@ -803,8 +819,38 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
alloc.AllocatedResources.Tasks[task.Name] = tr
alloc.AllocatedResources.Tasks[task2.Name] = tr
// Make the alloc be part of a deployment
alloc.DeploymentID = uuid.Generate()
alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks
alloc.Job.TaskGroups[0].Update.MaxParallel = 1
alloc.Job.TaskGroups[0].Update.MinHealthyTime = 10 * time.Millisecond
alloc.Job.TaskGroups[0].Update.HealthyDeadline = 2 * time.Second
checkHealthy := &api.AgentCheck{
CheckID: uuid.Generate(),
Status: api.HealthPassing,
}
conf, cleanup := testAllocRunnerConfig(t, alloc)
defer cleanup()
consulClient := conf.Consul.(*cconsul.MockConsulServiceClient)
consulClient.AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) {
return &consul.AllocRegistration{
Tasks: map[string]*consul.TaskRegistration{
task.Name: {
Services: map[string]*consul.ServiceRegistration{
"123": {
Service: &api.AgentService{Service: "fakeservice"},
Checks: []*api.AgentCheck{checkHealthy},
},
},
},
},
}, nil
}
ar, err := NewAllocRunner(conf)
require.NoError(t, err)
defer destroy(ar)
@ -850,6 +896,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) {
return false, fmt.Errorf("task2 should have failed")
}
if !last.DeploymentStatus.HasHealth() {
return false, fmt.Errorf("Expected deployment health to be non nil")
}
return true, nil
}, func(err error) {
require.Fail(t, "err: %v", err)