[ui, deployments] Promote Canary and Unhealthy Allocations in the deployment status panel (#17547)

* A wild health status appears

* autoPromote notification conditions

* Legend fixes etc

* Acceptance tests for new canary alerts
This commit is contained in:
Phil Renaud 2023-06-19 12:06:18 -04:00 committed by GitHub
parent cfb3bb517f
commit 8e41380f72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 296 additions and 17 deletions

View File

@ -39,6 +39,8 @@
<span class="alloc-health-indicator">
{{#if (eq @health "healthy")}}
<FlightIcon @name="check" @color="#25ba81" />
{{else if (eq @health "unhealthy")}}
<FlightIcon @name="x" @color="#c84034" />
{{else}}
<FlightIcon @name="running" @color="black" />
{{/if}}

View File

@ -19,6 +19,8 @@
<span class="alloc-health-indicator">
{{#if (eq @health "healthy")}}
<FlightIcon @name="check" @color="white" />
{{else if (eq @health "unhealthy")}}
<FlightIcon @name="x" @color="white" />
{{else}}
<FlightIcon @name="running" @color="white" />
{{/if}}

View File

@ -27,18 +27,38 @@
@disabled={{this.fail.isRunning}}
@onConfirm={{perform this.fail}} />
{{/if}}
</div>
</div>
</div>
<div class="boxed-section-body {{if @job.latestDeployment.requiresPromotion "requires-promotion"}}">
{{#if @job.latestDeployment.requiresPromotion}}
<button
data-test-promote-canary
type="button"
class="button is-warning is-small {{if this.promote.isRunning "is-loading"}}"
disabled={{this.promote.isRunning}}
onclick={{perform this.promote}}>Promote Canary</button>
<div class="canary-promotion-alert">
{{#if this.canariesHealthy}}
<Hds::Alert @type="inline" @color="warning" as |A|>
<A.Title>Deployment requires promotion</A.Title>
<A.Description>Your deployment requires manual promotion — all canary allocations have passed their health checks.</A.Description>
<A.Button data-test-promote-canary @text="Promote Canary" @color="primary" {{on "click" (perform this.promote)}} />
</Hds::Alert>
{{else}}
{{#if this.someCanariesHaveFailed}}
<Hds::Alert @type="inline" @color="critical" as |A|>
<A.Title>Some Canaries have failed</A.Title>
<A.Description>Your canary allocations have failed their health checks. Please have a look at the error logs and task events for the allocations in question.</A.Description>
</Hds::Alert>
{{else}}
<Hds::Alert @type="inline" @color="highlight" as |A|>
<A.Title>Checking Canary health</A.Title>
{{#if this.deploymentIsAutoPromoted}}
<A.Description>Your canary allocations are being placed and health-checked. If they pass, they will be automatically promoted and your deployment will continue.</A.Description>
{{else}}
<A.Description>Your job requires manual promotion, and your canary allocations are being placed and health-checked.</A.Description>
{{/if}}
</Hds::Alert>
{{/if}}
{{/if}}
</div>
</div>
</div>
<div class="boxed-section-body">
{{/if}}
<div class="deployment-allocations">
{{#if this.oldVersionAllocBlockIDs.length}}
<h4 class="title is-5" data-test-old-allocation-tally>Previous allocations: {{#if this.oldVersionAllocBlocks.running}}{{this.oldRunningHealthyAllocBlocks.length}} running{{/if}}</h4>
@ -86,16 +106,18 @@
{{#each-in this.newAllocsByHealth as |health count|}}
<span class="legend-item {{if (eq count 0) "faded"}}">
<span class="represented-allocation legend-example">
<span class="represented-allocation legend-example {{health}}">
<span class="alloc-health-indicator">
{{#if (eq health "healthy")}}
<FlightIcon @name="check" @color="#25ba81" />
{{else if (eq health "unhealthy")}}
<FlightIcon @name="x" @color="#c84034" />
{{else}}
<FlightIcon @name="running" @color="black" class="not-animated" />
{{/if}}
</span>
</span>
{{count}} {{capitalize health}}
{{count}} {{humanize health}}
</span>
{{/each-in}}

View File

@ -33,6 +33,32 @@ export default class JobStatusPanelDeployingComponent extends Component {
);
}
/**
* Promotion of a deployment will error if the canary allocations are not of status "Healthy";
* this function will check for that and disable the promote button if necessary.
* @returns {boolean}
*/
get canariesHealthy() {
const relevantAllocs = this.job.allocations.filter(
(a) => !a.isOld && a.isCanary && !a.hasBeenRescheduled
);
return relevantAllocs.every(
(a) => a.clientStatus === 'running' && a.isHealthy
);
}
get someCanariesHaveFailed() {
const relevantAllocs = this.job.allocations.filter(
(a) => !a.isOld && a.isCanary && !a.hasBeenRescheduled
);
return relevantAllocs.some(
(a) =>
a.clientStatus === 'failed' ||
a.clientStatus === 'lost' ||
a.isUnhealthy
);
}
@task(function* () {
try {
yield this.job.latestDeployment.content.promote();
@ -70,6 +96,7 @@ export default class JobStatusPanelDeployingComponent extends Component {
alloGroups[status] = {
healthy: { nonCanary: [] },
unhealthy: { nonCanary: [] },
health_unknown: { nonCanary: [] },
};
}
alloGroups[status].healthy.nonCanary.push(currentAlloc);
@ -88,6 +115,7 @@ export default class JobStatusPanelDeployingComponent extends Component {
categories[type.label] = {
healthy: { canary: [], nonCanary: [] },
unhealthy: { canary: [], nonCanary: [] },
health_unknown: { canary: [], nonCanary: [] },
};
return categories;
}, {});
@ -107,8 +135,10 @@ export default class JobStatusPanelDeployingComponent extends Component {
status === 'running'
? alloc.isHealthy
? 'healthy'
: 'unhealthy'
: 'unhealthy';
: alloc.isUnhealthy
? 'unhealthy'
: 'health_unknown'
: 'health_unknown';
if (allocationCategories[status]) {
// If status is failed or lost, we only want to show it IF it's used up its restarts/rescheds.
@ -129,6 +159,7 @@ export default class JobStatusPanelDeployingComponent extends Component {
allocationCategories['unplaced'] = {
healthy: { canary: [], nonCanary: [] },
unhealthy: { canary: [], nonCanary: [] },
health_unknown: { canary: [], nonCanary: [] },
};
allocationCategories['unplaced']['healthy']['nonCanary'] = Array(
availableSlotsToFill
@ -149,6 +180,13 @@ export default class JobStatusPanelDeployingComponent extends Component {
];
}
get newRunningUnhealthyAllocBlocks() {
return [
...this.newVersionAllocBlocks['running']['unhealthy']['canary'],
...this.newVersionAllocBlocks['running']['unhealthy']['nonCanary'],
];
}
get rescheduledAllocs() {
return this.job.allocations.filter((a) => !a.isOld && a.hasBeenRescheduled);
}
@ -183,8 +221,11 @@ export default class JobStatusPanelDeployingComponent extends Component {
get newAllocsByHealth() {
return {
healthy: this.newRunningHealthyAllocBlocks.length,
'health unknown':
this.totalAllocs - this.newRunningHealthyAllocBlocks.length,
unhealthy: this.newRunningUnhealthyAllocBlocks.length,
health_unknown:
this.totalAllocs -
this.newRunningHealthyAllocBlocks.length -
this.newRunningUnhealthyAllocBlocks.length,
};
}
// #endregion legend
@ -205,4 +246,8 @@ export default class JobStatusPanelDeployingComponent extends Component {
// v----- Realistic method: Tally a job's task groups' "count" property
return this.args.job.taskGroups.reduce((sum, tg) => sum + tg.count, 0);
}
get deploymentIsAutoPromoted() {
return this.job.latestDeployment?.get('isAutoPromoted');
}
}

View File

@ -29,6 +29,7 @@ export default class JobStatusPanelSteadyComponent extends Component {
* @typedef {Object} AllocationStatus
* @property {HealthStatus} healthy
* @property {HealthStatus} unhealthy
* @property {HealthStatus} health unknown
*/
/**

View File

@ -54,10 +54,15 @@ export default class Allocation extends Model {
return this.deploymentStatus?.Canary;
}
// deploymentStatus.Healthy can be true, false, or null. Null implies pending
get isHealthy() {
return this.deploymentStatus?.Healthy;
}
get isUnhealthy() {
return this.deploymentStatus?.Healthy === false;
}
get willNotRestart() {
return this.clientStatus === 'failed' || this.clientStatus === 'lost';
}

View File

@ -23,7 +23,7 @@ export default class Deployment extends Model {
// If any task group is not promoted yet requires promotion and the deployment
// is still running, the deployment needs promotion.
@computed('status', 'taskGroupSummaries.@each.promoted')
@computed('status', 'taskGroupSummaries.@each.{promoted,requiresPromotion}')
get requiresPromotion() {
return (
this.status === 'running' &&
@ -36,6 +36,13 @@ export default class Deployment extends Model {
);
}
@computed('taskGroupSummaries.@each.autoPromote')
get isAutoPromoted() {
return this.taskGroupSummaries
.toArray()
.every((summary) => summary.get('autoPromote'));
}
@attr('string') status;
@attr('string') statusDescription;

View File

@ -14,6 +14,7 @@ export default class TaskGroupDeploymentSummary extends Fragment {
@attr('string') name;
@attr('boolean') autoRevert;
@attr('boolean') autoPromote;
@attr('boolean') promoted;
@gt('desiredCanaries', 0) requiresPromotion;

View File

@ -33,6 +33,28 @@
gap: 1rem;
grid-auto-columns: 100%;
&.requires-promotion {
grid-template-areas:
'promotion-alert'
'deployment-allocations'
'legend-and-summary'
'history-and-params';
& > .canary-promotion-alert {
button {
background-color: $orange;
border-color: darken($orange, 5%);
&:hover {
background-color: darken($orange, 5%);
}
}
}
}
& > .promotion-alert {
grid-area: promotion-alert;
}
& > .deployment-allocations {
grid-area: deployment-allocations;
display: grid;
@ -68,6 +90,7 @@
legend {
display: grid;
grid-template-columns: repeat(4, 1fr);
grid-auto-rows: max-content;
gap: 0.5rem;
}
.versions {

View File

@ -5,7 +5,7 @@
import { module, test } from 'qunit';
import { setupRenderingTest } from 'ember-qunit';
import { find, render } from '@ember/test-helpers';
import { find, render, settled } from '@ember/test-helpers';
import hbs from 'htmlbars-inline-precompile';
import { startMirage } from 'nomad-ui/initializers/ember-cli-mirage';
import { initialize as fragmentSerializerInitializer } from 'nomad-ui/initializers/fragment-serializer';
@ -471,6 +471,177 @@ module(
);
});
test('During a deployment with canaries, canary alerts are handled', async function (assert) {
this.server.create('node');
const NUMBER_OF_GROUPS = 1;
const ALLOCS_PER_GROUP = 10;
const allocStatusDistribution = {
running: 0.9,
failed: 0.1,
unknown: 0,
lost: 0,
complete: 0,
pending: 0,
};
const job = await this.server.create('job', {
type: 'service',
createAllocations: true,
noDeployments: true, // manually created below
activeDeployment: true,
groupTaskCount: ALLOCS_PER_GROUP,
shallow: true,
resourceSpec: Array(NUMBER_OF_GROUPS).fill(['M: 257, C: 500']), // length of this array determines number of groups
allocStatusDistribution,
});
const jobRecord = await this.store.find(
'job',
JSON.stringify([job.id, 'default'])
);
const deployment = await this.server.create(
'deployment',
false,
'active',
{
jobId: job.id,
groupDesiredTotal: ALLOCS_PER_GROUP,
versionNumber: 1,
status: 'failed',
// requiresPromotion: false,
}
);
// requiresPromotion goes to false
deployment.deploymentTaskGroupSummaries.models.forEach((d) => {
d.update({
desiredCanaries: 0,
requiresPromotion: false,
promoted: false,
});
});
// All allocations set to Healthy and non-canary
let activelyDeployingJobAllocs = server.schema.allocations
.all()
.filter((a) => a.jobId === job.id);
activelyDeployingJobAllocs.models.forEach((a) => {
a.update({ deploymentStatus: { Healthy: true, Canary: false } });
});
this.set('job', jobRecord);
await this.get('job.latestDeployment');
await this.set('job.latestDeployment.status', 'running');
await this.get('job.allocations');
await render(hbs`
<JobStatus::Panel @job={{this.job}} />
`);
assert
.dom(find('.legend-item .represented-allocation.running').parentElement)
.hasText('9 Running');
assert
.dom(find('.legend-item .represented-allocation.healthy').parentElement)
.hasText('9 Healthy');
assert
.dom('.canary-promotion-alert')
.doesNotExist('No canary promotion alert when no canaries');
// Set 3 allocations to health-pending canaries
await Promise.all(
this.get('job.allocations')
.filterBy('clientStatus', 'running')
.slice(0, 3)
.map(async (a) => {
await a.set('deploymentStatus', { Healthy: null, Canary: true });
})
);
// Set the deployment's requiresPromotion to true
await Promise.all(
this.get('job.latestDeployment.taskGroupSummaries').map(async (a) => {
await a.set('desiredCanaries', 3);
await a.set('requiresPromotion', true);
})
);
await settled();
assert
.dom('.canary-promotion-alert')
.exists('Canary promotion alert when canaries are present');
assert
.dom('.canary-promotion-alert')
.containsText('Checking Canary health');
// Fail the health check on 1 canary
await Promise.all(
this.get('job.allocations')
.filterBy('clientStatus', 'running')
.slice(0, 1)
.map(async (a) => {
await a.set('deploymentStatus', { Healthy: false, Canary: true });
})
);
assert
.dom('.canary-promotion-alert')
.containsText('Some Canaries have failed');
// That 1 passes its health checks, but two peers remain pending
await Promise.all(
this.get('job.allocations')
.filterBy('clientStatus', 'running')
.slice(0, 1)
.map(async (a) => {
await a.set('deploymentStatus', { Healthy: true, Canary: true });
})
);
await settled();
assert
.dom('.canary-promotion-alert')
.containsText('Checking Canary health');
// Fail one of the running canaries, but dont specifically touch its deploymentStatus.health
await Promise.all(
this.get('job.allocations')
.filterBy('clientStatus', 'running')
.slice(0, 1)
.map(async (a) => {
await a.set('clientStatus', 'failed');
})
);
assert
.dom('.canary-promotion-alert')
.containsText('Some Canaries have failed');
// Canaries all running and healthy
await Promise.all(
this.get('job.allocations')
.slice(0, 3)
.map(async (a) => {
await a.setProperties({
deploymentStatus: { Healthy: true, Canary: true },
clientStatus: 'running',
});
})
);
await settled();
assert
.dom('.canary-promotion-alert')
.containsText('Deployment requires promotion');
});
test('when there is no running deployment, the latest deployment section shows up for the last deployment', async function (assert) {
this.server.create('job', {
type: 'service',