From a8411976a8fee08c432a9439eb09493d2d35336d Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" <4903+rboyer@users.noreply.github.com> Date: Wed, 30 Nov 2022 11:29:21 -0600 Subject: [PATCH] peering: better represent non-passing states during peer check flattening (#15615) During peer stream replication we flatten checks from the source cluster and build one thin overall check to hide the irrelevant details from the consuming cluster. This flattening logic did correctly flip to non-passing if there were any non-passing checks, but WHICH status it got during that was random (warn/error). Also it didn't represent "maintenance" operations. There is an api package call AggregatedStatus which more correctly flattened check statuses. This PR replicated the more complete logic into the peer stream package. --- .changelog/15615.txt | 3 + .../peerstream/subscription_manager.go | 27 +++- .../peerstream/subscription_manager_test.go | 152 +++++++++++++++++- 3 files changed, 177 insertions(+), 5 deletions(-) create mode 100644 .changelog/15615.txt diff --git a/.changelog/15615.txt b/.changelog/15615.txt new file mode 100644 index 000000000..e8b0cd30b --- /dev/null +++ b/.changelog/15615.txt @@ -0,0 +1,3 @@ +```release-note:bug +peering: better represent non-passing states during peer check flattening +``` diff --git a/agent/grpc-external/services/peerstream/subscription_manager.go b/agent/grpc-external/services/peerstream/subscription_manager.go index 8290688a6..e8a4e48b4 100644 --- a/agent/grpc-external/services/peerstream/subscription_manager.go +++ b/agent/grpc-external/services/peerstream/subscription_manager.go @@ -664,6 +664,21 @@ func createDiscoChainHealth( } } +var statusScores = map[string]int{ + // 0 is reserved for unknown + api.HealthMaint: 1, + api.HealthCritical: 2, + api.HealthWarning: 3, + api.HealthPassing: 4, +} + +func getMostImportantStatus(a, b string) string { + if statusScores[a] < statusScores[b] { + return a + } + return b +} + func flattenChecks( nodeName string, serviceID string, @@ -675,10 +690,16 @@ func flattenChecks( return nil } + // Similar logic to (api.HealthChecks).AggregatedStatus() healthStatus := api.HealthPassing - for _, chk := range checks { - if chk.Status != api.HealthPassing { - healthStatus = chk.Status + if len(checks) > 0 { + for _, chk := range checks { + id := chk.CheckID + if id == api.NodeMaint || strings.HasPrefix(id, api.ServiceMaintPrefix) { + healthStatus = api.HealthMaint + break // always wins + } + healthStatus = getMostImportantStatus(healthStatus, chk.Status) } } diff --git a/agent/grpc-external/services/peerstream/subscription_manager_test.go b/agent/grpc-external/services/peerstream/subscription_manager_test.go index cad1c1385..6d7a41979 100644 --- a/agent/grpc-external/services/peerstream/subscription_manager_test.go +++ b/agent/grpc-external/services/peerstream/subscription_manager_test.go @@ -10,8 +10,6 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "github.com/hashicorp/consul/types" - "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/connect" @@ -19,12 +17,14 @@ import ( "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/stream" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/proto/pbcommon" "github.com/hashicorp/consul/proto/pbpeering" "github.com/hashicorp/consul/proto/pbpeerstream" "github.com/hashicorp/consul/proto/pbservice" "github.com/hashicorp/consul/proto/prototest" "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/types" ) func TestSubscriptionManager_RegisterDeregister(t *testing.T) { @@ -837,6 +837,154 @@ func TestSubscriptionManager_ServerAddrs(t *testing.T) { }) } +func TestFlattenChecks(t *testing.T) { + type testcase struct { + checks []*pbservice.HealthCheck + expect string + expectNoResult bool + } + + run := func(t *testing.T, tc testcase) { + t.Helper() + got := flattenChecks( + "node-name", "service-id", "service-name", nil, tc.checks, + ) + if tc.expectNoResult { + require.Empty(t, got) + } else { + require.Len(t, got, 1) + require.Equal(t, tc.expect, got[0].Status) + } + } + + cases := map[string]testcase{ + "empty": { + checks: nil, + expectNoResult: true, + }, + "passing": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: api.HealthPassing, + }, + }, + expect: api.HealthPassing, + }, + "warning": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: api.HealthWarning, + }, + }, + expect: api.HealthWarning, + }, + "critical": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: api.HealthCritical, + }, + }, + expect: api.HealthCritical, + }, + "node_maintenance": { + checks: []*pbservice.HealthCheck{ + { + CheckID: api.NodeMaint, + Status: api.HealthPassing, + }, + }, + expect: api.HealthMaint, + }, + "service_maintenance": { + checks: []*pbservice.HealthCheck{ + { + CheckID: api.ServiceMaintPrefix + "service", + Status: api.HealthPassing, + }, + }, + expect: api.HealthMaint, + }, + "unknown": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: "nope-nope-noper", + }, + }, + expect: "nope-nope-noper", + }, + "maintenance_over_critical": { + checks: []*pbservice.HealthCheck{ + { + CheckID: api.NodeMaint, + Status: api.HealthPassing, + }, + { + CheckID: "check-id", + Status: api.HealthCritical, + }, + }, + expect: api.HealthMaint, + }, + "critical_over_warning": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: api.HealthCritical, + }, + { + CheckID: "check-id", + Status: api.HealthWarning, + }, + }, + expect: api.HealthCritical, + }, + "warning_over_passing": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: api.HealthWarning, + }, + { + CheckID: "check-id", + Status: api.HealthPassing, + }, + }, + expect: api.HealthWarning, + }, + "lots": { + checks: []*pbservice.HealthCheck{ + { + CheckID: "check-id", + Status: api.HealthPassing, + }, + { + CheckID: "check-id", + Status: api.HealthPassing, + }, + { + CheckID: "check-id", + Status: api.HealthPassing, + }, + { + CheckID: "check-id", + Status: api.HealthWarning, + }, + }, + expect: api.HealthWarning, + }, + } + + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + run(t, tc) + }) + } +} + type testSubscriptionBackend struct { state.EventPublisher store *state.Store