diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go index 96c312a2a..855eed077 100644 --- a/agent/agent_endpoint.go +++ b/agent/agent_endpoint.go @@ -146,9 +146,11 @@ func (s *HTTPServer) AgentServices(resp http.ResponseWriter, req *http.Request) } // Use empty list instead of nil - for _, s := range services { + for id, s := range services { if s.Tags == nil { - s.Tags = make([]string, 0) + clone := *s + clone.Tags = make([]string, 0) + services[id] = &clone } } @@ -170,10 +172,11 @@ func (s *HTTPServer) AgentChecks(resp http.ResponseWriter, req *http.Request) (i } // Use empty list instead of nil - // checks needs to be a deep copy for this not be racy - for _, c := range checks { + for id, c := range checks { if c.ServiceTags == nil { - c.ServiceTags = make([]string, 0) + clone := *c + clone.ServiceTags = make([]string, 0) + checks[id] = &clone } } diff --git a/agent/agent_test.go b/agent/agent_test.go index ff74aff3f..69a7d124a 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -547,11 +547,26 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) { } } -// TestAgent_ServiceWithTagChurn is designed to detect a class of issues where -// we would have unnecessary catalog churn for services with tags. See issues -// #3259, #3642, and #3845. -func TestAgent_ServiceWithTagChurn(t *testing.T) { +// TestAgent_IndexChurn is designed to detect a class of issues where +// we would have unnecessary catalog churn from anti-entropy. See issues +// #3259, #3642, #3845, and #3866. +func TestAgent_IndexChurn(t *testing.T) { t.Parallel() + + t.Run("no tags", func(t *testing.T) { + verifyIndexChurn(t, nil) + }) + + t.Run("with tags", func(t *testing.T) { + verifyIndexChurn(t, []string{"foo", "bar"}) + }) +} + +// verifyIndexChurn registers some things and runs anti-entropy a bunch of times +// in a row to make sure there are no index bumps. +func verifyIndexChurn(t *testing.T, tags []string) { + t.Helper() + a := NewTestAgent(t.Name(), "") defer a.Shutdown() @@ -559,7 +574,7 @@ func TestAgent_ServiceWithTagChurn(t *testing.T) { ID: "redis", Service: "redis", Port: 8000, - Tags: []string{"has", "tags"}, + Tags: tags, } if err := a.AddService(svc, nil, true, ""); err != nil { t.Fatalf("err: %v", err) @@ -567,6 +582,7 @@ func TestAgent_ServiceWithTagChurn(t *testing.T) { chk := &structs.HealthCheck{ CheckID: "redis-check", + Name: "Service-level check", ServiceID: "redis", Status: api.HealthCritical, } @@ -577,18 +593,34 @@ func TestAgent_ServiceWithTagChurn(t *testing.T) { t.Fatalf("err: %v", err) } + chk = &structs.HealthCheck{ + CheckID: "node-check", + Name: "Node-level check", + Status: api.HealthCritical, + } + chkt = &structs.CheckType{ + TTL: time.Hour, + } + if err := a.AddCheck(chk, chkt, true, ""); err != nil { + t.Fatalf("err: %v", err) + } + if err := a.sync.State.SyncFull(); err != nil { t.Fatalf("err: %v", err) } + args := &structs.ServiceSpecificRequest{ Datacenter: "dc1", ServiceName: "redis", } - var before structs.IndexedHealthChecks - if err := a.RPC("Health.ServiceChecks", args, &before); err != nil { + var before structs.IndexedCheckServiceNodes + if err := a.RPC("Health.ServiceNodes", args, &before); err != nil { t.Fatalf("err: %v", err) } - if got, want := len(before.HealthChecks), 1; got != want { + if got, want := len(before.Nodes), 1; got != want { + t.Fatalf("got %d want %d", got, want) + } + if got, want := len(before.Nodes[0].Checks), 3; /* incl. serfHealth */ got != want { t.Fatalf("got %d want %d", got, want) } @@ -598,8 +630,8 @@ func TestAgent_ServiceWithTagChurn(t *testing.T) { } } - var after structs.IndexedHealthChecks - if err := a.RPC("Health.ServiceChecks", args, &after); err != nil { + var after structs.IndexedCheckServiceNodes + if err := a.RPC("Health.ServiceNodes", args, &after); err != nil { t.Fatalf("err: %v", err) } verify.Values(t, "", after, before) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index d1e6bfdd4..ca781b36f 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -201,9 +201,11 @@ func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Req if out.ServiceNodes == nil { out.ServiceNodes = make(structs.ServiceNodes, 0) } - for _, s := range out.ServiceNodes { + for i, s := range out.ServiceNodes { if s.ServiceTags == nil { - s.ServiceTags = make([]string, 0) + clone := *s + clone.ServiceTags = make([]string, 0) + out.ServiceNodes[i] = &clone } } metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_service_nodes"}, 1, @@ -244,6 +246,15 @@ func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Req s.agent.TranslateAddresses(args.Datacenter, out.NodeServices.Node) } + // TODO: The NodeServices object in IndexedNodeServices is a pointer to + // something that's created for each request by the state store way down + // in https://github.com/hashicorp/consul/blob/v1.0.4/agent/consul/state/catalog.go#L953-L963. + // Since this isn't a pointer to a real state store object, it's safe to + // modify out.NodeServices.Services in the loop below without making a + // copy here. Same for the Tags in each service entry, since that was + // created by .ToNodeService() which made a copy. This is safe as-is but + // this whole business is tricky and subtle. See #3867 for more context. + // Use empty list instead of nil if out.NodeServices != nil { for _, s := range out.NodeServices.Services { diff --git a/agent/health_endpoint.go b/agent/health_endpoint.go index 59ea6c02f..e3cf4539a 100644 --- a/agent/health_endpoint.go +++ b/agent/health_endpoint.go @@ -42,9 +42,11 @@ func (s *HTTPServer) HealthChecksInState(resp http.ResponseWriter, req *http.Req if out.HealthChecks == nil { out.HealthChecks = make(structs.HealthChecks, 0) } - for _, c := range out.HealthChecks { + for i, c := range out.HealthChecks { if c.ServiceTags == nil { - c.ServiceTags = make([]string, 0) + clone := *c + clone.ServiceTags = make([]string, 0) + out.HealthChecks[i] = &clone } } return out.HealthChecks, nil @@ -80,9 +82,11 @@ func (s *HTTPServer) HealthNodeChecks(resp http.ResponseWriter, req *http.Reques if out.HealthChecks == nil { out.HealthChecks = make(structs.HealthChecks, 0) } - for _, c := range out.HealthChecks { + for i, c := range out.HealthChecks { if c.ServiceTags == nil { - c.ServiceTags = make([]string, 0) + clone := *c + clone.ServiceTags = make([]string, 0) + out.HealthChecks[i] = &clone } } return out.HealthChecks, nil @@ -120,9 +124,11 @@ func (s *HTTPServer) HealthServiceChecks(resp http.ResponseWriter, req *http.Req if out.HealthChecks == nil { out.HealthChecks = make(structs.HealthChecks, 0) } - for _, c := range out.HealthChecks { + for i, c := range out.HealthChecks { if c.ServiceTags == nil { - c.ServiceTags = make([]string, 0) + clone := *c + clone.ServiceTags = make([]string, 0) + out.HealthChecks[i] = &clone } } return out.HealthChecks, nil @@ -194,19 +200,20 @@ func (s *HTTPServer) HealthServiceNodes(resp http.ResponseWriter, req *http.Requ out.Nodes = make(structs.CheckServiceNodes, 0) } for i := range out.Nodes { - // TODO (slackpad) It's lame that this isn't a slice of pointers - // but it's not a well-scoped change to fix this. We should - // change this at the next opportunity. if out.Nodes[i].Checks == nil { out.Nodes[i].Checks = make(structs.HealthChecks, 0) } - for _, c := range out.Nodes[i].Checks { + for j, c := range out.Nodes[i].Checks { if c.ServiceTags == nil { - c.ServiceTags = make([]string, 0) + clone := *c + clone.ServiceTags = make([]string, 0) + out.Nodes[i].Checks[j] = &clone } } if out.Nodes[i].Service != nil && out.Nodes[i].Service.Tags == nil { - out.Nodes[i].Service.Tags = make([]string, 0) + clone := *out.Nodes[i].Service + clone.Tags = make([]string, 0) + out.Nodes[i].Service = &clone } } return out.Nodes, nil diff --git a/agent/structs/structs.go b/agent/structs/structs.go index b50f912f8..907a7a4ab 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -600,6 +600,8 @@ type IndexedServiceNodes struct { } type IndexedNodeServices struct { + // TODO: This should not be a pointer, see comments in + // agent/catalog_endpoint.go. NodeServices *NodeServices QueryMeta }