From 7d483830414b6fa1101ee13386199889c179d48c Mon Sep 17 00:00:00 2001 From: Freddy Date: Tue, 20 Jul 2021 10:09:29 -0600 Subject: [PATCH] Avoid panic on concurrent writes to cached service config map (#10647) If multiple instances of a service are co-located on the same node then their proxies will all share a cache entry for their resolved service configuration. This is because the cache key contains the name of the watched service but does not take into account the ID of the watching proxies. This means that there will be multiple agent service manager watches that can wake up on the same cache update. These watchers then concurrently modify the value in the cache when merging the resolved config into the local proxy definitions. To avoid this concurrent map write we will only delete the key from opaque config in the local proxy definition after the merge, rather than from the cached value before the merge. --- .changelog/10647.txt | 3 +++ agent/service_manager.go | 18 +++++++++--------- agent/service_manager_test.go | 15 +++++++++++++++ 3 files changed, 27 insertions(+), 9 deletions(-) create mode 100644 .changelog/10647.txt diff --git a/.changelog/10647.txt b/.changelog/10647.txt new file mode 100644 index 000000000..80ceef3a5 --- /dev/null +++ b/.changelog/10647.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: fix crash that would result from multiple instances of a service resolving service config on a single agent. +``` \ No newline at end of file diff --git a/agent/service_manager.go b/agent/service_manager.go index d112fc2d0..7223e98ac 100644 --- a/agent/service_manager.go +++ b/agent/service_manager.go @@ -396,12 +396,6 @@ func mergeServiceConfig(defaults *structs.ServiceConfigResponse, service *struct return nil, fmt.Errorf("failed to parse upstream config map for %s: %v", us.Upstream.String(), err) } - // Delete the mesh gateway key since this is the only place it is read from an opaque map. - // Later reads use Proxy.MeshGateway. - // Note that we use the "mesh_gateway" key and not other variants like "MeshGateway" because - // UpstreamConfig.MergeInto and ResolveServiceConfig only use "mesh_gateway". - delete(us.Config, "mesh_gateway") - remoteUpstreams[us.Upstream] = structs.Upstream{ DestinationNamespace: us.Upstream.NamespaceOrDefault(), DestinationName: us.Upstream.ID, @@ -425,7 +419,7 @@ func mergeServiceConfig(defaults *structs.ServiceConfigResponse, service *struct } localUpstreams[us.DestinationID()] = struct{}{} - usCfg, ok := remoteUpstreams[us.DestinationID()] + remoteCfg, ok := remoteUpstreams[us.DestinationID()] if !ok { // No config defaults to merge continue @@ -433,13 +427,19 @@ func mergeServiceConfig(defaults *structs.ServiceConfigResponse, service *struct // The local upstream config mode has the highest precedence, so only overwrite when it's set to the default if us.MeshGateway.Mode == structs.MeshGatewayModeDefault { - us.MeshGateway.Mode = usCfg.MeshGateway.Mode + us.MeshGateway.Mode = remoteCfg.MeshGateway.Mode } // Merge in everything else that is read from the map - if err := mergo.Merge(&us.Config, usCfg.Config); err != nil { + if err := mergo.Merge(&us.Config, remoteCfg.Config); err != nil { return nil, err } + + // Delete the mesh gateway key from opaque config since this is the value that was resolved from + // the servers and NOT the final merged value for this upstream. + // Note that we use the "mesh_gateway" key and not other variants like "MeshGateway" because + // UpstreamConfig.MergeInto and ResolveServiceConfig only use "mesh_gateway". + delete(us.Config, "mesh_gateway") } // Ensure upstreams present in central config are represented in the local configuration. diff --git a/agent/service_manager_test.go b/agent/service_manager_test.go index f6b134593..8ab54bc5b 100644 --- a/agent/service_manager_test.go +++ b/agent/service_manager_test.go @@ -3,6 +3,7 @@ package agent import ( "encoding/json" "fmt" + "github.com/mitchellh/copystructure" "io/ioutil" "os" "path/filepath" @@ -1188,9 +1189,16 @@ func Test_mergeServiceConfig_UpstreamOverrides(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + defaultsCopy, err := copystructure.Copy(tt.args.defaults) + require.NoError(t, err) + got, err := mergeServiceConfig(tt.args.defaults, tt.args.service) require.NoError(t, err) assert.Equal(t, tt.want, got) + + // The input defaults must not be modified by the merge. + // See PR #10647 + assert.Equal(t, tt.args.defaults, defaultsCopy) }) } } @@ -1281,9 +1289,16 @@ func Test_mergeServiceConfig_TransparentProxy(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + defaultsCopy, err := copystructure.Copy(tt.args.defaults) + require.NoError(t, err) + got, err := mergeServiceConfig(tt.args.defaults, tt.args.service) require.NoError(t, err) assert.Equal(t, tt.want, got) + + // The input defaults must not be modified by the merge. + // See PR #10647 + assert.Equal(t, tt.args.defaults, defaultsCopy) }) } }