open-consul/agent/consul/config_replication_test.go

351 lines
8.4 KiB
Go
Raw Normal View History

package consul
import (
"fmt"
"os"
"testing"
server: config entry replication now correctly uses namespaces in comparisons (#9024) Previously config entries sharing a kind & name but in different namespaces could occasionally cause "stuck states" in replication because the namespace fields were ignored during the differential comparison phase. Example: Two config entries written to the primary: kind=A,name=web,namespace=bar kind=A,name=web,namespace=foo Under the covers these both get saved to memdb, so they are sorted by all 3 components (kind,name,namespace) during natural iteration. This means that before the replication code does it's own incomplete sort, the underlying data IS sorted by namespace ascending (bar comes before foo). After one pass of replication the primary and secondary datacenters have the same set of config entries present. If "kind=A,name=web,namespace=bar" were to be deleted, then things get weird. Before replication the two sides look like: primary: [ kind=A,name=web,namespace=foo ] secondary: [ kind=A,name=web,namespace=bar kind=A,name=web,namespace=foo ] The differential comparison phase walks these two lists in sorted order and first compares "kind=A,name=web,namespace=foo" vs "kind=A,name=web,namespace=bar" and falsely determines they are the SAME and are thus cause an update of "kind=A,name=web,namespace=foo". Then it compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely determines that the latter should be DELETED. During reconciliation the deletes are processed before updates, and so for a brief moment in the secondary "kind=A,name=web,namespace=foo" is erroneously deleted and then immediately restored. Unfortunately after this replication phase the final state is identical to the initial state, so when it loops around again (rate limited) it repeats the same set of operations indefinitely.
2020-10-23 18:41:54 +00:00
"time"
2021-11-08 16:58:49 +00:00
"github.com/stretchr/testify/require"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
)
server: config entry replication now correctly uses namespaces in comparisons (#9024) Previously config entries sharing a kind & name but in different namespaces could occasionally cause "stuck states" in replication because the namespace fields were ignored during the differential comparison phase. Example: Two config entries written to the primary: kind=A,name=web,namespace=bar kind=A,name=web,namespace=foo Under the covers these both get saved to memdb, so they are sorted by all 3 components (kind,name,namespace) during natural iteration. This means that before the replication code does it's own incomplete sort, the underlying data IS sorted by namespace ascending (bar comes before foo). After one pass of replication the primary and secondary datacenters have the same set of config entries present. If "kind=A,name=web,namespace=bar" were to be deleted, then things get weird. Before replication the two sides look like: primary: [ kind=A,name=web,namespace=foo ] secondary: [ kind=A,name=web,namespace=bar kind=A,name=web,namespace=foo ] The differential comparison phase walks these two lists in sorted order and first compares "kind=A,name=web,namespace=foo" vs "kind=A,name=web,namespace=bar" and falsely determines they are the SAME and are thus cause an update of "kind=A,name=web,namespace=foo". Then it compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely determines that the latter should be DELETED. During reconciliation the deletes are processed before updates, and so for a brief moment in the secondary "kind=A,name=web,namespace=foo" is erroneously deleted and then immediately restored. Unfortunately after this replication phase the final state is identical to the initial state, so when it loops around again (rate limited) it repeats the same set of operations indefinitely.
2020-10-23 18:41:54 +00:00
func TestReplication_ConfigSort(t *testing.T) {
newDefaults := func(name, protocol string) *structs.ServiceConfigEntry {
return &structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: name,
Protocol: protocol,
}
}
newResolver := func(name string, timeout time.Duration) *structs.ServiceResolverConfigEntry {
return &structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: name,
ConnectTimeout: timeout,
}
}
type testcase struct {
configs []structs.ConfigEntry
expect []structs.ConfigEntry
}
cases := map[string]testcase{
"none": {},
"one": {
configs: []structs.ConfigEntry{
newDefaults("web", "grpc"),
},
expect: []structs.ConfigEntry{
newDefaults("web", "grpc"),
},
},
"just kinds": {
configs: []structs.ConfigEntry{
newResolver("web", 33*time.Second),
newDefaults("web", "grpc"),
},
expect: []structs.ConfigEntry{
newDefaults("web", "grpc"),
newResolver("web", 33*time.Second),
},
},
"just names": {
configs: []structs.ConfigEntry{
newDefaults("db", "grpc"),
newDefaults("api", "http2"),
},
expect: []structs.ConfigEntry{
newDefaults("api", "http2"),
newDefaults("db", "grpc"),
},
},
"all": {
configs: []structs.ConfigEntry{
newResolver("web", 33*time.Second),
newDefaults("web", "grpc"),
newDefaults("db", "grpc"),
newDefaults("api", "http2"),
},
expect: []structs.ConfigEntry{
newDefaults("api", "http2"),
newDefaults("db", "grpc"),
newDefaults("web", "grpc"),
newResolver("web", 33*time.Second),
},
},
}
for name, tc := range cases {
tc := tc
t.Run(name, func(t *testing.T) {
configSort(tc.configs)
require.Equal(t, tc.expect, tc.configs)
// and it should be stable
configSort(tc.configs)
require.Equal(t, tc.expect, tc.configs)
})
}
}
2021-11-08 16:58:49 +00:00
func TestReplication_DisallowedConfigEntries(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
}
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.PrimaryDatacenter = "dc1"
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
client := rpcClient(t, s1)
defer client.Close()
dir2, s2 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc2"
c.PrimaryDatacenter = "dc1"
c.ConfigReplicationRate = 100
c.ConfigReplicationBurst = 100
c.ConfigReplicationApplyLimit = 1000000
})
testrpc.WaitForLeader(t, s2.RPC, "dc2")
defer os.RemoveAll(dir2)
defer s2.Shutdown()
// Try to join.
joinWAN(t, s2, s1)
testrpc.WaitForLeader(t, s1.RPC, "dc1")
testrpc.WaitForLeader(t, s1.RPC, "dc2")
args := []structs.ConfigEntryRequest{
{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: "foo",
Protocol: "http2",
},
},
{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.PartitionExportsConfigEntry{
Name: "default",
Services: []structs.ExportedService{
{
Name: structs.WildcardSpecifier,
Consumers: []structs.ServiceConsumer{
{
Partition: "non-default",
},
},
},
},
},
},
{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: "global",
Config: map[string]interface{}{
"Protocol": "http",
},
},
},
{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.MeshConfigEntry{
TransparentProxy: structs.TransparentProxyMeshConfig{
MeshDestinationsOnly: true,
},
},
},
}
for _, arg := range args {
out := false
require.NoError(t, s1.RPC("ConfigEntry.Apply", &arg, &out))
}
retry.Run(t, func(r *retry.R) {
_, local, err := s2.fsm.State().ConfigEntries(nil, structs.ReplicationEnterpriseMeta())
require.NoError(r, err)
require.Len(r, local, 3)
localKinds := make([]string, 0)
for _, entry := range local {
localKinds = append(localKinds, entry.GetKind())
}
// Should have all inserted kinds except for partition-exports.
expectKinds := []string{
structs.ProxyDefaults, structs.ServiceDefaults, structs.MeshConfig,
}
require.ElementsMatch(r, expectKinds, localKinds)
})
}
func TestReplication_ConfigEntries(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
}
t.Parallel()
dir1, s1 := testServerWithConfig(t, func(c *Config) {
c.PrimaryDatacenter = "dc1"
})
defer os.RemoveAll(dir1)
defer s1.Shutdown()
testrpc.WaitForLeader(t, s1.RPC, "dc1")
client := rpcClient(t, s1)
defer client.Close()
dir2, s2 := testServerWithConfig(t, func(c *Config) {
c.Datacenter = "dc2"
c.PrimaryDatacenter = "dc1"
c.ConfigReplicationRate = 100
c.ConfigReplicationBurst = 100
c.ConfigReplicationApplyLimit = 1000000
})
testrpc.WaitForLeader(t, s2.RPC, "dc2")
defer os.RemoveAll(dir2)
defer s2.Shutdown()
// Try to join.
joinWAN(t, s2, s1)
testrpc.WaitForLeader(t, s1.RPC, "dc1")
testrpc.WaitForLeader(t, s1.RPC, "dc2")
// Create some new configuration entries
var entries []structs.ConfigEntry
for i := 0; i < 50; i++ {
arg := structs.ConfigEntryRequest{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: fmt.Sprintf("svc-%d", i),
Protocol: "tcp",
},
}
Centralized Config CLI (#5731) * Add HTTP endpoints for config entry management * Finish implementing decoding in the HTTP Config entry apply endpoint * Add CAS operation to the config entry apply endpoint Also use this for the bootstrapping and move the config entry decoding function into the structs package. * First pass at the API client for the config entries * Fixup some of the ConfigEntry APIs Return a singular response object instead of a list for the ConfigEntry.Get RPC. This gets plumbed through the HTTP API as well. Dont return QueryMeta in the JSON response for the config entry listing HTTP API. Instead just return a list of config entries. * Minor API client fixes * Attempt at some ConfigEntry api client tests These don’t currently work due to weak typing in JSON * Get some of the api client tests passing * Implement reflectwalk magic to correct JSON encoding a ProxyConfigEntry Also added a test for the HTTP endpoint that exposes the problem. However, since the test doesn’t actually do the JSON encode/decode its still failing. * Move MapWalk magic into a binary marshaller instead of JSON. * Add a MapWalk test * Get rid of unused func * Get rid of unused imports * Fixup some tests now that the decoding from msgpack coerces things into json compat types * Stub out most of the central config cli Fully implement the config read command. * Basic config delete command implementation * Implement config write command * Implement config list subcommand Not entirely sure about the output here. Its basically the read output indented with a line specifying the kind/name of each type which is also duplicated in the indented output. * Update command usage * Update some help usage formatting * Add the connect enable helper cli command * Update list command output * Rename the config entry API client methods. * Use renamed apis * Implement config write tests Stub the others with the noTabs tests. * Change list output format Now just simply output 1 line per named config * Add config read tests * Add invalid args write test. * Add config delete tests * Add config list tests * Add connect enable tests * Update some CLI commands to use CAS ops This also modifies the HTTP API for a write op to return a boolean indicating whether the value was written or not. * Fix up the HTTP API CAS tests as I realized they weren’t testing what they should. * Update config entry rpc tests to properly test CAS * Fix up a few more tests * Fix some tests that using ConfigEntries.Apply * Update config_write_test.go * Get rid of unused import
2019-04-30 23:27:16 +00:00
out := false
require.NoError(t, s1.RPC("ConfigEntry.Apply", &arg, &out))
entries = append(entries, arg.Entry)
}
arg := structs.ConfigEntryRequest{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: "global",
Config: map[string]interface{}{
"foo": "bar",
"bar": 1,
},
},
}
Centralized Config CLI (#5731) * Add HTTP endpoints for config entry management * Finish implementing decoding in the HTTP Config entry apply endpoint * Add CAS operation to the config entry apply endpoint Also use this for the bootstrapping and move the config entry decoding function into the structs package. * First pass at the API client for the config entries * Fixup some of the ConfigEntry APIs Return a singular response object instead of a list for the ConfigEntry.Get RPC. This gets plumbed through the HTTP API as well. Dont return QueryMeta in the JSON response for the config entry listing HTTP API. Instead just return a list of config entries. * Minor API client fixes * Attempt at some ConfigEntry api client tests These don’t currently work due to weak typing in JSON * Get some of the api client tests passing * Implement reflectwalk magic to correct JSON encoding a ProxyConfigEntry Also added a test for the HTTP endpoint that exposes the problem. However, since the test doesn’t actually do the JSON encode/decode its still failing. * Move MapWalk magic into a binary marshaller instead of JSON. * Add a MapWalk test * Get rid of unused func * Get rid of unused imports * Fixup some tests now that the decoding from msgpack coerces things into json compat types * Stub out most of the central config cli Fully implement the config read command. * Basic config delete command implementation * Implement config write command * Implement config list subcommand Not entirely sure about the output here. Its basically the read output indented with a line specifying the kind/name of each type which is also duplicated in the indented output. * Update command usage * Update some help usage formatting * Add the connect enable helper cli command * Update list command output * Rename the config entry API client methods. * Use renamed apis * Implement config write tests Stub the others with the noTabs tests. * Change list output format Now just simply output 1 line per named config * Add config read tests * Add invalid args write test. * Add config delete tests * Add config list tests * Add connect enable tests * Update some CLI commands to use CAS ops This also modifies the HTTP API for a write op to return a boolean indicating whether the value was written or not. * Fix up the HTTP API CAS tests as I realized they weren’t testing what they should. * Update config entry rpc tests to properly test CAS * Fix up a few more tests * Fix some tests that using ConfigEntries.Apply * Update config_write_test.go * Get rid of unused import
2019-04-30 23:27:16 +00:00
out := false
require.NoError(t, s1.RPC("ConfigEntry.Apply", &arg, &out))
entries = append(entries, arg.Entry)
checkSame := func(t *retry.R) error {
_, remote, err := s1.fsm.State().ConfigEntries(nil, structs.ReplicationEnterpriseMeta())
require.NoError(t, err)
_, local, err := s2.fsm.State().ConfigEntries(nil, structs.ReplicationEnterpriseMeta())
require.NoError(t, err)
require.Len(t, local, len(remote))
for i, entry := range remote {
require.Equal(t, entry.GetKind(), local[i].GetKind())
require.Equal(t, entry.GetName(), local[i].GetName())
// more validations
switch entry.GetKind() {
case structs.ServiceDefaults:
localSvc, ok := local[i].(*structs.ServiceConfigEntry)
require.True(t, ok)
remoteSvc, ok := entry.(*structs.ServiceConfigEntry)
require.True(t, ok)
require.Equal(t, remoteSvc.Protocol, localSvc.Protocol)
case structs.ProxyDefaults:
localProxy, ok := local[i].(*structs.ProxyConfigEntry)
require.True(t, ok)
remoteProxy, ok := entry.(*structs.ProxyConfigEntry)
require.True(t, ok)
require.Equal(t, remoteProxy.Config, localProxy.Config)
}
}
return nil
}
// Wait for the replica to converge.
retry.Run(t, func(r *retry.R) {
checkSame(r)
})
// Update those policies
for i := 0; i < 50; i++ {
arg := structs.ConfigEntryRequest{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.ServiceConfigEntry{
Kind: structs.ServiceDefaults,
Name: fmt.Sprintf("svc-%d", i),
Protocol: "udp",
},
}
Centralized Config CLI (#5731) * Add HTTP endpoints for config entry management * Finish implementing decoding in the HTTP Config entry apply endpoint * Add CAS operation to the config entry apply endpoint Also use this for the bootstrapping and move the config entry decoding function into the structs package. * First pass at the API client for the config entries * Fixup some of the ConfigEntry APIs Return a singular response object instead of a list for the ConfigEntry.Get RPC. This gets plumbed through the HTTP API as well. Dont return QueryMeta in the JSON response for the config entry listing HTTP API. Instead just return a list of config entries. * Minor API client fixes * Attempt at some ConfigEntry api client tests These don’t currently work due to weak typing in JSON * Get some of the api client tests passing * Implement reflectwalk magic to correct JSON encoding a ProxyConfigEntry Also added a test for the HTTP endpoint that exposes the problem. However, since the test doesn’t actually do the JSON encode/decode its still failing. * Move MapWalk magic into a binary marshaller instead of JSON. * Add a MapWalk test * Get rid of unused func * Get rid of unused imports * Fixup some tests now that the decoding from msgpack coerces things into json compat types * Stub out most of the central config cli Fully implement the config read command. * Basic config delete command implementation * Implement config write command * Implement config list subcommand Not entirely sure about the output here. Its basically the read output indented with a line specifying the kind/name of each type which is also duplicated in the indented output. * Update command usage * Update some help usage formatting * Add the connect enable helper cli command * Update list command output * Rename the config entry API client methods. * Use renamed apis * Implement config write tests Stub the others with the noTabs tests. * Change list output format Now just simply output 1 line per named config * Add config read tests * Add invalid args write test. * Add config delete tests * Add config list tests * Add connect enable tests * Update some CLI commands to use CAS ops This also modifies the HTTP API for a write op to return a boolean indicating whether the value was written or not. * Fix up the HTTP API CAS tests as I realized they weren’t testing what they should. * Update config entry rpc tests to properly test CAS * Fix up a few more tests * Fix some tests that using ConfigEntries.Apply * Update config_write_test.go * Get rid of unused import
2019-04-30 23:27:16 +00:00
out := false
require.NoError(t, s1.RPC("ConfigEntry.Apply", &arg, &out))
}
arg = structs.ConfigEntryRequest{
Datacenter: "dc1",
Op: structs.ConfigEntryUpsert,
Entry: &structs.ProxyConfigEntry{
Kind: structs.ProxyDefaults,
Name: "global",
Config: map[string]interface{}{
"foo": "baz",
"baz": 2,
},
},
}
require.NoError(t, s1.RPC("ConfigEntry.Apply", &arg, &out))
// Wait for the replica to converge.
retry.Run(t, func(r *retry.R) {
checkSame(r)
})
for _, entry := range entries {
arg := structs.ConfigEntryRequest{
Datacenter: "dc1",
Op: structs.ConfigEntryDelete,
Entry: entry,
}
var out structs.ConfigEntryDeleteResponse
require.NoError(t, s1.RPC("ConfigEntry.Delete", &arg, &out))
}
// Wait for the replica to converge.
retry.Run(t, func(r *retry.R) {
checkSame(r)
})
}