Fix tproxy failover issue with sameness groups (#17533)

Sameness groups with default-for-failover enabled did not function properly with
tproxy whenever all instances of the service disappeared from the local cluster.
This occured, because there were no corresponding resolvers (due to the implicit
failover policy) which caused VIPs to be deallocated.

This ticket expands upon the VIP allocations so that both service-defaults and
service-intentions (without destination wildcards) will ensure that the virtual
IP exists.
This commit is contained in:
Derek Menteer 2023-05-31 15:40:06 -05:00 committed by GitHub
parent 22cf348c24
commit ae28875d55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 83 additions and 29 deletions

View File

@ -44,6 +44,8 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
StorageBackend: storageBackend,
})
fsm.state.SystemMetadataSet(10, &structs.SystemMetadataEntry{Key: structs.SystemMetadataVirtualIPsEnabled, Value: "true"})
// Add some state
node1 := &structs.Node{
ID: "610918a6-464f-fa9b-1a95-03bd6e88ed92",
@ -79,8 +81,14 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
Connect: connectConf,
})
psn := structs.PeeredServiceName{ServiceName: structs.NewServiceName("web", nil)}
vip, err := fsm.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.1")
fsm.state.EnsureService(4, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: []string{"primary"}, Address: "127.0.0.1", Port: 5000})
fsm.state.EnsureService(5, "baz", &structs.NodeService{ID: "web", Service: "web", Tags: nil, Address: "127.0.0.2", Port: 80})
fsm.state.EnsureService(6, "baz", &structs.NodeService{ID: "db", Service: "db", Tags: []string{"secondary"}, Address: "127.0.0.2", Port: 5000})
fsm.state.EnsureCheck(7, &structs.HealthCheck{
Node: "foo",
@ -442,6 +450,10 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
},
}
require.NoError(t, fsm.state.EnsureConfigEntry(26, serviceIxn))
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("foo", nil)}
vip, err = fsm.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.2")
// mesh config entry
meshConfig := &structs.MeshConfigEntry{
@ -465,10 +477,10 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
Port: 8000,
Connect: connectConf,
})
psn := structs.PeeredServiceName{ServiceName: structs.NewServiceName("frontend", nil)}
vip, err := fsm.state.VirtualIPForService(psn)
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("frontend", nil)}
vip, err = fsm.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.1")
require.Equal(t, vip, "240.0.0.3")
fsm.state.EnsureService(30, "foo", &structs.NodeService{
ID: "backend",
@ -480,7 +492,7 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("backend", nil)}
vip, err = fsm.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.2")
require.Equal(t, vip, "240.0.0.4")
_, serviceNames, err := fsm.state.ServiceNamesOfKind(nil, structs.ServiceKindTypical)
require.NoError(t, err)
@ -534,15 +546,15 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
},
}))
// Add a service-resolver entry to get a virtual IP for service foo
// Add a service-resolver entry to get a virtual IP for service goo
resolverEntry := &structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "foo",
Name: "goo",
}
require.NoError(t, fsm.state.EnsureConfigEntry(34, resolverEntry))
vip, err = fsm.state.VirtualIPForService(structs.PeeredServiceName{ServiceName: structs.NewServiceName("foo", nil)})
vip, err = fsm.state.VirtualIPForService(structs.PeeredServiceName{ServiceName: structs.NewServiceName("goo", nil)})
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.3")
require.Equal(t, vip, "240.0.0.5")
// Resources
resource, err := storageBackend.WriteCAS(context.Background(), &pbresource.Resource{
@ -665,18 +677,26 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
require.Equal(t, uint64(25), checks[0].ModifyIndex)
// Verify virtual IPs are consistent.
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("frontend", nil)}
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("web", nil)}
vip, err = fsm2.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.1")
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("backend", nil)}
vip, err = fsm2.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.2")
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("foo", nil)}
vip, err = fsm2.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.2")
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("frontend", nil)}
vip, err = fsm2.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.3")
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("backend", nil)}
vip, err = fsm2.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.4")
psn = structs.PeeredServiceName{ServiceName: structs.NewServiceName("goo", nil)}
vip, err = fsm2.state.VirtualIPForService(psn)
require.NoError(t, err)
require.Equal(t, vip, "240.0.0.5")
// Verify key is set
_, d, err := fsm2.state.KVSGet(nil, "/test", nil)

View File

@ -915,7 +915,7 @@ func ensureServiceTxn(tx WriteTxn, idx uint64, node string, preserveIndexes bool
if err != nil {
return err
}
if supported {
if supported && sn.Name != "" {
psn := structs.PeeredServiceName{Peer: svc.PeerName, ServiceName: sn}
vip, err := assignServiceVirtualIP(tx, idx, psn)
if err != nil {
@ -2110,7 +2110,13 @@ func freeServiceVirtualIP(
// Don't deregister the virtual IP if at least one resolver/router/splitter config entry still
// references this service.
configEntryVIPKinds := []string{structs.ServiceResolver, structs.ServiceRouter, structs.ServiceSplitter}
configEntryVIPKinds := []string{
structs.ServiceResolver,
structs.ServiceRouter,
structs.ServiceSplitter,
structs.ServiceDefaults,
structs.ServiceIntentions,
}
for _, kind := range configEntryVIPKinds {
_, entry, err := configEntryTxn(tx, nil, kind, psn.ServiceName.Name, &psn.ServiceName.EnterpriseMeta)
if err != nil {
@ -3051,14 +3057,15 @@ func (s *Store) ServiceVirtualIPs() (uint64, []ServiceVirtualIP, error) {
tx := s.db.Txn(false)
defer tx.Abort()
return servicesVirtualIPsTxn(tx)
return servicesVirtualIPsTxn(tx, nil)
}
func servicesVirtualIPsTxn(tx ReadTxn) (uint64, []ServiceVirtualIP, error) {
func servicesVirtualIPsTxn(tx ReadTxn, ws memdb.WatchSet) (uint64, []ServiceVirtualIP, error) {
iter, err := tx.Get(tableServiceVirtualIPs, indexID)
if err != nil {
return 0, nil, err
}
ws.Add(iter.WatchCh())
var vips []ServiceVirtualIP
for raw := iter.Next(); raw != nil; raw = iter.Next() {

View File

@ -6,6 +6,7 @@ package state
import (
"errors"
"fmt"
"strings"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-multierror"
@ -465,9 +466,8 @@ func deleteConfigEntryTxn(tx WriteTxn, idx uint64, kind, name string, entMeta *a
return fmt.Errorf("failed updating index: %s", err)
}
// If this is a resolver/router/splitter, attempt to delete the virtual IP associated
// with this service.
if kind == structs.ServiceResolver || kind == structs.ServiceRouter || kind == structs.ServiceSplitter {
// Attempt to delete the virtual IP associated with this service, if applicable.
if configEntryHasVirtualIP(c) {
psn := structs.PeeredServiceName{ServiceName: sn}
if err := freeServiceVirtualIP(tx, idx, psn, nil); err != nil {
return fmt.Errorf("failed to clean up virtual IP for %q: %v", psn.String(), err)
@ -519,11 +519,14 @@ func insertConfigEntryWithTxn(tx WriteTxn, idx uint64, conf structs.ConfigEntry)
if err != nil {
return err
}
case structs.ServiceResolver:
fallthrough
case structs.ServiceRouter:
fallthrough
case structs.ServiceSplitter:
}
// Assign virtual-ips, if needed
supported, err := virtualIPsSupported(tx, nil)
if err != nil {
return err
}
if supported && configEntryHasVirtualIP(conf) {
psn := structs.PeeredServiceName{ServiceName: structs.NewServiceName(conf.GetName(), conf.GetEnterpriseMeta())}
if _, err := assignServiceVirtualIP(tx, idx, psn); err != nil {
return err
@ -541,6 +544,28 @@ func insertConfigEntryWithTxn(tx WriteTxn, idx uint64, conf structs.ConfigEntry)
return nil
}
func configEntryHasVirtualIP(c structs.ConfigEntry) bool {
if c == nil || c.GetName() == "" {
return false
}
switch c.GetKind() {
case structs.ServiceRouter:
return true
case structs.ServiceResolver:
return true
case structs.ServiceSplitter:
return true
case structs.ServiceDefaults:
return true
case structs.ServiceIntentions:
entMeta := c.GetEnterpriseMeta()
return !strings.Contains(c.GetName(), "*") &&
!strings.Contains(entMeta.NamespaceOrDefault(), "*") &&
!strings.Contains(entMeta.PartitionOrDefault(), "*")
}
return false
}
// validateProposedConfigEntryInGraph can be used to verify graph integrity for
// a proposed graph create/update/delete.
//

View File

@ -1106,7 +1106,7 @@ func (s *Store) intentionTopologyTxn(
// We only need to do this for upstreams currently, so that tproxy can find which discovery chains should be
// contacted for failover scenarios. Virtual services technically don't need to be considered as downstreams,
// because they will take on the identity of the calling service, rather than the chain itself.
vipIndex, vipServices, err := servicesVirtualIPsTxn(tx)
vipIndex, vipServices, err := servicesVirtualIPsTxn(tx, ws)
if err != nil {
return index, nil, fmt.Errorf("failed to list service virtual IPs: %v", err)
}

View File

@ -2097,6 +2097,7 @@ func disableLegacyIntentions(s *Store) error {
func testConfigStateStore(t *testing.T) *Store {
s := testStateStore(t)
s.SystemMetadataSet(5, &structs.SystemMetadataEntry{Key: structs.SystemMetadataVirtualIPsEnabled, Value: "true"})
disableLegacyIntentions(s)
return s
}
@ -2651,6 +2652,7 @@ func TestStore_IntentionTopology_Destination(t *testing.T) {
func TestStore_IntentionTopology_Watches(t *testing.T) {
s := testConfigStateStore(t)
s.SystemMetadataSet(10, &structs.SystemMetadataEntry{Key: structs.SystemMetadataVirtualIPsEnabled, Value: "true"})
var i uint64 = 1
require.NoError(t, s.EnsureNode(i, &structs.Node{
@ -2687,7 +2689,8 @@ func TestStore_IntentionTopology_Watches(t *testing.T) {
index, got, err = s.IntentionTopology(ws, target, false, acl.Deny, structs.IntentionTargetService)
require.NoError(t, err)
require.Equal(t, uint64(2), index)
require.Empty(t, got)
// Because API is a virtual service, it is included in this output.
require.Equal(t, structs.ServiceList{structs.NewServiceName("api", nil)}, got)
// Watch should not fire after unrelated intention changes
require.NoError(t, s.EnsureConfigEntry(i, &structs.ServiceIntentionsConfigEntry{
@ -2701,7 +2704,6 @@ func TestStore_IntentionTopology_Watches(t *testing.T) {
},
}))
i++
// TODO(freddy) Why is this firing?
// require.False(t, watchFired(ws))
@ -2709,7 +2711,7 @@ func TestStore_IntentionTopology_Watches(t *testing.T) {
index, got, err = s.IntentionTopology(ws, target, false, acl.Deny, structs.IntentionTargetService)
require.NoError(t, err)
require.Equal(t, uint64(3), index)
require.Empty(t, got)
require.Equal(t, structs.ServiceList{structs.NewServiceName("api", nil)}, got)
// Watch should fire after service list changes
require.NoError(t, s.EnsureService(i, "foo", &structs.NodeService{