2019-04-26 17:38:39 +00:00
|
|
|
package consul
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"sort"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/armon/go-metrics"
|
2020-01-28 23:50:41 +00:00
|
|
|
"github.com/hashicorp/go-hclog"
|
2022-02-23 23:27:48 +00:00
|
|
|
"github.com/hashicorp/go-multierror"
|
2021-04-08 22:58:15 +00:00
|
|
|
|
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2019-04-26 17:38:39 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func configSort(configs []structs.ConfigEntry) {
|
server: config entry replication now correctly uses namespaces in comparisons (#9024)
Previously config entries sharing a kind & name but in different
namespaces could occasionally cause "stuck states" in replication
because the namespace fields were ignored during the differential
comparison phase.
Example:
Two config entries written to the primary:
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
Under the covers these both get saved to memdb, so they are sorted by
all 3 components (kind,name,namespace) during natural iteration. This
means that before the replication code does it's own incomplete sort,
the underlying data IS sorted by namespace ascending (bar comes before
foo).
After one pass of replication the primary and secondary datacenters have
the same set of config entries present. If
"kind=A,name=web,namespace=bar" were to be deleted, then things get
weird. Before replication the two sides look like:
primary: [
kind=A,name=web,namespace=foo
]
secondary: [
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
]
The differential comparison phase walks these two lists in sorted order
and first compares "kind=A,name=web,namespace=foo" vs
"kind=A,name=web,namespace=bar" and falsely determines they are the SAME
and are thus cause an update of "kind=A,name=web,namespace=foo". Then it
compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely
determines that the latter should be DELETED.
During reconciliation the deletes are processed before updates, and so
for a brief moment in the secondary "kind=A,name=web,namespace=foo" is
erroneously deleted and then immediately restored.
Unfortunately after this replication phase the final state is identical
to the initial state, so when it loops around again (rate limited) it
repeats the same set of operations indefinitely.
2020-10-23 18:41:54 +00:00
|
|
|
sort.SliceStable(configs, func(i, j int) bool {
|
2019-04-26 17:38:39 +00:00
|
|
|
return cmpConfigLess(configs[i], configs[j])
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry, lastRemoteIndex uint64) ([]structs.ConfigEntry, []structs.ConfigEntry) {
|
|
|
|
configSort(local)
|
|
|
|
configSort(remote)
|
|
|
|
|
server: config entry replication now correctly uses namespaces in comparisons (#9024)
Previously config entries sharing a kind & name but in different
namespaces could occasionally cause "stuck states" in replication
because the namespace fields were ignored during the differential
comparison phase.
Example:
Two config entries written to the primary:
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
Under the covers these both get saved to memdb, so they are sorted by
all 3 components (kind,name,namespace) during natural iteration. This
means that before the replication code does it's own incomplete sort,
the underlying data IS sorted by namespace ascending (bar comes before
foo).
After one pass of replication the primary and secondary datacenters have
the same set of config entries present. If
"kind=A,name=web,namespace=bar" were to be deleted, then things get
weird. Before replication the two sides look like:
primary: [
kind=A,name=web,namespace=foo
]
secondary: [
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
]
The differential comparison phase walks these two lists in sorted order
and first compares "kind=A,name=web,namespace=foo" vs
"kind=A,name=web,namespace=bar" and falsely determines they are the SAME
and are thus cause an update of "kind=A,name=web,namespace=foo". Then it
compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely
determines that the latter should be DELETED.
During reconciliation the deletes are processed before updates, and so
for a brief moment in the secondary "kind=A,name=web,namespace=foo" is
erroneously deleted and then immediately restored.
Unfortunately after this replication phase the final state is identical
to the initial state, so when it loops around again (rate limited) it
repeats the same set of operations indefinitely.
2020-10-23 18:41:54 +00:00
|
|
|
var (
|
|
|
|
deletions []structs.ConfigEntry
|
|
|
|
updates []structs.ConfigEntry
|
|
|
|
localIdx int
|
|
|
|
remoteIdx int
|
|
|
|
)
|
2019-04-26 17:38:39 +00:00
|
|
|
for localIdx, remoteIdx = 0, 0; localIdx < len(local) && remoteIdx < len(remote); {
|
server: config entry replication now correctly uses namespaces in comparisons (#9024)
Previously config entries sharing a kind & name but in different
namespaces could occasionally cause "stuck states" in replication
because the namespace fields were ignored during the differential
comparison phase.
Example:
Two config entries written to the primary:
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
Under the covers these both get saved to memdb, so they are sorted by
all 3 components (kind,name,namespace) during natural iteration. This
means that before the replication code does it's own incomplete sort,
the underlying data IS sorted by namespace ascending (bar comes before
foo).
After one pass of replication the primary and secondary datacenters have
the same set of config entries present. If
"kind=A,name=web,namespace=bar" were to be deleted, then things get
weird. Before replication the two sides look like:
primary: [
kind=A,name=web,namespace=foo
]
secondary: [
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
]
The differential comparison phase walks these two lists in sorted order
and first compares "kind=A,name=web,namespace=foo" vs
"kind=A,name=web,namespace=bar" and falsely determines they are the SAME
and are thus cause an update of "kind=A,name=web,namespace=foo". Then it
compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely
determines that the latter should be DELETED.
During reconciliation the deletes are processed before updates, and so
for a brief moment in the secondary "kind=A,name=web,namespace=foo" is
erroneously deleted and then immediately restored.
Unfortunately after this replication phase the final state is identical
to the initial state, so when it loops around again (rate limited) it
repeats the same set of operations indefinitely.
2020-10-23 18:41:54 +00:00
|
|
|
if configSameID(local[localIdx], remote[remoteIdx]) {
|
2019-04-26 17:38:39 +00:00
|
|
|
// config is in both the local and remote state - need to check raft indices
|
|
|
|
if remote[remoteIdx].GetRaftIndex().ModifyIndex > lastRemoteIndex {
|
|
|
|
updates = append(updates, remote[remoteIdx])
|
|
|
|
}
|
|
|
|
// increment both indices when equal
|
|
|
|
localIdx += 1
|
|
|
|
remoteIdx += 1
|
|
|
|
} else if cmpConfigLess(local[localIdx], remote[remoteIdx]) {
|
|
|
|
// config no longer in remoted state - needs deleting
|
|
|
|
deletions = append(deletions, local[localIdx])
|
|
|
|
|
|
|
|
// increment just the local index
|
|
|
|
localIdx += 1
|
|
|
|
} else {
|
|
|
|
// local state doesn't have this config - needs updating
|
|
|
|
updates = append(updates, remote[remoteIdx])
|
|
|
|
|
|
|
|
// increment just the remote index
|
|
|
|
remoteIdx += 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for ; localIdx < len(local); localIdx += 1 {
|
|
|
|
deletions = append(deletions, local[localIdx])
|
|
|
|
}
|
|
|
|
|
|
|
|
for ; remoteIdx < len(remote); remoteIdx += 1 {
|
|
|
|
updates = append(updates, remote[remoteIdx])
|
|
|
|
}
|
|
|
|
|
|
|
|
return deletions, updates
|
|
|
|
}
|
|
|
|
|
server: config entry replication now correctly uses namespaces in comparisons (#9024)
Previously config entries sharing a kind & name but in different
namespaces could occasionally cause "stuck states" in replication
because the namespace fields were ignored during the differential
comparison phase.
Example:
Two config entries written to the primary:
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
Under the covers these both get saved to memdb, so they are sorted by
all 3 components (kind,name,namespace) during natural iteration. This
means that before the replication code does it's own incomplete sort,
the underlying data IS sorted by namespace ascending (bar comes before
foo).
After one pass of replication the primary and secondary datacenters have
the same set of config entries present. If
"kind=A,name=web,namespace=bar" were to be deleted, then things get
weird. Before replication the two sides look like:
primary: [
kind=A,name=web,namespace=foo
]
secondary: [
kind=A,name=web,namespace=bar
kind=A,name=web,namespace=foo
]
The differential comparison phase walks these two lists in sorted order
and first compares "kind=A,name=web,namespace=foo" vs
"kind=A,name=web,namespace=bar" and falsely determines they are the SAME
and are thus cause an update of "kind=A,name=web,namespace=foo". Then it
compares "<nothing>" with "kind=A,name=web,namespace=foo" and falsely
determines that the latter should be DELETED.
During reconciliation the deletes are processed before updates, and so
for a brief moment in the secondary "kind=A,name=web,namespace=foo" is
erroneously deleted and then immediately restored.
Unfortunately after this replication phase the final state is identical
to the initial state, so when it loops around again (rate limited) it
repeats the same set of operations indefinitely.
2020-10-23 18:41:54 +00:00
|
|
|
func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool {
|
|
|
|
if first.GetKind() < second.GetKind() {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if first.GetKind() > second.GetKind() {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if first.GetEnterpriseMeta().LessThan(second.GetEnterpriseMeta()) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if second.GetEnterpriseMeta().LessThan(first.GetEnterpriseMeta()) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return first.GetName() < second.GetName()
|
|
|
|
}
|
|
|
|
|
|
|
|
func configSameID(e1, e2 structs.ConfigEntry) bool {
|
|
|
|
return e1.GetKind() == e2.GetKind() &&
|
|
|
|
e1.GetEnterpriseMeta().IsSame(e2.GetEnterpriseMeta()) &&
|
|
|
|
e1.GetName() == e2.GetName()
|
|
|
|
}
|
|
|
|
|
2019-04-26 17:38:39 +00:00
|
|
|
func (s *Server) reconcileLocalConfig(ctx context.Context, configs []structs.ConfigEntry, op structs.ConfigEntryOp) (bool, error) {
|
|
|
|
ticker := time.NewTicker(time.Second / time.Duration(s.config.ConfigReplicationApplyLimit))
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
2022-03-17 23:02:26 +00:00
|
|
|
rpcServiceMethod := "ConfigEntry.Apply"
|
|
|
|
if op == structs.ConfigEntryDelete || op == structs.ConfigEntryDeleteCAS {
|
|
|
|
rpcServiceMethod = "ConfigEntry.Delete"
|
|
|
|
}
|
|
|
|
|
2022-02-23 23:27:48 +00:00
|
|
|
var merr error
|
2019-04-26 17:38:39 +00:00
|
|
|
for i, entry := range configs {
|
2021-12-04 22:16:15 +00:00
|
|
|
// Exported services only apply to the primary datacenter.
|
2021-12-03 06:50:38 +00:00
|
|
|
if entry.GetKind() == structs.ExportedServices {
|
2021-11-08 16:58:49 +00:00
|
|
|
continue
|
|
|
|
}
|
2019-04-26 17:38:39 +00:00
|
|
|
req := structs.ConfigEntryRequest{
|
|
|
|
Op: op,
|
|
|
|
Datacenter: s.config.Datacenter,
|
|
|
|
Entry: entry,
|
|
|
|
}
|
|
|
|
|
2022-03-17 23:02:26 +00:00
|
|
|
_, err := s.leaderRaftApply(rpcServiceMethod, structs.ConfigEntryRequestType, &req)
|
2019-04-26 17:38:39 +00:00
|
|
|
if err != nil {
|
2022-02-23 23:27:48 +00:00
|
|
|
merr = multierror.Append(merr, fmt.Errorf("Failed to apply config entry %s: %w", op, err))
|
2019-04-26 17:38:39 +00:00
|
|
|
}
|
2020-08-21 15:42:37 +00:00
|
|
|
|
2019-04-26 17:38:39 +00:00
|
|
|
if i < len(configs)-1 {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return true, nil
|
|
|
|
case <-ticker.C:
|
|
|
|
// do nothing - ready for the next batch
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-23 23:27:48 +00:00
|
|
|
return false, merr
|
2019-04-26 17:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Server) fetchConfigEntries(lastRemoteIndex uint64) (*structs.IndexedGenericConfigEntries, error) {
|
|
|
|
defer metrics.MeasureSince([]string{"leader", "replication", "config-entries", "fetch"}, time.Now())
|
|
|
|
|
2020-10-06 18:24:05 +00:00
|
|
|
req := structs.ConfigEntryListAllRequest{
|
2019-04-26 17:38:39 +00:00
|
|
|
Datacenter: s.config.PrimaryDatacenter,
|
2020-10-06 18:24:05 +00:00
|
|
|
Kinds: structs.AllConfigEntryKinds,
|
2019-04-26 17:38:39 +00:00
|
|
|
QueryOptions: structs.QueryOptions{
|
|
|
|
AllowStale: true,
|
|
|
|
MinQueryIndex: lastRemoteIndex,
|
|
|
|
Token: s.tokens.ReplicationToken(),
|
|
|
|
},
|
2020-01-24 15:04:58 +00:00
|
|
|
EnterpriseMeta: *s.replicationEnterpriseMeta(),
|
2019-04-26 17:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var response structs.IndexedGenericConfigEntries
|
2022-12-14 15:24:22 +00:00
|
|
|
if err := s.RPC(context.Background(), "ConfigEntry.ListAll", &req, &response); err != nil {
|
2019-04-26 17:38:39 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &response, nil
|
|
|
|
}
|
|
|
|
|
2020-01-28 23:50:41 +00:00
|
|
|
func (s *Server) replicateConfig(ctx context.Context, lastRemoteIndex uint64, logger hclog.Logger) (uint64, bool, error) {
|
2019-04-26 17:38:39 +00:00
|
|
|
remote, err := s.fetchConfigEntries(lastRemoteIndex)
|
|
|
|
if err != nil {
|
|
|
|
return 0, false, fmt.Errorf("failed to retrieve remote config entries: %v", err)
|
|
|
|
}
|
|
|
|
|
2020-01-29 17:16:08 +00:00
|
|
|
logger.Debug("finished fetching config entries", "amount", len(remote.Entries))
|
2019-04-26 17:38:39 +00:00
|
|
|
|
|
|
|
// Need to check if we should be stopping. This will be common as the fetching process is a blocking
|
|
|
|
// RPC which could have been hanging around for a long time and during that time leadership could
|
|
|
|
// have been lost.
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return 0, true, nil
|
|
|
|
default:
|
|
|
|
// do nothing
|
|
|
|
}
|
|
|
|
|
|
|
|
// Measure everything after the remote query, which can block for long
|
|
|
|
// periods of time. This metric is a good measure of how expensive the
|
|
|
|
// replication process is.
|
|
|
|
defer metrics.MeasureSince([]string{"leader", "replication", "config", "apply"}, time.Now())
|
|
|
|
|
2020-01-24 15:04:58 +00:00
|
|
|
_, local, err := s.fsm.State().ConfigEntries(nil, s.replicationEnterpriseMeta())
|
2019-04-26 17:38:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, false, fmt.Errorf("failed to retrieve local config entries: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the remote index ever goes backwards, it's a good indication that
|
|
|
|
// the remote side was rebuilt and we should do a full sync since we
|
|
|
|
// can't make any assumptions about what's going on.
|
|
|
|
//
|
|
|
|
// Resetting lastRemoteIndex to 0 will work because we never consider local
|
|
|
|
// raft indices. Instead we compare the raft modify index in the response object
|
|
|
|
// with the lastRemoteIndex (only when we already have a config entry of the same kind/name)
|
|
|
|
// to determine if an update is needed. Resetting lastRemoteIndex to 0 then has the affect
|
|
|
|
// of making us think all the local state is out of date and any matching entries should
|
|
|
|
// still be updated.
|
|
|
|
//
|
|
|
|
// The lastRemoteIndex is not used when the entry exists either only in the local state or
|
|
|
|
// only in the remote state. In those situations we need to either delete it or create it.
|
|
|
|
if remote.QueryMeta.Index < lastRemoteIndex {
|
2020-01-29 17:16:08 +00:00
|
|
|
logger.Warn("Config Entry replication remote index moved backwards, forcing a full Config Entry sync",
|
2020-01-28 23:50:41 +00:00
|
|
|
"from", lastRemoteIndex,
|
|
|
|
"to", remote.QueryMeta.Index,
|
|
|
|
)
|
2019-04-26 17:38:39 +00:00
|
|
|
lastRemoteIndex = 0
|
|
|
|
}
|
|
|
|
|
2020-01-29 17:16:08 +00:00
|
|
|
logger.Debug("Config Entry replication",
|
2020-01-28 23:50:41 +00:00
|
|
|
"local", len(local),
|
|
|
|
"remote", len(remote.Entries),
|
|
|
|
)
|
2019-04-26 17:38:39 +00:00
|
|
|
// Calculate the changes required to bring the state into sync and then
|
|
|
|
// apply them.
|
|
|
|
deletions, updates := diffConfigEntries(local, remote.Entries, lastRemoteIndex)
|
|
|
|
|
2020-01-29 17:16:08 +00:00
|
|
|
logger.Debug("Config Entry replication",
|
2020-01-28 23:50:41 +00:00
|
|
|
"deletions", len(deletions),
|
|
|
|
"updates", len(updates),
|
|
|
|
)
|
2019-04-26 17:38:39 +00:00
|
|
|
|
2022-02-23 23:27:48 +00:00
|
|
|
var merr error
|
2019-04-26 17:38:39 +00:00
|
|
|
if len(deletions) > 0 {
|
2020-01-29 17:16:08 +00:00
|
|
|
logger.Debug("Deleting local config entries",
|
2020-01-28 23:50:41 +00:00
|
|
|
"deletions", len(deletions),
|
|
|
|
)
|
2019-04-26 17:38:39 +00:00
|
|
|
|
|
|
|
exit, err := s.reconcileLocalConfig(ctx, deletions, structs.ConfigEntryDelete)
|
|
|
|
if exit {
|
|
|
|
return 0, true, nil
|
|
|
|
}
|
|
|
|
if err != nil {
|
2022-02-23 23:27:48 +00:00
|
|
|
merr = multierror.Append(merr, err)
|
|
|
|
} else {
|
|
|
|
logger.Debug("Config Entry replication - finished deletions")
|
2019-04-26 17:38:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(updates) > 0 {
|
2020-01-29 17:16:08 +00:00
|
|
|
logger.Debug("Updating local config entries",
|
2020-01-28 23:50:41 +00:00
|
|
|
"updates", len(updates),
|
|
|
|
)
|
2019-04-26 17:38:39 +00:00
|
|
|
exit, err := s.reconcileLocalConfig(ctx, updates, structs.ConfigEntryUpsert)
|
|
|
|
if exit {
|
|
|
|
return 0, true, nil
|
|
|
|
}
|
|
|
|
if err != nil {
|
2022-02-23 23:27:48 +00:00
|
|
|
merr = multierror.Append(merr, err)
|
|
|
|
} else {
|
|
|
|
logger.Debug("Config Entry replication - finished updates")
|
2019-04-26 17:38:39 +00:00
|
|
|
}
|
2022-02-23 23:27:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if merr != nil {
|
|
|
|
return 0, false, merr
|
2019-04-26 17:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Return the index we got back from the remote side, since we've synced
|
|
|
|
// up with the remote state as of that index.
|
|
|
|
return remote.QueryMeta.Index, false, nil
|
|
|
|
}
|