510 lines
16 KiB
Go
510 lines
16 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package consul
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
"github.com/hashicorp/consul/logging"
|
|
)
|
|
|
|
const (
|
|
// maxIntentionTxnSize is the maximum size (in bytes) of a transaction used during
|
|
// Intention replication.
|
|
maxIntentionTxnSize = raftWarnSize / 4
|
|
)
|
|
|
|
func (s *Server) startIntentionConfigEntryMigration(ctx context.Context) error {
|
|
if !s.config.ConnectEnabled {
|
|
return nil
|
|
}
|
|
|
|
// Check for the system metadata first, as that's the most trustworthy in
|
|
// both the primary and secondaries.
|
|
intentionFormat, err := s.getSystemMetadata(structs.SystemMetadataIntentionFormatKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if intentionFormat == structs.SystemMetadataIntentionFormatConfigValue {
|
|
// Bypass the serf component and jump right to the final state.
|
|
s.setDatacenterSupportsIntentionsAsConfigEntries()
|
|
return nil // nothing to migrate
|
|
}
|
|
|
|
if s.config.PrimaryDatacenter == s.config.Datacenter {
|
|
// Do a quick legacy intentions check to see if it's even worth
|
|
// spinning up the routine at all. This only applies if the primary
|
|
// datacenter is composed entirely of compatible servers and there are
|
|
// no more legacy intentions.
|
|
if s.DatacenterSupportsIntentionsAsConfigEntries() {
|
|
// NOTE: we only have to migrate legacy intentions from the default
|
|
// partition because partitions didn't exist when legacy intentions
|
|
// were canonical
|
|
_, ixns, err := s.fsm.State().LegacyIntentions(nil, structs.WildcardEnterpriseMetaInDefaultPartition())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(ixns) == 0 {
|
|
// Though there's nothing to migrate, still trigger the special
|
|
// delete-all operation which should update various indexes and
|
|
// drop some system metadata so we can skip all of this next
|
|
// time.
|
|
//
|
|
// This is done inline with leader election so that new
|
|
// clusters on 1.9.0 with no legacy intentions will immediately
|
|
// transition to intentions-as-config-entries mode.
|
|
return s.legacyIntentionsMigrationCleanupPhase(true)
|
|
}
|
|
}
|
|
|
|
// When running in the primary we do all of the real work.
|
|
s.leaderRoutineManager.Start(ctx, intentionMigrationRoutineName, s.legacyIntentionMigration)
|
|
} else {
|
|
// When running in the secondary we mostly just wait until the
|
|
// primary finishes, and then wait until we're pretty sure the main
|
|
// config entry replication thread has seen all of the
|
|
// migration-related config entry edits before zeroing OUR copy of
|
|
// the old intentions table.
|
|
s.leaderRoutineManager.Start(ctx, intentionMigrationRoutineName, s.legacyIntentionMigrationInSecondaryDC)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// This function is only intended to be run as a managed go routine, it will block until
|
|
// the context passed in indicates that it should exit.
|
|
func (s *Server) legacyIntentionMigration(ctx context.Context) error {
|
|
if s.config.PrimaryDatacenter != s.config.Datacenter {
|
|
return nil
|
|
}
|
|
|
|
connectLogger := s.loggers.Named(logging.Connect)
|
|
|
|
loopCtx, loopCancel := context.WithCancel(ctx)
|
|
defer loopCancel()
|
|
|
|
retryLoopBackoff(loopCtx, func() error {
|
|
// We have to wait until all of our sibling servers are upgraded.
|
|
if !s.DatacenterSupportsIntentionsAsConfigEntries() {
|
|
return nil
|
|
}
|
|
|
|
state := s.fsm.State()
|
|
// NOTE: we only have to migrate legacy intentions from the default
|
|
// partition because partitions didn't exist when legacy intentions
|
|
// were canonical
|
|
_, ixns, err := state.LegacyIntentions(nil, structs.WildcardEnterpriseMetaInDefaultPartition())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// NOTE: do not early abort here if the list is empty, let it run to completion.
|
|
|
|
entries, err := convertLegacyIntentionsToConfigEntries(ixns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
entries, err = s.filterMigratedLegacyIntentions(entries)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Totally cheat and repurpose one part of config entry replication
|
|
// here so we automatically get our writes rate limited.
|
|
_, err = s.reconcileLocalConfig(ctx, entries, structs.ConfigEntryUpsert)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Wrap up
|
|
if err := s.legacyIntentionsMigrationCleanupPhase(false); err != nil {
|
|
return err
|
|
}
|
|
|
|
loopCancel()
|
|
connectLogger.Info("intention migration complete")
|
|
return nil
|
|
|
|
}, func(err error) {
|
|
connectLogger.Error(
|
|
"error migrating intentions to config entries, will retry",
|
|
"routine", intentionMigrationRoutineName,
|
|
"error", err,
|
|
)
|
|
})
|
|
|
|
return nil
|
|
}
|
|
|
|
func convertLegacyIntentionsToConfigEntries(ixns structs.Intentions) ([]structs.ConfigEntry, error) {
|
|
entries := migrateIntentionsToConfigEntries(ixns)
|
|
genericEntries := make([]structs.ConfigEntry, 0, len(entries))
|
|
for _, entry := range entries {
|
|
if err := entry.LegacyNormalize(); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := entry.LegacyValidate(); err != nil {
|
|
return nil, err
|
|
}
|
|
genericEntries = append(genericEntries, entry)
|
|
}
|
|
return genericEntries, nil
|
|
}
|
|
|
|
// legacyIntentionsMigrationCleanupPhase will delete all legacy intentions and
|
|
// also record a piece of system metadata indicating that the migration has
|
|
// been completed.
|
|
func (s *Server) legacyIntentionsMigrationCleanupPhase(quiet bool) error {
|
|
if !quiet {
|
|
s.loggers.Named(logging.Connect).
|
|
Info("finishing up intention migration by clearing the legacy store")
|
|
}
|
|
|
|
// This is a special intention op that ensures we bind the raft indexes
|
|
// associated with both the legacy table and the config entry table.
|
|
//
|
|
// We also update a piece of system metadata to reflect that we are
|
|
// definitely in a post-migration world.
|
|
req := structs.IntentionRequest{
|
|
Op: structs.IntentionOpDeleteAll,
|
|
}
|
|
|
|
if _, err := s.leaderRaftApply("Intentions.DeleteAll", structs.IntentionRequestType, req); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Bypass the serf component and jump right to the final state.
|
|
s.setDatacenterSupportsIntentionsAsConfigEntries()
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) legacyIntentionMigrationInSecondaryDC(ctx context.Context) error {
|
|
if s.config.PrimaryDatacenter == s.config.Datacenter {
|
|
return nil
|
|
}
|
|
|
|
const (
|
|
stateReplicateLegacy = iota
|
|
stateWaitForPrimary
|
|
stateWaitForConfigReplication
|
|
stateDoCleanup
|
|
)
|
|
|
|
var (
|
|
connectLogger = s.loggers.Named(logging.Connect)
|
|
|
|
currentState = stateReplicateLegacy
|
|
lastLegacyReplicationFetchIndex uint64
|
|
legacyReplicationDisabled bool
|
|
lastLegacyOnlyFetchIndex uint64
|
|
)
|
|
|
|
// This loop does several things:
|
|
//
|
|
// (1) Until we know for certain that the all of the servers in the primary
|
|
// DC and all of the servers in our DC are running a Consul version that
|
|
// can support intentions as config entries we have to continue to do
|
|
// legacy intention replication.
|
|
//
|
|
// (2) Once we know all versions of Consul are compatible, we cease to
|
|
// replicate legacy intentions as that table is frozen in the primary DC.
|
|
// We do a special blocking query back to exclusively the legacy intentions
|
|
// table in the primary to detect when it is zeroed out. We capture the max
|
|
// raft index of this zeroing.
|
|
//
|
|
// (3) We wait until our own config entry replication crosses the primary
|
|
// index from (2) so we know that we have replicated all of the new forms
|
|
// of the existing intentions.
|
|
|
|
// (1) Legacy intention replication. A blocking query back to the primary
|
|
// asking for intentions to replicate is both needed if the primary is OLD
|
|
// since we still need to replicate new writes, but also if the primary is
|
|
// NEW to know when the migration code in the primary has completed and
|
|
// zeroed the legacy memdb table.
|
|
//
|
|
// (2) If the primary has finished migration, we have to wait until our own
|
|
// config entry replication catches up.
|
|
//
|
|
// (3) After config entry replication catches up we should zero out own own
|
|
// legacy intentions memdb table.
|
|
|
|
loopCtx, loopCancel := context.WithCancel(ctx)
|
|
defer loopCancel()
|
|
|
|
retryLoopBackoff(loopCtx, func() error {
|
|
// This for loop only exists to avoid backoff every state transition.
|
|
// Only trigger the loop if the state changes, otherwise return a nil
|
|
// error.
|
|
for {
|
|
// Check for the system metadata first, as that's the most trustworthy.
|
|
intentionFormat, err := s.getSystemMetadata(structs.SystemMetadataIntentionFormatKey)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if intentionFormat == structs.SystemMetadataIntentionFormatConfigValue {
|
|
// Bypass the serf component and jump right to the final state.
|
|
s.setDatacenterSupportsIntentionsAsConfigEntries()
|
|
loopCancel()
|
|
return nil // nothing to migrate
|
|
}
|
|
|
|
switch currentState {
|
|
case stateReplicateLegacy:
|
|
if s.DatacenterSupportsIntentionsAsConfigEntries() {
|
|
// Now all nodes in this datacenter and the primary are totally
|
|
// ready for intentions as config entries, so disable legacy
|
|
// replication and transition to the next phase.
|
|
currentState = stateWaitForPrimary
|
|
|
|
// Explicitly zero these out as they are now unused but could
|
|
// be at worst misleading.
|
|
lastLegacyReplicationFetchIndex = 0
|
|
legacyReplicationDisabled = false
|
|
|
|
} else if !legacyReplicationDisabled {
|
|
// This is the embedded legacy intention replication.
|
|
index, outOfLegacyMode, err := s.replicateLegacyIntentionsOnce(ctx, lastLegacyReplicationFetchIndex)
|
|
if err != nil {
|
|
return err
|
|
} else if outOfLegacyMode {
|
|
// We chill out and wait until all of the nodes in this
|
|
// datacenter are ready for intentions as config entries.
|
|
//
|
|
// It's odd that we get this to happen before serf gives us
|
|
// the feature flag, but gossip isn't immediate so it's
|
|
// technically possible.
|
|
legacyReplicationDisabled = true
|
|
} else {
|
|
lastLegacyReplicationFetchIndex = nextIndexVal(lastLegacyReplicationFetchIndex, index)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
case stateWaitForPrimary:
|
|
// Loop until we see the primary has finished migrating to config entries.
|
|
index, numIxns, err := s.fetchLegacyIntentionsSummary(ctx, lastLegacyOnlyFetchIndex)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
lastLegacyOnlyFetchIndex = nextIndexVal(lastLegacyOnlyFetchIndex, index)
|
|
if numIxns == 0 {
|
|
connectLogger.Debug("intention migration in secondary status", "last_primary_index", lastLegacyOnlyFetchIndex)
|
|
currentState = stateWaitForConfigReplication
|
|
// do not clear lastLegacyOnlyFetchIndex!
|
|
} else {
|
|
return nil
|
|
}
|
|
|
|
case stateWaitForConfigReplication:
|
|
|
|
// manually list replicated config entries by kind
|
|
|
|
// lastLegacyOnlyFetchIndex is now the raft commit index that
|
|
// zeroed out the intentions memdb table.
|
|
//
|
|
// We compare that with the last raft commit index we have replicated
|
|
// config entries for and use that to determine if we have caught up.
|
|
lastReplicatedConfigIndex := s.configReplicator.Index()
|
|
connectLogger.Debug(
|
|
"intention migration in secondary status",
|
|
"last_primary_intention_index", lastLegacyOnlyFetchIndex,
|
|
"last_primary_replicated_config_index", lastReplicatedConfigIndex,
|
|
)
|
|
if lastReplicatedConfigIndex >= lastLegacyOnlyFetchIndex {
|
|
currentState = stateDoCleanup
|
|
} else {
|
|
return nil
|
|
}
|
|
|
|
case stateDoCleanup:
|
|
if err := s.legacyIntentionsMigrationCleanupPhase(false); err != nil {
|
|
return err
|
|
}
|
|
|
|
loopCancel()
|
|
return nil
|
|
|
|
default:
|
|
return fmt.Errorf("impossible state: %v", currentState)
|
|
}
|
|
}
|
|
}, func(err error) {
|
|
connectLogger.Error(
|
|
"error performing intention migration in secondary datacenter, will retry",
|
|
"routine", intentionMigrationRoutineName,
|
|
"error", err,
|
|
)
|
|
})
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) fetchLegacyIntentionsSummary(_ context.Context, lastFetchIndex uint64) (uint64, int, error) {
|
|
args := structs.IntentionListRequest{
|
|
Datacenter: s.config.PrimaryDatacenter,
|
|
Legacy: true,
|
|
QueryOptions: structs.QueryOptions{
|
|
MinQueryIndex: lastFetchIndex,
|
|
Token: s.tokens.ReplicationToken(),
|
|
},
|
|
}
|
|
|
|
var remote structs.IndexedIntentions
|
|
if err := s.forwardDC("Intention.List", s.config.PrimaryDatacenter, &args, &remote); err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
return remote.Index, len(remote.Intentions), nil
|
|
}
|
|
|
|
// replicateLegacyIntentionsOnce executes a blocking query to the primary
|
|
// datacenter to replicate the intentions there to the local state one time.
|
|
func (s *Server) replicateLegacyIntentionsOnce(ctx context.Context, lastFetchIndex uint64) (uint64, bool, error) {
|
|
args := structs.DCSpecificRequest{
|
|
Datacenter: s.config.PrimaryDatacenter,
|
|
EnterpriseMeta: *s.replicationEnterpriseMeta(),
|
|
QueryOptions: structs.QueryOptions{
|
|
MinQueryIndex: lastFetchIndex,
|
|
Token: s.tokens.ReplicationToken(),
|
|
},
|
|
}
|
|
|
|
var remote structs.IndexedIntentions
|
|
if err := s.forwardDC("Intention.List", s.config.PrimaryDatacenter, &args, &remote); err != nil {
|
|
return 0, false, err
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return 0, false, ctx.Err()
|
|
default:
|
|
}
|
|
|
|
if remote.DataOrigin == structs.IntentionDataOriginConfigEntries {
|
|
return 0, true, nil
|
|
}
|
|
|
|
_, local, err := s.fsm.State().LegacyIntentions(nil, s.replicationEnterpriseMeta())
|
|
if err != nil {
|
|
return 0, false, err
|
|
}
|
|
|
|
// Do a quick sanity check that somehow Permissions didn't slip through.
|
|
// This shouldn't be necessary, but one extra check isn't going to hurt
|
|
// anything.
|
|
for _, ixn := range local {
|
|
if len(ixn.Permissions) > 0 {
|
|
// Assume that the data origin has switched to config entries.
|
|
return 0, true, nil
|
|
}
|
|
}
|
|
|
|
// Compute the diff between the remote and local intentions.
|
|
deletes, updates := diffIntentions(local, remote.Intentions)
|
|
txnOpSets := batchLegacyIntentionUpdates(deletes, updates)
|
|
|
|
// Apply batched updates to the state store.
|
|
for _, ops := range txnOpSets {
|
|
txnReq := structs.TxnRequest{Ops: ops}
|
|
|
|
// TODO(rpc-metrics-improv) -- verify labels
|
|
resp, err := s.leaderRaftApply("Txn.Apply", structs.TxnRequestType, &txnReq)
|
|
|
|
if err != nil {
|
|
return 0, false, err
|
|
}
|
|
|
|
if txnResp, ok := resp.(structs.TxnResponse); ok {
|
|
if len(txnResp.Errors) > 0 {
|
|
return 0, false, txnResp.Error()
|
|
}
|
|
} else {
|
|
return 0, false, fmt.Errorf("unexpected return type %T", resp)
|
|
}
|
|
}
|
|
|
|
return remote.QueryMeta.Index, false, nil
|
|
}
|
|
|
|
// diffIntentions computes the difference between the local and remote intentions
|
|
// and returns lists of deletes and updates.
|
|
func diffIntentions(local, remote structs.Intentions) (structs.Intentions, structs.Intentions) {
|
|
localIdx := make(map[string][]byte, len(local))
|
|
remoteIdx := make(map[string]struct{}, len(remote))
|
|
|
|
var deletes structs.Intentions
|
|
var updates structs.Intentions
|
|
|
|
for _, intention := range local {
|
|
localIdx[intention.ID] = intention.Hash
|
|
}
|
|
for _, intention := range remote {
|
|
remoteIdx[intention.ID] = struct{}{}
|
|
}
|
|
|
|
for _, intention := range local {
|
|
if _, ok := remoteIdx[intention.ID]; !ok {
|
|
deletes = append(deletes, intention)
|
|
}
|
|
}
|
|
|
|
for _, intention := range remote {
|
|
existingHash, ok := localIdx[intention.ID]
|
|
if !ok {
|
|
updates = append(updates, intention)
|
|
} else if bytes.Compare(existingHash, intention.Hash) != 0 {
|
|
updates = append(updates, intention)
|
|
}
|
|
}
|
|
|
|
return deletes, updates
|
|
}
|
|
|
|
// batchLegacyIntentionUpdates breaks up the given updates into sets of TxnOps based
|
|
// on the estimated size of the operations.
|
|
//
|
|
//nolint:staticcheck
|
|
func batchLegacyIntentionUpdates(deletes, updates structs.Intentions) []structs.TxnOps {
|
|
var txnOps structs.TxnOps
|
|
for _, delete := range deletes {
|
|
deleteOp := &structs.TxnIntentionOp{
|
|
Op: structs.IntentionOpDelete,
|
|
Intention: delete,
|
|
}
|
|
txnOps = append(txnOps, &structs.TxnOp{Intention: deleteOp})
|
|
}
|
|
|
|
for _, update := range updates {
|
|
updateOp := &structs.TxnIntentionOp{
|
|
Op: structs.IntentionOpUpdate,
|
|
Intention: update,
|
|
}
|
|
txnOps = append(txnOps, &structs.TxnOp{Intention: updateOp})
|
|
}
|
|
|
|
// Divide the operations into chunks according to maxIntentionTxnSize.
|
|
var batchedOps []structs.TxnOps
|
|
for batchStart := 0; batchStart < len(txnOps); {
|
|
// inner loop finds the last element to include in this batch.
|
|
batchSize := 0
|
|
batchEnd := batchStart
|
|
for ; batchEnd < len(txnOps) && batchSize < maxIntentionTxnSize; batchEnd += 1 {
|
|
batchSize += txnOps[batchEnd].Intention.Intention.LegacyEstimateSize()
|
|
}
|
|
|
|
batchedOps = append(batchedOps, txnOps[batchStart:batchEnd])
|
|
|
|
// txnOps[batchEnd] wasn't included as the slicing doesn't include the element at the stop index
|
|
batchStart = batchEnd
|
|
}
|
|
|
|
return batchedOps
|
|
}
|