open-consul/agent/consul/config_replication.go

package consul

import (
	"context"
	"fmt"
	"sort"
	"time"

	"github.com/armon/go-metrics"
	"github.com/hashicorp/consul/agent/structs"
)

func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool {
	return first.GetKind() < second.GetKind() || (first.GetKind() == second.GetKind() && first.GetName() < second.GetName())
}

func configSort(configs []structs.ConfigEntry) {
	sort.Slice(configs, func(i, j int) bool {
		return cmpConfigLess(configs[i], configs[j])
	})
}

func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry, lastRemoteIndex uint64) ([]structs.ConfigEntry, []structs.ConfigEntry) {
	configSort(local)
	configSort(remote)

	var deletions []structs.ConfigEntry
	var updates []structs.ConfigEntry
	var localIdx int
	var remoteIdx int
	for localIdx, remoteIdx = 0, 0; localIdx < len(local) && remoteIdx < len(remote); {
		if local[localIdx].GetKind() == remote[remoteIdx].GetKind() && local[localIdx].GetName() == remote[remoteIdx].GetName() {
			// config is in both the local and remote state - need to check raft indices
			if remote[remoteIdx].GetRaftIndex().ModifyIndex > lastRemoteIndex {
				updates = append(updates, remote[remoteIdx])
			}
			// increment both indices when equal
			localIdx += 1
			remoteIdx += 1
		} else if cmpConfigLess(local[localIdx], remote[remoteIdx]) {
			// config no longer in remoted state - needs deleting
			deletions = append(deletions, local[localIdx])

			// increment just the local index
			localIdx += 1
		} else {
			// local state doesn't have this config - needs updating
			updates = append(updates, remote[remoteIdx])

			// increment just the remote index
			remoteIdx += 1
		}
	}

	for ; localIdx < len(local); localIdx += 1 {
		deletions = append(deletions, local[localIdx])
	}

	for ; remoteIdx < len(remote); remoteIdx += 1 {
		updates = append(updates, remote[remoteIdx])
	}

	return deletions, updates
}

func (s *Server) reconcileLocalConfig(ctx context.Context, configs []structs.ConfigEntry, op structs.ConfigEntryOp) (bool, error) {
	ticker := time.NewTicker(time.Second / time.Duration(s.config.ConfigReplicationApplyLimit))
	defer ticker.Stop()

	for i, entry := range configs {
		req := structs.ConfigEntryRequest{
			Op:         op,
			Datacenter: s.config.Datacenter,
			Entry:      entry,
		}

		resp, err := s.raftApply(structs.ConfigEntryRequestType, &req)
		if err != nil {
			return false, fmt.Errorf("Failed to apply config %s: %v", op, err)
		}
		if respErr, ok := resp.(error); ok && err != nil {
			return false, fmt.Errorf("Failed to apply config %s: %v", op, respErr)
		}

		if i < len(configs)-1 {
			select {
			case <-ctx.Done():
				return true, nil
			case <-ticker.C:
				// do nothing - ready for the next batch
			}
		}
	}

	return false, nil
}

func (s *Server) fetchConfigEntries(lastRemoteIndex uint64) (*structs.IndexedGenericConfigEntries, error) {
	defer metrics.MeasureSince([]string{"leader", "replication", "config-entries", "fetch"}, time.Now())

	req := structs.DCSpecificRequest{
		Datacenter: s.config.PrimaryDatacenter,
		QueryOptions: structs.QueryOptions{
			AllowStale:    true,
			MinQueryIndex: lastRemoteIndex,
			Token:         s.tokens.ReplicationToken(),
		},
	}

	var response structs.IndexedGenericConfigEntries
	if err := s.RPC("ConfigEntry.ListAll", &req, &response); err != nil {
		return nil, err
	}

	return &response, nil
}

func (s *Server) replicateConfig(ctx context.Context, lastRemoteIndex uint64) (uint64, bool, error) {
	remote, err := s.fetchConfigEntries(lastRemoteIndex)
	if err != nil {
		return 0, false, fmt.Errorf("failed to retrieve remote config entries: %v", err)
	}

	s.logger.Printf("[DEBUG] replication: finished fetching config entries: %d", len(remote.Entries))

	// Need to check if we should be stopping. This will be common as the fetching process is a blocking
	// RPC which could have been hanging around for a long time and during that time leadership could
	// have been lost.
	select {
	case <-ctx.Done():
		return 0, true, nil
	default:
		// do nothing
	}

	// Measure everything after the remote query, which can block for long
	// periods of time. This metric is a good measure of how expensive the
	// replication process is.
	defer metrics.MeasureSince([]string{"leader", "replication", "config", "apply"}, time.Now())

	_, local, err := s.fsm.State().ConfigEntries(nil)
	if err != nil {
		return 0, false, fmt.Errorf("failed to retrieve local config entries: %v", err)
	}

	// If the remote index ever goes backwards, it's a good indication that
	// the remote side was rebuilt and we should do a full sync since we
	// can't make any assumptions about what's going on.
	//
	// Resetting lastRemoteIndex to 0 will work because we never consider local
	// raft indices. Instead we compare the raft modify index in the response object
	// with the lastRemoteIndex (only when we already have a config entry of the same kind/name)
	// to determine if an update is needed. Resetting lastRemoteIndex to 0 then has the affect
	// of making us think all the local state is out of date and any matching entries should
	// still be updated.
	//
	// The lastRemoteIndex is not used when the entry exists either only in the local state or
	// only in the remote state. In those situations we need to either delete it or create it.
	if remote.QueryMeta.Index < lastRemoteIndex {
		s.logger.Printf("[WARN] replication: Config Entry replication remote index moved backwards (%d to %d), forcing a full Config Entry sync", lastRemoteIndex, remote.QueryMeta.Index)
		lastRemoteIndex = 0
	}

	s.logger.Printf("[DEBUG] replication: Config Entry replication - local: %d, remote: %d", len(local), len(remote.Entries))
	// Calculate the changes required to bring the state into sync and then
	// apply them.
	deletions, updates := diffConfigEntries(local, remote.Entries, lastRemoteIndex)

	s.logger.Printf("[DEBUG] replication: Config Entry replication - deletions: %d, updates: %d", len(deletions), len(updates))

	if len(deletions) > 0 {
		s.logger.Printf("[DEBUG] replication: Config Entry replication - performing %d deletions", len(deletions))

		exit, err := s.reconcileLocalConfig(ctx, deletions, structs.ConfigEntryDelete)
		if exit {
			return 0, true, nil
		}
		if err != nil {
			return 0, false, fmt.Errorf("failed to delete local config entries: %v", err)
		}
		s.logger.Printf("[DEBUG] replication: Config Entry replication - finished deletions")
	}

	if len(updates) > 0 {
		s.logger.Printf("[DEBUG] replication: Config Entry replication - performing %d updates", len(updates))
		exit, err := s.reconcileLocalConfig(ctx, updates, structs.ConfigEntryUpsert)
		if exit {
			return 0, true, nil
		}
		if err != nil {
			return 0, false, fmt.Errorf("failed to update local config entries: %v", err)
		}
		s.logger.Printf("[DEBUG] replication: Config Entry replication - finished updates")
	}

	// Return the index we got back from the remote side, since we've synced
	// up with the remote state as of that index.
	return remote.QueryMeta.Index, false, nil
}
Implement config entry replication (#5706) 2019-04-26 17:38:39 +00:00			`package consul`

			`import (`
			`"context"`
			`"fmt"`
			`"sort"`
			`"time"`

			`"github.com/armon/go-metrics"`
			`"github.com/hashicorp/consul/agent/structs"`
			`)`

			`func cmpConfigLess(first structs.ConfigEntry, second structs.ConfigEntry) bool {`
			`return first.GetKind() < second.GetKind() \|\| (first.GetKind() == second.GetKind() && first.GetName() < second.GetName())`
			`}`

			`func configSort(configs []structs.ConfigEntry) {`
			`sort.Slice(configs, func(i, j int) bool {`
			`return cmpConfigLess(configs[i], configs[j])`
			`})`
			`}`

			`func diffConfigEntries(local []structs.ConfigEntry, remote []structs.ConfigEntry, lastRemoteIndex uint64) ([]structs.ConfigEntry, []structs.ConfigEntry) {`
			`configSort(local)`
			`configSort(remote)`

			`var deletions []structs.ConfigEntry`
			`var updates []structs.ConfigEntry`
			`var localIdx int`
			`var remoteIdx int`
			`for localIdx, remoteIdx = 0, 0; localIdx < len(local) && remoteIdx < len(remote); {`
			`if local[localIdx].GetKind() == remote[remoteIdx].GetKind() && local[localIdx].GetName() == remote[remoteIdx].GetName() {`
			`// config is in both the local and remote state - need to check raft indices`
			`if remote[remoteIdx].GetRaftIndex().ModifyIndex > lastRemoteIndex {`
			`updates = append(updates, remote[remoteIdx])`
			`}`
			`// increment both indices when equal`
			`localIdx += 1`
			`remoteIdx += 1`
			`} else if cmpConfigLess(local[localIdx], remote[remoteIdx]) {`
			`// config no longer in remoted state - needs deleting`
			`deletions = append(deletions, local[localIdx])`

			`// increment just the local index`
			`localIdx += 1`
			`} else {`
			`// local state doesn't have this config - needs updating`
			`updates = append(updates, remote[remoteIdx])`

			`// increment just the remote index`
			`remoteIdx += 1`
			`}`
			`}`

			`for ; localIdx < len(local); localIdx += 1 {`
			`deletions = append(deletions, local[localIdx])`
			`}`

			`for ; remoteIdx < len(remote); remoteIdx += 1 {`
			`updates = append(updates, remote[remoteIdx])`
			`}`

			`return deletions, updates`
			`}`

			`func (s *Server) reconcileLocalConfig(ctx context.Context, configs []structs.ConfigEntry, op structs.ConfigEntryOp) (bool, error) {`
			`ticker := time.NewTicker(time.Second / time.Duration(s.config.ConfigReplicationApplyLimit))`
			`defer ticker.Stop()`

			`for i, entry := range configs {`
			`req := structs.ConfigEntryRequest{`
			`Op: op,`
			`Datacenter: s.config.Datacenter,`
			`Entry: entry,`
			`}`

			`resp, err := s.raftApply(structs.ConfigEntryRequestType, &req)`
			`if err != nil {`
			`return false, fmt.Errorf("Failed to apply config %s: %v", op, err)`
			`}`
			`if respErr, ok := resp.(error); ok && err != nil {`
			`return false, fmt.Errorf("Failed to apply config %s: %v", op, respErr)`
			`}`

			`if i < len(configs)-1 {`
			`select {`
			`case <-ctx.Done():`
			`return true, nil`
			`case <-ticker.C:`
			`// do nothing - ready for the next batch`
			`}`
			`}`
			`}`

			`return false, nil`
			`}`

			`func (s Server) fetchConfigEntries(lastRemoteIndex uint64) (structs.IndexedGenericConfigEntries, error) {`
			`defer metrics.MeasureSince([]string{"leader", "replication", "config-entries", "fetch"}, time.Now())`

			`req := structs.DCSpecificRequest{`
			`Datacenter: s.config.PrimaryDatacenter,`
			`QueryOptions: structs.QueryOptions{`
			`AllowStale: true,`
			`MinQueryIndex: lastRemoteIndex,`
			`Token: s.tokens.ReplicationToken(),`
			`},`
			`}`

			`var response structs.IndexedGenericConfigEntries`
			`if err := s.RPC("ConfigEntry.ListAll", &req, &response); err != nil {`
			`return nil, err`
			`}`

			`return &response, nil`
			`}`

			`func (s *Server) replicateConfig(ctx context.Context, lastRemoteIndex uint64) (uint64, bool, error) {`
			`remote, err := s.fetchConfigEntries(lastRemoteIndex)`
			`if err != nil {`
			`return 0, false, fmt.Errorf("failed to retrieve remote config entries: %v", err)`
			`}`

			`s.logger.Printf("[DEBUG] replication: finished fetching config entries: %d", len(remote.Entries))`

			`// Need to check if we should be stopping. This will be common as the fetching process is a blocking`
			`// RPC which could have been hanging around for a long time and during that time leadership could`
			`// have been lost.`
			`select {`
			`case <-ctx.Done():`
			`return 0, true, nil`
			`default:`
			`// do nothing`
			`}`

			`// Measure everything after the remote query, which can block for long`
			`// periods of time. This metric is a good measure of how expensive the`
			`// replication process is.`
			`defer metrics.MeasureSince([]string{"leader", "replication", "config", "apply"}, time.Now())`

			`_, local, err := s.fsm.State().ConfigEntries(nil)`
			`if err != nil {`
			`return 0, false, fmt.Errorf("failed to retrieve local config entries: %v", err)`
			`}`

			`// If the remote index ever goes backwards, it's a good indication that`
			`// the remote side was rebuilt and we should do a full sync since we`
			`// can't make any assumptions about what's going on.`
			`//`
			`// Resetting lastRemoteIndex to 0 will work because we never consider local`
			`// raft indices. Instead we compare the raft modify index in the response object`
			`// with the lastRemoteIndex (only when we already have a config entry of the same kind/name)`
			`// to determine if an update is needed. Resetting lastRemoteIndex to 0 then has the affect`
			`// of making us think all the local state is out of date and any matching entries should`
			`// still be updated.`
			`//`
			`// The lastRemoteIndex is not used when the entry exists either only in the local state or`
			`// only in the remote state. In those situations we need to either delete it or create it.`
			`if remote.QueryMeta.Index < lastRemoteIndex {`
			`s.logger.Printf("[WARN] replication: Config Entry replication remote index moved backwards (%d to %d), forcing a full Config Entry sync", lastRemoteIndex, remote.QueryMeta.Index)`
			`lastRemoteIndex = 0`
			`}`

			`s.logger.Printf("[DEBUG] replication: Config Entry replication - local: %d, remote: %d", len(local), len(remote.Entries))`
			`// Calculate the changes required to bring the state into sync and then`
			`// apply them.`
			`deletions, updates := diffConfigEntries(local, remote.Entries, lastRemoteIndex)`

			`s.logger.Printf("[DEBUG] replication: Config Entry replication - deletions: %d, updates: %d", len(deletions), len(updates))`

			`if len(deletions) > 0 {`
			`s.logger.Printf("[DEBUG] replication: Config Entry replication - performing %d deletions", len(deletions))`

			`exit, err := s.reconcileLocalConfig(ctx, deletions, structs.ConfigEntryDelete)`
			`if exit {`
			`return 0, true, nil`
			`}`
			`if err != nil {`
			`return 0, false, fmt.Errorf("failed to delete local config entries: %v", err)`
			`}`
			`s.logger.Printf("[DEBUG] replication: Config Entry replication - finished deletions")`
			`}`

			`if len(updates) > 0 {`
			`s.logger.Printf("[DEBUG] replication: Config Entry replication - performing %d updates", len(updates))`
			`exit, err := s.reconcileLocalConfig(ctx, updates, structs.ConfigEntryUpsert)`
			`if exit {`
			`return 0, true, nil`
			`}`
			`if err != nil {`
			`return 0, false, fmt.Errorf("failed to update local config entries: %v", err)`
			`}`
			`s.logger.Printf("[DEBUG] replication: Config Entry replication - finished updates")`
			`}`

			`// Return the index we got back from the remote side, since we've synced`
			`// up with the remote state as of that index.`
			`return remote.QueryMeta.Index, false, nil`
			`}`