f926c7643f
Enforce lowercase peer names. Prior to this change peer names could be mixed case. This can cause issues, as peer names are used as DNS labels in various locations. It also caused issues with envoy configuration.
1059 lines
35 KiB
Go
1059 lines
35 KiB
Go
package peering
|
|
|
|
import (
|
|
"container/ring"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/go-memdb"
|
|
"github.com/hashicorp/go-multierror"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
grpcstatus "google.golang.org/grpc/status"
|
|
"google.golang.org/protobuf/proto"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
|
|
"github.com/hashicorp/consul/lib/retry"
|
|
|
|
"github.com/hashicorp/consul/acl"
|
|
"github.com/hashicorp/consul/acl/resolver"
|
|
"github.com/hashicorp/consul/agent/consul/state"
|
|
"github.com/hashicorp/consul/agent/consul/stream"
|
|
external "github.com/hashicorp/consul/agent/grpc-external"
|
|
"github.com/hashicorp/consul/agent/grpc-external/services/peerstream"
|
|
"github.com/hashicorp/consul/agent/structs"
|
|
"github.com/hashicorp/consul/lib"
|
|
"github.com/hashicorp/consul/proto/pbpeering"
|
|
"github.com/hashicorp/consul/proto/pbpeerstream"
|
|
)
|
|
|
|
var (
|
|
errPeeringTokenInvalidCA = errors.New("peering token CA value is invalid")
|
|
errPeeringTokenEmptyServerAddresses = errors.New("peering token server addresses value is empty")
|
|
errPeeringTokenEmptyServerName = errors.New("peering token server name value is empty")
|
|
errPeeringTokenEmptyPeerID = errors.New("peering token peer ID value is empty")
|
|
)
|
|
|
|
const (
|
|
// meshGatewayWait is the initial wait on calls to exchange a secret with a peer when dialing through a gateway.
|
|
// This wait provides some time for the first gateway address to configure a route to the peer servers.
|
|
// This study shows latency distribution https://www.hashicorp.com/cgsb.
|
|
// With 1s we cover ~p96, then we initiate the 3-second retry loop.
|
|
meshGatewayWait = 1 * time.Second
|
|
establishmentTimeout = 3 * time.Second
|
|
)
|
|
|
|
// errPeeringInvalidServerAddress is returned when an establish request contains
|
|
// an invalid server address.
|
|
type errPeeringInvalidServerAddress struct {
|
|
addr string
|
|
}
|
|
|
|
// Error implements the error interface
|
|
func (e *errPeeringInvalidServerAddress) Error() string {
|
|
return fmt.Sprintf("%s is not a valid peering server address", e.addr)
|
|
}
|
|
|
|
// For private/internal gRPC handlers, protoc-gen-rpc-glue generates the
|
|
// requisite methods to satisfy the structs.RPCInfo interface using fields
|
|
// from the pbcommon package. This service is public, so we can't use those
|
|
// fields in our proto definition. Instead, we construct our RPCInfo manually.
|
|
var writeRequest struct {
|
|
structs.WriteRequest
|
|
structs.DCSpecificRequest
|
|
}
|
|
|
|
var readRequest struct {
|
|
structs.QueryOptions
|
|
structs.DCSpecificRequest
|
|
}
|
|
|
|
// Server implements pbpeering.PeeringService to provide RPC operations for
|
|
// managing peering relationships.
|
|
type Server struct {
|
|
Config
|
|
}
|
|
|
|
type Config struct {
|
|
Backend Backend
|
|
Tracker *peerstream.Tracker
|
|
Logger hclog.Logger
|
|
ForwardRPC func(structs.RPCInfo, func(*grpc.ClientConn) error) (bool, error)
|
|
Datacenter string
|
|
ConnectEnabled bool
|
|
PeeringEnabled bool
|
|
}
|
|
|
|
func NewServer(cfg Config) *Server {
|
|
requireNotNil(cfg.Backend, "Backend")
|
|
requireNotNil(cfg.Tracker, "Tracker")
|
|
requireNotNil(cfg.Logger, "Logger")
|
|
requireNotNil(cfg.ForwardRPC, "ForwardRPC")
|
|
if cfg.Datacenter == "" {
|
|
panic("Datacenter is required")
|
|
}
|
|
return &Server{
|
|
Config: cfg,
|
|
}
|
|
}
|
|
|
|
func requireNotNil(v interface{}, name string) {
|
|
if v == nil {
|
|
panic(name + " is required")
|
|
}
|
|
}
|
|
|
|
var _ pbpeering.PeeringServiceServer = (*Server)(nil)
|
|
|
|
func (s *Server) Register(grpcServer *grpc.Server) {
|
|
pbpeering.RegisterPeeringServiceServer(grpcServer, s)
|
|
}
|
|
|
|
// Backend defines the core integrations the Peering endpoint depends on. A
|
|
// functional implementation will integrate with various subcomponents of Consul
|
|
// such as the State store for reading and writing data, the CA machinery for
|
|
// providing access to CA data and the RPC system for forwarding requests to
|
|
// other servers.
|
|
type Backend interface {
|
|
// ResolveTokenAndDefaultMeta returns an acl.Authorizer which authorizes
|
|
// actions based on the permissions granted to the token.
|
|
// If either entMeta or authzContext are non-nil they will be populated with the
|
|
// partition and namespace from the token.
|
|
ResolveTokenAndDefaultMeta(token string, entMeta *acl.EnterpriseMeta, authzCtx *acl.AuthorizerContext) (resolver.Result, error)
|
|
|
|
// GetTLSMaterials returns the TLS materials for the dialer to dial the acceptor using TLS.
|
|
// It returns the server name to validate, and the CA certificate to validate with.
|
|
GetTLSMaterials(generatingToken bool) (string, []string, error)
|
|
|
|
// GetLocalServerAddresses returns the addresses used for establishing a peering connection.
|
|
// These may be server addresses or mesh gateway addresses if peering through mesh gateways.
|
|
GetLocalServerAddresses() ([]string, error)
|
|
|
|
// EncodeToken packages a peering token into a slice of bytes.
|
|
EncodeToken(tok *structs.PeeringToken) ([]byte, error)
|
|
|
|
// DecodeToken unpackages a peering token from a slice of bytes.
|
|
DecodeToken([]byte) (*structs.PeeringToken, error)
|
|
|
|
// GetDialAddresses returns: the addresses to cycle through when dialing a peer's servers,
|
|
// an optional buffer of just gateway addresses, and an optional error.
|
|
// The resulting ring buffer is front-loaded with the local mesh gateway addresses if the local
|
|
// datacenter is configured to dial through mesh gateways.
|
|
GetDialAddresses(logger hclog.Logger, ws memdb.WatchSet, peerID string) (*ring.Ring, *ring.Ring, error)
|
|
|
|
EnterpriseCheckPartitions(partition string) error
|
|
|
|
EnterpriseCheckNamespaces(namespace string) error
|
|
|
|
Subscribe(req *stream.SubscribeRequest) (*stream.Subscription, error)
|
|
|
|
// IsLeader indicates whether the consul server is in a leader state or not.
|
|
IsLeader() bool
|
|
|
|
// SetLeaderAddress is called on a raft.LeaderObservation in a go routine
|
|
// in the consul server; see trackLeaderChanges()
|
|
SetLeaderAddress(string)
|
|
|
|
// GetLeaderAddress provides the best hint for the current address of the
|
|
// leader. There is no guarantee that this is the actual address of the
|
|
// leader.
|
|
GetLeaderAddress() string
|
|
|
|
// CheckPeeringUUID returns true if the given UUID is not associated with
|
|
// an existing peering.
|
|
CheckPeeringUUID(id string) (bool, error)
|
|
|
|
ValidateProposedPeeringSecret(id string) (bool, error)
|
|
|
|
PeeringWrite(req *pbpeering.PeeringWriteRequest) error
|
|
|
|
Store() Store
|
|
}
|
|
|
|
// Store provides a read-only interface for querying Peering data.
|
|
type Store interface {
|
|
PeeringRead(ws memdb.WatchSet, q state.Query) (uint64, *pbpeering.Peering, error)
|
|
PeeringReadByID(ws memdb.WatchSet, id string) (uint64, *pbpeering.Peering, error)
|
|
PeeringList(ws memdb.WatchSet, entMeta acl.EnterpriseMeta) (uint64, []*pbpeering.Peering, error)
|
|
PeeringTrustBundleRead(ws memdb.WatchSet, q state.Query) (uint64, *pbpeering.PeeringTrustBundle, error)
|
|
PeeringTrustBundleList(ws memdb.WatchSet, entMeta acl.EnterpriseMeta) (uint64, []*pbpeering.PeeringTrustBundle, error)
|
|
TrustBundleListByService(ws memdb.WatchSet, service, dc string, entMeta acl.EnterpriseMeta) (uint64, []*pbpeering.PeeringTrustBundle, error)
|
|
}
|
|
|
|
var peeringNotEnabledErr = grpcstatus.Error(codes.FailedPrecondition, "peering must be enabled to use this endpoint")
|
|
|
|
// GenerateToken implements the PeeringService RPC method to generate a
|
|
// peering token which is the initial step in establishing a peering relationship
|
|
// with other Consul clusters.
|
|
func (s *Server) GenerateToken(
|
|
ctx context.Context,
|
|
req *pbpeering.GenerateTokenRequest,
|
|
) (*pbpeering.GenerateTokenResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
// validate prior to forwarding to the leader, this saves a network hop
|
|
if err := validatePeerName(req.PeerName); err != nil {
|
|
return nil, fmt.Errorf("%s is not a valid peer name: %w", req.PeerName, err)
|
|
}
|
|
|
|
if err := structs.ValidateMetaTags(req.Meta); err != nil {
|
|
return nil, fmt.Errorf("meta tags failed validation: %w", err)
|
|
}
|
|
|
|
resp := &pbpeering.GenerateTokenResponse{}
|
|
handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).GenerateToken(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "generate_token"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
serverName, caPEMs, err := s.Backend.GetTLSMaterials(true)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var (
|
|
peering *pbpeering.Peering
|
|
secretID string
|
|
)
|
|
|
|
// This loop ensures at most one retry in the case of a race condition.
|
|
for canRetry := true; canRetry; canRetry = false {
|
|
peering, err = s.getExistingPeering(req.PeerName, entMeta.PartitionOrDefault())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if peering == nil {
|
|
id, err := lib.GenerateUUID(s.Backend.CheckPeeringUUID)
|
|
if err != nil {
|
|
return resp, err
|
|
}
|
|
peering = &pbpeering.Peering{
|
|
ID: id,
|
|
Name: req.PeerName,
|
|
Meta: req.Meta,
|
|
|
|
// PartitionOrEmpty is used to avoid writing "default" in OSS.
|
|
Partition: entMeta.PartitionOrEmpty(),
|
|
}
|
|
} else {
|
|
// validate that this peer name is not being used as a dialer already
|
|
if err := validatePeer(peering, false); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// A new establishment secret is generated on every GenerateToken request.
|
|
// This allows for rotating secrets by generating a new token for a peering and then
|
|
// using the new token to re-establish the peering.
|
|
secretID, err = s.generateNewEstablishmentSecret()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to generate secret for peering establishment: %w", err)
|
|
}
|
|
|
|
writeReq := &pbpeering.PeeringWriteRequest{
|
|
Peering: peering,
|
|
SecretsRequest: &pbpeering.SecretsWriteRequest{
|
|
PeerID: peering.ID,
|
|
Request: &pbpeering.SecretsWriteRequest_GenerateToken{
|
|
GenerateToken: &pbpeering.SecretsWriteRequest_GenerateTokenRequest{
|
|
EstablishmentSecret: secretID,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
if err := s.Backend.PeeringWrite(writeReq); err != nil {
|
|
// There's a possible race where two servers call Generate Token at the
|
|
// same time with the same peer name for the first time. They both
|
|
// generate an ID and try to insert and only one wins. This detects the
|
|
// collision and forces the loser to discard its generated ID and use
|
|
// the one from the other server.
|
|
if strings.Contains(err.Error(), "A peering already exists with the name") {
|
|
// retry to fetch existing peering
|
|
continue
|
|
}
|
|
return nil, fmt.Errorf("failed to write peering: %w", err)
|
|
}
|
|
// write succeeded, break loop early
|
|
break
|
|
}
|
|
|
|
serverAddrs, err := s.Backend.GetLocalServerAddresses()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
tok := structs.PeeringToken{
|
|
// Store the UUID so that we can do a global search when handling inbound streams.
|
|
PeerID: peering.ID,
|
|
CA: caPEMs,
|
|
ManualServerAddresses: req.ServerExternalAddresses,
|
|
ServerAddresses: serverAddrs,
|
|
ServerName: serverName,
|
|
EstablishmentSecret: secretID,
|
|
Remote: structs.PeeringTokenRemote{
|
|
Partition: req.PartitionOrDefault(),
|
|
Datacenter: s.Datacenter,
|
|
},
|
|
}
|
|
|
|
encoded, err := s.Backend.EncodeToken(&tok)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
resp.PeeringToken = string(encoded)
|
|
return resp, err
|
|
}
|
|
|
|
// Establish implements the PeeringService RPC method to finalize peering
|
|
// registration. Given a valid token output from a peer's GenerateToken endpoint,
|
|
// a peering is registered.
|
|
func (s *Server) Establish(
|
|
ctx context.Context,
|
|
req *pbpeering.EstablishRequest,
|
|
) (*pbpeering.EstablishResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
// validate prior to forwarding to the leader, this saves a network hop
|
|
if err := validatePeerName(req.PeerName); err != nil {
|
|
return nil, fmt.Errorf("%s is not a valid peer name: %w", req.PeerName, err)
|
|
}
|
|
tok, err := s.Backend.DecodeToken([]byte(req.PeeringToken))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := validatePeeringToken(tok); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := structs.ValidateMetaTags(req.Meta); err != nil {
|
|
return nil, fmt.Errorf("meta tags failed validation: %w", err)
|
|
}
|
|
|
|
resp := &pbpeering.EstablishResponse{}
|
|
handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).Establish(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "establish"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
existing, err := s.getExistingPeering(req.PeerName, entMeta.PartitionOrDefault())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := s.validatePeeringLocality(tok); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var id string
|
|
serverAddrs := tok.ServerAddresses
|
|
if existing == nil {
|
|
id, err = lib.GenerateUUID(s.Backend.CheckPeeringUUID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
id = existing.ID
|
|
// If there is a connected stream, assume that the existing ServerAddresses
|
|
// are up to date and do not try to overwrite them with the token's addresses.
|
|
if status, ok := s.Tracker.StreamStatus(id); ok && status.Connected {
|
|
serverAddrs = existing.PeerServerAddresses
|
|
}
|
|
}
|
|
|
|
// validate that this peer name is not being used as an acceptor already
|
|
if err := validatePeer(existing, true); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
peering := &pbpeering.Peering{
|
|
ID: id,
|
|
Name: req.PeerName,
|
|
PeerCAPems: tok.CA,
|
|
ManualServerAddresses: tok.ManualServerAddresses,
|
|
PeerServerAddresses: serverAddrs,
|
|
PeerServerName: tok.ServerName,
|
|
PeerID: tok.PeerID,
|
|
Meta: req.Meta,
|
|
|
|
// State is intentionally not set until after the secret exchange succeeds.
|
|
// This is to prevent a scenario where an active peering is re-established,
|
|
// the secret exchange fails, and the peering state gets stuck in "Establishing"
|
|
// while the original connection is still active.
|
|
// State: pbpeering.PeeringState_ESTABLISHING,
|
|
|
|
// PartitionOrEmpty is used to avoid writing "default" in OSS.
|
|
Partition: entMeta.PartitionOrEmpty(),
|
|
Remote: &pbpeering.RemoteInfo{
|
|
Partition: tok.Remote.Partition,
|
|
Datacenter: tok.Remote.Datacenter,
|
|
},
|
|
}
|
|
|
|
// Write the peering ahead of the ExchangeSecret handshake to give
|
|
// mesh gateways in the default partition an opportunity
|
|
// to update their config with an outbound route to this peer server.
|
|
//
|
|
// If the request to exchange a secret fails then the peering will continue to exist.
|
|
// We do not undo this write because this call to establish may actually be a re-establish call
|
|
// for an active peering.
|
|
writeReq := &pbpeering.PeeringWriteRequest{
|
|
Peering: peering,
|
|
}
|
|
if err := s.Backend.PeeringWrite(writeReq); err != nil {
|
|
return nil, fmt.Errorf("failed to write peering: %w", err)
|
|
}
|
|
|
|
exchangeResp, dialErrors := s.exchangeSecret(ctx, peering, tok.EstablishmentSecret)
|
|
if exchangeResp == nil {
|
|
return nil, dialErrors
|
|
}
|
|
peering.State = pbpeering.PeeringState_ESTABLISHING
|
|
|
|
// As soon as a peering is written with a non-empty list of ServerAddresses
|
|
// and an active stream secret, a leader routine will see the peering and
|
|
// attempt to establish a peering stream with the remote peer.
|
|
writeReq = &pbpeering.PeeringWriteRequest{
|
|
Peering: peering,
|
|
SecretsRequest: &pbpeering.SecretsWriteRequest{
|
|
PeerID: peering.ID,
|
|
Request: &pbpeering.SecretsWriteRequest_Establish{
|
|
Establish: &pbpeering.SecretsWriteRequest_EstablishRequest{
|
|
ActiveStreamSecret: exchangeResp.StreamSecret,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
if err := s.Backend.PeeringWrite(writeReq); err != nil {
|
|
return nil, fmt.Errorf("failed to write peering: %w", err)
|
|
}
|
|
return resp, nil
|
|
}
|
|
|
|
// validatePeeringLocality makes sure that we don't create a peering in the same cluster it was generated.
|
|
// If the ServerName of the PeeringToken overlaps with our own, we do not accept it.
|
|
func (s *Server) validatePeeringLocality(token *structs.PeeringToken) error {
|
|
serverName, _, err := s.Backend.GetTLSMaterials(false)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to fetch TLS materials: %w", err)
|
|
}
|
|
if serverName == token.ServerName {
|
|
return fmt.Errorf(
|
|
"cannot create a peering within the same cluster %q. Refer to the `exported-services` documentation if you want to export between partitions without peering",
|
|
serverName)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// exchangeSecret will continuously attempt to exchange the given establishment secret with the peer, up to a timeout.
|
|
// This function will attempt to dial through mesh gateways if the local DC is configured to peer through gateways,
|
|
// but will fall back to server addresses if not.
|
|
func (s *Server) exchangeSecret(ctx context.Context, peering *pbpeering.Peering, establishmentSecret string) (*pbpeerstream.ExchangeSecretResponse, error) {
|
|
req := pbpeerstream.ExchangeSecretRequest{
|
|
PeerID: peering.PeerID,
|
|
EstablishmentSecret: establishmentSecret,
|
|
}
|
|
|
|
tlsOption, err := peering.TLSDialOption()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to build TLS dial option from peering: %w", err)
|
|
}
|
|
|
|
allAddrs, gatewayAddrs, err := s.Backend.GetDialAddresses(s.Logger, nil, peering.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get addresses to dial peer: %w", err)
|
|
}
|
|
|
|
if gatewayAddrs != nil {
|
|
// If we are dialing through local gateways we sleep before issuing the first request.
|
|
// This gives the local gateways some time to configure a route to the peer servers.
|
|
time.Sleep(meshGatewayWait)
|
|
|
|
// Exclusively try
|
|
resp, _ := retryExchange(ctx, &req, gatewayAddrs, tlsOption)
|
|
if resp != nil {
|
|
return resp, nil
|
|
}
|
|
}
|
|
|
|
return retryExchange(ctx, &req, allAddrs, tlsOption)
|
|
}
|
|
|
|
// retryExchange attempts a secret exchange in a retry loop, taking a new address from the ring buffer on each iteration
|
|
func retryExchange(ctx context.Context, req *pbpeerstream.ExchangeSecretRequest, ringBuf *ring.Ring, tlsOption grpc.DialOption) (*pbpeerstream.ExchangeSecretResponse, error) {
|
|
var (
|
|
resp *pbpeerstream.ExchangeSecretResponse
|
|
dialErrors error
|
|
)
|
|
|
|
retryWait := 150 * time.Millisecond
|
|
jitter := retry.NewJitter(25)
|
|
|
|
retryCtx, cancel := context.WithTimeout(ctx, establishmentTimeout)
|
|
defer cancel()
|
|
|
|
for retryCtx.Err() == nil {
|
|
addr := ringBuf.Value.(string)
|
|
|
|
dialCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
|
defer cancel()
|
|
|
|
conn, err := grpc.DialContext(dialCtx, addr,
|
|
tlsOption,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to dial peer: %w", err)
|
|
}
|
|
defer conn.Close()
|
|
|
|
client := pbpeerstream.NewPeerStreamServiceClient(conn)
|
|
resp, err = client.ExchangeSecret(ctx, req)
|
|
|
|
// If we got a permission denied error that means out establishment secret is invalid, so we do not retry.
|
|
grpcErr, ok := grpcstatus.FromError(err)
|
|
if ok && grpcErr.Code() == codes.PermissionDenied {
|
|
return nil, grpcstatus.Errorf(codes.PermissionDenied, "a new peering token must be generated: %s", grpcErr.Message())
|
|
}
|
|
if err != nil {
|
|
dialErrors = multierror.Append(dialErrors, fmt.Errorf("failed to exchange peering secret through address %q: %w", addr, err))
|
|
}
|
|
if resp != nil {
|
|
// Got a valid response. We're done.
|
|
break
|
|
}
|
|
|
|
time.Sleep(jitter(retryWait))
|
|
|
|
// Cycle to the next possible address.
|
|
ringBuf = ringBuf.Next()
|
|
}
|
|
return resp, dialErrors
|
|
}
|
|
|
|
// OPTIMIZE: Handle blocking queries
|
|
func (s *Server) PeeringRead(ctx context.Context, req *pbpeering.PeeringReadRequest) (*pbpeering.PeeringReadResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
|
|
var resp *pbpeering.PeeringReadResponse
|
|
handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringRead(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "read"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().PeeringReadAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
q := state.Query{
|
|
Value: strings.ToLower(req.Name),
|
|
EnterpriseMeta: *entMeta,
|
|
}
|
|
_, peering, err := s.Backend.Store().PeeringRead(nil, q)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if peering == nil {
|
|
return &pbpeering.PeeringReadResponse{Peering: nil}, nil
|
|
}
|
|
|
|
cp := s.reconcilePeering(peering)
|
|
return &pbpeering.PeeringReadResponse{Peering: cp}, nil
|
|
}
|
|
|
|
// OPTIMIZE: Handle blocking queries
|
|
func (s *Server) PeeringList(ctx context.Context, req *pbpeering.PeeringListRequest) (*pbpeering.PeeringListResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
|
|
var resp *pbpeering.PeeringListResponse
|
|
handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringList(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().PeeringReadAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "list"}, time.Now())
|
|
|
|
idx, peerings, err := s.Backend.Store().PeeringList(nil, *entMeta)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// reconcile the actual peering state; need to copy over the ds for peering
|
|
var cPeerings []*pbpeering.Peering
|
|
for _, p := range peerings {
|
|
cp := s.reconcilePeering(p)
|
|
cPeerings = append(cPeerings, cp)
|
|
}
|
|
|
|
return &pbpeering.PeeringListResponse{Peerings: cPeerings, Index: idx}, nil
|
|
}
|
|
|
|
// TODO(peering): Get rid of this func when we stop using the stream tracker for imported/ exported services and the peering state
|
|
// reconcilePeering enriches the peering with the following information:
|
|
// -- PeeringState.Active if the peering is active
|
|
// -- ImportedServicesCount and ExportedServicesCount
|
|
// NOTE: we return a new peering with this additional data
|
|
func (s *Server) reconcilePeering(peering *pbpeering.Peering) *pbpeering.Peering {
|
|
streamState, found := s.Tracker.StreamStatus(peering.ID)
|
|
if !found {
|
|
// TODO(peering): this may be noise on non-leaders
|
|
s.Logger.Warn("did not find peer in stream tracker; cannot populate imported and"+
|
|
" exported services count or reconcile peering state", "peerID", peering.ID)
|
|
peering.StreamStatus = &pbpeering.StreamStatus{}
|
|
return peering
|
|
} else {
|
|
cp := copyPeering(peering)
|
|
|
|
// reconcile pbpeering.PeeringState_Active
|
|
if streamState.Connected {
|
|
cp.State = pbpeering.PeeringState_ACTIVE
|
|
} else if streamState.DisconnectErrorMessage != "" {
|
|
cp.State = pbpeering.PeeringState_FAILING
|
|
}
|
|
|
|
latest := func(tt ...*time.Time) *time.Time {
|
|
latest := time.Time{}
|
|
for _, t := range tt {
|
|
if t == nil {
|
|
continue
|
|
}
|
|
if t.After(latest) {
|
|
latest = *t
|
|
}
|
|
}
|
|
return &latest
|
|
}
|
|
|
|
lastRecv := latest(streamState.LastRecvHeartbeat, streamState.LastRecvError, streamState.LastRecvResourceSuccess)
|
|
lastSend := latest(streamState.LastSendError, streamState.LastSendSuccess)
|
|
|
|
cp.StreamStatus = &pbpeering.StreamStatus{
|
|
ImportedServices: streamState.ImportedServices,
|
|
ExportedServices: streamState.ExportedServices,
|
|
LastHeartbeat: pbpeering.TimePtrToProto(streamState.LastRecvHeartbeat),
|
|
LastReceive: pbpeering.TimePtrToProto(lastRecv),
|
|
LastSend: pbpeering.TimePtrToProto(lastSend),
|
|
}
|
|
|
|
return cp
|
|
}
|
|
}
|
|
|
|
// TODO(peering): As of writing, this method is only used in tests to set up Peerings in the state store.
|
|
// Consider removing if we can find another way to populate state store in peering_endpoint_test.go
|
|
func (s *Server) PeeringWrite(ctx context.Context, req *pbpeering.PeeringWriteRequest) (*pbpeering.PeeringWriteResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Peering.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
|
|
var resp *pbpeering.PeeringWriteResponse
|
|
handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringWrite(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "write"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Peering.Partition)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if req.Peering == nil {
|
|
return nil, fmt.Errorf("missing required peering body")
|
|
}
|
|
|
|
var id string
|
|
peering, err := s.getExistingPeering(req.Peering.Name, entMeta.PartitionOrDefault())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if peering == nil {
|
|
id, err = lib.GenerateUUID(s.Backend.CheckPeeringUUID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
id = peering.ID
|
|
}
|
|
req.Peering.ID = id
|
|
|
|
err = s.Backend.PeeringWrite(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &pbpeering.PeeringWriteResponse{}, nil
|
|
}
|
|
|
|
func (s *Server) PeeringDelete(ctx context.Context, req *pbpeering.PeeringDeleteRequest) (*pbpeering.PeeringDeleteResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
|
|
var resp *pbpeering.PeeringDeleteResponse
|
|
handled, err := s.ForwardRPC(&writeRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).PeeringDelete(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "delete"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().PeeringWriteAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
q := state.Query{
|
|
Value: strings.ToLower(req.Name),
|
|
EnterpriseMeta: *entMeta,
|
|
}
|
|
_, existing, err := s.Backend.Store().PeeringRead(nil, q)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if existing == nil || existing.State == pbpeering.PeeringState_DELETING {
|
|
// Return early when the Peering doesn't exist or is already marked for deletion.
|
|
// We don't return nil because the pb will fail to marshal.
|
|
return &pbpeering.PeeringDeleteResponse{}, nil
|
|
}
|
|
|
|
// We are using a write request due to needing to perform a deferred deletion.
|
|
// The peering gets marked for deletion by setting the DeletedAt field,
|
|
// and a leader routine will handle deleting the peering.
|
|
writeReq := &pbpeering.PeeringWriteRequest{
|
|
Peering: &pbpeering.Peering{
|
|
// We only need to include the name and partition for the peering to be identified.
|
|
// All other data associated with the peering can be discarded because once marked
|
|
// for deletion the peering is effectively gone.
|
|
ID: existing.ID,
|
|
Name: req.Name,
|
|
State: pbpeering.PeeringState_DELETING,
|
|
ManualServerAddresses: existing.ManualServerAddresses,
|
|
PeerServerAddresses: existing.PeerServerAddresses,
|
|
DeletedAt: timestamppb.New(time.Now().UTC()),
|
|
|
|
// PartitionOrEmpty is used to avoid writing "default" in OSS.
|
|
Partition: entMeta.PartitionOrEmpty(),
|
|
},
|
|
}
|
|
err = s.Backend.PeeringWrite(writeReq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &pbpeering.PeeringDeleteResponse{}, nil
|
|
}
|
|
|
|
// OPTIMIZE: Handle blocking queries
|
|
func (s *Server) TrustBundleRead(ctx context.Context, req *pbpeering.TrustBundleReadRequest) (*pbpeering.TrustBundleReadResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var resp *pbpeering.TrustBundleReadResponse
|
|
handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).TrustBundleRead(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "trust_bundle_read"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := structs.DefaultEnterpriseMetaInPartition(req.Partition)
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().ServiceWriteAnyAllowed(&authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
idx, trustBundle, err := s.Backend.Store().PeeringTrustBundleRead(nil, state.Query{
|
|
Value: req.Name,
|
|
EnterpriseMeta: *entMeta,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read trust bundle for peer %s: %w", req.Name, err)
|
|
}
|
|
|
|
return &pbpeering.TrustBundleReadResponse{
|
|
Index: idx,
|
|
Bundle: trustBundle,
|
|
}, nil
|
|
}
|
|
|
|
// TODO(peering): rename rpc & request/response to drop the "service" part
|
|
// OPTIMIZE: Handle blocking queries
|
|
func (s *Server) TrustBundleListByService(ctx context.Context, req *pbpeering.TrustBundleListByServiceRequest) (*pbpeering.TrustBundleListByServiceResponse, error) {
|
|
if !s.Config.PeeringEnabled {
|
|
return nil, peeringNotEnabledErr
|
|
}
|
|
|
|
if err := s.Backend.EnterpriseCheckPartitions(req.Partition); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
if err := s.Backend.EnterpriseCheckNamespaces(req.Namespace); err != nil {
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, err.Error())
|
|
}
|
|
if req.ServiceName == "" {
|
|
return nil, errors.New("missing service name")
|
|
}
|
|
|
|
var resp *pbpeering.TrustBundleListByServiceResponse
|
|
handled, err := s.ForwardRPC(&readRequest, func(conn *grpc.ClientConn) error {
|
|
ctx := external.ForwardMetadataContext(ctx)
|
|
var err error
|
|
resp, err = pbpeering.NewPeeringServiceClient(conn).TrustBundleListByService(ctx, req)
|
|
return err
|
|
})
|
|
if handled || err != nil {
|
|
return resp, err
|
|
}
|
|
|
|
defer metrics.MeasureSince([]string{"peering", "trust_bundle_list_by_service"}, time.Now())
|
|
|
|
var authzCtx acl.AuthorizerContext
|
|
entMeta := acl.NewEnterpriseMetaWithPartition(req.Partition, req.Namespace)
|
|
options, err := external.QueryOptionsFromContext(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
authz, err := s.Backend.ResolveTokenAndDefaultMeta(options.Token, &entMeta, &authzCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := authz.ToAllowAuthorizer().ServiceWriteAllowed(req.ServiceName, &authzCtx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var (
|
|
idx uint64
|
|
bundles []*pbpeering.PeeringTrustBundle
|
|
)
|
|
|
|
switch {
|
|
case req.Kind == string(structs.ServiceKindMeshGateway):
|
|
idx, bundles, err = s.Backend.Store().PeeringTrustBundleList(nil, entMeta)
|
|
case req.ServiceName != "":
|
|
idx, bundles, err = s.Backend.Store().TrustBundleListByService(nil, req.ServiceName, s.Datacenter, entMeta)
|
|
case req.Kind != "":
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, "kind must be mesh-gateway if set")
|
|
default:
|
|
return nil, grpcstatus.Error(codes.InvalidArgument, "one of service or kind is required")
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &pbpeering.TrustBundleListByServiceResponse{Index: idx, Bundles: bundles}, nil
|
|
}
|
|
|
|
func (s *Server) getExistingPeering(peerName, partition string) (*pbpeering.Peering, error) {
|
|
q := state.Query{
|
|
Value: strings.ToLower(peerName),
|
|
EnterpriseMeta: *structs.NodeEnterpriseMetaInPartition(partition),
|
|
}
|
|
_, peering, err := s.Backend.Store().PeeringRead(nil, q)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return peering, nil
|
|
}
|
|
|
|
func (s *Server) generateNewEstablishmentSecret() (string, error) {
|
|
id, err := lib.GenerateUUID(s.Backend.ValidateProposedPeeringSecret)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return id, nil
|
|
}
|
|
|
|
// validatePeer enforces the following rule for an existing peering:
|
|
// - if a peering already exists, it can only be used as an acceptor or dialer
|
|
//
|
|
// We define a DIALER as a peering that has server addresses (or a peering that is created via the Establish endpoint)
|
|
// Conversely, we define an ACCEPTOR as a peering that is created via the GenerateToken endpoint
|
|
func validatePeer(peering *pbpeering.Peering, shouldDial bool) error {
|
|
if peering != nil && peering.ShouldDial() != shouldDial {
|
|
if shouldDial {
|
|
return fmt.Errorf("cannot create peering with name: %q; there is an existing peering expecting to be dialed", peering.Name)
|
|
} else {
|
|
return fmt.Errorf("cannot create peering with name: %q; there is already an established peering", peering.Name)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func copyPeering(p *pbpeering.Peering) *pbpeering.Peering {
|
|
var copyP pbpeering.Peering
|
|
proto.Merge(©P, p)
|
|
|
|
return ©P
|
|
}
|