open-consul/agent/consul/auto_config_endpoint.go

429 lines
14 KiB
Go
Raw Normal View History

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package consul
import (
"context"
"crypto/x509"
"encoding/base64"
"fmt"
"regexp"
"github.com/hashicorp/consul/acl"
bexpr "github.com/hashicorp/go-bexpr"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/authmethod/ssoauth"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib/template"
Protobuf Refactoring for Multi-Module Cleanliness (#16302) Protobuf Refactoring for Multi-Module Cleanliness This commit includes the following: Moves all packages that were within proto/ to proto/private Rewrites imports to account for the packages being moved Adds in buf.work.yaml to enable buf workspaces Names the proto-public buf module so that we can override the Go package imports within proto/buf.yaml Bumps the buf version dependency to 1.14.0 (I was trying out the version to see if it would get around an issue - it didn't but it also doesn't break things and it seemed best to keep up with the toolchain changes) Why: In the future we will need to consume other protobuf dependencies such as the Google HTTP annotations for openapi generation or grpc-gateway usage. There were some recent changes to have our own ratelimiting annotations. The two combined were not working when I was trying to use them together (attempting to rebase another branch) Buf workspaces should be the solution to the problem Buf workspaces means that each module will have generated Go code that embeds proto file names relative to the proto dir and not the top level repo root. This resulted in proto file name conflicts in the Go global protobuf type registry. The solution to that was to add in a private/ directory into the path within the proto/ directory. That then required rewriting all the imports. Is this safe? AFAICT yes The gRPC wire protocol doesn't seem to care about the proto file names (although the Go grpc code does tack on the proto file name as Metadata in the ServiceDesc) Other than imports, there were no changes to any generated code as a result of this.
2023-02-17 21:14:46 +00:00
"github.com/hashicorp/consul/proto/private/pbautoconf"
"github.com/hashicorp/consul/proto/private/pbconfig"
"github.com/hashicorp/consul/proto/private/pbconnect"
"github.com/hashicorp/consul/tlsutil"
)
type AutoConfigOptions struct {
NodeName string
SegmentName string
Partition string
CSR *x509.CertificateRequest
SpiffeID *connect.SpiffeIDAgent
}
func (opts AutoConfigOptions) PartitionOrDefault() string {
return acl.PartitionOrDefault(opts.Partition)
}
type AutoConfigAuthorizer interface {
// Authorizes the request and returns a struct containing the various
// options for how to generate the configuration.
Authorize(*pbautoconf.AutoConfigRequest) (AutoConfigOptions, error)
}
type disabledAuthorizer struct{}
func (_ *disabledAuthorizer) Authorize(_ *pbautoconf.AutoConfigRequest) (AutoConfigOptions, error) {
return AutoConfigOptions{}, fmt.Errorf("Auto Config is disabled")
}
type jwtAuthorizer struct {
validator *ssoauth.Validator
allowReuse bool
claimAssertions []string
}
// Invalidate any quote or whitespace characters that could cause an escape with bexpr.
// This includes an extra single-quote character not specified in the grammar for safety in case it is later added.
// https://github.com/hashicorp/go-bexpr/blob/v0.1.11/grammar/grammar.peg#L188-L191
var invalidSegmentName = regexp.MustCompile("[`'\"\\s]+")
var InvalidNodeName = invalidSegmentName
func (a *jwtAuthorizer) Authorize(req *pbautoconf.AutoConfigRequest) (AutoConfigOptions, error) {
// perform basic JWT Authorization
identity, err := a.validator.ValidateLogin(context.Background(), req.JWT)
if err != nil {
// TODO (autoconf) maybe we should add a more generic permission denied error not tied to the ACL package?
return AutoConfigOptions{}, acl.PermissionDenied("Failed JWT authorization: %v", err)
}
// Ensure provided data cannot escape the RHS of a bexpr for security.
// This is not the cleanest way to prevent this behavior. Ideally, the bexpr would allow us to
// inject a variable on the RHS for comparison as well, but it would be a complex change to implement
// that would likely break backwards-compatibility in certain circumstances.
if InvalidNodeName.MatchString(req.Node) {
return AutoConfigOptions{}, fmt.Errorf("Invalid request field. %v = `%v`", "node", req.Node)
}
if invalidSegmentName.MatchString(req.Segment) {
return AutoConfigOptions{}, fmt.Errorf("Invalid request field. %v = `%v`", "segment", req.Segment)
}
if req.Partition != "" && !dns.IsValidLabel(req.Partition) {
return AutoConfigOptions{}, fmt.Errorf("Invalid request field. %v = `%v`", "partition", req.Partition)
}
// Ensure that every value in this mapping is safe to interpolate before using it.
varMap := map[string]string{
"node": req.Node,
"segment": req.Segment,
"partition": req.PartitionOrDefault(),
}
for _, raw := range a.claimAssertions {
// validate and fill any HIL
filled, err := template.InterpolateHIL(raw, varMap, true)
if err != nil {
return AutoConfigOptions{}, fmt.Errorf("Failed to render claim assertion template %q: %w", raw, err)
}
evaluator, err := bexpr.CreateEvaluatorForType(filled, nil, identity.SelectableFields)
if err != nil {
return AutoConfigOptions{}, fmt.Errorf("Failed to create evaluator for claim assertion %q: %w", filled, err)
}
ok, err := evaluator.Evaluate(identity.SelectableFields)
if err != nil {
return AutoConfigOptions{}, fmt.Errorf("Failed to execute claim assertion %q: %w", filled, err)
}
if !ok {
return AutoConfigOptions{}, acl.PermissionDenied("Failed JWT claim assertion")
}
}
opts := AutoConfigOptions{
NodeName: req.Node,
SegmentName: req.Segment,
Partition: req.Partition,
}
if req.CSR != "" {
csr, id, err := parseAutoConfigCSR(req.CSR)
if err != nil {
return AutoConfigOptions{}, err
}
if id.Agent != req.Node || !acl.EqualPartitions(id.Partition, req.Partition) {
return AutoConfigOptions{},
fmt.Errorf("Spiffe ID agent name (%s) of the certificate signing request is not for the correct node (%s)",
printNodeName(id.Agent, id.Partition),
printNodeName(req.Node, req.Partition),
)
}
opts.CSR = csr
opts.SpiffeID = id
}
return opts, nil
}
type AutoConfigBackend interface {
CreateACLToken(template *structs.ACLToken) (*structs.ACLToken, error)
DatacenterJoinAddresses(partition, segment string) ([]string, error)
ForwardRPC(method string, info structs.RPCInfo, reply interface{}) (bool, error)
GetCARoots() (*structs.IndexedCARoots, error)
SignCertificate(csr *x509.CertificateRequest, id connect.CertURI) (*structs.IssuedCert, error)
}
// AutoConfig endpoint is used for cluster auto configuration operations
type AutoConfig struct {
// currently AutoConfig does not support pushing down any configuration that would be reloadable on the servers
// (outside of some TLS settings such as the configured CA certs which are retrieved via the TLS configurator)
// If that changes then we will need to change this to use an atomic.Value and provide means of reloading it.
config *Config
tlsConfigurator *tlsutil.Configurator
backend AutoConfigBackend
authorizer AutoConfigAuthorizer
}
func NewAutoConfig(conf *Config, tlsConfigurator *tlsutil.Configurator, backend AutoConfigBackend, authz AutoConfigAuthorizer) *AutoConfig {
if conf == nil {
conf = DefaultConfig()
}
return &AutoConfig{
config: conf,
tlsConfigurator: tlsConfigurator,
backend: backend,
authorizer: authz,
}
}
// updateTLSCertificatesInConfig will ensure that the TLS settings regarding how an agent is
// made aware of its certificates are populated. This will only work if connect is enabled and
// in some cases only if auto_encrypt is enabled on the servers. This endpoint has the option
// to configure auto_encrypt or potentially in the future to generate the certificates inline.
func (ac *AutoConfig) updateTLSCertificatesInConfig(opts AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error {
// nothing to be done as we cannot generate certificates
if !ac.config.ConnectEnabled {
return nil
}
if opts.CSR != nil {
cert, err := ac.backend.SignCertificate(opts.CSR, opts.SpiffeID)
if err != nil {
return fmt.Errorf("Failed to sign CSR: %w", err)
}
// convert to the protobuf form of the issued certificate
2022-03-16 16:12:29 +00:00
pbcert, err := pbconnect.NewIssuedCertFromStructs(cert)
if err != nil {
return err
}
resp.Certificate = pbcert
}
connectRoots, err := ac.backend.GetCARoots()
if err != nil {
return fmt.Errorf("Failed to lookup the CA roots: %w", err)
}
// convert to the protobuf form of the issued certificate
2022-03-16 16:12:29 +00:00
pbroots, err := pbconnect.NewCARootsFromStructs(connectRoots)
if err != nil {
return err
}
resp.CARoots = pbroots
// get the non-connect CA certs from the TLS Configurator
if ac.tlsConfigurator != nil {
resp.ExtraCACertificates = ac.tlsConfigurator.ManualCAPems()
}
return nil
}
// updateACLtokensInConfig will configure all of the agents ACL settings and will populate
// the configuration with an agent token usable for all default agent operations.
func (ac *AutoConfig) updateACLsInConfig(opts AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error {
acl := &pbconfig.ACL{
Enabled: ac.config.ACLsEnabled,
PolicyTTL: ac.config.ACLResolverSettings.ACLPolicyTTL.String(),
RoleTTL: ac.config.ACLResolverSettings.ACLRoleTTL.String(),
TokenTTL: ac.config.ACLResolverSettings.ACLTokenTTL.String(),
DownPolicy: ac.config.ACLResolverSettings.ACLDownPolicy,
DefaultPolicy: ac.config.ACLResolverSettings.ACLDefaultPolicy,
EnableKeyListPolicy: ac.config.ACLEnableKeyListPolicy,
}
// when ACLs are enabled we want to create a local token with a node identity
if ac.config.ACLsEnabled {
// set up the token template - the ids and create
template := structs.ACLToken{
Description: fmt.Sprintf("Auto Config Token for Node %q", printNodeName(opts.NodeName, opts.Partition)),
Local: true,
NodeIdentities: []*structs.ACLNodeIdentity{
{
NodeName: opts.NodeName,
Datacenter: ac.config.Datacenter,
},
},
EnterpriseMeta: *structs.DefaultEnterpriseMetaInPartition(opts.PartitionOrDefault()),
}
token, err := ac.backend.CreateACLToken(&template)
if err != nil {
return fmt.Errorf("Failed to generate an ACL token for node %q: %w", printNodeName(opts.NodeName, opts.Partition), err)
}
acl.Tokens = &pbconfig.ACLTokens{Agent: token.SecretID}
}
resp.Config.ACL = acl
return nil
}
// updateJoinAddressesInConfig determines the correct gossip endpoints that clients should
// be connecting to for joining the cluster based on the segment given in the opts parameter.
func (ac *AutoConfig) updateJoinAddressesInConfig(opts AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error {
joinAddrs, err := ac.backend.DatacenterJoinAddresses(opts.Partition, opts.SegmentName)
if err != nil {
return err
}
if resp.Config.Gossip == nil {
resp.Config.Gossip = &pbconfig.Gossip{}
}
resp.Config.Gossip.RetryJoinLAN = joinAddrs
return nil
}
// updateGossipEncryptionInConfig will populate the gossip encryption configuration settings
func (ac *AutoConfig) updateGossipEncryptionInConfig(_ AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error {
// Add gossip encryption settings if there is any key loaded
memberlistConfig := ac.config.SerfLANConfig.MemberlistConfig
if lanKeyring := memberlistConfig.Keyring; lanKeyring != nil {
if resp.Config.Gossip == nil {
resp.Config.Gossip = &pbconfig.Gossip{}
}
if resp.Config.Gossip.Encryption == nil {
resp.Config.Gossip.Encryption = &pbconfig.GossipEncryption{}
}
pk := lanKeyring.GetPrimaryKey()
if len(pk) > 0 {
resp.Config.Gossip.Encryption.Key = base64.StdEncoding.EncodeToString(pk)
}
resp.Config.Gossip.Encryption.VerifyIncoming = memberlistConfig.GossipVerifyIncoming
resp.Config.Gossip.Encryption.VerifyOutgoing = memberlistConfig.GossipVerifyOutgoing
}
return nil
}
// updateTLSSettingsInConfig will populate the TLS configuration settings but will not
// populate leaf or ca certficiates.
func (ac *AutoConfig) updateTLSSettingsInConfig(_ AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error {
if ac.tlsConfigurator == nil {
// TLS is not enabled?
return nil
}
var err error
agent: convert listener config to TLS types (#12522) * tlsutil: initial implementation of types/TLSVersion tlsutil: add test for parsing deprecated agent TLS version strings tlsutil: return TLSVersionInvalid with error tlsutil: start moving tlsutil cipher suite lookups over to types/tls tlsutil: rename tlsLookup to ParseTLSVersion, add cipherSuiteLookup agent: attempt to use types in runtime config agent: implement b.tlsVersion validation in config builder agent: fix tlsVersion nil check in builder tlsutil: update to renamed ParseTLSVersion and goTLSVersions tlsutil: fixup TestConfigurator_CommonTLSConfigTLSMinVersion tlsutil: disable invalid config parsing tests tlsutil: update tests auto_config: lookup old config strings from base.TLSMinVersion auto_config: update endpoint tests to use TLS types agent: update runtime_test to use TLS types agent: update TestRuntimeCinfig_Sanitize.golden agent: update config runtime tests to expect TLS types * website: update Consul agent tls_min_version values * agent: fixup TLS parsing and compilation errors * test: fixup lint issues in agent/config_runtime_test and tlsutil/config_test * tlsutil: add CHACHA20_POLY1305 cipher suites to goTLSCipherSuites * test: revert autoconfig tls min version fixtures to old format * types: add TLSVersions public function * agent: add warning for deprecated TLS version strings * agent: move agent config specific logic from tlsutil.ParseTLSVersion into agent config builder * tlsutil(BREAKING): change default TLS min version to TLS 1.2 * agent: move ParseCiphers logic from tlsutil into agent config builder * tlsutil: remove unused CipherString function * agent: fixup import for types package * Revert "tlsutil: remove unused CipherString function" This reverts commit 6ca7f6f58d268e617501b7db9500113c13bae70c. * agent: fixup config builder and runtime tests * tlsutil: fixup one remaining ListenerConfig -> ProtocolConfig * test: move TLS cipher suites parsing test from tlsutil into agent config builder tests * agent: remove parseCiphers helper from auto_config_endpoint_test * test: remove unused imports from tlsutil * agent: remove resolved FIXME comment * tlsutil: remove TODO and FIXME in cipher suite validation * agent: prevent setting inherited cipher suite config when TLS 1.3 is specified * changelog: add entry for converting agent config to TLS types * agent: remove FIXME in runtime test, this is covered in builder tests with invalid tls9 value now * tlsutil: remove config tests for values checked at agent config builder boundary * tlsutil: remove tls version check from loadProtocolConfig * tlsutil: remove tests and TODOs for logic checked in TestBuilder_tlsVersion and TestBuilder_tlsCipherSuites * website: update search link for supported Consul agent cipher suites * website: apply review suggestions for tls_min_version description * website: attempt to clean up markdown list formatting for tls_min_version * website: moar linebreaks to fix tls_min_version formatting * Revert "website: moar linebreaks to fix tls_min_version formatting" This reverts commit 38585927422f73ebf838a7663e566ac245f2a75c. * autoconfig: translate old values for TLSMinVersion * agent: rename var for translated value of deprecated TLS version value * Update agent/config/deprecated.go Co-authored-by: Dan Upton <daniel@floppy.co> * agent: fix lint issue * agent: fixup deprecated config test assertions for updated warning Co-authored-by: Dan Upton <daniel@floppy.co>
2022-03-24 19:32:25 +00:00
resp.Config.TLS, err = ac.tlsConfigurator.AutoConfigTLSSettings()
return err
}
// baseConfig will populate the configuration with some base settings such as the
// datacenter names, node name etc.
func (ac *AutoConfig) baseConfig(opts AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error {
if opts.NodeName == "" {
return fmt.Errorf("Cannot generate auto config response without a node name")
}
resp.Config.Datacenter = ac.config.Datacenter
resp.Config.PrimaryDatacenter = ac.config.PrimaryDatacenter
resp.Config.NodeName = opts.NodeName
resp.Config.SegmentName = opts.SegmentName
resp.Config.Partition = opts.Partition
return nil
}
type autoConfigUpdater func(c *AutoConfig, opts AutoConfigOptions, resp *pbautoconf.AutoConfigResponse) error
var (
// variable holding the list of config updating functions to execute when generating
// the auto config response. This will allow for more easily adding extra self-contained
// configurators here in the future.
autoConfigUpdaters []autoConfigUpdater = []autoConfigUpdater{
(*AutoConfig).baseConfig,
(*AutoConfig).updateJoinAddressesInConfig,
(*AutoConfig).updateGossipEncryptionInConfig,
(*AutoConfig).updateTLSSettingsInConfig,
(*AutoConfig).updateACLsInConfig,
(*AutoConfig).updateTLSCertificatesInConfig,
}
)
// AgentAutoConfig will authorize the incoming request and then generate the configuration
// to push down to the client
func (ac *AutoConfig) InitialConfiguration(req *pbautoconf.AutoConfigRequest, resp *pbautoconf.AutoConfigResponse) error {
// default the datacenter to our datacenter - agents do not have to specify this as they may not
// yet know the datacenter name they are going to be in.
if req.Datacenter == "" {
req.Datacenter = ac.config.Datacenter
}
// TODO (autoconf) Is performing auto configuration over the WAN really a bad idea?
if req.Datacenter != ac.config.Datacenter {
return fmt.Errorf("invalid datacenter %q - agent auto configuration cannot target a remote datacenter", req.Datacenter)
}
// TODO (autoconf) maybe panic instead?
if ac.backend == nil {
return fmt.Errorf("No Auto Config backend is configured")
}
// forward to the leader
if done, err := ac.backend.ForwardRPC("AutoConfig.InitialConfiguration", req, resp); done {
return err
}
// TODO (autoconf) maybe panic instead?
if ac.authorizer == nil {
return fmt.Errorf("No Auto Config authorizer is configured")
}
// authorize the request with the configured authorizer
opts, err := ac.authorizer.Authorize(req)
if err != nil {
return err
}
resp.Config = &pbconfig.Config{}
// update all the configurations
for _, configFn := range autoConfigUpdaters {
if err := configFn(ac, opts, resp); err != nil {
return err
}
}
return nil
}
func parseAutoConfigCSR(csr string) (*x509.CertificateRequest, *connect.SpiffeIDAgent, error) {
// Parse the CSR string into the x509 CertificateRequest struct
x509CSR, err := connect.ParseCSR(csr)
if err != nil {
return nil, nil, fmt.Errorf("Failed to parse CSR: %w", err)
}
// ensure that exactly one URI SAN is present
if len(x509CSR.URIs) != 1 {
return nil, nil, fmt.Errorf("CSR SAN contains an invalid number of URIs: %v", len(x509CSR.URIs))
}
if len(x509CSR.EmailAddresses) > 0 {
return nil, nil, fmt.Errorf("CSR SAN does not allow specifying email addresses")
}
// Parse the SPIFFE ID
spiffeID, err := connect.ParseCertURI(x509CSR.URIs[0])
if err != nil {
return nil, nil, fmt.Errorf("Failed to parse the SPIFFE URI: %w", err)
}
agentID, isAgent := spiffeID.(*connect.SpiffeIDAgent)
if !isAgent {
return nil, nil, fmt.Errorf("SPIFFE ID is not an Agent ID")
}
return x509CSR, agentID, nil
}
func printNodeName(nodeName, partition string) string {
if acl.IsDefaultPartition(partition) {
return nodeName
}
return partition + "/" + nodeName
}