156db8d368
Co-authored-by: James Rasell <jrasell@users.noreply.github.com>
355 lines
12 KiB
Go
355 lines
12 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package client
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/armon/go-metrics"
|
|
"github.com/hashicorp/go-set"
|
|
"github.com/hashicorp/nomad/acl"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
const (
|
|
// policyCacheSize is the number of ACL policies to keep cached. Policies have a fetching cost
|
|
// so we keep the hot policies cached to reduce the ACL token resolution time.
|
|
policyCacheSize = 64
|
|
|
|
// aclCacheSize is the number of ACL objects to keep cached. ACLs have a parsing and
|
|
// construction cost, so we keep the hot objects cached to reduce the ACL token resolution time.
|
|
aclCacheSize = 64
|
|
|
|
// tokenCacheSize is the number of bearer tokens, ACL and workload identity,
|
|
// to keep cached. Tokens have a fetching cost, so we keep the hot tokens
|
|
// cached to reduce the lookups.
|
|
tokenCacheSize = 128
|
|
|
|
// roleCacheSize is the number of ACL roles to keep cached. Looking up
|
|
// roles requires an RPC call, so we keep the hot roles cached to reduce
|
|
// the number of lookups.
|
|
roleCacheSize = 64
|
|
)
|
|
|
|
// clientACLResolver holds the state required for client resolution
|
|
// of ACLs
|
|
type clientACLResolver struct {
|
|
// aclCache is used to maintain the parsed ACL objects
|
|
aclCache *structs.ACLCache[*acl.ACL]
|
|
|
|
// policyCache is used to maintain the fetched policy objects
|
|
policyCache *structs.ACLCache[*structs.ACLPolicy]
|
|
|
|
// tokenCache is used to maintain the fetched token objects
|
|
tokenCache *structs.ACLCache[*structs.AuthenticatedIdentity]
|
|
|
|
// roleCache is used to maintain a cache of the fetched ACL roles. Each
|
|
// entry is keyed by the role ID.
|
|
roleCache *structs.ACLCache[*structs.ACLRole]
|
|
}
|
|
|
|
// init is used to setup the client resolver state
|
|
func (c *clientACLResolver) init() {
|
|
c.aclCache = structs.NewACLCache[*acl.ACL](aclCacheSize)
|
|
c.policyCache = structs.NewACLCache[*structs.ACLPolicy](policyCacheSize)
|
|
c.tokenCache = structs.NewACLCache[*structs.AuthenticatedIdentity](tokenCacheSize)
|
|
c.roleCache = structs.NewACLCache[*structs.ACLRole](roleCacheSize)
|
|
}
|
|
|
|
// ResolveToken is used to translate an ACL Token Secret ID or workload
|
|
// identity into an ACL object, nil if ACLs are disabled, or an error.
|
|
func (c *Client) ResolveToken(bearerToken string) (*acl.ACL, error) {
|
|
a, _, err := c.resolveTokenAndACL(bearerToken)
|
|
return a, err
|
|
}
|
|
|
|
func (c *Client) resolveTokenAndACL(bearerToken string) (*acl.ACL, *structs.AuthenticatedIdentity, error) {
|
|
// Fast-path if ACLs are disabled
|
|
if !c.GetConfig().ACLEnabled {
|
|
return nil, nil, nil
|
|
}
|
|
defer metrics.MeasureSince([]string{"client", "acl", "resolve_token"}, time.Now())
|
|
|
|
// Resolve the token value
|
|
ident, err := c.resolveTokenValue(bearerToken)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
// Only allow ACLs and workload identities to call client RPCs
|
|
if ident.ACLToken == nil && ident.Claims == nil {
|
|
return nil, nil, structs.ErrTokenNotFound
|
|
}
|
|
|
|
// Give the token expiry some slight leeway in case the client and server
|
|
// clocks are skewed.
|
|
if ident.IsExpired(time.Now().Add(2 * time.Second)) {
|
|
return nil, nil, structs.ErrTokenExpired
|
|
}
|
|
|
|
var policies []*structs.ACLPolicy
|
|
|
|
// Resolve token policies
|
|
if token := ident.ACLToken; token != nil {
|
|
// Check if this is a management token
|
|
if ident.ACLToken.Type == structs.ACLManagementToken {
|
|
return acl.ManagementACL, ident, nil
|
|
}
|
|
|
|
// Resolve the policy links within the token ACL roles.
|
|
policyNames, err := c.resolveTokenACLRoles(bearerToken, token.Roles)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
// Generate a slice of all policy names included within the token, taken
|
|
// from both the ACL roles and the direct assignments.
|
|
policyNames = append(policyNames, token.Policies...)
|
|
|
|
// Resolve ACL token policies
|
|
if policies, err = c.resolvePolicies(token.SecretID, policyNames); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
} else {
|
|
// Resolve policies for workload identities
|
|
policyArgs := structs.GenericRequest{
|
|
QueryOptions: structs.QueryOptions{
|
|
AuthToken: bearerToken,
|
|
Region: c.Region(),
|
|
},
|
|
}
|
|
policyReply := structs.ACLPolicySetResponse{}
|
|
if err := c.RPC("ACL.GetClaimPolicies", &policyArgs, &policyReply); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
policies = make([]*structs.ACLPolicy, 0, len(policyReply.Policies))
|
|
for _, p := range policyReply.Policies {
|
|
policies = append(policies, p)
|
|
}
|
|
}
|
|
|
|
// Resolve the ACL object
|
|
aclObj, err := structs.CompileACLObject(c.aclCache, policies)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
return aclObj, ident, nil
|
|
}
|
|
|
|
// resolveTokenValue is used to translate a bearer token, either an ACL token's
|
|
// secret or a workload identity, into an ACL token with caching We use a local
|
|
// cache up to the TTL limit, and then resolve via a server. If we cannot
|
|
// reach a server, but have a cached value we extend the TTL to gracefully handle outages.
|
|
func (c *Client) resolveTokenValue(bearerToken string) (*structs.AuthenticatedIdentity, error) {
|
|
// Hot-path the anonymous token
|
|
if bearerToken == "" {
|
|
return &structs.AuthenticatedIdentity{ACLToken: structs.AnonymousACLToken}, nil
|
|
}
|
|
|
|
// Lookup the token entry in the cache
|
|
entry, ok := c.tokenCache.Get(bearerToken)
|
|
if ok {
|
|
if entry.Age() <= c.GetConfig().ACLTokenTTL {
|
|
return entry.Get(), nil
|
|
}
|
|
}
|
|
|
|
// Lookup the token
|
|
req := structs.GenericRequest{
|
|
QueryOptions: structs.QueryOptions{
|
|
AuthToken: bearerToken,
|
|
Region: c.Region(),
|
|
AllowStale: true,
|
|
},
|
|
}
|
|
var resp structs.ACLWhoAmIResponse
|
|
if err := c.RPC("ACL.WhoAmI", &req, &resp); err != nil {
|
|
// If we encounter an error but have a cached value, mask the error and extend the cache
|
|
if ok {
|
|
c.logger.Warn("failed to resolve token, using expired cached value", "error", err)
|
|
return entry.Get(), nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
// Cache the response (positive or negative)
|
|
c.tokenCache.Add(bearerToken, resp.Identity)
|
|
return resp.Identity, nil
|
|
}
|
|
|
|
// resolvePolicies is used to translate a set of named ACL policies into the objects.
|
|
// We cache the policies locally, and fault them from a server as necessary. Policies
|
|
// are cached for a TTL, and then refreshed. If a server cannot be reached, the cache TTL
|
|
// will be ignored to gracefully handle outages.
|
|
func (c *Client) resolvePolicies(secretID string, policies []string) ([]*structs.ACLPolicy, error) {
|
|
var out []*structs.ACLPolicy
|
|
var expired []*structs.ACLPolicy
|
|
var missing []string
|
|
|
|
// Scan the cache for each policy
|
|
for _, policyName := range policies {
|
|
// Lookup the policy in the cache
|
|
entry, ok := c.policyCache.Get(policyName)
|
|
if !ok {
|
|
missing = append(missing, policyName)
|
|
continue
|
|
}
|
|
|
|
// Check if the cached value is valid or expired
|
|
if entry.Age() <= c.GetConfig().ACLPolicyTTL {
|
|
out = append(out, entry.Get())
|
|
} else {
|
|
expired = append(expired, entry.Get())
|
|
}
|
|
}
|
|
|
|
// Hot-path if we have no missing or expired policies
|
|
if len(missing)+len(expired) == 0 {
|
|
return out, nil
|
|
}
|
|
|
|
// Lookup the missing and expired policies
|
|
fetch := missing
|
|
for _, p := range expired {
|
|
fetch = append(fetch, p.Name)
|
|
}
|
|
req := structs.ACLPolicySetRequest{
|
|
Names: fetch,
|
|
QueryOptions: structs.QueryOptions{
|
|
Region: c.Region(),
|
|
AuthToken: secretID,
|
|
AllowStale: true,
|
|
},
|
|
}
|
|
var resp structs.ACLPolicySetResponse
|
|
if err := c.RPC("ACL.GetPolicies", &req, &resp); err != nil {
|
|
// If we encounter an error but have cached policies, mask the error and extend the cache
|
|
if len(missing) == 0 {
|
|
c.logger.Warn("failed to resolve policies, using expired cached value", "error", err)
|
|
out = append(out, expired...)
|
|
return out, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
// Handle each output
|
|
for _, policy := range resp.Policies {
|
|
c.policyCache.Add(policy.Name, policy)
|
|
out = append(out, policy)
|
|
}
|
|
|
|
// Return the valid policies
|
|
return out, nil
|
|
}
|
|
|
|
// resolveTokenACLRoles is used to unpack an ACL roles and their policy
|
|
// assignments into a list of ACL policy names. This can then be used to
|
|
// compile an ACL object.
|
|
//
|
|
// When roles need to be looked up from state via server RPC, we may use the
|
|
// expired cache version. This can only occur if we can fully resolve the role
|
|
// via the cache.
|
|
func (c *Client) resolveTokenACLRoles(secretID string, roleLinks []*structs.ACLTokenRoleLink) ([]string, error) {
|
|
|
|
var (
|
|
// missingRoleIDs are the roles linked which are not found within our
|
|
// cache. These must be looked up from the server via and RPC, so we
|
|
// can correctly identify the policy links.
|
|
missingRoleIDs []string
|
|
|
|
// expiredRoleIDs are the roles linked which have been found within our
|
|
// cache, but are expired. These must be looked up from the server via
|
|
// and RPC, so we can correctly identify the policy links.
|
|
expiredRoleIDs []string
|
|
)
|
|
|
|
// policyNames tracks the resolved ACL policies which are linked to the
|
|
// role as a deduplicated list. This is the output object and represents
|
|
// the authorisation this role provides token bearers.
|
|
policyNames := set.New[string](0)
|
|
|
|
for _, roleLink := range roleLinks {
|
|
|
|
// Look within the cache to see if the role is already present. If we
|
|
// do not find it, add the ID to our tracking, so we look this up via
|
|
// RPC.
|
|
entry, ok := c.roleCache.Get(roleLink.ID)
|
|
if !ok {
|
|
missingRoleIDs = append(missingRoleIDs, roleLink.ID)
|
|
continue
|
|
}
|
|
|
|
// If the cached value is expired, add the ID to our tracking, so we
|
|
// look this up via RPC. Otherwise, iterate the policy links and add
|
|
// each policy name to our return object tracking.
|
|
if entry.Age() <= c.GetConfig().ACLRoleTTL {
|
|
for _, policyLink := range entry.Get().Policies {
|
|
policyNames.Insert(policyLink.Name)
|
|
}
|
|
} else {
|
|
expiredRoleIDs = append(expiredRoleIDs, entry.Get().ID)
|
|
}
|
|
}
|
|
|
|
// Hot-path: we were able to resolve all ACL roles via the cache and
|
|
// generate a list of linked policy names. Therefore, we can avoid making
|
|
// any RPC calls.
|
|
if len(missingRoleIDs)+len(expiredRoleIDs) == 0 {
|
|
return policyNames.Slice(), nil
|
|
}
|
|
|
|
// Created a combined list of role IDs that we need to lookup from server
|
|
// state.
|
|
roleIDsToFetch := missingRoleIDs
|
|
roleIDsToFetch = append(roleIDsToFetch, expiredRoleIDs...)
|
|
|
|
// Generate an RPC request to detail all the ACL roles that we did not find
|
|
// or were expired within the cache.
|
|
roleByIDReq := structs.ACLRolesByIDRequest{
|
|
ACLRoleIDs: roleIDsToFetch,
|
|
QueryOptions: structs.QueryOptions{
|
|
Region: c.Region(),
|
|
AuthToken: secretID,
|
|
AllowStale: true,
|
|
},
|
|
}
|
|
|
|
var roleByIDResp structs.ACLRolesByIDResponse
|
|
|
|
// Perform the RPC call to detail the required ACL roles. If the RPC call
|
|
// fails, and we are only updating expired cache entries, use the expired
|
|
// entries. This allows use to handle intermittent failures.
|
|
err := c.RPC(structs.ACLGetRolesByIDRPCMethod, &roleByIDReq, &roleByIDResp)
|
|
if err != nil {
|
|
if len(missingRoleIDs) == 0 {
|
|
c.logger.Warn("failed to resolve ACL roles, using expired cached value", "error", err)
|
|
for _, aclRole := range roleByIDResp.ACLRoles {
|
|
for _, rolePolicyLink := range aclRole.Policies {
|
|
policyNames.Insert(rolePolicyLink.Name)
|
|
}
|
|
}
|
|
return policyNames.Slice(), nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
// Generate a timestamp for the cache entry. We do not need to use a
|
|
// timestamp per ACL role response integration.
|
|
now := time.Now()
|
|
for _, aclRole := range roleByIDResp.ACLRoles {
|
|
// Add an entry to the cache using the generated timestamp for future
|
|
// expiry calculations. Any existing, expired entry will be
|
|
// overwritten.
|
|
c.roleCache.AddAtTime(aclRole.ID, aclRole, now)
|
|
|
|
// Iterate the role policy links, extracting the name and adding this
|
|
// to our return response tracking.
|
|
for _, rolePolicyLink := range aclRole.Policies {
|
|
policyNames.Insert(rolePolicyLink.Name)
|
|
}
|
|
}
|
|
|
|
return policyNames.Slice(), nil
|
|
}
|