users: create cache for user lookups (#16100)
* users: create cache for user lookups This PR introduces a global cache for OS user lookups. This should relieve pressure on the OS domain/directory lookups, which would be queried more now that Task API exists. Hits are cached for 1 hour, and misses are cached for 1 minute. These values are fairly arbitrary - we can tweak them if there is any reason to. Closes #16010 * users: delete expired negative entry from cache
This commit is contained in:
parent
0d37892024
commit
af28ac1610
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:improvement
|
||||||
|
users: Added a cache for OS user lookups
|
||||||
|
```
|
|
@ -0,0 +1,85 @@
|
||||||
|
package users
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os/user"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/lib/lang"
|
||||||
|
"oss.indeed.com/go/libtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
cacheTTL = 1 * time.Hour
|
||||||
|
failureTTL = 1 * time.Minute
|
||||||
|
)
|
||||||
|
|
||||||
|
type entry[T any] lang.Pair[T, time.Time]
|
||||||
|
|
||||||
|
func (e *entry[T]) expired(now time.Time, ttl time.Duration) bool {
|
||||||
|
return now.After(e.Second.Add(ttl))
|
||||||
|
}
|
||||||
|
|
||||||
|
type (
|
||||||
|
userCache map[string]*entry[*user.User]
|
||||||
|
userFailureCache map[string]*entry[error]
|
||||||
|
)
|
||||||
|
|
||||||
|
type lookupUserFunc func(string) (*user.User, error)
|
||||||
|
|
||||||
|
type cache struct {
|
||||||
|
clock libtime.Clock
|
||||||
|
lookupUser lookupUserFunc
|
||||||
|
|
||||||
|
lock sync.Mutex
|
||||||
|
users userCache
|
||||||
|
userFailures userFailureCache
|
||||||
|
}
|
||||||
|
|
||||||
|
func newCache() *cache {
|
||||||
|
return &cache{
|
||||||
|
clock: libtime.SystemClock(),
|
||||||
|
lookupUser: internalLookupUser,
|
||||||
|
users: make(userCache),
|
||||||
|
userFailures: make(userFailureCache),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *cache) GetUser(username string) (*user.User, error) {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
// record this moment as "now" for further cache operations
|
||||||
|
now := c.clock.Now()
|
||||||
|
|
||||||
|
// first check if the user is in the cache and the entry we have
|
||||||
|
// is not yet expired
|
||||||
|
usr, exists := c.users[username]
|
||||||
|
if exists && !usr.expired(now, cacheTTL) {
|
||||||
|
return usr.First, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// next check if there was a recent failure already, so we
|
||||||
|
// avoid spamming the OS with dead user lookups
|
||||||
|
failure, exists2 := c.userFailures[username]
|
||||||
|
if exists2 {
|
||||||
|
if !failure.expired(now, failureTTL) {
|
||||||
|
return nil, failure.First
|
||||||
|
}
|
||||||
|
// may as well cleanup expired case
|
||||||
|
delete(c.userFailures, username)
|
||||||
|
}
|
||||||
|
|
||||||
|
// need to perform an OS lookup
|
||||||
|
u, err := c.lookupUser(username)
|
||||||
|
|
||||||
|
// lookup was a failure, populate the failure cache
|
||||||
|
if err != nil {
|
||||||
|
c.userFailures[username] = &entry[error]{err, now}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// lookup was a success, populate the user cache
|
||||||
|
c.users[username] = &entry[*user.User]{u, now}
|
||||||
|
return u, nil
|
||||||
|
}
|
|
@ -0,0 +1,153 @@
|
||||||
|
//go:build unix
|
||||||
|
|
||||||
|
package users
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os/user"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/nomad/ci"
|
||||||
|
"github.com/shoenig/test/must"
|
||||||
|
"oss.indeed.com/go/libtime/libtimetest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCache_real_hit(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
c := newCache()
|
||||||
|
|
||||||
|
// fresh lookup
|
||||||
|
u, err := c.GetUser("nobody")
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.NotNil(t, u)
|
||||||
|
|
||||||
|
// hit again, cached value
|
||||||
|
u2, err2 := c.GetUser("nobody")
|
||||||
|
must.NoError(t, err2)
|
||||||
|
must.NotNil(t, u2)
|
||||||
|
must.True(t, u == u2) // compare pointers
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_real_miss(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
c := newCache()
|
||||||
|
|
||||||
|
// fresh lookup
|
||||||
|
u, err := c.GetUser("doesnotexist")
|
||||||
|
must.Error(t, err)
|
||||||
|
must.Nil(t, u)
|
||||||
|
|
||||||
|
// hit again, cached value
|
||||||
|
u2, err2 := c.GetUser("doesnotexist")
|
||||||
|
must.Error(t, err2)
|
||||||
|
must.Nil(t, u2)
|
||||||
|
must.True(t, err == err2) // compare pointers
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_mock_hit(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
c := newCache()
|
||||||
|
|
||||||
|
lookupCount := 0
|
||||||
|
|
||||||
|
// hijack the underlying lookup function with our own mock
|
||||||
|
c.lookupUser = func(username string) (*user.User, error) {
|
||||||
|
lookupCount++
|
||||||
|
return &user.User{Name: username}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// hijack the clock with our own mock
|
||||||
|
t0 := time.Now()
|
||||||
|
clockCount := 0
|
||||||
|
c.clock = libtimetest.NewClockMock(t).NowMock.Set(func() time.Time {
|
||||||
|
clockCount++
|
||||||
|
switch clockCount {
|
||||||
|
case 1:
|
||||||
|
return t0
|
||||||
|
case 2:
|
||||||
|
return t0.Add(59 * time.Minute)
|
||||||
|
default:
|
||||||
|
return t0.Add(61 * time.Minute)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const username = "armon"
|
||||||
|
|
||||||
|
// initial lookup
|
||||||
|
u, err := c.GetUser(username)
|
||||||
|
must.NoError(t, err)
|
||||||
|
must.Eq(t, "armon", u.Name)
|
||||||
|
must.Eq(t, 1, lookupCount)
|
||||||
|
must.Eq(t, 1, clockCount)
|
||||||
|
|
||||||
|
// second lookup, 59 minutes after initil lookup
|
||||||
|
u2, err2 := c.GetUser(username)
|
||||||
|
must.NoError(t, err2)
|
||||||
|
must.Eq(t, "armon", u2.Name)
|
||||||
|
must.Eq(t, 1, lookupCount) // was in cache
|
||||||
|
must.Eq(t, 2, clockCount)
|
||||||
|
|
||||||
|
// third lookup, 61 minutes after initial lookup (expired)
|
||||||
|
u3, err3 := c.GetUser(username)
|
||||||
|
must.NoError(t, err3)
|
||||||
|
must.Eq(t, "armon", u3.Name)
|
||||||
|
must.Eq(t, 2, lookupCount)
|
||||||
|
must.Eq(t, 3, clockCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_mock_miss(t *testing.T) {
|
||||||
|
ci.Parallel(t)
|
||||||
|
|
||||||
|
c := newCache()
|
||||||
|
|
||||||
|
lookupCount := 0
|
||||||
|
lookupErr := errors.New("lookup error")
|
||||||
|
|
||||||
|
// hijack the underlying lookup function with our own mock
|
||||||
|
c.lookupUser = func(username string) (*user.User, error) {
|
||||||
|
lookupCount++
|
||||||
|
return nil, lookupErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// hijack the clock with our own mock
|
||||||
|
t0 := time.Now()
|
||||||
|
clockCount := 0
|
||||||
|
c.clock = libtimetest.NewClockMock(t).NowMock.Set(func() time.Time {
|
||||||
|
clockCount++
|
||||||
|
switch clockCount {
|
||||||
|
case 1:
|
||||||
|
return t0
|
||||||
|
case 2:
|
||||||
|
return t0.Add(59 * time.Second)
|
||||||
|
default:
|
||||||
|
return t0.Add(61 * time.Second)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const username = "armon"
|
||||||
|
|
||||||
|
// initial lookup
|
||||||
|
u, err := c.GetUser(username)
|
||||||
|
must.ErrorIs(t, err, lookupErr)
|
||||||
|
must.Nil(t, u)
|
||||||
|
must.Eq(t, 1, lookupCount)
|
||||||
|
must.Eq(t, 1, clockCount)
|
||||||
|
|
||||||
|
// second lookup, 59 seconds after initial (still in cache)
|
||||||
|
u2, err2 := c.GetUser(username)
|
||||||
|
must.ErrorIs(t, err2, lookupErr)
|
||||||
|
must.Nil(t, u2)
|
||||||
|
must.Eq(t, 1, lookupCount) // in cache
|
||||||
|
must.Eq(t, 2, clockCount)
|
||||||
|
|
||||||
|
// third lookup, 61 seconds after initial (expired)
|
||||||
|
u3, err3 := c.GetUser(username)
|
||||||
|
must.ErrorIs(t, err3, lookupErr)
|
||||||
|
must.Nil(t, u3)
|
||||||
|
must.Eq(t, 2, lookupCount)
|
||||||
|
must.Eq(t, 3, clockCount)
|
||||||
|
}
|
|
@ -12,12 +12,21 @@ import (
|
||||||
"github.com/hashicorp/go-multierror"
|
"github.com/hashicorp/go-multierror"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var globalCache = newCache()
|
||||||
|
|
||||||
|
// Lookup returns the user.User entry associated with the given username.
|
||||||
|
//
|
||||||
|
// Values are cached up to 1 hour, or 1 minute for failure cases.
|
||||||
|
func Lookup(username string) (*user.User, error) {
|
||||||
|
return globalCache.GetUser(username)
|
||||||
|
}
|
||||||
|
|
||||||
// lock is used to serialize all user lookup at the process level, because
|
// lock is used to serialize all user lookup at the process level, because
|
||||||
// some NSS implementations are not concurrency safe
|
// some NSS implementations are not concurrency safe
|
||||||
var lock sync.Mutex
|
var lock sync.Mutex
|
||||||
|
|
||||||
// Lookup username while holding a global process lock.
|
// internalLookupUser username while holding a global process lock.
|
||||||
func Lookup(username string) (*user.User, error) {
|
func internalLookupUser(username string) (*user.User, error) {
|
||||||
lock.Lock()
|
lock.Lock()
|
||||||
defer lock.Unlock()
|
defer lock.Unlock()
|
||||||
return user.Lookup(username)
|
return user.Lookup(username)
|
||||||
|
|
|
@ -32,7 +32,7 @@ func NobodyIDs() (uint32, uint32) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
u, err := Lookup("nobody")
|
u, err := internalLookupUser("nobody")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("failed to lookup nobody user: %v", err))
|
panic(fmt.Sprintf("failed to lookup nobody user: %v", err))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue