users: create cache for user lookups (#16100)
* users: create cache for user lookups This PR introduces a global cache for OS user lookups. This should relieve pressure on the OS domain/directory lookups, which would be queried more now that Task API exists. Hits are cached for 1 hour, and misses are cached for 1 minute. These values are fairly arbitrary - we can tweak them if there is any reason to. Closes #16010 * users: delete expired negative entry from cache
This commit is contained in:
parent
0d37892024
commit
af28ac1610
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
users: Added a cache for OS user lookups
|
||||
```
|
|
@ -0,0 +1,85 @@
|
|||
package users
|
||||
|
||||
import (
|
||||
"os/user"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/lib/lang"
|
||||
"oss.indeed.com/go/libtime"
|
||||
)
|
||||
|
||||
const (
|
||||
cacheTTL = 1 * time.Hour
|
||||
failureTTL = 1 * time.Minute
|
||||
)
|
||||
|
||||
type entry[T any] lang.Pair[T, time.Time]
|
||||
|
||||
func (e *entry[T]) expired(now time.Time, ttl time.Duration) bool {
|
||||
return now.After(e.Second.Add(ttl))
|
||||
}
|
||||
|
||||
type (
|
||||
userCache map[string]*entry[*user.User]
|
||||
userFailureCache map[string]*entry[error]
|
||||
)
|
||||
|
||||
type lookupUserFunc func(string) (*user.User, error)
|
||||
|
||||
type cache struct {
|
||||
clock libtime.Clock
|
||||
lookupUser lookupUserFunc
|
||||
|
||||
lock sync.Mutex
|
||||
users userCache
|
||||
userFailures userFailureCache
|
||||
}
|
||||
|
||||
func newCache() *cache {
|
||||
return &cache{
|
||||
clock: libtime.SystemClock(),
|
||||
lookupUser: internalLookupUser,
|
||||
users: make(userCache),
|
||||
userFailures: make(userFailureCache),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cache) GetUser(username string) (*user.User, error) {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
// record this moment as "now" for further cache operations
|
||||
now := c.clock.Now()
|
||||
|
||||
// first check if the user is in the cache and the entry we have
|
||||
// is not yet expired
|
||||
usr, exists := c.users[username]
|
||||
if exists && !usr.expired(now, cacheTTL) {
|
||||
return usr.First, nil
|
||||
}
|
||||
|
||||
// next check if there was a recent failure already, so we
|
||||
// avoid spamming the OS with dead user lookups
|
||||
failure, exists2 := c.userFailures[username]
|
||||
if exists2 {
|
||||
if !failure.expired(now, failureTTL) {
|
||||
return nil, failure.First
|
||||
}
|
||||
// may as well cleanup expired case
|
||||
delete(c.userFailures, username)
|
||||
}
|
||||
|
||||
// need to perform an OS lookup
|
||||
u, err := c.lookupUser(username)
|
||||
|
||||
// lookup was a failure, populate the failure cache
|
||||
if err != nil {
|
||||
c.userFailures[username] = &entry[error]{err, now}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// lookup was a success, populate the user cache
|
||||
c.users[username] = &entry[*user.User]{u, now}
|
||||
return u, nil
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
//go:build unix
|
||||
|
||||
package users
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os/user"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/ci"
|
||||
"github.com/shoenig/test/must"
|
||||
"oss.indeed.com/go/libtime/libtimetest"
|
||||
)
|
||||
|
||||
func TestCache_real_hit(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
c := newCache()
|
||||
|
||||
// fresh lookup
|
||||
u, err := c.GetUser("nobody")
|
||||
must.NoError(t, err)
|
||||
must.NotNil(t, u)
|
||||
|
||||
// hit again, cached value
|
||||
u2, err2 := c.GetUser("nobody")
|
||||
must.NoError(t, err2)
|
||||
must.NotNil(t, u2)
|
||||
must.True(t, u == u2) // compare pointers
|
||||
}
|
||||
|
||||
func TestCache_real_miss(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
c := newCache()
|
||||
|
||||
// fresh lookup
|
||||
u, err := c.GetUser("doesnotexist")
|
||||
must.Error(t, err)
|
||||
must.Nil(t, u)
|
||||
|
||||
// hit again, cached value
|
||||
u2, err2 := c.GetUser("doesnotexist")
|
||||
must.Error(t, err2)
|
||||
must.Nil(t, u2)
|
||||
must.True(t, err == err2) // compare pointers
|
||||
}
|
||||
|
||||
func TestCache_mock_hit(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
c := newCache()
|
||||
|
||||
lookupCount := 0
|
||||
|
||||
// hijack the underlying lookup function with our own mock
|
||||
c.lookupUser = func(username string) (*user.User, error) {
|
||||
lookupCount++
|
||||
return &user.User{Name: username}, nil
|
||||
}
|
||||
|
||||
// hijack the clock with our own mock
|
||||
t0 := time.Now()
|
||||
clockCount := 0
|
||||
c.clock = libtimetest.NewClockMock(t).NowMock.Set(func() time.Time {
|
||||
clockCount++
|
||||
switch clockCount {
|
||||
case 1:
|
||||
return t0
|
||||
case 2:
|
||||
return t0.Add(59 * time.Minute)
|
||||
default:
|
||||
return t0.Add(61 * time.Minute)
|
||||
}
|
||||
})
|
||||
|
||||
const username = "armon"
|
||||
|
||||
// initial lookup
|
||||
u, err := c.GetUser(username)
|
||||
must.NoError(t, err)
|
||||
must.Eq(t, "armon", u.Name)
|
||||
must.Eq(t, 1, lookupCount)
|
||||
must.Eq(t, 1, clockCount)
|
||||
|
||||
// second lookup, 59 minutes after initil lookup
|
||||
u2, err2 := c.GetUser(username)
|
||||
must.NoError(t, err2)
|
||||
must.Eq(t, "armon", u2.Name)
|
||||
must.Eq(t, 1, lookupCount) // was in cache
|
||||
must.Eq(t, 2, clockCount)
|
||||
|
||||
// third lookup, 61 minutes after initial lookup (expired)
|
||||
u3, err3 := c.GetUser(username)
|
||||
must.NoError(t, err3)
|
||||
must.Eq(t, "armon", u3.Name)
|
||||
must.Eq(t, 2, lookupCount)
|
||||
must.Eq(t, 3, clockCount)
|
||||
}
|
||||
|
||||
func TestCache_mock_miss(t *testing.T) {
|
||||
ci.Parallel(t)
|
||||
|
||||
c := newCache()
|
||||
|
||||
lookupCount := 0
|
||||
lookupErr := errors.New("lookup error")
|
||||
|
||||
// hijack the underlying lookup function with our own mock
|
||||
c.lookupUser = func(username string) (*user.User, error) {
|
||||
lookupCount++
|
||||
return nil, lookupErr
|
||||
}
|
||||
|
||||
// hijack the clock with our own mock
|
||||
t0 := time.Now()
|
||||
clockCount := 0
|
||||
c.clock = libtimetest.NewClockMock(t).NowMock.Set(func() time.Time {
|
||||
clockCount++
|
||||
switch clockCount {
|
||||
case 1:
|
||||
return t0
|
||||
case 2:
|
||||
return t0.Add(59 * time.Second)
|
||||
default:
|
||||
return t0.Add(61 * time.Second)
|
||||
}
|
||||
})
|
||||
|
||||
const username = "armon"
|
||||
|
||||
// initial lookup
|
||||
u, err := c.GetUser(username)
|
||||
must.ErrorIs(t, err, lookupErr)
|
||||
must.Nil(t, u)
|
||||
must.Eq(t, 1, lookupCount)
|
||||
must.Eq(t, 1, clockCount)
|
||||
|
||||
// second lookup, 59 seconds after initial (still in cache)
|
||||
u2, err2 := c.GetUser(username)
|
||||
must.ErrorIs(t, err2, lookupErr)
|
||||
must.Nil(t, u2)
|
||||
must.Eq(t, 1, lookupCount) // in cache
|
||||
must.Eq(t, 2, clockCount)
|
||||
|
||||
// third lookup, 61 seconds after initial (expired)
|
||||
u3, err3 := c.GetUser(username)
|
||||
must.ErrorIs(t, err3, lookupErr)
|
||||
must.Nil(t, u3)
|
||||
must.Eq(t, 2, lookupCount)
|
||||
must.Eq(t, 3, clockCount)
|
||||
}
|
|
@ -12,12 +12,21 @@ import (
|
|||
"github.com/hashicorp/go-multierror"
|
||||
)
|
||||
|
||||
var globalCache = newCache()
|
||||
|
||||
// Lookup returns the user.User entry associated with the given username.
|
||||
//
|
||||
// Values are cached up to 1 hour, or 1 minute for failure cases.
|
||||
func Lookup(username string) (*user.User, error) {
|
||||
return globalCache.GetUser(username)
|
||||
}
|
||||
|
||||
// lock is used to serialize all user lookup at the process level, because
|
||||
// some NSS implementations are not concurrency safe
|
||||
var lock sync.Mutex
|
||||
|
||||
// Lookup username while holding a global process lock.
|
||||
func Lookup(username string) (*user.User, error) {
|
||||
// internalLookupUser username while holding a global process lock.
|
||||
func internalLookupUser(username string) (*user.User, error) {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
return user.Lookup(username)
|
||||
|
|
|
@ -32,7 +32,7 @@ func NobodyIDs() (uint32, uint32) {
|
|||
}
|
||||
|
||||
func init() {
|
||||
u, err := Lookup("nobody")
|
||||
u, err := internalLookupUser("nobody")
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("failed to lookup nobody user: %v", err))
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue