Merge pull request #2973 from hashicorp/b-2087
Fixes panic in tombstone GC.
This commit is contained in:
commit
622b7d3e8f
|
@ -6,49 +6,51 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
// TombstoneGC is used to track creation of tombstones
|
||||
// so that they can be garbage collected after their TTL
|
||||
// expires. The tombstones allow queries to provide monotonic
|
||||
// index values within the TTL window. The GC is used to
|
||||
// prevent monotonic growth in storage usage. This is a trade off
|
||||
// between the length of the TTL and the storage overhead.
|
||||
// TombstoneGC is used to track creation of tombstones so that they can be
|
||||
// garbage collected after their TTL expires. The tombstones allow queries to
|
||||
// provide monotonic index values within the TTL window. The GC is used to
|
||||
// prevent monotonic growth in storage usage. This is a trade off between the
|
||||
// length of the TTL and the storage overhead.
|
||||
//
|
||||
// In practice, this is required to fix the issue of delete
|
||||
// visibility. When data is deleted from the KV store, the
|
||||
// "latest" row can go backwards if the newest row is removed.
|
||||
// The tombstones provide a way to ensure time doesn't move
|
||||
// backwards within some interval.
|
||||
// In practice, this is required to fix the issue of delete visibility. When
|
||||
// data is deleted from the KV store, the "latest" row can go backwards if the
|
||||
// newest row is removed. The tombstones provide a way to ensure time doesn't
|
||||
// move backwards within some interval.
|
||||
//
|
||||
type TombstoneGC struct {
|
||||
ttl time.Duration
|
||||
// ttl sets the TTL for tombstones.
|
||||
ttl time.Duration
|
||||
|
||||
// granularity determines how we bin TTLs into timers.
|
||||
granularity time.Duration
|
||||
|
||||
// enabled controls if we actually setup any timers.
|
||||
enabled bool
|
||||
|
||||
// expires maps the time of expiration to the highest
|
||||
// tombstone value that should be expired.
|
||||
// expires maps the time of expiration to the highest tombstone value
|
||||
// that should be expired.
|
||||
expires map[time.Time]*expireInterval
|
||||
|
||||
// expireCh is used to stream expiration
|
||||
// expireCh is used to stream expiration to the leader for processing.
|
||||
expireCh chan uint64
|
||||
|
||||
// lock is used to ensure safe access to all the fields
|
||||
lock sync.Mutex
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
// expireInterval is used to track the maximum index
|
||||
// to expire in a given interval with a timer
|
||||
// expireInterval is used to track the maximum index to expire in a given
|
||||
// interval with a timer.
|
||||
type expireInterval struct {
|
||||
// maxIndex has the highest tombstone index that should be GC-d.
|
||||
maxIndex uint64
|
||||
timer *time.Timer
|
||||
|
||||
// timer is the timer tracking this bin.
|
||||
timer *time.Timer
|
||||
}
|
||||
|
||||
// NewTombstoneGC is used to construct a new TombstoneGC given
|
||||
// a TTL for tombstones and a tracking granularity. Longer TTLs
|
||||
// ensure correct behavior for more time, but use more storage.
|
||||
// A shorter granularity increases the number of Raft transactions
|
||||
// and reduce how far past the TTL we perform GC.
|
||||
// NewTombstoneGC is used to construct a new TombstoneGC given a TTL for
|
||||
// tombstones and a tracking granularity. Longer TTLs ensure correct behavior
|
||||
// for more time, but use more storage. A shorter granularity increases the
|
||||
// number of Raft transactions and reduce how far past the TTL we perform GC.
|
||||
func NewTombstoneGC(ttl, granularity time.Duration) (*TombstoneGC, error) {
|
||||
// Sanity check the inputs
|
||||
if ttl <= 0 || granularity <= 0 {
|
||||
|
@ -58,15 +60,14 @@ func NewTombstoneGC(ttl, granularity time.Duration) (*TombstoneGC, error) {
|
|||
t := &TombstoneGC{
|
||||
ttl: ttl,
|
||||
granularity: granularity,
|
||||
enabled: false,
|
||||
expires: make(map[time.Time]*expireInterval),
|
||||
expireCh: make(chan uint64, 1),
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// ExpireCh is used to return a channel that streams the next index
|
||||
// that should be expired
|
||||
// ExpireCh is used to return a channel that streams the next index that should
|
||||
// be expired.
|
||||
func (t *TombstoneGC) ExpireCh() <-chan uint64 {
|
||||
return t.expireCh
|
||||
}
|
||||
|
@ -74,8 +75,8 @@ func (t *TombstoneGC) ExpireCh() <-chan uint64 {
|
|||
// SetEnabled is used to control if the tombstone GC is
|
||||
// enabled. Should only be enabled by the leader node.
|
||||
func (t *TombstoneGC) SetEnabled(enabled bool) {
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
if enabled == t.enabled {
|
||||
return
|
||||
}
|
||||
|
@ -97,23 +98,23 @@ func (t *TombstoneGC) SetEnabled(enabled bool) {
|
|||
func (t *TombstoneGC) Hint(index uint64) {
|
||||
expires := t.nextExpires()
|
||||
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
if !t.enabled {
|
||||
return
|
||||
}
|
||||
|
||||
// Check for an existing expiration timer
|
||||
// Check for an existing expiration timer and bump its index if we
|
||||
// find one.
|
||||
exp, ok := t.expires[expires]
|
||||
if ok {
|
||||
// Increment the highest index to be expired at that time
|
||||
if index > exp.maxIndex {
|
||||
exp.maxIndex = index
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Create new expiration time
|
||||
// Create a new expiration timer.
|
||||
t.expires[expires] = &expireInterval{
|
||||
maxIndex: index,
|
||||
timer: time.AfterFunc(expires.Sub(time.Now()), func() {
|
||||
|
@ -122,14 +123,17 @@ func (t *TombstoneGC) Hint(index uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
// PendingExpiration is used to check if any expirations are pending
|
||||
// PendingExpiration is used to check if any expirations are pending.
|
||||
func (t *TombstoneGC) PendingExpiration() bool {
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
|
||||
return len(t.expires) > 0
|
||||
}
|
||||
|
||||
// nextExpires is used to calculate the next expiration time
|
||||
// nextExpires is used to calculate the next expiration time, based on the
|
||||
// granularity that is set. This allows us to bin expirations and avoid a ton
|
||||
// of timers.
|
||||
func (t *TombstoneGC) nextExpires() time.Time {
|
||||
expires := time.Now().Add(t.ttl)
|
||||
remain := expires.UnixNano() % int64(t.granularity)
|
||||
|
@ -137,14 +141,19 @@ func (t *TombstoneGC) nextExpires() time.Time {
|
|||
return adj
|
||||
}
|
||||
|
||||
// expireTime is used to expire the entries at the given time
|
||||
// expireTime is used to expire the entries at the given time.
|
||||
func (t *TombstoneGC) expireTime(expires time.Time) {
|
||||
// Get the maximum index and clear the entry
|
||||
t.lock.Lock()
|
||||
exp := t.expires[expires]
|
||||
delete(t.expires, expires)
|
||||
t.lock.Unlock()
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
|
||||
// Notify the expires channel
|
||||
// Get the maximum index and clear the entry. It's possible that the GC
|
||||
// has been shut down while this timer fired and got blocked on the lock,
|
||||
// so if there's nothing in the map for us we just exit out since there
|
||||
// is no work to do.
|
||||
exp, ok := t.expires[expires]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
delete(t.expires, expires)
|
||||
t.expireCh <- exp.maxIndex
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ func TestTombstoneGC(t *testing.T) {
|
|||
gran := 5 * time.Millisecond
|
||||
gc, err := NewTombstoneGC(ttl, gran)
|
||||
if err != nil {
|
||||
t.Fatalf("should fail")
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
gc.SetEnabled(true)
|
||||
|
||||
|
@ -81,7 +81,7 @@ func TestTombstoneGC_Expire(t *testing.T) {
|
|||
gran := 5 * time.Millisecond
|
||||
gc, err := NewTombstoneGC(ttl, gran)
|
||||
if err != nil {
|
||||
t.Fatalf("should fail")
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
gc.SetEnabled(true)
|
||||
|
||||
|
|
Loading…
Reference in a new issue