cache: remove data race in agent cache

In normal operations there is a read/write race related to request
QueryOptions fields. An example race:

    WARNING: DATA RACE
    Read at 0x00c000836950 by goroutine 30:
      github.com/hashicorp/consul/agent/structs.(*ServiceConfigRequest).CacheInfo()
          /go/src/github.com/hashicorp/consul/agent/structs/config_entry.go:506 +0x109
      github.com/hashicorp/consul/agent/cache.(*Cache).getWithIndex()
          /go/src/github.com/hashicorp/consul/agent/cache/cache.go:262 +0x5c
      github.com/hashicorp/consul/agent/cache.(*Cache).notifyBlockingQuery()
          /go/src/github.com/hashicorp/consul/agent/cache/watch.go:89 +0xd7

    Previous write at 0x00c000836950 by goroutine 147:
      github.com/hashicorp/consul/agent/cache-types.(*ResolvedServiceConfig).Fetch()
          /go/src/github.com/hashicorp/consul/agent/cache-types/resolved_service_config.go:31 +0x219
      github.com/hashicorp/consul/agent/cache.(*Cache).fetch.func1()
          /go/src/github.com/hashicorp/consul/agent/cache/cache.go:495 +0x112

This patch does a lightweight copy of the request struct so that the
embedded QueryOptions fields that are mutated during Fetch() are scoped
to just that one RPC.
This commit is contained in:
R.B. Boyer 2019-09-09 13:04:30 -05:00 committed by R.B. Boyer
parent ccd5b241ba
commit c17e417cc8
13 changed files with 52 additions and 0 deletions

View File

@ -25,6 +25,10 @@ func (c *CatalogDatacenters) Fetch(opts cache.FetchOptions, req cache.Request) (
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Allways allow stale - there's no point in hitting leader if the request is
// going to be served from cache and endup arbitrarily stale anyway. This
// allows cached service-discover to automatically read scale across all

View File

@ -25,6 +25,10 @@ func (c *CatalogListServices) Fetch(opts cache.FetchOptions, req cache.Request)
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -26,6 +26,10 @@ func (c *CatalogServices) Fetch(opts cache.FetchOptions, req cache.Request) (cac
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -25,6 +25,10 @@ func (c *ConfigEntries) Fetch(opts cache.FetchOptions, req cache.Request) (cache
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -298,6 +298,10 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Do we already have a cert in the cache?
var existing *structs.IssuedCert
// Really important this is not a pointer type since otherwise we would set it

View File

@ -27,6 +27,10 @@ func (c *ConnectCARoot) Fetch(opts cache.FetchOptions, req cache.Request) (cache
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -26,6 +26,10 @@ func (c *CompiledDiscoveryChain) Fetch(opts cache.FetchOptions, req cache.Reques
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -26,6 +26,10 @@ func (c *HealthServices) Fetch(opts cache.FetchOptions, req cache.Request) (cach
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -25,6 +25,10 @@ func (c *IntentionMatch) Fetch(opts cache.FetchOptions, req cache.Request) (cach
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.MinQueryIndex = opts.MinIndex
reqReal.MaxQueryTime = opts.Timeout

View File

@ -26,6 +26,10 @@ func (c *NodeServices) Fetch(opts cache.FetchOptions, req cache.Request) (cache.
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -26,6 +26,10 @@ func (c *PreparedQuery) Fetch(opts cache.FetchOptions, req cache.Request) (cache
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Allways allow stale - there's no point in hitting leader if the request is
// going to be served from cache and endup arbitrarily stale anyway. This
// allows cached service-discover to automatically read scale across all

View File

@ -26,6 +26,10 @@ func (c *ResolvedServiceConfig) Fetch(opts cache.FetchOptions, req cache.Request
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout

View File

@ -25,6 +25,10 @@ func (c *InternalServiceDump) Fetch(opts cache.FetchOptions, req cache.Request)
"Internal cache failure: request wrong type: %T", req)
}
// Lightweight copy this object so that manipulating QueryOptions doesn't race.
dup := *reqReal
reqReal = &dup
// Set the minimum query index to our current index so we block
reqReal.QueryOptions.MinQueryIndex = opts.MinIndex
reqReal.QueryOptions.MaxQueryTime = opts.Timeout