Merge pull request #3195 from hashicorp/b-node-locking

Non-locked accessors to common Node fields
This commit is contained in:
Alex Dadgar 2017-09-14 14:09:35 -07:00 committed by GitHub
commit 90a3c20017
3 changed files with 27 additions and 19 deletions

View file

@ -8,6 +8,8 @@ BUG FIXES:
* core: *Fix restoration of stopped periodic jobs [GH-3201]
* api: Fix search handling of jobs with more than four hyphens and case were
length could cause lookup error [GH-3203]
* client: Fix lock contention that could cause a node to miss a heartbeat and
be marked as down [GH-3195]
## 0.6.3 (September 11, 2017)

View file

@ -296,7 +296,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
// Start collecting stats
go c.emitStats()
c.logger.Printf("[INFO] client: Node ID %q", c.Node().ID)
c.logger.Printf("[INFO] client: Node ID %q", c.NodeID())
return c, nil
}
@ -369,10 +369,7 @@ func (c *Client) Leave() error {
// Datacenter returns the datacenter for the given client
func (c *Client) Datacenter() string {
c.configLock.RLock()
dc := c.configCopy.Node.Datacenter
c.configLock.RUnlock()
return dc
return c.config.Node.Datacenter
}
// Region returns the region for the given client
@ -380,6 +377,16 @@ func (c *Client) Region() string {
return c.config.Region
}
// NodeID returns the node ID for the given client
func (c *Client) NodeID() string {
return c.config.Node.ID
}
// secretNodeID returns the secret node ID for the given client
func (c *Client) secretNodeID() string {
return c.config.Node.SecretID
}
// RPCMajorVersion returns the structs.ApiMajorVersion supported by the
// client.
func (c *Client) RPCMajorVersion() int {
@ -467,7 +474,7 @@ func (c *Client) Stats() map[string]map[string]string {
defer c.heartbeatLock.Unlock()
stats := map[string]map[string]string{
"client": map[string]string{
"node_id": c.Node().ID,
"node_id": c.NodeID(),
"known_servers": c.servers.all().String(),
"num_allocations": strconv.Itoa(c.NumAllocs()),
"last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat)),
@ -1159,9 +1166,8 @@ func (c *Client) updateNodeStatus() error {
c.heartbeatLock.Lock()
defer c.heartbeatLock.Unlock()
node := c.Node()
req := structs.NodeUpdateStatusRequest{
NodeID: node.ID,
NodeID: c.NodeID(),
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: c.Region()},
}
@ -1226,7 +1232,7 @@ func (c *Client) updateAllocStatus(alloc *structs.Allocation) {
// send the fields that are updatable by the client.
stripped := new(structs.Allocation)
stripped.ID = alloc.ID
stripped.NodeID = c.Node().ID
stripped.NodeID = c.NodeID()
stripped.TaskStates = alloc.TaskStates
stripped.ClientStatus = alloc.ClientStatus
stripped.ClientDescription = alloc.ClientDescription
@ -1303,10 +1309,9 @@ func (c *Client) watchAllocations(updates chan *allocUpdates) {
// The request and response for getting the map of allocations that should
// be running on the Node to their AllocModifyIndex which is incremented
// when the allocation is updated by the servers.
n := c.Node()
req := structs.NodeSpecificRequest{
NodeID: n.ID,
SecretID: n.SecretID,
NodeID: c.NodeID(),
SecretID: c.secretNodeID(),
QueryOptions: structs.QueryOptions{
Region: c.Region(),
AllowStale: true,
@ -1664,8 +1669,8 @@ func (c *Client) deriveToken(alloc *structs.Allocation, taskNames []string, vcli
// DeriveVaultToken of nomad server can take in a set of tasks and
// creates tokens for all the tasks.
req := &structs.DeriveVaultTokenRequest{
NodeID: c.Node().ID,
SecretID: c.Node().SecretID,
NodeID: c.NodeID(),
SecretID: c.secretNodeID(),
AllocID: alloc.ID,
Tasks: verifiedTasks,
QueryOptions: structs.QueryOptions{
@ -1849,7 +1854,7 @@ DISCOLOOP:
func (c *Client) emitStats() {
// Assign labels directly before emitting stats so the information expected
// is ready
c.baseLabels = []metrics.Label{{Name: "node_id", Value: c.Node().ID}, {Name: "datacenter", Value: c.Node().Datacenter}}
c.baseLabels = []metrics.Label{{Name: "node_id", Value: c.NodeID()}, {Name: "datacenter", Value: c.Datacenter()}}
// Start collecting host stats right away and then keep collecting every
// collection interval
@ -2027,7 +2032,7 @@ func (c *Client) setGaugeForUptime(hStats *stats.HostStats) {
// emitHostStats pushes host resource usage stats to remote metrics collection sinks
func (c *Client) emitHostStats() {
nodeID := c.Node().ID
nodeID := c.NodeID()
hStats := c.hostStatsCollector.Stats()
c.setGaugeForMemoryStats(nodeID, hStats)
@ -2041,7 +2046,7 @@ func (c *Client) emitHostStats() {
// emitClientMetrics emits lower volume client metrics
func (c *Client) emitClientMetrics() {
nodeID := c.Node().ID
nodeID := c.NodeID()
// Emit allocation metrics
blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0

View file

@ -92,6 +92,9 @@ func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollec
// Collect collects stats related to resource usage of a host
func (h *HostStatsCollector) Collect() error {
h.hostStatsLock.Lock()
defer h.hostStatsLock.Unlock()
hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
// Determine up-time
@ -131,9 +134,7 @@ func (h *HostStatsCollector) Collect() error {
hs.AllocDirStats = h.toDiskStats(usage, nil)
// Update the collected status object.
h.hostStatsLock.Lock()
h.hostStats = hs
h.hostStatsLock.Unlock()
return nil
}