Merge branch 'master' into b-reserved-scoring

2020-04-30 14:48:14 -07:00 · 2020-04-30 14:48:14 -07:00 · c901d0e7dd
parent 439a9f7301 cbae10333c
commit c901d0e7dd
154 changed files with 5992 additions and 1727 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,9 +1,30 @@
-## 0.11.1 (Unreleased)
+## 0.11.2 (Unreleased)
+
+FEATURES:
+ * **Task dependencies UI**: task lifecycle charts and details
+
+BUG FIXES:
+
+ * api: autoscaling policies should not be returned for stopped jobs [[GH-7768](https://github.com/hashicorp/nomad/issues/7768)]
+ * core: job scale status endpoint was returning incorrect counts [[GH-7789](https://github.com/hashicorp/nomad/issues/7789)]
+ * core: Fixed a bug where scores for allocations were biased toward nodes with resource reservations [[GH-7730](https://github.com/hashicorp/nomad/issues/7730)]
+ * jobspec: autoscaling policy block should return a parsing error multiple `policy` blocks are provided [[GH-7716](https://github.com/hashicorp/nomad/issues/7716)]
+ * ui: Fixed a bug where exec popup had incorrect URL for jobs where name ≠ id [[GH-7814](https://github.com/hashicorp/nomad/issues/7814)]
+
+## 0.11.1 (April 22, 2020)

 BUG FIXES:

 * core: Fixed a bug that only ran a task `shutdown_delay` if the task had a registered service [[GH-7663](https://github.com/hashicorp/nomad/issues/7663)]
- * core: Fixed a bug where scores for allocations were biased toward nodes with resource reservations [[GH-7730](https://github.com/hashicorp/nomad/issues/7730)]
+ * core: Fixed a panic when garbage collecting a job with allocations spanning multiple versions [[GH-7758](https://github.com/hashicorp/nomad/issues/7758)]
+ * agent: Fixed a bug where http server logs did not honor json log formatting, and reduced http server logging level to Trace [[GH-7748](https://github.com/hashicorp/nomad/issues/7748)]
+ * connect: Fixed bugs where some connect parameters would be ignored [[GH-7690](https://github.com/hashicorp/nomad/pull/7690)] [[GH-7684](https://github.com/hashicorp/nomad/pull/7684)]
+ * connect: Fixed a bug where an absent connect sidecar_service stanza would trigger panic [[GH-7683](https://github.com/hashicorp/nomad/pull/7683)]
+ * connect: Fixed a bug where some connect proxy fields would be dropped from 'job inspect' output [[GH-7397](https://github.com/hashicorp/nomad/issues/7397)]
+ * csi: Fixed a panic when claiming a volume for an allocation that was already garbage collected [[GH-7760](https://github.com/hashicorp/nomad/issues/7760)]
+ * csi: Fixed a bug where CSI plugins with `NODE_STAGE_VOLUME` capabilities were receiving an incorrect volume ID [[GH-7754](https://github.com/hashicorp/nomad/issues/7754)]
+ * driver/docker: Fixed a bug where retrying failed docker creation may in rare cases trigger a panic [[GH-7749](https://github.com/hashicorp/nomad/issues/7749)]
+ * scheduler: Fixed a bug in managing allocated devices for a job allocation in in-place update scenarios [[GH-7762](https://github.com/hashicorp/nomad/issues/7762)]
 * vault: Upgrade http2 library to fix Vault API calls that fail with `http2: no cached connection was available` [[GH-7673](https://github.com/hashicorp/nomad/issues/7673)]

 ## 0.11.0 (April 8, 2020)
@ -62,7 +83,7 @@ BUG FIXES:

 SECURITY:

- * server: Override content-type headers for unsafe content. CVE-TBD [[GH-7468](https://github.com/hashicorp/nomad/issues/7468)]
+ * server: Override content-type headers for unsafe content. CVE-2020-10944 [[GH-7468](https://github.com/hashicorp/nomad/issues/7468)]

 ## 0.10.4 (February 19, 2020)

--- a/16
+++ b/16
@ -175,11 +175,7 @@ deps:  ## Install build and development dependencies
 	GO111MODULE=on go get -u gotest.tools/gotestsum
 	GO111MODULE=on go get -u github.com/fatih/hclfmt
 	GO111MODULE=on go get -u github.com/golang/protobuf/protoc-gen-go@v1.3.4
-
-	# The tag here must correspoond to codec version nomad uses, e.g. v1.1.5.
-	# Though, v1.1.5 codecgen has a bug in code generator, so using a specific sha
-	# here instead.
-	GO111MODULE=on go get -u github.com/hashicorp/go-msgpack/codec/codecgen@f51b5189210768cf0d476580cf287620374d4f02
+	GO111MODULE=on go get -u github.com/hashicorp/go-msgpack/codec/codecgen@v1.1.5

 .PHONY: lint-deps
 lint-deps: ## Install linter dependencies
@ -200,11 +196,15 @@ check: ## Lint the source code
 	@golangci-lint run -j 1

 	@echo "==> Spell checking website..."
-	@misspell -error -source=text website/source/
+	@misspell -error -source=text website/pages/

 	@echo "==> Check proto files are in-sync..."
 	@$(MAKE) proto
-	@if (git status | grep -q .pb.go); then echo the following proto files are out of sync; git status |grep .pb.go; exit 1; fi
+	@if (git status -s | grep -q .pb.go); then echo the following proto files are out of sync; git status -s | grep .pb.go; exit 1; fi
+
+	@echo "==> Check format of jobspecs and HCL files..."
+	@$(MAKE) hclfmt
+	@if (git status -s | grep -q -e '\.hcl$$' -e '\.nomad$$'); then echo the following HCL files are out of sync; git status -s | grep -e '\.hcl$$' -e '\.nomad$$'; exit 1; fi

 	@echo "==> Check API package is isolated from rest"
 	@if go list --test -f '{{ join .Deps "\n" }}' ./api | grep github.com/hashicorp/nomad/ | grep -v -e /vendor/ -e /nomad/api/ -e nomad/api.test; then echo "  /api package depends the ^^ above internal nomad packages.  Remove such dependency"; exit 1; fi
@ -229,7 +229,7 @@ generate-structs: ## Update generated code
 .PHONY: proto
 proto:
 	@echo "--> Generating proto bindings..."
-	@for file in $$(git ls-files "*.proto" | grep -v "vendor\/.*.proto"); do \
+	@for file in $$(git ls-files "*.proto" | grep -E -v -- "vendor\/.*.proto|demo\/.*.proto"); do \
 		protoc -I . -I ../../.. --go_out=plugins=grpc:. $$file; \
 	done

--- a/acl/policy.go
+++ b/acl/policy.go
@ -10,7 +10,7 @@ import (
 const (
 	// The following levels are the only valid values for the `policy = "read"` stanza.
 	// When policies are merged together, the most privilege is granted, except for deny
-	// which always takes precedence and supercedes.
+	// which always takes precedence and supersedes.
 	PolicyDeny  = "deny"
 	PolicyRead  = "read"
 	PolicyList  = "list"
--- a/api/jobs.go
+++ b/api/jobs.go
@ -45,7 +45,7 @@ type Jobs struct {
 	client *Client
 }

-// JobsParseRequest is used for arguments of the /vi/jobs/parse endpoint
+// JobsParseRequest is used for arguments of the /v1/jobs/parse endpoint
 type JobsParseRequest struct {
 	// JobHCL is an hcl jobspec
 	JobHCL string
@ -60,7 +60,7 @@ func (c *Client) Jobs() *Jobs {
 	return &Jobs{client: c}
 }

-// Parse is used to convert the HCL repesentation of a Job to JSON server side.
+// ParseHCL is used to convert the HCL repesentation of a Job to JSON server side.
 // To parse the HCL client side see package github.com/hashicorp/nomad/jobspec
 func (j *Jobs) ParseHCL(jobHCL string, canonicalize bool) (*Job, error) {
 	var job Job
--- a/client/allocrunner/alloc_runner_hooks.go
+++ b/client/allocrunner/alloc_runner_hooks.go
@ -125,7 +125,7 @@ func (a *allocHealthSetter) SetHealth(healthy, isDeploy bool, trackerTaskEvents
 	a.ar.allocBroadcaster.Send(calloc)
 }

-// initRunnerHooks intializes the runners hooks.
+// initRunnerHooks initializes the runners hooks.
 func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
 	hookLogger := ar.logger.Named("runner_hook")

--- a/client/allocrunner/csi_hook.go
+++ b/client/allocrunner/csi_hook.go
@ -104,6 +104,7 @@ func (c *csiHook) claimVolumesFromAlloc() (map[string]*volumeAndRequest, error)
 		req := &structs.CSIVolumeClaimRequest{
 			VolumeID:     pair.request.Source,
 			AllocationID: c.alloc.ID,
+			NodeID:       c.alloc.NodeID,
 			Claim:        claimType,
 		}
 		req.Region = c.alloc.Job.Region
--- a/client/csi_endpoint.go
+++ b/client/csi_endpoint.go
@ -143,12 +143,12 @@ func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolum
 	csiReq := req.ToCSIRequest()

 	// Submit the request for a volume to the CSI Plugin.
-	ctx, cancelFn := context.WithTimeout(context.Background(), 30*time.Second)
+	ctx, cancelFn := c.requestContext()
 	defer cancelFn()
 	// CSI ControllerUnpublishVolume errors for timeout, codes.Unavailable and
 	// codes.ResourceExhausted are retried; all other errors are fatal.
 	_, err = plugin.ControllerUnpublishVolume(ctx, csiReq,
-		grpc_retry.WithPerRetryTimeout(10*time.Second),
+		grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout),
 		grpc_retry.WithMax(3),
 		grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)))
 	if err != nil {
--- a/client/fingerprint/env_aws.go
+++ b/client/fingerprint/env_aws.go
@ -21,14 +21,14 @@ import (

 const (
 	// AwsMetadataTimeout is the timeout used when contacting the AWS metadata
-	// service
+	// services.
 	AwsMetadataTimeout = 2 * time.Second
 )

 // map of instance type to approximate speed, in Mbits/s
 // Estimates from http://stackoverflow.com/a/35806587
 // This data is meant for a loose approximation
-var ec2InstanceSpeedMap = map[*regexp.Regexp]int{
+var ec2NetSpeedTable = map[*regexp.Regexp]int{
 	regexp.MustCompile("t2.nano"):      30,
 	regexp.MustCompile("t2.micro"):     70,
 	regexp.MustCompile("t2.small"):     125,
@ -46,6 +46,353 @@ var ec2InstanceSpeedMap = map[*regexp.Regexp]int{
 	regexp.MustCompile(`.*\.32xlarge`): 10000,
 }

+type ec2Specs struct {
+	mhz   float64
+	cores int
+	model string
+}
+
+func (e ec2Specs) ticks() int {
+	return int(e.mhz) * e.cores
+}
+
+func specs(ghz float64, vCores int, model string) ec2Specs {
+	return ec2Specs{
+		mhz:   ghz * 1000,
+		cores: vCores,
+		model: model,
+	}
+}
+
+// Map of instance type to documented CPU speed.
+//
+// Most values are taken from https://aws.amazon.com/ec2/instance-types/.
+// Values for a1 & m6g (Graviton) are taken from https://en.wikichip.org/wiki/annapurna_labs/alpine/al73400
+// Values for inf1 are taken from launching a inf1.xlarge and looking at /proc/cpuinfo
+//
+// In a few cases, AWS has upgraded the generation of CPU while keeping the same
+// instance designation. Since it is possible to launch on the lower performance
+// CPU, that one is used as the spec for the instance type.
+//
+// This table is provided as a best-effort to determine the number of CPU ticks
+// available for use by Nomad tasks. If an instance type is missing, the fallback
+// behavior is to use values from go-psutil, which is only capable of reading
+// "current" CPU MHz.
+var ec2ProcSpeedTable = map[string]ec2Specs{
+	// -- General Purpose --
+
+	// a1
+	"a1.medium":  specs(2.3, 1, "AWS Graviton"),
+	"a1.large":   specs(2.3, 2, "AWS Graviton"),
+	"a1.xlarge":  specs(2.3, 4, "AWS Graviton"),
+	"a1.2xlarge": specs(2.3, 8, "AWS Graviton"),
+	"a1.4xlarge": specs(2.3, 16, "AWS Graviton"),
+	"a1.metal":   specs(2.3, 16, "AWS Graviton"),
+
+	// t3
+	"t3.nano":    specs(2.5, 2, "2.5 GHz Intel Scalable"),
+	"t3.micro":   specs(2.5, 2, "2.5 GHz Intel Scalable"),
+	"t3.small":   specs(2.5, 2, "2.5 GHz Intel Scalable"),
+	"t3.medium":  specs(2.5, 2, "2.5 GHz Intel Scalable"),
+	"t3.large":   specs(2.5, 2, "2.5 GHz Intel Scalable"),
+	"t3.xlarge":  specs(2.5, 4, "2.5 GHz Intel Scalable"),
+	"t3.2xlarge": specs(2.5, 8, "2.5 GHz Intel Scalable"),
+
+	// t3a
+	"t3a.nano":    specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"t3a.micro":   specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"t3a.small":   specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"t3a.medium":  specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"t3a.large":   specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"t3a.xlarge":  specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
+	"t3a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
+
+	// t2
+	"t2.nano":    specs(3.3, 1, "3.3 GHz Intel Scalable"),
+	"t2.micro":   specs(3.3, 1, "3.3 GHz Intel Scalable"),
+	"t2.small":   specs(3.3, 1, "3.3 GHz Intel Scalable"),
+	"t2.medium":  specs(3.3, 2, "3.3 GHz Intel Scalable"),
+	"t2.large":   specs(3.0, 2, "3.0 GHz Intel Scalable"),
+	"t2.xlarge":  specs(3.0, 4, "3.0 GHz Intel Scalable"),
+	"t2.2xlarge": specs(3.0, 8, "3.0 GHz Intel Scalable"),
+
+	// m6g
+	"m6g.medium":   specs(2.3, 1, "AWS Graviton2 Neoverse"),
+	"m6g.large":    specs(2.3, 2, "AWS Graviton2 Neoverse"),
+	"m6g.xlarge":   specs(2.3, 4, "AWS Graviton2 Neoverse"),
+	"m6g.2xlarge":  specs(2.3, 8, "AWS Graviton2 Neoverse"),
+	"m6g.4xlarge":  specs(2.3, 16, "AWS Graviton2 Neoverse"),
+	"m6g.8xlarge":  specs(2.3, 32, "AWS Graviton2 Neoverse"),
+	"m6g.12xlarge": specs(2.3, 48, "AWS Graviton2 Neoverse"),
+	"m6g.16xlarge": specs(2.3, 64, "AWS Graviton2 Neoverse"),
+
+	// m5, m5d
+	"m5.large":     specs(3.1, 2, "3.1 GHz Intel Xeon Platinum"),
+	"m5.xlarge":    specs(3.1, 4, "3.1 GHz Intel Xeon Platinum"),
+	"m5.2xlarge":   specs(3.1, 8, "3.1 GHz Intel Xeon Platinum"),
+	"m5.4xlarge":   specs(3.1, 16, "3.1 GHz Intel Xeon Platinum"),
+	"m5.8xlarge":   specs(3.1, 32, "3.1 GHz Intel Xeon Platinum"),
+	"m5.12xlarge":  specs(3.1, 48, "3.1 GHz Intel Xeon Platinum"),
+	"m5.16xlarge":  specs(3.1, 64, "3.1 GHz Intel Xeon Platinum"),
+	"m5.24xlarge":  specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
+	"m5.metal":     specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.large":    specs(3.1, 2, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.xlarge":   specs(3.1, 4, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.2xlarge":  specs(3.1, 8, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.4xlarge":  specs(3.1, 16, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.8xlarge":  specs(3.1, 32, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
+	"m5d.metal":    specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
+
+	// m5a, m5ad
+	"m5a.large":     specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.xlarge":    specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.2xlarge":   specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.4xlarge":   specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.8xlarge":   specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.12xlarge":  specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.16xlarge":  specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"),
+	"m5a.24xlarge":  specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
+	"m5ad.large":    specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"m5ad.xlarge":   specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
+	"m5ad.2xlarge":  specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
+	"m5ad.4xlarge":  specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
+	"m5ad.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
+	"m5ad.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
+
+	// m5n, m5dn
+	"m5n.large":     specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.xlarge":    specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.2xlarge":   specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.4xlarge":   specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.8xlarge":   specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.12xlarge":  specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.16xlarge":  specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
+	"m5n.24xlarge":  specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.large":    specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.xlarge":   specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.2xlarge":  specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.4xlarge":  specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.8xlarge":  specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
+	"m5dn.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
+
+	// m4
+	"m4.large":    specs(2.3, 2, "2.3 GHz Intel Xeon® E5-2686 v4"),
+	"m4.xlarge":   specs(2.3, 4, "2.3 GHz Intel Xeon® E5-2686 v4"),
+	"m4.2xlarge":  specs(2.3, 8, "2.3 GHz Intel Xeon® E5-2686 v4"),
+	"m4.4xlarge":  specs(2.3, 16, "2.3 GHz Intel Xeon® E5-2686 v4"),
+	"m4.10xlarge": specs(2.3, 40, "2.3 GHz Intel Xeon® E5-2686 v4"),
+	"m4.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon® E5-2686 v4"),
+
+	// -- Compute Optimized --
+
+	// c5, c5d
+	"c5.large":     specs(3.4, 2, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5.xlarge":    specs(3.4, 4, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5.2xlarge":   specs(3.4, 8, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5.4xlarge":   specs(3.4, 16, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5.9xlarge":   specs(3.4, 36, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5.12xlarge":  specs(3.6, 48, "3.6 GHz Intel Xeon Scalable"),
+	"c5.18xlarge":  specs(3.6, 72, "3.6 GHz Intel Xeon Scalable"),
+	"c5.24xlarge":  specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
+	"c5.metal":     specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
+	"c5d.large":    specs(3.4, 2, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5d.xlarge":   specs(3.4, 4, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5d.2xlarge":  specs(3.4, 8, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5d.4xlarge":  specs(3.4, 16, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5d.9xlarge":  specs(3.4, 36, "3.4 GHz Intel Xeon Platinum 8000"),
+	"c5d.12xlarge": specs(3.6, 48, "3.6 GHz Intel Xeon Scalable"),
+	"c5d.18xlarge": specs(3.6, 72, "3.6 GHz Intel Xeon Scalable"),
+	"c5d.24xlarge": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
+	"c5d.metal":    specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
+
+	// c5n
+	"c5n.large":    specs(3.0, 2, "3.0 GHz Intel Xeon Platinum"),
+	"c5n.xlarge":   specs(3.0, 4, "3.0 GHz Intel Xeon Platinum"),
+	"c5n.2xlarge":  specs(3.0, 8, "3.0 GHz Intel Xeon Platinum"),
+	"c5n.4xlarge":  specs(3.0, 16, "3.0 GHz Intel Xeon Platinum"),
+	"c5n.9xlarge":  specs(3.0, 36, "3.0 GHz Intel Xeon Platinum"),
+	"c5n.18xlarge": specs(3.0, 72, "3.0 GHz Intel Xeon Platinum"),
+	"c5n.metal":    specs(3.0, 72, "3.0 GHz Intel Xeon Platinum"),
+
+	// c4
+	"c4.large":   specs(2.9, 2, "2.9 GHz Intel Xeon E5-2666 v3"),
+	"c4.xlarge":  specs(2.9, 4, "2.9 GHz Intel Xeon E5-2666 v3"),
+	"c4.2xlarge": specs(2.9, 8, "2.9 GHz Intel Xeon E5-2666 v3"),
+	"c4.4xlarge": specs(2.9, 16, "2.9 GHz Intel Xeon E5-2666 v3"),
+	"c4.8xlarge": specs(2.9, 36, "2.9 GHz Intel Xeon E5-2666 v3"),
+
+	// -- Memory Optimized --
+
+	// r5, r5d
+	"r5.large":     specs(3.1, 2, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.xlarge":    specs(3.1, 4, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.2xlarge":   specs(3.1, 8, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.4xlarge":   specs(3.1, 16, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.8xlarge":   specs(3.1, 32, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.12xlarge":  specs(3.1, 48, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.16xlarge":  specs(3.1, 64, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.24xlarge":  specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5.metal":     specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.large":    specs(3.1, 2, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.xlarge":   specs(3.1, 4, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.2xlarge":  specs(3.1, 8, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.4xlarge":  specs(3.1, 16, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.8xlarge":  specs(3.1, 32, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
+	"r5d.metal":    specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
+
+	// r5a, r5ad
+	"r5a.large":     specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.xlarge":    specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.2xlarge":   specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.4xlarge":   specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.8xlarge":   specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.12xlarge":  specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.16xlarge":  specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"),
+	"r5a.24xlarge":  specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.large":    specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.xlarge":   specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.2xlarge":  specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.4xlarge":  specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.8xlarge":  specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"),
+	"r5ad.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
+
+	// r5n
+	"r5n.large":     specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.xlarge":    specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.2xlarge":   specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.4xlarge":   specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.8xlarge":   specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.12xlarge":  specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.16xlarge":  specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
+	"r5n.24xlarge":  specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.large":    specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.xlarge":   specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.2xlarge":  specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.4xlarge":  specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.8xlarge":  specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
+	"r5dn.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
+
+	// r4
+	"r4.large":    specs(2.3, 2, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"r4.xlarge":   specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"r4.2xlarge":  specs(2.3, 8, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"r4.4xlarge":  specs(2.3, 16, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"r4.8xlarge":  specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"r4.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
+
+	// x1e
+	"x1e.xlarge":   specs(2.3, 4, "2.3 GHz Intel Xeon E7-8880 v3"),
+	"x1e.2xlarge":  specs(2.3, 8, "2.3 GHz Intel Xeon E7-8880 v3"),
+	"x1e.4xlarge":  specs(2.3, 16, "2.3 GHz Intel Xeon E7-8880 v3"),
+	"x1e.8xlarge":  specs(2.3, 32, "2.3 GHz Intel Xeon E7-8880 v3"),
+	"x1e.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"),
+	"x1e.32xlarge": specs(2.3, 128, "2.3 GHz Intel Xeon E7-8880 v3"),
+
+	// x1
+	"x1.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"),
+	"x1.32xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"),
+
+	// high-memory
+	"u-6tb1.metal":  specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"),
+	"u-9tb1.metal":  specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"),
+	"u-12tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"),
+	"u-18tb1.metal": specs(2.7, 448, "2.7 GHz Intel Xeon Scalable"),
+	"u-24tb1.metal": specs(2.7, 448, "2.7 GHz Intel Xeon Scalable"),
+
+	// z1d
+	"z1d.large":    specs(4.0, 2, "4.0 GHz Intel Xeon Scalable"),
+	"z1d.xlarge":   specs(4.0, 4, "4.0 GHz Intel Xeon Scalable"),
+	"z1d.2xlarge":  specs(4.0, 8, "4.0 GHz Intel Xeon Scalable"),
+	"z1d.3xlarge":  specs(4.0, 12, "4.0 GHz Intel Xeon Scalable"),
+	"z1d.6xlarge":  specs(4.0, 24, "4.0 GHz Intel Xeon Scalable"),
+	"z1d.12xlarge": specs(4.0, 48, "4.0 GHz Intel Xeon Scalable"),
+	"z1d.metal":    specs(4.0, 48, "4.0 GHz Intel Xeon Scalable"),
+
+	// -- Accelerated Computing --
+
+	// p3, p3dn
+	"p3.2xlarge":    specs(2.3, 8, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"p3.8xlarge":    specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"p3.16xlarge":   specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"p3dn.24xlarge": specs(2.5, 96, "2.5 GHz Intel Xeon P-8175M"),
+
+	// p2
+	"p2.xlarge":   specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"p2.8xlarge":  specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"p2.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
+
+	// inf1
+	"inf1.xlarge":   specs(3.0, 4, "3.0 GHz Intel Xeon Platinum 8275CL"),
+	"inf1.2xlarge":  specs(3.0, 8, "3.0 GHz Intel Xeon Platinum 8275CL"),
+	"inf1.6xlarge":  specs(3.0, 24, "3.0 GHz Intel Xeon Platinum 8275CL"),
+	"inf1.24xlarge": specs(3.0, 96, "3.0 GHz Intel Xeon Platinum 8275CL"),
+
+	// g4dn
+	"g4dn.xlarge":   specs(2.5, 4, "2.5 GHz Cascade Lake 24C"),
+	"g4dn.2xlarge":  specs(2.5, 8, "2.5 GHz Cascade Lake 24C"),
+	"g4dn.4xlarge":  specs(2.5, 16, "2.5 GHz Cascade Lake 24C"),
+	"g4dn.8xlarge":  specs(2.5, 32, "2.5 GHz Cascade Lake 24C"),
+	"g4dn.16xlarge": specs(2.5, 64, "2.5 GHz Cascade Lake 24C"),
+	"g4dn.12xlarge": specs(2.5, 48, "2.5 GHz Cascade Lake 24C"),
+	"g4dn.metal":    specs(2.5, 96, "2.5 GHz Cascade Lake 24C"),
+
+	// g3
+	"g3s.xlarge":   specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"g3s.4xlarge":  specs(2.3, 16, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"g3s.8xlarge":  specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
+	"g3s.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
+
+	// f1
+	"f1.2xlarge":  specs(2.3, 8, "Intel Xeon E5-2686 v4"),
+	"f1.4xlarge":  specs(2.3, 16, "Intel Xeon E5-2686 v4"),
+	"f1.16xlarge": specs(2.3, 64, "Intel Xeon E5-2686 v4"),
+
+	// -- Storage Optimized --
+
+	// i3
+	"i3.large":    specs(2.3, 2, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"i3.xlarge":   specs(2.3, 4, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"i3.2xlarge":  specs(2.3, 8, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"i3.4xlarge":  specs(2.3, 16, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"i3.8xlarge":  specs(2.3, 32, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"i3.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"i3.metal":    specs(2.3, 72, "2.3 GHz Intel Xeon E5 2686 v4"),
+
+	// i3en
+	"i3en.large":    specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.xlarge":   specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.2xlarge":  specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.3xlarge":  specs(3.1, 12, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.6xlarge":  specs(3.1, 24, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
+	"i3en.metal":    specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
+
+	// d2
+	"d2.xlarge":  specs(2.4, 4, "2.4 GHz Intel Xeon E5-2676 v3"),
+	"d2.2xlarge": specs(2.4, 8, "2.4 GHz Intel Xeon E5-2676 v3"),
+	"d2.4xlarge": specs(2.4, 16, "2.4 GHz Intel Xeon E5-2676 v3"),
+	"d2.8xlarge": specs(2.4, 36, "2.4 GHz Intel Xeon E5-2676 v3"),
+
+	// h1
+	"h1.2xlarge":  specs(2.3, 8, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"h1.4xlarge":  specs(2.3, 16, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"h1.8xlarge":  specs(2.3, 32, "2.3 GHz Intel Xeon E5 2686 v4"),
+	"h1.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5 2686 v4"),
+}
+
 // EnvAWSFingerprint is used to fingerprint AWS metadata
 type EnvAWSFingerprint struct {
 	StaticFingerprinter
@ -128,25 +475,48 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F
 		response.AddAttribute(key, v)
 	}

-	// newNetwork is populated and added to the Nodes resources
-	var newNetwork *structs.NetworkResource
+	// accumulate resource information, then assign to response
+	var resources *structs.Resources
+	var nodeResources *structs.NodeResources

 	// copy over network specific information
 	if val, ok := response.Attributes["unique.platform.aws.local-ipv4"]; ok && val != "" {
 		response.AddAttribute("unique.network.ip-address", val)
-
-		newNetwork = &structs.NetworkResource{
-			Device: "eth0",
-			IP:     val,
-			CIDR:   val + "/32",
-			MBits:  f.throughput(request, ec2meta, val),
-		}
-
-		response.NodeResources = &structs.NodeResources{
-			Networks: []*structs.NetworkResource{newNetwork},
+		nodeResources = new(structs.NodeResources)
+		nodeResources.Networks = []*structs.NetworkResource{
+			{
+				Device: "eth0",
+				IP:     val,
+				CIDR:   val + "/32",
+				MBits:  f.throughput(request, ec2meta, val),
+			},
 		}
 	}

+	// copy over CPU speed information
+	if specs := f.lookupCPU(ec2meta); specs != nil {
+		response.AddAttribute("cpu.modelname", specs.model)
+		response.AddAttribute("cpu.frequency", fmt.Sprintf("%.0f", specs.mhz))
+		response.AddAttribute("cpu.numcores", fmt.Sprintf("%d", specs.cores))
+		f.logger.Debug("lookup ec2 cpu", "cores", specs.cores, "MHz", log.Fmt("%.0f", specs.mhz), "model", specs.model)
+
+		if ticks := specs.ticks(); request.Config.CpuCompute <= 0 {
+			response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks))
+			f.logger.Debug("setting ec2 cpu ticks", "ticks", ticks)
+			resources = new(structs.Resources)
+			resources.CPU = ticks
+			if nodeResources == nil {
+				nodeResources = new(structs.NodeResources)
+			}
+			nodeResources.Cpu = structs.NodeCpuResources{CpuShares: int64(ticks)}
+		}
+	} else {
+		f.logger.Warn("failed to find the cpu specification for this instance type")
+	}
+
+	response.Resources = resources
+	response.NodeResources = nodeResources
+
 	// populate Links
 	response.AddLink("aws.ec2", fmt.Sprintf("%s.%s",
 		response.Attributes["platform.aws.placement.availability-zone"],
@ -156,6 +526,28 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F
 	return nil
 }

+func (f *EnvAWSFingerprint) instanceType(ec2meta *ec2metadata.EC2Metadata) (string, error) {
+	response, err := ec2meta.GetMetadata("instance-type")
+	if err != nil {
+		return "", err
+	}
+	return strings.TrimSpace(response), nil
+}
+
+func (f *EnvAWSFingerprint) lookupCPU(ec2meta *ec2metadata.EC2Metadata) *ec2Specs {
+	instanceType, err := f.instanceType(ec2meta)
+	if err != nil {
+		f.logger.Warn("failed to read EC2 metadata instance-type", "error", err)
+		return nil
+	}
+	for iType, specs := range ec2ProcSpeedTable {
+		if strings.EqualFold(iType, instanceType) {
+			return &specs
+		}
+	}
+	return nil
+}
+
 func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2metadata.EC2Metadata, ip string) int {
 	throughput := request.Config.NetworkSpeed
 	if throughput != 0 {
@ -180,17 +572,15 @@ func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2

 // EnvAWSFingerprint uses lookup table to approximate network speeds
 func (f *EnvAWSFingerprint) linkSpeed(ec2meta *ec2metadata.EC2Metadata) int {
-
-	resp, err := ec2meta.GetMetadata("instance-type")
+	instanceType, err := f.instanceType(ec2meta)
 	if err != nil {
 		f.logger.Error("error reading instance-type", "error", err)
 		return 0
 	}

-	key := strings.Trim(resp, "\n")
 	netSpeed := 0
-	for reg, speed := range ec2InstanceSpeedMap {
-		if reg.MatchString(key) {
+	for reg, speed := range ec2NetSpeedTable {
+		if reg.MatchString(instanceType) {
 			netSpeed = speed
 			break
 		}
@ -210,11 +600,11 @@ func ec2MetaClient(endpoint string, timeout time.Duration) (*ec2metadata.EC2Meta
 		c = c.WithEndpoint(endpoint)
 	}

-	session, err := session.NewSession(c)
+	sess, err := session.NewSession(c)
 	if err != nil {
 		return nil, err
 	}
-	return ec2metadata.New(session, c), nil
+	return ec2metadata.New(sess, c), nil
 }

 func isAWS(ec2meta *ec2metadata.EC2Metadata) bool {
--- a/client/fingerprint/env_aws_test.go
+++ b/client/fingerprint/env_aws_test.go
@ -202,6 +202,74 @@ func TestNetworkFingerprint_AWS_IncompleteImitation(t *testing.T) {
 	require.Nil(t, response.NodeResources)
 }

+func TestCPUFingerprint_AWS_InstanceFound(t *testing.T) {
+	endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
+	defer cleanup()
+
+	f := NewEnvAWSFingerprint(testlog.HCLogger(t))
+	f.(*EnvAWSFingerprint).endpoint = endpoint
+
+	node := &structs.Node{Attributes: make(map[string]string)}
+
+	request := &FingerprintRequest{Config: &config.Config{}, Node: node}
+	var response FingerprintResponse
+	err := f.Fingerprint(request, &response)
+	require.NoError(t, err)
+	require.True(t, response.Detected)
+	require.Equal(t, "2.5 GHz AMD EPYC 7000 series", response.Attributes["cpu.modelname"])
+	require.Equal(t, "2500", response.Attributes["cpu.frequency"])
+	require.Equal(t, "8", response.Attributes["cpu.numcores"])
+	require.Equal(t, "20000", response.Attributes["cpu.totalcompute"])
+	require.Equal(t, 20000, response.Resources.CPU)
+	require.Equal(t, int64(20000), response.NodeResources.Cpu.CpuShares)
+}
+
+func TestCPUFingerprint_AWS_OverrideCompute(t *testing.T) {
+	endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
+	defer cleanup()
+
+	f := NewEnvAWSFingerprint(testlog.HCLogger(t))
+	f.(*EnvAWSFingerprint).endpoint = endpoint
+
+	node := &structs.Node{Attributes: make(map[string]string)}
+
+	request := &FingerprintRequest{Config: &config.Config{
+		CpuCompute: 99999,
+	}, Node: node}
+	var response FingerprintResponse
+	err := f.Fingerprint(request, &response)
+	require.NoError(t, err)
+	require.True(t, response.Detected)
+	require.Equal(t, "2.5 GHz AMD EPYC 7000 series", response.Attributes["cpu.modelname"])
+	require.Equal(t, "2500", response.Attributes["cpu.frequency"])
+	require.Equal(t, "8", response.Attributes["cpu.numcores"])
+	require.NotContains(t, response.Attributes, "cpu.totalcompute")
+	require.Nil(t, response.Resources)          // defaults in cpu fingerprinter
+	require.Zero(t, response.NodeResources.Cpu) // defaults in cpu fingerprinter
+}
+
+func TestCPUFingerprint_AWS_InstanceNotFound(t *testing.T) {
+	endpoint, cleanup := startFakeEC2Metadata(t, unknownInstanceType)
+	defer cleanup()
+
+	f := NewEnvAWSFingerprint(testlog.HCLogger(t))
+	f.(*EnvAWSFingerprint).endpoint = endpoint
+
+	node := &structs.Node{Attributes: make(map[string]string)}
+
+	request := &FingerprintRequest{Config: &config.Config{}, Node: node}
+	var response FingerprintResponse
+	err := f.Fingerprint(request, &response)
+	require.NoError(t, err)
+	require.True(t, response.Detected)
+	require.NotContains(t, response.Attributes, "cpu.modelname")
+	require.NotContains(t, response.Attributes, "cpu.frequency")
+	require.NotContains(t, response.Attributes, "cpu.numcores")
+	require.NotContains(t, response.Attributes, "cpu.totalcompute")
+	require.Nil(t, response.Resources)
+	require.Nil(t, response.NodeResources)
+}
+
 /// Utility functions for tests

 func startFakeEC2Metadata(t *testing.T, endpoints []endpoint) (endpoint string, cleanup func()) {
@ -252,7 +320,7 @@ var awsStubs = []endpoint{
 	{
 		Uri:         "/latest/meta-data/instance-type",
 		ContentType: "text/plain",
-		Body:        "m3.2xlarge",
+		Body:        "t3a.2xlarge",
 	},
 	{
 		Uri:         "/latest/meta-data/local-hostname",
@ -276,6 +344,34 @@ var awsStubs = []endpoint{
 	},
 }

+var unknownInstanceType = []endpoint{
+	{
+		Uri:         "/latest/meta-data/ami-id",
+		ContentType: "text/plain",
+		Body:        "ami-1234",
+	},
+	{
+		Uri:         "/latest/meta-data/hostname",
+		ContentType: "text/plain",
+		Body:        "ip-10-0-0-207.us-west-2.compute.internal",
+	},
+	{
+		Uri:         "/latest/meta-data/placement/availability-zone",
+		ContentType: "text/plain",
+		Body:        "us-west-2a",
+	},
+	{
+		Uri:         "/latest/meta-data/instance-id",
+		ContentType: "text/plain",
+		Body:        "i-b3ba3875",
+	},
+	{
+		Uri:         "/latest/meta-data/instance-type",
+		ContentType: "text/plain",
+		Body:        "xyz123.uber",
+	},
+}
+
 // noNetworkAWSStubs mimics an EC2 instance but without local ip address
 // may happen in environments with odd EC2 Metadata emulation
 var noNetworkAWSStubs = []endpoint{
--- a/client/pluginmanager/csimanager/instance_test.go
+++ b/client/pluginmanager/csimanager/instance_test.go
@ -2,7 +2,6 @@ package csimanager

 import (
 	"context"
-	"fmt"
 	"sync"
 	"testing"
 	"time"
@ -47,7 +46,6 @@ func TestInstanceManager_Shutdown(t *testing.T) {
 	im.shutdownCtxCancelFn = cancelFn
 	im.shutdownCh = make(chan struct{})
 	im.updater = func(_ string, info *structs.CSIInfo) {
-		fmt.Println(info)
 		lock.Lock()
 		defer lock.Unlock()
 		pluginHealth = info.Healthy
--- a/client/pluginmanager/csimanager/volume.go
+++ b/client/pluginmanager/csimanager/volume.go
@ -166,7 +166,7 @@ func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume,
 	// CSI NodeStageVolume errors for timeout, codes.Unavailable and
 	// codes.ResourceExhausted are retried; all other errors are fatal.
 	return v.plugin.NodeStageVolume(ctx,
-		vol.ID,
+		vol.RemoteID(),
 		publishContext,
 		pluginStagingPath,
 		capability,
--- a/client/structs/generate.sh
+++ b/client/structs/generate.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-
-set -e
-
-codecgen -d 102 -t codegen_generated -o structs.generated.go structs.go
-sed -i'' -e 's|"github.com/ugorji/go/codec|"github.com/hashicorp/go-msgpack/codec|g' structs.generated.go
--- a/client/structs/structs.go
+++ b/client/structs/structs.go
@ -1,6 +1,6 @@
 package structs

-//go:generate ./generate.sh
+//go:generate codecgen -c github.com/hashicorp/go-msgpack/codec -d 102 -t codegen_generated -o structs.generated.go structs.go

 import (
 	"errors"
--- a/command/agent/command.go
+++ b/command/agent/command.go
@ -640,10 +640,12 @@ func (c *Command) Run(args []string) int {
 		logGate.Flush()
 		return 1
 	}
-	defer c.agent.Shutdown()

-	// Shutdown the HTTP server at the end
 	defer func() {
+		c.agent.Shutdown()
+
+		// Shutdown the http server at the end, to ease debugging if
+		// the agent takes long to shutdown
 		if c.httpServer != nil {
 			c.httpServer.Shutdown()
 		}
--- a/command/agent/http.go
+++ b/command/agent/http.go
@ -146,6 +146,7 @@ func NewHTTPServer(agent *Agent, config *Config) (*HTTPServer, error) {
 		Addr:      srv.Addr,
 		Handler:   gzip(mux),
 		ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns),
+		ErrorLog:  newHTTPServerLogger(srv.logger),
 	}

 	go func() {
@ -466,7 +467,11 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque

 			resp.WriteHeader(code)
 			resp.Write([]byte(errMsg))
-			s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
+			if isAPIClientError(code) {
+				s.logger.Debug("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
+			} else {
+				s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
+			}
 			return
 		}

@ -520,7 +525,11 @@ func (s *HTTPServer) wrapNonJSON(handler func(resp http.ResponseWriter, req *htt
 			code, errMsg := errCodeFromHandler(err)
 			resp.WriteHeader(code)
 			resp.Write([]byte(errMsg))
-			s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
+			if isAPIClientError(code) {
+				s.logger.Debug("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
+			} else {
+				s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
+			}
 			return
 		}

@ -532,6 +541,11 @@ func (s *HTTPServer) wrapNonJSON(handler func(resp http.ResponseWriter, req *htt
 	return f
 }

+// isAPIClientError returns true if the passed http code represents a client error
+func isAPIClientError(code int) bool {
+	return 400 <= code && code <= 499
+}
+
 // decodeBody is used to decode a JSON request body
 func decodeBody(req *http.Request, out interface{}) error {
 	dec := json.NewDecoder(req.Body)
--- a/command/agent/http_stdlog.go
+++ b/command/agent/http_stdlog.go
@ -0,0 +1,33 @@
+package agent
+
+import (
+	"bytes"
+	"log"
+
+	hclog "github.com/hashicorp/go-hclog"
+)
+
+func newHTTPServerLogger(logger hclog.Logger) *log.Logger {
+	return log.New(&httpServerLoggerAdapter{logger}, "", 0)
+}
+
+// a logger adapter that forwards http server logs as a Trace level
+// hclog log entries. Logs related to panics are forwarded with Error level.
+//
+// HTTP server logs are typically spurious as they represent HTTP
+// client errors (e.g. TLS handshake failures).
+type httpServerLoggerAdapter struct {
+	logger hclog.Logger
+}
+
+func (l *httpServerLoggerAdapter) Write(data []byte) (int, error) {
+	if bytes.Contains(data, []byte("panic")) {
+		str := string(bytes.TrimRight(data, " \t\n"))
+		l.logger.Error(str)
+	} else if l.logger.IsTrace() {
+		str := string(bytes.TrimRight(data, " \t\n"))
+		l.logger.Trace(str)
+	}
+
+	return len(data), nil
+}
--- a/command/agent/http_stdlog_test.go
+++ b/command/agent/http_stdlog_test.go
@ -0,0 +1,48 @@
+package agent
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/hashicorp/go-hclog"
+	"github.com/stretchr/testify/require"
+)
+
+func TestHttpServerLoggerFilters_Level_Info(t *testing.T) {
+	var buf bytes.Buffer
+	hclogger := hclog.New(&hclog.LoggerOptions{
+		Name:   "testlog",
+		Output: &buf,
+		Level:  hclog.Info,
+	})
+
+	stdlogger := newHTTPServerLogger(hclogger)
+
+	// spurious logging would be filtered out
+	stdlogger.Printf("spurious logging: %v", "arg")
+	require.Empty(t, buf.String())
+
+	// panics are included
+	stdlogger.Printf("panic while processing: %v", "endpoint")
+	require.Contains(t, buf.String(), "[ERROR] testlog: panic while processing: endpoint")
+
+}
+
+func TestHttpServerLoggerFilters_Level_Trace(t *testing.T) {
+	var buf bytes.Buffer
+	hclogger := hclog.New(&hclog.LoggerOptions{
+		Name:   "testlog",
+		Output: &buf,
+		Level:  hclog.Trace,
+	})
+
+	stdlogger := newHTTPServerLogger(hclogger)
+
+	// spurious logging will be included as Trace level
+	stdlogger.Printf("spurious logging: %v", "arg")
+	require.Contains(t, buf.String(), "[TRACE] testlog: spurious logging: arg")
+
+	stdlogger.Printf("panic while processing: %v", "endpoint")
+	require.Contains(t, buf.String(), "[ERROR] testlog: panic while processing: endpoint")
+
+}
--- a/command/agent/http_test.go
+++ b/command/agent/http_test.go
@ -1082,6 +1082,18 @@ func TestHTTPServer_Limits_OK(t *testing.T) {
 	}
 }

+func Test_IsAPIClientError(t *testing.T) {
+	trueCases := []int{400, 403, 404, 499}
+	for _, c := range trueCases {
+		require.Truef(t, isAPIClientError(c), "code: %v", c)
+	}
+
+	falseCases := []int{100, 300, 500, 501, 505}
+	for _, c := range falseCases {
+		require.Falsef(t, isAPIClientError(c), "code: %v", c)
+	}
+}
+
 func httpTest(t testing.TB, cb func(c *Config), f func(srv *TestAgent)) {
 	s := makeHTTPServer(t, cb)
 	defer s.Shutdown()
--- a/command/deployment_status.go
+++ b/command/deployment_status.go
@ -85,14 +85,12 @@ func (c *DeploymentStatusCommand) Run(args []string) int {

 	// Check that we got exactly one argument
 	args = flags.Args()
-	if l := len(args); l != 1 {
+	if l := len(args); l > 1 {
 		c.Ui.Error("This command takes one argument: <deployment id>")
 		c.Ui.Error(commandErrorText(c))
 		return 1
 	}

-	dID := args[0]
-
 	// Truncate the id unless full length is requested
 	length := shortId
 	if verbose {
@ -106,7 +104,20 @@ func (c *DeploymentStatusCommand) Run(args []string) int {
 		return 1
 	}

+	// List if no arguments are provided
+	if len(args) == 0 {
+		deploys, _, err := client.Deployments().List(nil)
+		if err != nil {
+			c.Ui.Error(fmt.Sprintf("Error retrieving deployments: %s", err))
+			return 1
+		}
+
+		c.Ui.Output(formatDeployments(deploys, length))
+		return 0
+	}
+
 	// Do a prefix lookup
+	dID := args[0]
 	deploy, possible, err := getDeployment(client.Deployments(), dID)
 	if err != nil {
 		c.Ui.Error(fmt.Sprintf("Error retrieving deployment: %s", err))
--- a/command/deployment_status_test.go
+++ b/command/deployment_status_test.go
@ -1,13 +1,13 @@
 package command

 import (
-	"strings"
 	"testing"

 	"github.com/hashicorp/nomad/nomad/mock"
 	"github.com/mitchellh/cli"
 	"github.com/posener/complete"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 func TestDeploymentStatusCommand_Implements(t *testing.T) {
@ -21,20 +21,23 @@ func TestDeploymentStatusCommand_Fails(t *testing.T) {
 	cmd := &DeploymentStatusCommand{Meta: Meta{Ui: ui}}

 	// Fails on misuse
-	if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 {
-		t.Fatalf("expected exit code 1, got: %d", code)
-	}
-	if out := ui.ErrorWriter.String(); !strings.Contains(out, commandErrorText(cmd)) {
-		t.Fatalf("expected help output, got: %s", out)
-	}
+	code := cmd.Run([]string{"some", "bad", "args"})
+	require.Equal(t, 1, code)
+	out := ui.ErrorWriter.String()
+	require.Contains(t, out, commandErrorText(cmd))
 	ui.ErrorWriter.Reset()

-	if code := cmd.Run([]string{"-address=nope", "12"}); code != 1 {
-		t.Fatalf("expected exit code 1, got: %d", code)
-	}
-	if out := ui.ErrorWriter.String(); !strings.Contains(out, "Error retrieving deployment") {
-		t.Fatalf("expected failed query error, got: %s", out)
-	}
+	code = cmd.Run([]string{"-address=nope", "12"})
+	require.Equal(t, 1, code)
+	out = ui.ErrorWriter.String()
+	require.Contains(t, out, "Error retrieving deployment")
+	ui.ErrorWriter.Reset()
+
+	code = cmd.Run([]string{"-address=nope"})
+	require.Equal(t, 1, code)
+	out = ui.ErrorWriter.String()
+	// "deployments" indicates that we attempted to list all deployments
+	require.Contains(t, out, "Error retrieving deployments")
 	ui.ErrorWriter.Reset()
 }

--- a/command/job_plan.go
+++ b/command/job_plan.go
@ -17,7 +17,7 @@ const (
 nomad job run -check-index %d %s

 When running the job with the check-index flag, the job will only be run if the
-server side version matches the job modify index returned. If the index has
+job modify index given matches the server-side version. If the index has
 changed, another user has modified the job and the plan's results are
 potentially invalid.`

--- a/contributing/checklist-jobspec.md
+++ b/contributing/checklist-jobspec.md
@ -15,6 +15,7 @@
 * [ ] Add structs/fields to `nomad/structs` package
  * Validation happens in this package and must be implemented
  * Implement other methods and tests from `api/` package
+  * Note that analogous struct field names should match with `api/` package
 * [ ] Add conversion between `api/` and `nomad/structs` in `command/agent/job_endpoint.go`
 * [ ] Add check for job diff in `nomad/structs/diff.go`
  * Note that fields must be listed in alphabetical order in `FieldDiff` slices in `nomad/structs/diff_test.go`
--- a/demo/grpc-checks/.gitignore
+++ b/demo/grpc-checks/.gitignore
@ -0,0 +1 @@
+grpc-checks
--- a/demo/grpc-checks/Dockerfile
+++ b/demo/grpc-checks/Dockerfile
@ -0,0 +1,18 @@
+FROM golang:alpine as builder
+WORKDIR /build
+ADD . /build
+RUN apk add protoc && \
+    go get -u github.com/golang/protobuf/protoc-gen-go
+RUN go version && \
+    go env && \
+    go generate && \
+    CGO_ENABLED=0 GOOS=linux go build
+
+FROM alpine:latest
+MAINTAINER nomadproject.io
+
+WORKDIR /opt
+COPY --from=builder /build/grpc-checks /opt
+
+ENTRYPOINT ["/opt/grpc-checks"]
+
--- a/demo/grpc-checks/README.md
+++ b/demo/grpc-checks/README.md
@ -0,0 +1,38 @@
+# grpc-checks
+
+An example service that exposes a gRPC healthcheck endpoint
+
+### generate protobuf
+
+Note that main.go also includes this as a go:generate directive
+so that running this by hand is not necessary
+
+```bash
+$ protoc -I ./health ./health/health.proto --go_out=plugins=grpc:health
+```
+
+### build & run example
+
+Generate, compile, and run the example server.
+
+```bash
+go generate
+go build
+go run main.go
+```
+
+### publish
+
+#### Testing locally
+```bash
+$ docker build -t hashicorpnomad/grpc-checks:test .
+$ docker run --rm hashicorpnomad/grpc-checks:test
+```
+
+#### Upload to Docker Hub
+```bash
+# replace <version> with the next version number
+docker login
+$ docker build -t hashicorpnomad/grpc-checks:<version> .
+$ docker push hashicorpnomad/grpc-checks:<version>
+```
--- a/demo/grpc-checks/example/example.go
+++ b/demo/grpc-checks/example/example.go
@ -0,0 +1,31 @@
+package example
+
+import (
+	"context"
+	"log"
+
+	ghc "google.golang.org/grpc/health/grpc_health_v1"
+)
+
+// Server is a trivial gRPC server that implements the standard grpc.health.v1
+// interface.
+type Server struct {
+}
+
+func New() *Server {
+	return new(Server)
+}
+
+func (s *Server) Check(ctx context.Context, hcr *ghc.HealthCheckRequest) (*ghc.HealthCheckResponse, error) {
+	log.Printf("Check:%s (%s)", hcr.Service, hcr.String())
+	return &ghc.HealthCheckResponse{
+		Status: ghc.HealthCheckResponse_SERVING,
+	}, nil
+}
+
+func (s *Server) Watch(hcr *ghc.HealthCheckRequest, hws ghc.Health_WatchServer) error {
+	log.Printf("Watch:%s (%s)", hcr.Service, hcr.String())
+	return hws.Send(&ghc.HealthCheckResponse{
+		Status: ghc.HealthCheckResponse_SERVING,
+	})
+}
--- a/demo/grpc-checks/go.mod
+++ b/demo/grpc-checks/go.mod
@ -0,0 +1,8 @@
+module github.com/hashicorp/nomad/demo/grpc-checks
+
+go 1.14
+
+require (
+	github.com/golang/protobuf v1.3.5
+	google.golang.org/grpc v1.28.1
+)
--- a/demo/grpc-checks/go.sum
+++ b/demo/grpc-checks/go.sum
@ -0,0 +1,53 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
+github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/hashicorp/nomad v0.11.1 h1:ow411q+bAduxC0X0V3NLx9slQzwG9wiB66yVzpQ0aEg=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 h1:gSJIx1SDwno+2ElGhA4+qG2zF97qiUzTM+rQ0klBOcE=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.28.1 h1:C1QC6KzgSiLyBabDi87BbjaGreoRgGUF5nOyvfrAZ1k=
+google.golang.org/grpc v1.28.1/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
--- a/demo/grpc-checks/main.go
+++ b/demo/grpc-checks/main.go
@ -0,0 +1,40 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"net"
+	"os"
+
+	"github.com/hashicorp/nomad/demo/grpc-checks/example"
+	"google.golang.org/grpc"
+	ghc "google.golang.org/grpc/health/grpc_health_v1"
+)
+
+func main() {
+
+	port := os.Getenv("GRPC_HC_PORT")
+	if port == "" {
+		port = "3333"
+	}
+	address := fmt.Sprintf(":%s", port)
+
+	log.Printf("creating tcp listener on %s", address)
+	listener, err := net.Listen("tcp", address)
+	if err != nil {
+		log.Printf("unable to create listener: %v", err)
+		os.Exit(1)
+	}
+
+	log.Printf("creating grpc server")
+	grpcServer := grpc.NewServer()
+
+	log.Printf("registering health server")
+	ghc.RegisterHealthServer(grpcServer, example.New())
+
+	log.Printf("listening ...")
+	if err := grpcServer.Serve(listener); err != nil {
+		log.Printf("unable to listen: %v", err)
+		os.Exit(1)
+	}
+}
--- a/demo/vagrant/Vagrantfile
+++ b/demo/vagrant/Vagrantfile
@ -25,7 +25,7 @@ sudo docker --version
 sudo apt-get install unzip curl vim -y

 echo "Installing Nomad..."
-NOMAD_VERSION=0.10.4
+NOMAD_VERSION=0.11.0
 cd /tmp/
 curl -sSL https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -o nomad.zip
 unzip nomad.zip
--- a/demo/vagrant/client1.hcl
+++ b/demo/vagrant/client1.hcl
@ -21,3 +21,16 @@ client {
 ports {
  http = 5656
 }
+
+# Because we will potentially have two clients talking to the same
+# Docker daemon, we have to disable the dangling container cleanup,
+# otherwise they will stop each other's work thinking it was orphaned.
+plugin "docker" {
+  config {
+    gc {
+      dangling_containers {
+        enabled = false
+      }
+    }
+  }
+}
--- a/demo/vagrant/client2.hcl
+++ b/demo/vagrant/client2.hcl
@ -21,3 +21,16 @@ client {
 ports {
  http = 5657
 }
+
+# Because we will potentially have two clients talking to the same
+# Docker daemon, we have to disable the dangling container cleanup,
+# otherwise they will stop each other's work thinking it was orphaned.
+plugin "docker" {
+  config {
+    gc {
+      dangling_containers {
+        enabled = false
+      }
+    }
+  }
+}
--- a/drivers/docker/driver.go
+++ b/drivers/docker/driver.go
@ -439,16 +439,21 @@ CREATE:
 			return container, nil
 		}

-		// Delete matching containers
-		err = client.RemoveContainer(docker.RemoveContainerOptions{
-			ID:    container.ID,
-			Force: true,
-		})
-		if err != nil {
-			d.logger.Error("failed to purge container", "container_id", container.ID)
-			return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
-		} else {
-			d.logger.Info("purged container", "container_id", container.ID)
+		// Purge conflicting container if found.
+		// If container is nil here, the conflicting container was
+		// deleted in our check here, so retry again.
+		if container != nil {
+			// Delete matching containers
+			err = client.RemoveContainer(docker.RemoveContainerOptions{
+				ID:    container.ID,
+				Force: true,
+			})
+			if err != nil {
+				d.logger.Error("failed to purge container", "container_id", container.ID)
+				return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
+			} else {
+				d.logger.Info("purged container", "container_id", container.ID)
+			}
 		}

 		if attempted < 5 {
--- a/e2e/terraform/Makefile
+++ b/e2e/terraform/Makefile
@ -1,4 +1,5 @@
 NOMAD_SHA ?= $(shell git rev-parse HEAD)
+PKG_PATH = $(shell pwd)/../../pkg/linux_amd64/nomad

 dev-cluster:
 	terraform apply -auto-approve -var-file=terraform.tfvars.dev
@ -6,5 +7,11 @@ dev-cluster:
 	cd .. && NOMAD_E2E=1 go test -v . -nomad.sha=$(NOMAD_SHA) -provision.terraform ./provisioning.json -skipTests
 	terraform output message

+dev-cluster-from-local:
+	terraform apply -auto-approve -var-file=terraform.tfvars.dev
+	terraform output provisioning | jq . > ../provisioning.json
+	cd .. && NOMAD_E2E=1 go test -v . -nomad.local_file=$(PKG_PATH) -provision.terraform ./provisioning.json -skipTests
+	terraform output message
+
 clean:
 	terraform destroy -auto-approve
--- a/jobspec/parse_group.go
+++ b/jobspec/parse_group.go
@ -366,15 +366,16 @@ func parseScalingPolicy(out **api.ScalingPolicy, list *ast.ObjectList) error {

 	// If we have policy, then parse that
 	if o := listVal.Filter("policy"); len(o.Items) > 0 {
-		for _, o := range o.Elem().Items {
-			var m map[string]interface{}
-			if err := hcl.DecodeObject(&m, o.Val); err != nil {
-				return err
-			}
-
-			if err := mapstructure.WeakDecode(m, &result.Policy); err != nil {
-				return err
-			}
+		if len(o.Elem().Items) > 1 {
+			return fmt.Errorf("only one 'policy' block allowed per 'scaling' block")
+		}
+		p := o.Elem().Items[0]
+		var m map[string]interface{}
+		if err := hcl.DecodeObject(&m, p.Val); err != nil {
+			return err
+		}
+		if err := mapstructure.WeakDecode(m, &result.Policy); err != nil {
+			return err
 		}
 	}

--- a/jobspec/parse_service.go
+++ b/jobspec/parse_service.go
@ -281,7 +281,7 @@ func parseSidecarTask(item *ast.ObjectItem) (*api.SidecarTask, error) {
 		KillSignal:  task.KillSignal,
 	}

-	// Parse ShutdownDelay separately to get pointer
+	// Parse ShutdownDelay separatly to get pointer
 	var m map[string]interface{}
 	if err := hcl.DecodeObject(&m, item.Val); err != nil {
 		return nil, err
@ -320,6 +320,24 @@ func parseProxy(o *ast.ObjectItem) (*api.ConsulProxy, error) {
 	}

 	var proxy api.ConsulProxy
+	var m map[string]interface{}
+	if err := hcl.DecodeObject(&m, o.Val); err != nil {
+		return nil, err
+	}
+
+	delete(m, "upstreams")
+	delete(m, "expose")
+	delete(m, "config")
+
+	dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
+		Result: &proxy,
+	})
+	if err != nil {
+		return nil, err
+	}
+	if err := dec.Decode(m); err != nil {
+		return nil, fmt.Errorf("proxy: %v", err)
+	}

 	var listVal *ast.ObjectList
 	if ot, ok := o.Val.(*ast.ObjectType); ok {
--- a/jobspec/parse_test.go
+++ b/jobspec/parse_test.go
@ -894,28 +894,6 @@ func TestParse(t *testing.T) {
 			},
 			false,
 		},
-		{
-			"service-connect-sidecar_task-name.hcl",
-			&api.Job{
-				ID:   helper.StringToPtr("sidecar_task_name"),
-				Name: helper.StringToPtr("sidecar_task_name"),
-				Type: helper.StringToPtr("service"),
-				TaskGroups: []*api.TaskGroup{{
-					Name: helper.StringToPtr("group"),
-					Services: []*api.Service{{
-						Name: "example",
-						Connect: &api.ConsulConnect{
-							Native:         false,
-							SidecarService: &api.ConsulSidecarService{},
-							SidecarTask: &api.SidecarTask{
-								Name: "my-sidecar",
-							},
-						},
-					}},
-				}},
-			},
-			false,
-		},
 		{
 			"reschedule-job.hcl",
 			&api.Job{
@ -1051,6 +1029,7 @@ func TestParse(t *testing.T) {
 									SidecarService: &api.ConsulSidecarService{
 										Tags: []string{"side1", "side2"},
 										Proxy: &api.ConsulProxy{
+											LocalServicePort: 8080,
 											Upstreams: []*api.ConsulUpstream{
 												{
 													DestinationName: "other-service",
@ -1172,6 +1151,99 @@ func TestParse(t *testing.T) {
 			},
 			false,
 		},
+		{
+			"tg-service-connect-sidecar_task-name.hcl",
+			&api.Job{
+				ID:   helper.StringToPtr("sidecar_task_name"),
+				Name: helper.StringToPtr("sidecar_task_name"),
+				Type: helper.StringToPtr("service"),
+				TaskGroups: []*api.TaskGroup{{
+					Name: helper.StringToPtr("group"),
+					Services: []*api.Service{{
+						Name: "example",
+						Connect: &api.ConsulConnect{
+							Native:         false,
+							SidecarService: &api.ConsulSidecarService{},
+							SidecarTask: &api.SidecarTask{
+								Name: "my-sidecar",
+							},
+						},
+					}},
+				}},
+			},
+			false,
+		},
+		{
+			"tg-service-connect-proxy.hcl",
+			&api.Job{
+				ID:   helper.StringToPtr("service-connect-proxy"),
+				Name: helper.StringToPtr("service-connect-proxy"),
+				Type: helper.StringToPtr("service"),
+				TaskGroups: []*api.TaskGroup{{
+					Name: helper.StringToPtr("group"),
+					Services: []*api.Service{{
+						Name: "example",
+						Connect: &api.ConsulConnect{
+							Native: false,
+							SidecarService: &api.ConsulSidecarService{
+								Proxy: &api.ConsulProxy{
+									LocalServiceAddress: "10.0.1.2",
+									LocalServicePort:    8080,
+									ExposeConfig: &api.ConsulExposeConfig{
+										Path: []*api.ConsulExposePath{{
+											Path:          "/metrics",
+											Protocol:      "http",
+											LocalPathPort: 9001,
+											ListenerPort:  "metrics",
+										}, {
+											Path:          "/health",
+											Protocol:      "http",
+											LocalPathPort: 9002,
+											ListenerPort:  "health",
+										}},
+									},
+									Upstreams: []*api.ConsulUpstream{{
+										DestinationName: "upstream1",
+										LocalBindPort:   2001,
+									}, {
+										DestinationName: "upstream2",
+										LocalBindPort:   2002,
+									}},
+									Config: map[string]interface{}{
+										"foo": "bar",
+									},
+								},
+							},
+						},
+					}},
+				}},
+			},
+			false,
+		},
+		{
+			"tg-service-connect-local-service.hcl",
+			&api.Job{
+				ID:   helper.StringToPtr("connect-proxy-local-service"),
+				Name: helper.StringToPtr("connect-proxy-local-service"),
+				Type: helper.StringToPtr("service"),
+				TaskGroups: []*api.TaskGroup{{
+					Name: helper.StringToPtr("group"),
+					Services: []*api.Service{{
+						Name: "example",
+						Connect: &api.ConsulConnect{
+							Native: false,
+							SidecarService: &api.ConsulSidecarService{
+								Proxy: &api.ConsulProxy{
+									LocalServiceAddress: "10.0.1.2",
+									LocalServicePort:    9876,
+								},
+							},
+						},
+					}},
+				}},
+			},
+			false,
+		},
 		{
 			"tg-service-check-expose.hcl",
 			&api.Job{
@ -1238,6 +1310,32 @@ func TestParse(t *testing.T) {
 			},
 			false,
 		},
+
+		{
+			"tg-scaling-policy-minimal.hcl",
+			&api.Job{
+				ID:   helper.StringToPtr("elastic"),
+				Name: helper.StringToPtr("elastic"),
+				TaskGroups: []*api.TaskGroup{
+					{
+						Name: helper.StringToPtr("group"),
+						Scaling: &api.ScalingPolicy{
+							Min:     nil,
+							Max:     0,
+							Policy:  nil,
+							Enabled: nil,
+						},
+					},
+				},
+			},
+			false,
+		},
+
+		{
+			"tg-scaling-policy-multi-policy.hcl",
+			nil,
+			true,
+		},
 	}

 	for _, tc := range cases {
--- a/jobspec/test-fixtures/tg-scaling-policy-minimal.hcl
+++ b/jobspec/test-fixtures/tg-scaling-policy-minimal.hcl
@ -0,0 +1,5 @@
+job "elastic" {
+  group "group" {
+    scaling {}
+  }
+}
--- a/jobspec/test-fixtures/tg-scaling-policy-multi-policy.hcl
+++ b/jobspec/test-fixtures/tg-scaling-policy-multi-policy.hcl
@ -0,0 +1,19 @@
+job "elastic" {
+  group "group" {
+    scaling {
+      enabled = false
+      min     = 5
+      max     = 100
+
+      policy {
+        foo = "right"
+        b   = true
+      }
+
+      policy {
+        foo = "wrong"
+        c   = false
+      }
+    }
+  }
+}
--- a/jobspec/test-fixtures/tg-service-connect-local-service.hcl
+++ b/jobspec/test-fixtures/tg-service-connect-local-service.hcl
@ -0,0 +1,18 @@
+job "connect-proxy-local-service" {
+  type = "service"
+
+  group "group" {
+    service {
+      name = "example"
+
+      connect {
+        sidecar_service {
+          proxy {
+            local_service_port    = 9876
+            local_service_address = "10.0.1.2"
+          }
+        }
+      }
+    }
+  }
+}
--- a/jobspec/test-fixtures/tg-service-connect-proxy.hcl
+++ b/jobspec/test-fixtures/tg-service-connect-proxy.hcl
@ -0,0 +1,48 @@
+job "service-connect-proxy" {
+  type = "service"
+
+  group "group" {
+    service {
+      name = "example"
+
+      connect {
+        sidecar_service {
+          proxy {
+            local_service_port    = 8080
+            local_service_address = "10.0.1.2"
+
+            upstreams {
+              destination_name = "upstream1"
+              local_bind_port  = 2001
+            }
+
+            upstreams {
+              destination_name = "upstream2"
+              local_bind_port  = 2002
+            }
+
+            expose {
+              path {
+                path            = "/metrics"
+                protocol        = "http"
+                local_path_port = 9001
+                listener_port   = "metrics"
+              }
+
+              path {
+                path            = "/health"
+                protocol        = "http"
+                local_path_port = 9002
+                listener_port   = "health"
+              }
+            }
+
+            config {
+              foo = "bar"
+            }
+          }
+        }
+      }
+    }
+  }
+}
--- a/jobspec/test-fixtures/tg-service-connect-sidecar_task-name.hcl
+++ b/jobspec/test-fixtures/tg-service-connect-sidecar_task-name.hcl
@ -4,12 +4,14 @@ job "sidecar_task_name" {
  group "group" {
    service {
      name = "example"
+
      connect {
-        sidecar_service {}
+        sidecar_service = {}
+
        sidecar_task {
          name = "my-sidecar"
        }
      }
    }
  }
-}
+}
--- a/nomad/core_sched.go
+++ b/nomad/core_sched.go
@ -8,9 +8,7 @@ import (

 	log "github.com/hashicorp/go-hclog"
 	memdb "github.com/hashicorp/go-memdb"
-	multierror "github.com/hashicorp/go-multierror"
 	version "github.com/hashicorp/go-version"
-	cstructs "github.com/hashicorp/nomad/client/structs"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/scheduler"
@ -711,188 +709,30 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
 	return timeDiff > interval.Nanoseconds()
 }

+// TODO: we need a periodic trigger to iterate over all the volumes and split
+// them up into separate work items, same as we do for jobs.
+
 // csiVolumeClaimGC is used to garbage collect CSI volume claims
 func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error {
-	c.logger.Trace("garbage collecting unclaimed CSI volume claims")
+	c.logger.Trace("garbage collecting unclaimed CSI volume claims", "eval.JobID", eval.JobID)

 	// Volume ID smuggled in with the eval's own JobID
 	evalVolID := strings.Split(eval.JobID, ":")
-	if len(evalVolID) != 3 {
+
+	// COMPAT(1.0): 0.11.0 shipped with 3 fields. tighten this check to len == 2
+	if len(evalVolID) < 2 {
 		c.logger.Error("volume gc called without volID")
 		return nil
 	}

 	volID := evalVolID[1]
-	runningAllocs := evalVolID[2] == "purge"
-	return volumeClaimReap(c.srv, volID, eval.Namespace,
-		c.srv.config.Region, eval.LeaderACL, runningAllocs)
-}
-
-func volumeClaimReap(srv RPCServer, volID, namespace, region, leaderACL string, runningAllocs bool) error {
-
-	ws := memdb.NewWatchSet()
-
-	vol, err := srv.State().CSIVolumeByID(ws, namespace, volID)
-	if err != nil {
-		return err
-	}
-	if vol == nil {
-		return nil
-	}
-	vol, err = srv.State().CSIVolumeDenormalize(ws, vol)
-	if err != nil {
-		return err
-	}
-
-	plug, err := srv.State().CSIPluginByID(ws, vol.PluginID)
-	if err != nil {
-		return err
-	}
-
-	gcClaims, nodeClaims := collectClaimsToGCImpl(vol, runningAllocs)
-
-	var result *multierror.Error
-	for _, claim := range gcClaims {
-		nodeClaims, err = volumeClaimReapImpl(srv,
-			&volumeClaimReapArgs{
-				vol:        vol,
-				plug:       plug,
-				allocID:    claim.allocID,
-				nodeID:     claim.nodeID,
-				mode:       claim.mode,
-				namespace:  namespace,
-				region:     region,
-				leaderACL:  leaderACL,
-				nodeClaims: nodeClaims,
-			},
-		)
-		if err != nil {
-			result = multierror.Append(result, err)
-			continue
-		}
-	}
-	return result.ErrorOrNil()
-
-}
-
-type gcClaimRequest struct {
-	allocID string
-	nodeID  string
-	mode    structs.CSIVolumeClaimMode
-}
-
-func collectClaimsToGCImpl(vol *structs.CSIVolume, runningAllocs bool) ([]gcClaimRequest, map[string]int) {
-	gcAllocs := []gcClaimRequest{}
-	nodeClaims := map[string]int{} // node IDs -> count
-
-	collectFunc := func(allocs map[string]*structs.Allocation,
-		mode structs.CSIVolumeClaimMode) {
-		for _, alloc := range allocs {
-			// we call denormalize on the volume above to populate
-			// Allocation pointers. But the alloc might have been
-			// garbage collected concurrently, so if the alloc is
-			// still nil we can safely skip it.
-			if alloc == nil {
-				continue
-			}
-			nodeClaims[alloc.NodeID]++
-			if runningAllocs || alloc.Terminated() {
-				gcAllocs = append(gcAllocs, gcClaimRequest{
-					allocID: alloc.ID,
-					nodeID:  alloc.NodeID,
-					mode:    mode,
-				})
-			}
-		}
-	}
-
-	collectFunc(vol.WriteAllocs, structs.CSIVolumeClaimWrite)
-	collectFunc(vol.ReadAllocs, structs.CSIVolumeClaimRead)
-	return gcAllocs, nodeClaims
-}
-
-type volumeClaimReapArgs struct {
-	vol        *structs.CSIVolume
-	plug       *structs.CSIPlugin
-	allocID    string
-	nodeID     string
-	mode       structs.CSIVolumeClaimMode
-	region     string
-	namespace  string
-	leaderACL  string
-	nodeClaims map[string]int // node IDs -> count
-}
-
-func volumeClaimReapImpl(srv RPCServer, args *volumeClaimReapArgs) (map[string]int, error) {
-	vol := args.vol
-	nodeID := args.nodeID
-
-	// (1) NodePublish / NodeUnstage must be completed before controller
-	// operations or releasing the claim.
-	nReq := &cstructs.ClientCSINodeDetachVolumeRequest{
-		PluginID:       args.plug.ID,
-		VolumeID:       vol.ID,
-		ExternalID:     vol.RemoteID(),
-		AllocID:        args.allocID,
-		NodeID:         nodeID,
-		AttachmentMode: vol.AttachmentMode,
-		AccessMode:     vol.AccessMode,
-		ReadOnly:       args.mode == structs.CSIVolumeClaimRead,
-	}
-	err := srv.RPC("ClientCSI.NodeDetachVolume", nReq,
-		&cstructs.ClientCSINodeDetachVolumeResponse{})
-	if err != nil {
-		return args.nodeClaims, err
-	}
-	args.nodeClaims[nodeID]--
-
-	// (2) we only emit the controller unpublish if no other allocs
-	// on the node need it, but we also only want to make this
-	// call at most once per node
-	if vol.ControllerRequired && args.nodeClaims[nodeID] < 1 {
-
-		// we need to get the CSI Node ID, which is not the same as
-		// the Nomad Node ID
-		ws := memdb.NewWatchSet()
-		targetNode, err := srv.State().NodeByID(ws, nodeID)
-		if err != nil {
-			return args.nodeClaims, err
-		}
-		if targetNode == nil {
-			return args.nodeClaims, fmt.Errorf("%s: %s",
-				structs.ErrUnknownNodePrefix, nodeID)
-		}
-		targetCSIInfo, ok := targetNode.CSINodePlugins[args.plug.ID]
-		if !ok {
-			return args.nodeClaims, fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
-		}
-
-		cReq := &cstructs.ClientCSIControllerDetachVolumeRequest{
-			VolumeID:        vol.RemoteID(),
-			ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
-		}
-		cReq.PluginID = args.plug.ID
-		err = srv.RPC("ClientCSI.ControllerDetachVolume", cReq,
-			&cstructs.ClientCSIControllerDetachVolumeResponse{})
-		if err != nil {
-			return args.nodeClaims, err
-		}
-	}
-
-	// (3) release the claim from the state store, allowing it to be rescheduled
 	req := &structs.CSIVolumeClaimRequest{
-		VolumeID:     vol.ID,
-		AllocationID: args.allocID,
-		Claim:        structs.CSIVolumeClaimRelease,
-		WriteRequest: structs.WriteRequest{
-			Region:    args.region,
-			Namespace: args.namespace,
-			AuthToken: args.leaderACL,
-		},
+		VolumeID: volID,
+		Claim:    structs.CSIVolumeClaimRelease,
 	}
-	err = srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
-	if err != nil {
-		return args.nodeClaims, err
-	}
-	return args.nodeClaims, nil
+	req.Namespace = eval.Namespace
+	req.Region = c.srv.config.Region
+
+	err := c.srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
+	return err
 }
--- a/nomad/core_sched_test.go
+++ b/nomad/core_sched_test.go
@ -6,10 +6,8 @@ import (
 	"time"

 	memdb "github.com/hashicorp/go-memdb"
-	cstructs "github.com/hashicorp/nomad/client/structs"
 	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/mock"
-	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/testutil"
 	"github.com/stretchr/testify/assert"
@ -2195,270 +2193,3 @@ func TestAllocation_GCEligible(t *testing.T) {
 	alloc.ClientStatus = structs.AllocClientStatusComplete
 	require.True(allocGCEligible(alloc, nil, time.Now(), 1000))
 }
-
-func TestCSI_GCVolumeClaims_Collection(t *testing.T) {
-	t.Parallel()
-	srv, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
-	defer shutdownSrv()
-	testutil.WaitForLeader(t, srv.RPC)
-
-	state := srv.fsm.State()
-	ws := memdb.NewWatchSet()
-	index := uint64(100)
-
-	// Create a client node, plugin, and volume
-	node := mock.Node()
-	node.Attributes["nomad.version"] = "0.11.0" // client RPCs not supported on early version
-	node.CSINodePlugins = map[string]*structs.CSIInfo{
-		"csi-plugin-example": {
-			PluginID:                 "csi-plugin-example",
-			Healthy:                  true,
-			RequiresControllerPlugin: true,
-			NodeInfo:                 &structs.CSINodeInfo{},
-		},
-	}
-	node.CSIControllerPlugins = map[string]*structs.CSIInfo{
-		"csi-plugin-example": {
-			PluginID:                 "csi-plugin-example",
-			Healthy:                  true,
-			RequiresControllerPlugin: true,
-			ControllerInfo: &structs.CSIControllerInfo{
-				SupportsReadOnlyAttach:           true,
-				SupportsAttachDetach:             true,
-				SupportsListVolumes:              true,
-				SupportsListVolumesAttachedNodes: false,
-			},
-		},
-	}
-	err := state.UpsertNode(99, node)
-	require.NoError(t, err)
-	volId0 := uuid.Generate()
-	ns := structs.DefaultNamespace
-	vols := []*structs.CSIVolume{{
-		ID:             volId0,
-		Namespace:      ns,
-		PluginID:       "csi-plugin-example",
-		AccessMode:     structs.CSIVolumeAccessModeMultiNodeSingleWriter,
-		AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
-	}}
-
-	err = state.CSIVolumeRegister(index, vols)
-	index++
-	require.NoError(t, err)
-	vol, err := state.CSIVolumeByID(ws, ns, volId0)
-
-	require.NoError(t, err)
-	require.True(t, vol.ControllerRequired)
-	require.Len(t, vol.ReadAllocs, 0)
-	require.Len(t, vol.WriteAllocs, 0)
-
-	// Create a job with 2 allocations
-	job := mock.Job()
-	job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
-		"_": {
-			Name:     "someVolume",
-			Type:     structs.VolumeTypeCSI,
-			Source:   volId0,
-			ReadOnly: false,
-		},
-	}
-	err = state.UpsertJob(index, job)
-	index++
-	require.NoError(t, err)
-
-	alloc1 := mock.Alloc()
-	alloc1.JobID = job.ID
-	alloc1.NodeID = node.ID
-	err = state.UpsertJobSummary(index, mock.JobSummary(alloc1.JobID))
-	index++
-	require.NoError(t, err)
-	alloc1.TaskGroup = job.TaskGroups[0].Name
-
-	alloc2 := mock.Alloc()
-	alloc2.JobID = job.ID
-	alloc2.NodeID = node.ID
-	err = state.UpsertJobSummary(index, mock.JobSummary(alloc2.JobID))
-	index++
-	require.NoError(t, err)
-	alloc2.TaskGroup = job.TaskGroups[0].Name
-
-	err = state.UpsertAllocs(104, []*structs.Allocation{alloc1, alloc2})
-	require.NoError(t, err)
-
-	// Claim the volumes and verify the claims were set
-	err = state.CSIVolumeClaim(index, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
-	index++
-	require.NoError(t, err)
-	err = state.CSIVolumeClaim(index, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
-	index++
-	require.NoError(t, err)
-	vol, err = state.CSIVolumeByID(ws, ns, volId0)
-	require.NoError(t, err)
-	require.Len(t, vol.ReadAllocs, 1)
-	require.Len(t, vol.WriteAllocs, 1)
-
-	// Update both allocs as failed/terminated
-	alloc1.ClientStatus = structs.AllocClientStatusFailed
-	alloc2.ClientStatus = structs.AllocClientStatusFailed
-	err = state.UpdateAllocsFromClient(index, []*structs.Allocation{alloc1, alloc2})
-	require.NoError(t, err)
-
-	vol, err = state.CSIVolumeDenormalize(ws, vol)
-	require.NoError(t, err)
-
-	gcClaims, nodeClaims := collectClaimsToGCImpl(vol, false)
-	require.Equal(t, nodeClaims[node.ID], 2)
-	require.Len(t, gcClaims, 2)
-}
-
-func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
-	t.Parallel()
-	require := require.New(t)
-
-	s, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
-	defer shutdownSrv()
-	testutil.WaitForLeader(t, s.RPC)
-
-	node := mock.Node()
-	plugin := mock.CSIPlugin()
-	vol := mock.CSIVolume(plugin)
-	alloc := mock.Alloc()
-
-	cases := []struct {
-		Name                                string
-		Claim                               gcClaimRequest
-		ClaimsCount                         map[string]int
-		ControllerRequired                  bool
-		ExpectedErr                         string
-		ExpectedCount                       int
-		ExpectedClaimsCount                 int
-		ExpectedNodeDetachVolumeCount       int
-		ExpectedControllerDetachVolumeCount int
-		ExpectedVolumeClaimCount            int
-		srv                                 *MockRPCServer
-	}{
-		{
-			Name: "NodeDetachVolume fails",
-			Claim: gcClaimRequest{
-				allocID: alloc.ID,
-				nodeID:  node.ID,
-				mode:    structs.CSIVolumeClaimRead,
-			},
-			ClaimsCount:                   map[string]int{node.ID: 1},
-			ControllerRequired:            true,
-			ExpectedErr:                   "node plugin missing",
-			ExpectedClaimsCount:           1,
-			ExpectedNodeDetachVolumeCount: 1,
-			srv: &MockRPCServer{
-				state:                        s.State(),
-				nextCSINodeDetachVolumeError: fmt.Errorf("node plugin missing"),
-			},
-		},
-		{
-			Name: "ControllerDetachVolume no controllers",
-			Claim: gcClaimRequest{
-				allocID: alloc.ID,
-				nodeID:  node.ID,
-				mode:    structs.CSIVolumeClaimRead,
-			},
-			ClaimsCount:        map[string]int{node.ID: 1},
-			ControllerRequired: true,
-			ExpectedErr: fmt.Sprintf(
-				"Unknown node: %s", node.ID),
-			ExpectedClaimsCount:                 0,
-			ExpectedNodeDetachVolumeCount:       1,
-			ExpectedControllerDetachVolumeCount: 0,
-			srv: &MockRPCServer{
-				state: s.State(),
-			},
-		},
-		{
-			Name: "ControllerDetachVolume node-only",
-			Claim: gcClaimRequest{
-				allocID: alloc.ID,
-				nodeID:  node.ID,
-				mode:    structs.CSIVolumeClaimRead,
-			},
-			ClaimsCount:                         map[string]int{node.ID: 1},
-			ControllerRequired:                  false,
-			ExpectedClaimsCount:                 0,
-			ExpectedNodeDetachVolumeCount:       1,
-			ExpectedControllerDetachVolumeCount: 0,
-			ExpectedVolumeClaimCount:            1,
-			srv: &MockRPCServer{
-				state: s.State(),
-			},
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.Name, func(t *testing.T) {
-			vol.ControllerRequired = tc.ControllerRequired
-			nodeClaims, err := volumeClaimReapImpl(tc.srv, &volumeClaimReapArgs{
-				vol:        vol,
-				plug:       plugin,
-				allocID:    tc.Claim.allocID,
-				nodeID:     tc.Claim.nodeID,
-				mode:       tc.Claim.mode,
-				region:     "global",
-				namespace:  "default",
-				leaderACL:  "not-in-use",
-				nodeClaims: tc.ClaimsCount,
-			})
-			if tc.ExpectedErr != "" {
-				require.EqualError(err, tc.ExpectedErr)
-			} else {
-				require.NoError(err)
-			}
-			require.Equal(tc.ExpectedClaimsCount,
-				nodeClaims[tc.Claim.nodeID], "expected claims")
-			require.Equal(tc.ExpectedNodeDetachVolumeCount,
-				tc.srv.countCSINodeDetachVolume, "node detach RPC count")
-			require.Equal(tc.ExpectedControllerDetachVolumeCount,
-				tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
-			require.Equal(tc.ExpectedVolumeClaimCount,
-				tc.srv.countCSIVolumeClaim, "volume claim RPC count")
-		})
-	}
-}
-
-type MockRPCServer struct {
-	state *state.StateStore
-
-	// mock responses for ClientCSI.NodeDetachVolume
-	nextCSINodeDetachVolumeResponse *cstructs.ClientCSINodeDetachVolumeResponse
-	nextCSINodeDetachVolumeError    error
-	countCSINodeDetachVolume        int
-
-	// mock responses for ClientCSI.ControllerDetachVolume
-	nextCSIControllerDetachVolumeResponse *cstructs.ClientCSIControllerDetachVolumeResponse
-	nextCSIControllerDetachVolumeError    error
-	countCSIControllerDetachVolume        int
-
-	// mock responses for CSI.VolumeClaim
-	nextCSIVolumeClaimResponse *structs.CSIVolumeClaimResponse
-	nextCSIVolumeClaimError    error
-	countCSIVolumeClaim        int
-}
-
-func (srv *MockRPCServer) RPC(method string, args interface{}, reply interface{}) error {
-	switch method {
-	case "ClientCSI.NodeDetachVolume":
-		reply = srv.nextCSINodeDetachVolumeResponse
-		srv.countCSINodeDetachVolume++
-		return srv.nextCSINodeDetachVolumeError
-	case "ClientCSI.ControllerDetachVolume":
-		reply = srv.nextCSIControllerDetachVolumeResponse
-		srv.countCSIControllerDetachVolume++
-		return srv.nextCSIControllerDetachVolumeError
-	case "CSIVolume.Claim":
-		reply = srv.nextCSIVolumeClaimResponse
-		srv.countCSIVolumeClaim++
-		return srv.nextCSIVolumeClaimError
-	default:
-		return fmt.Errorf("unexpected method %q passed to mock", method)
-	}
-
-}
-
-func (srv *MockRPCServer) State() *state.StateStore { return srv.state }
--- a/nomad/csi_endpoint.go
+++ b/nomad/csi_endpoint.go
@ -348,15 +348,31 @@ func (v *CSIVolume) Claim(args *structs.CSIVolumeClaimRequest, reply *structs.CS
 		return structs.ErrPermissionDenied
 	}

-	// if this is a new claim, add a Volume and PublishContext from the
-	// controller (if any) to the reply
+	// COMPAT(1.0): the NodeID field was added after 0.11.0 and so we
+	// need to ensure it's been populated during upgrades from 0.11.0
+	// to later patch versions. Remove this block in 1.0
+	if args.Claim != structs.CSIVolumeClaimRelease && args.NodeID == "" {
+		state := v.srv.fsm.State()
+		ws := memdb.NewWatchSet()
+		alloc, err := state.AllocByID(ws, args.AllocationID)
+		if err != nil {
+			return err
+		}
+		if alloc == nil {
+			return fmt.Errorf("%s: %s",
+				structs.ErrUnknownAllocationPrefix, args.AllocationID)
+		}
+		args.NodeID = alloc.NodeID
+	}
+
 	if args.Claim != structs.CSIVolumeClaimRelease {
+		// if this is a new claim, add a Volume and PublishContext from the
+		// controller (if any) to the reply
 		err = v.controllerPublishVolume(args, reply)
 		if err != nil {
 			return fmt.Errorf("controller publish: %v", err)
 		}
 	}
-
 	resp, index, err := v.srv.raftApply(structs.CSIVolumeClaimRequestType, args)
 	if err != nil {
 		v.logger.Error("csi raft apply failed", "error", err, "method", "claim")
@ -400,6 +416,7 @@ func (v *CSIVolume) controllerPublishVolume(req *structs.CSIVolumeClaimRequest,
 		return nil
 	}

+	// get Nomad's ID for the client node (not the storage provider's ID)
 	targetNode, err := state.NodeByID(ws, alloc.NodeID)
 	if err != nil {
 		return err
@ -407,15 +424,19 @@ func (v *CSIVolume) controllerPublishVolume(req *structs.CSIVolumeClaimRequest,
 	if targetNode == nil {
 		return fmt.Errorf("%s: %s", structs.ErrUnknownNodePrefix, alloc.NodeID)
 	}
+
+	// get the the storage provider's ID for the client node (not
+	// Nomad's ID for the node)
 	targetCSIInfo, ok := targetNode.CSINodePlugins[plug.ID]
 	if !ok {
 		return fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
 	}
+	externalNodeID := targetCSIInfo.NodeInfo.ID

 	method := "ClientCSI.ControllerAttachVolume"
 	cReq := &cstructs.ClientCSIControllerAttachVolumeRequest{
 		VolumeID:        vol.RemoteID(),
-		ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
+		ClientCSINodeID: externalNodeID,
 		AttachmentMode:  vol.AttachmentMode,
 		AccessMode:      vol.AccessMode,
 		ReadOnly:        req.Claim == structs.CSIVolumeClaimRead,
--- a/nomad/csi_endpoint_test.go
+++ b/nomad/csi_endpoint_test.go
@ -201,11 +201,22 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
 	defer shutdown()
 	testutil.WaitForLeader(t, srv.RPC)

+	index := uint64(1000)
+
 	state := srv.fsm.State()
 	codec := rpcClient(t, srv)
 	id0 := uuid.Generate()
 	alloc := mock.BatchAlloc()

+	// Create a client node and alloc
+	node := mock.Node()
+	alloc.NodeID = node.ID
+	summary := mock.JobSummary(alloc.JobID)
+	index++
+	require.NoError(t, state.UpsertJobSummary(index, summary))
+	index++
+	require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc}))
+
 	// Create an initial volume claim request; we expect it to fail
 	// because there's no such volume yet.
 	claimReq := &structs.CSIVolumeClaimRequest{
@ -222,8 +233,8 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
 	require.EqualError(t, err, fmt.Sprintf("controller publish: volume not found: %s", id0),
 		"expected 'volume not found' error because volume hasn't yet been created")

-	// Create a client node, plugin, alloc, and volume
-	node := mock.Node()
+	// Create a plugin and volume
+
 	node.CSINodePlugins = map[string]*structs.CSIInfo{
 		"minnie": {
 			PluginID: "minnie",
@ -231,7 +242,8 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
 			NodeInfo: &structs.CSINodeInfo{},
 		},
 	}
-	err = state.UpsertNode(1002, node)
+	index++
+	err = state.UpsertNode(index, node)
 	require.NoError(t, err)

 	vols := []*structs.CSIVolume{{
@ -244,7 +256,8 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
 			Segments: map[string]string{"foo": "bar"},
 		}},
 	}}
-	err = state.CSIVolumeRegister(1003, vols)
+	index++
+	err = state.CSIVolumeRegister(index, vols)
 	require.NoError(t, err)

 	// Verify that the volume exists, and is healthy
@ -263,12 +276,6 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
 	require.Len(t, volGetResp.Volume.ReadAllocs, 0)
 	require.Len(t, volGetResp.Volume.WriteAllocs, 0)

-	// Upsert the job and alloc
-	alloc.NodeID = node.ID
-	summary := mock.JobSummary(alloc.JobID)
-	require.NoError(t, state.UpsertJobSummary(1004, summary))
-	require.NoError(t, state.UpsertAllocs(1005, []*structs.Allocation{alloc}))
-
 	// Now our claim should succeed
 	err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Claim", claimReq, claimResp)
 	require.NoError(t, err)
@ -284,8 +291,10 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
 	alloc2 := mock.Alloc()
 	alloc2.JobID = uuid.Generate()
 	summary = mock.JobSummary(alloc2.JobID)
-	require.NoError(t, state.UpsertJobSummary(1005, summary))
-	require.NoError(t, state.UpsertAllocs(1006, []*structs.Allocation{alloc2}))
+	index++
+	require.NoError(t, state.UpsertJobSummary(index, summary))
+	index++
+	require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc2}))
 	claimReq.AllocationID = alloc2.ID
 	err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Claim", claimReq, claimResp)
 	require.EqualError(t, err, "volume max claim reached",
--- a/nomad/fsm.go
+++ b/nomad/fsm.go
@ -270,6 +270,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} {
 		return n.applyCSIVolumeDeregister(buf[1:], log.Index)
 	case structs.CSIVolumeClaimRequestType:
 		return n.applyCSIVolumeClaim(buf[1:], log.Index)
+	case structs.CSIVolumeClaimBatchRequestType:
+		return n.applyCSIVolumeBatchClaim(buf[1:], log.Index)
 	case structs.ScalingEventRegisterRequestType:
 		return n.applyUpsertScalingEvent(buf[1:], log.Index)
 	}
@ -1156,6 +1158,24 @@ func (n *nomadFSM) applyCSIVolumeDeregister(buf []byte, index uint64) interface{
 	return nil
 }

+func (n *nomadFSM) applyCSIVolumeBatchClaim(buf []byte, index uint64) interface{} {
+	var batch *structs.CSIVolumeClaimBatchRequest
+	if err := structs.Decode(buf, &batch); err != nil {
+		panic(fmt.Errorf("failed to decode request: %v", err))
+	}
+	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_batch_claim"}, time.Now())
+
+	for _, req := range batch.Claims {
+		err := n.state.CSIVolumeClaim(index, req.RequestNamespace(),
+			req.VolumeID, req.ToClaim())
+		if err != nil {
+			n.logger.Error("CSIVolumeClaim for batch failed", "error", err)
+			return err // note: fails the remaining batch
+		}
+	}
+	return nil
+}
+
 func (n *nomadFSM) applyCSIVolumeClaim(buf []byte, index uint64) interface{} {
 	var req structs.CSIVolumeClaimRequest
 	if err := structs.Decode(buf, &req); err != nil {
@ -1163,26 +1183,10 @@ func (n *nomadFSM) applyCSIVolumeClaim(buf []byte, index uint64) interface{} {
 	}
 	defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_claim"}, time.Now())

-	ws := memdb.NewWatchSet()
-	alloc, err := n.state.AllocByID(ws, req.AllocationID)
-	if err != nil {
-		n.logger.Error("AllocByID failed", "error", err)
-		return err
-	}
-	if alloc == nil {
-		n.logger.Error("AllocByID failed to find alloc", "alloc_id", req.AllocationID)
-		if err != nil {
-			return err
-		}
-
-		return structs.ErrUnknownAllocationPrefix
-	}
-
-	if err := n.state.CSIVolumeClaim(index, req.RequestNamespace(), req.VolumeID, alloc, req.Claim); err != nil {
+	if err := n.state.CSIVolumeClaim(index, req.RequestNamespace(), req.VolumeID, req.ToClaim()); err != nil {
 		n.logger.Error("CSIVolumeClaim failed", "error", err)
 		return err
 	}
-
 	return nil
 }

--- a/nomad/interfaces.go
+++ b/nomad/interfaces.go
@ -1,11 +0,0 @@
-package nomad
-
-import "github.com/hashicorp/nomad/nomad/state"
-
-// RPCServer is a minimal interface of the Server, intended as
-// an aid for testing logic surrounding server-to-server or
-// server-to-client RPC calls
-type RPCServer interface {
-	RPC(method string, args interface{}, reply interface{}) error
-	State() *state.StateStore
-}
--- a/nomad/job_endpoint.go
+++ b/nomad/job_endpoint.go
@ -737,19 +737,13 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD
 	for _, vol := range volumesToGC {
 		// we have to build this eval by hand rather than calling srv.CoreJob
 		// here because we need to use the volume's namespace
-
-		runningAllocs := ":ok"
-		if args.Purge {
-			runningAllocs = ":purge"
-		}
-
 		eval := &structs.Evaluation{
 			ID:          uuid.Generate(),
 			Namespace:   job.Namespace,
 			Priority:    structs.CoreJobPriority,
 			Type:        structs.JobTypeCore,
 			TriggeredBy: structs.EvalTriggerAllocStop,
-			JobID:       structs.CoreJobCSIVolumeClaimGC + ":" + vol.Source + runningAllocs,
+			JobID:       structs.CoreJobCSIVolumeClaimGC + ":" + vol.Source,
 			LeaderACL:   j.srv.getLeaderAcl(),
 			Status:      structs.EvalStatusPending,
 			CreateTime:  now,
@ -1806,10 +1800,6 @@ func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
 				reply.JobScaleStatus = nil
 				return nil
 			}
-			deployment, err := state.LatestDeploymentByJobID(ws, args.RequestNamespace(), args.JobID)
-			if err != nil {
-				return err
-			}

 			events, eventsIndex, err := state.ScalingEventsByJob(ws, args.RequestNamespace(), args.JobID)
 			if err != nil {
@ -1819,6 +1809,13 @@ func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
 				events = make(map[string][]*structs.ScalingEvent)
 			}

+			var allocs []*structs.Allocation
+			var allocsIndex uint64
+			allocs, err = state.AllocsByJob(ws, job.Namespace, job.ID, false)
+			if err != nil {
+				return err
+			}
+
 			// Setup the output
 			reply.JobScaleStatus = &structs.JobScaleStatus{
 				JobID:          job.ID,
@ -1832,24 +1829,45 @@ func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
 				tgScale := &structs.TaskGroupScaleStatus{
 					Desired: tg.Count,
 				}
-				if deployment != nil {
-					if ds, ok := deployment.TaskGroups[tg.Name]; ok {
-						tgScale.Placed = ds.PlacedAllocs
-						tgScale.Healthy = ds.HealthyAllocs
-						tgScale.Unhealthy = ds.UnhealthyAllocs
-					}
-				}
 				tgScale.Events = events[tg.Name]
 				reply.JobScaleStatus.TaskGroups[tg.Name] = tgScale
 			}

-			maxIndex := job.ModifyIndex
-			if deployment != nil && deployment.ModifyIndex > maxIndex {
-				maxIndex = deployment.ModifyIndex
+			for _, alloc := range allocs {
+				// TODO: ignore canaries until we figure out what we should do with canaries
+				if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary {
+					continue
+				}
+				if alloc.TerminalStatus() {
+					continue
+				}
+				tgScale, ok := reply.JobScaleStatus.TaskGroups[alloc.TaskGroup]
+				if !ok || tgScale == nil {
+					continue
+				}
+				tgScale.Placed++
+				if alloc.ClientStatus == structs.AllocClientStatusRunning {
+					tgScale.Running++
+				}
+				if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.HasHealth() {
+					if alloc.DeploymentStatus.IsHealthy() {
+						tgScale.Healthy++
+					} else if alloc.DeploymentStatus.IsUnhealthy() {
+						tgScale.Unhealthy++
+					}
+				}
+				if alloc.ModifyIndex > allocsIndex {
+					allocsIndex = alloc.ModifyIndex
+				}
 			}
+
+			maxIndex := job.ModifyIndex
 			if eventsIndex > maxIndex {
 				maxIndex = eventsIndex
 			}
+			if allocsIndex > maxIndex {
+				maxIndex = allocsIndex
+			}
 			reply.Index = maxIndex

 			// Set the query response
--- a/nomad/job_endpoint_hook_expose_check.go
+++ b/nomad/job_endpoint_hook_expose_check.go
@ -1,9 +1,11 @@
 package nomad

 import (
+	"fmt"
 	"strconv"
 	"strings"

+	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/pkg/errors"
 )
@ -197,6 +199,21 @@ func exposePathForCheck(tg *structs.TaskGroup, s *structs.Service, check *struct
 		return nil, nil
 	}

+	// If the check is exposable but doesn't have a port label set build
+	// a port with a generated label, add it to the group's Dynamic ports
+	// and set the check port label to the generated label.
+	//
+	// This lets PortLabel be optional for any exposed check.
+	if check.PortLabel == "" {
+		port := structs.Port{
+			Label: fmt.Sprintf("svc_%s_ck_%s", s.Name, uuid.Generate()[:6]),
+			To:    -1,
+		}
+
+		tg.Networks[0].DynamicPorts = append(tg.Networks[0].DynamicPorts, port)
+		check.PortLabel = port.Label
+	}
+
 	// Determine the local service port (i.e. what port the service is actually
 	// listening to inside the network namespace).
 	//
@ -216,9 +233,7 @@ func exposePathForCheck(tg *structs.TaskGroup, s *structs.Service, check *struct
 	}

 	// The Path, Protocol, and PortLabel are just copied over from the service
-	// check definition. It is required that the user configure their own port
-	// mapping for each check, including setting the 'to = -1' sentinel value
-	// enabling the network namespace pass-through.
+	// check definition.
 	return &structs.ConsulExposePath{
 		Path:          check.Path,
 		Protocol:      check.Protocol,
--- a/nomad/job_endpoint_hook_expose_check_test.go
+++ b/nomad/job_endpoint_hook_expose_check_test.go
@ -346,6 +346,36 @@ func TestJobExposeCheckHook_exposePathForCheck(t *testing.T) {
 		}, s, c)
 		require.EqualError(t, err, `unable to determine local service port for service check group1->service1->check1`)
 	})
+
+	t.Run("empty check port", func(t *testing.T) {
+		c := &structs.ServiceCheck{
+			Name: "check1",
+			Type: "http",
+			Path: "/health",
+		}
+		s := &structs.Service{
+			Name:      "service1",
+			PortLabel: "9999",
+			Checks:    []*structs.ServiceCheck{c},
+		}
+		tg := &structs.TaskGroup{
+			Name:     "group1",
+			Services: []*structs.Service{s},
+			Networks: structs.Networks{{
+				Mode:         "bridge",
+				DynamicPorts: []structs.Port{},
+			}},
+		}
+		ePath, err := exposePathForCheck(tg, s, c)
+		require.NoError(t, err)
+		require.Len(t, tg.Networks[0].DynamicPorts, 1)
+		require.Equal(t, &structs.ConsulExposePath{
+			Path:          "/health",
+			Protocol:      "",
+			LocalPathPort: 9999,
+			ListenerPort:  tg.Networks[0].DynamicPorts[0].Label,
+		}, ePath)
+	})
 }

 func TestJobExposeCheckHook_containsExposePath(t *testing.T) {
--- a/nomad/job_endpoint_test.go
+++ b/nomad/job_endpoint_test.go
@ -5627,42 +5627,104 @@ func TestJobEndpoint_GetScaleStatus(t *testing.T) {
 	testutil.WaitForLeader(t, s1.RPC)
 	state := s1.fsm.State()

-	job := mock.Job()
+	jobV1 := mock.Job()

-	// check before job registration
+	// check before registration
 	// Fetch the scaling status
 	get := &structs.JobScaleStatusRequest{
-		JobID: job.ID,
+		JobID: jobV1.ID,
 		QueryOptions: structs.QueryOptions{
 			Region:    "global",
-			Namespace: job.Namespace,
+			Namespace: jobV1.Namespace,
 		},
 	}
 	var resp2 structs.JobScaleStatusResponse
 	require.NoError(msgpackrpc.CallWithCodec(codec, "Job.ScaleStatus", get, &resp2))
 	require.Nil(resp2.JobScaleStatus)

-	// Create the register request
-	err := state.UpsertJob(1000, job)
-	require.Nil(err)
+	// stopped (previous version)
+	require.NoError(state.UpsertJob(1000, jobV1), "UpsertJob")
+	a0 := mock.Alloc()
+	a0.Job = jobV1
+	a0.Namespace = jobV1.Namespace
+	a0.JobID = jobV1.ID
+	a0.ClientStatus = structs.AllocClientStatusComplete
+	require.NoError(state.UpsertAllocs(1010, []*structs.Allocation{a0}), "UpsertAllocs")
+
+	jobV2 := jobV1.Copy()
+	require.NoError(state.UpsertJob(1100, jobV2), "UpsertJob")
+	a1 := mock.Alloc()
+	a1.Job = jobV2
+	a1.Namespace = jobV2.Namespace
+	a1.JobID = jobV2.ID
+	a1.ClientStatus = structs.AllocClientStatusRunning
+	// healthy
+	a1.DeploymentStatus = &structs.AllocDeploymentStatus{
+		Healthy: helper.BoolToPtr(true),
+	}
+	a2 := mock.Alloc()
+	a2.Job = jobV2
+	a2.Namespace = jobV2.Namespace
+	a2.JobID = jobV2.ID
+	a2.ClientStatus = structs.AllocClientStatusPending
+	// unhealthy
+	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
+		Healthy: helper.BoolToPtr(false),
+	}
+	a3 := mock.Alloc()
+	a3.Job = jobV2
+	a3.Namespace = jobV2.Namespace
+	a3.JobID = jobV2.ID
+	a3.ClientStatus = structs.AllocClientStatusRunning
+	// canary
+	a3.DeploymentStatus = &structs.AllocDeploymentStatus{
+		Healthy: helper.BoolToPtr(true),
+		Canary:  true,
+	}
+	// no health
+	a4 := mock.Alloc()
+	a4.Job = jobV2
+	a4.Namespace = jobV2.Namespace
+	a4.JobID = jobV2.ID
+	a4.ClientStatus = structs.AllocClientStatusRunning
+	// upsert allocations
+	require.NoError(state.UpsertAllocs(1110, []*structs.Allocation{a1, a2, a3, a4}), "UpsertAllocs")
+
+	event := &structs.ScalingEvent{
+		Time:    time.Now().Unix(),
+		Count:   helper.Int64ToPtr(5),
+		Message: "message",
+		Error:   false,
+		Meta: map[string]interface{}{
+			"a": "b",
+		},
+		EvalID: nil,
+	}
+
+	require.NoError(state.UpsertScalingEvent(1003, &structs.ScalingEventRequest{
+		Namespace:    jobV2.Namespace,
+		JobID:        jobV2.ID,
+		TaskGroup:    jobV2.TaskGroups[0].Name,
+		ScalingEvent: event,
+	}), "UpsertScalingEvent")

 	// check after job registration
 	require.NoError(msgpackrpc.CallWithCodec(codec, "Job.ScaleStatus", get, &resp2))
 	require.NotNil(resp2.JobScaleStatus)

 	expectedStatus := structs.JobScaleStatus{
-		JobID:          job.ID,
-		JobCreateIndex: job.CreateIndex,
-		JobModifyIndex: job.ModifyIndex,
-		JobStopped:     job.Stop,
+		JobID:          jobV2.ID,
+		JobCreateIndex: jobV2.CreateIndex,
+		JobModifyIndex: a1.CreateIndex,
+		JobStopped:     jobV2.Stop,
 		TaskGroups: map[string]*structs.TaskGroupScaleStatus{
-			job.TaskGroups[0].Name: {
-				Desired:   job.TaskGroups[0].Count,
-				Placed:    0,
-				Running:   0,
-				Healthy:   0,
-				Unhealthy: 0,
-				Events:    nil,
+			jobV2.TaskGroups[0].Name: {
+				Desired:   jobV2.TaskGroups[0].Count,
+				Placed:    3,
+				Running:   2,
+				Healthy:   1,
+				Unhealthy: 1,
+				Events:    []*structs.ScalingEvent{event},
 			},
 		},
 	}
--- a/nomad/leader.go
+++ b/nomad/leader.go
@ -241,6 +241,9 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
 	// Enable the NodeDrainer
 	s.nodeDrainer.SetEnabled(true, s.State())

+	// Enable the volume watcher, since we are now the leader
+	s.volumeWatcher.SetEnabled(true, s.State())
+
 	// Restore the eval broker state
 	if err := s.restoreEvals(); err != nil {
 		return err
@ -870,6 +873,9 @@ func (s *Server) revokeLeadership() error {
 	// Disable the node drainer
 	s.nodeDrainer.SetEnabled(false, nil)

+	// Disable the volume watcher
+	s.volumeWatcher.SetEnabled(false, nil)
+
 	// Disable any enterprise systems required.
 	if err := s.revokeEnterpriseLeadership(); err != nil {
 		return err
--- a/nomad/mock/mock.go
+++ b/nomad/mock/mock.go
@ -1313,6 +1313,9 @@ func CSIVolume(plugin *structs.CSIPlugin) *structs.CSIVolume {
 		MountOptions:        &structs.CSIMountOptions{},
 		ReadAllocs:          map[string]*structs.Allocation{},
 		WriteAllocs:         map[string]*structs.Allocation{},
+		ReadClaims:          map[string]*structs.CSIVolumeClaim{},
+		WriteClaims:         map[string]*structs.CSIVolumeClaim{},
+		PastClaims:          map[string]*structs.CSIVolumeClaim{},
 		PluginID:            plugin.ID,
 		Provider:            plugin.Provider,
 		ProviderVersion:     plugin.Version,
--- a/nomad/node_endpoint.go
+++ b/nomad/node_endpoint.go
@ -1149,7 +1149,7 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
 			Priority:    structs.CoreJobPriority,
 			Type:        structs.JobTypeCore,
 			TriggeredBy: structs.EvalTriggerAllocStop,
-			JobID:       structs.CoreJobCSIVolumeClaimGC + ":" + volAndNamespace[0] + ":no",
+			JobID:       structs.CoreJobCSIVolumeClaimGC + ":" + volAndNamespace[0],
 			LeaderACL:   n.srv.getLeaderAcl(),
 			Status:      structs.EvalStatusPending,
 			CreateTime:  now.UTC().UnixNano(),
--- a/nomad/node_endpoint_test.go
+++ b/nomad/node_endpoint_test.go
@ -2381,9 +2381,17 @@ func TestClientEndpoint_UpdateAlloc_UnclaimVolumes(t *testing.T) {
 	require.NoError(t, err)

 	// Claim the volumes and verify the claims were set
-	err = state.CSIVolumeClaim(105, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
+	err = state.CSIVolumeClaim(105, ns, volId0, &structs.CSIVolumeClaim{
+		AllocationID: alloc1.ID,
+		NodeID:       alloc1.NodeID,
+		Mode:         structs.CSIVolumeClaimWrite,
+	})
 	require.NoError(t, err)
-	err = state.CSIVolumeClaim(106, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
+	err = state.CSIVolumeClaim(106, ns, volId0, &structs.CSIVolumeClaim{
+		AllocationID: alloc2.ID,
+		NodeID:       alloc2.NodeID,
+		Mode:         structs.CSIVolumeClaimRead,
+	})
 	require.NoError(t, err)
 	vol, err = state.CSIVolumeByID(ws, ns, volId0)
 	require.NoError(t, err)
@ -2406,7 +2414,7 @@ func TestClientEndpoint_UpdateAlloc_UnclaimVolumes(t *testing.T) {

 	// Verify the eval for the claim GC was emitted
 	// Lookup the evaluations
-	eval, err := state.EvalsByJob(ws, job.Namespace, structs.CoreJobCSIVolumeClaimGC+":"+volId0+":no")
+	eval, err := state.EvalsByJob(ws, job.Namespace, structs.CoreJobCSIVolumeClaimGC+":"+volId0)
 	require.NotNil(t, eval)
 	require.Nil(t, err)
 }
--- a/nomad/server.go
+++ b/nomad/server.go
@ -35,6 +35,7 @@ import (
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
 	"github.com/hashicorp/nomad/nomad/structs/config"
+	"github.com/hashicorp/nomad/nomad/volumewatcher"
 	"github.com/hashicorp/nomad/scheduler"
 	"github.com/hashicorp/raft"
 	raftboltdb "github.com/hashicorp/raft-boltdb"
@ -186,6 +187,9 @@ type Server struct {
 	// nodeDrainer is used to drain allocations from nodes.
 	nodeDrainer *drainer.NodeDrainer

+	// volumeWatcher is used to release volume claims
+	volumeWatcher *volumewatcher.Watcher
+
 	// evalBroker is used to manage the in-progress evaluations
 	// that are waiting to be brokered to a sub-scheduler
 	evalBroker *EvalBroker
@ -399,6 +403,12 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulACLs consu
 		return nil, fmt.Errorf("failed to create deployment watcher: %v", err)
 	}

+	// Setup the volume watcher
+	if err := s.setupVolumeWatcher(); err != nil {
+		s.logger.Error("failed to create volume watcher", "error", err)
+		return nil, fmt.Errorf("failed to create volume watcher: %v", err)
+	}
+
 	// Setup the node drainer.
 	s.setupNodeDrainer()

@ -993,6 +1003,27 @@ func (s *Server) setupDeploymentWatcher() error {
 	return nil
 }

+// setupVolumeWatcher creates a volume watcher that consumes the RPC
+// endpoints for state information and makes transitions via Raft through a
+// shim that provides the appropriate methods.
+func (s *Server) setupVolumeWatcher() error {
+
+	// Create the raft shim type to restrict the set of raft methods that can be
+	// made
+	raftShim := &volumeWatcherRaftShim{
+		apply: s.raftApply,
+	}
+
+	// Create the volume watcher
+	s.volumeWatcher = volumewatcher.NewVolumesWatcher(
+		s.logger, raftShim,
+		s.staticEndpoints.ClientCSI,
+		volumewatcher.LimitStateQueriesPerSecond,
+		volumewatcher.CrossVolumeUpdateBatchDuration)
+
+	return nil
+}
+
 // setupNodeDrainer creates a node drainer which will be enabled when a server
 // becomes a leader.
 func (s *Server) setupNodeDrainer() {
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@ -1187,15 +1187,14 @@ func (s *StateStore) deleteJobFromPlugin(index uint64, txn *memdb.Txn, job *stru
 	plugins := map[string]*structs.CSIPlugin{}

 	for _, a := range allocs {
-		tg := job.LookupTaskGroup(a.TaskGroup)
 		// if its nil, we can just panic
+		tg := a.Job.LookupTaskGroup(a.TaskGroup)
 		for _, t := range tg.Tasks {
 			if t.CSIPluginConfig != nil {
 				plugAllocs = append(plugAllocs, &pair{
 					pluginID: t.CSIPluginConfig.ID,
 					alloc:    a,
 				})
-
 			}
 		}
 	}
@ -1479,16 +1478,10 @@ func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn
 		return fmt.Errorf("index update failed: %v", err)
 	}

-	// Delete any job scaling policies
-	numDeletedScalingPolicies, err := txn.DeleteAll("scaling_policy", "target_prefix", namespace, jobID)
-	if err != nil {
+	// Delete any remaining job scaling policies
+	if err := s.deleteJobScalingPolicies(index, job, txn); err != nil {
 		return fmt.Errorf("deleting job scaling policies failed: %v", err)
 	}
-	if numDeletedScalingPolicies > 0 {
-		if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
-			return fmt.Errorf("index update failed: %v", err)
-		}
-	}

 	// Delete the scaling events
 	if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil {
@ -1507,6 +1500,20 @@ func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn
 	return nil
 }

+// deleteJobScalingPolicies deletes any scaling policies associated with the job
+func (s *StateStore) deleteJobScalingPolicies(index uint64, job *structs.Job, txn *memdb.Txn) error {
+	numDeletedScalingPolicies, err := txn.DeleteAll("scaling_policy", "target_prefix", job.Namespace, job.ID)
+	if err != nil {
+		return fmt.Errorf("deleting job scaling policies failed: %v", err)
+	}
+	if numDeletedScalingPolicies > 0 {
+		if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
+			return fmt.Errorf("index update failed: %v", err)
+		}
+	}
+	return nil
+}
+
 // deleteJobVersions deletes all versions of the given job.
 func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error {
 	iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID)
@ -2018,9 +2025,10 @@ func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace string)
 }

 // CSIVolumeClaim updates the volume's claim count and allocation list
-func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *structs.Allocation, claim structs.CSIVolumeClaimMode) error {
+func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *structs.CSIVolumeClaim) error {
 	txn := s.db.Txn(true)
 	defer txn.Abort()
+	ws := memdb.NewWatchSet()

 	row, err := txn.First("csi_volumes", "id", namespace, id)
 	if err != nil {
@ -2035,7 +2043,21 @@ func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *s
 		return fmt.Errorf("volume row conversion error")
 	}

-	ws := memdb.NewWatchSet()
+	var alloc *structs.Allocation
+	if claim.Mode != structs.CSIVolumeClaimRelease {
+		alloc, err = s.AllocByID(ws, claim.AllocationID)
+		if err != nil {
+			s.logger.Error("AllocByID failed", "error", err)
+			return fmt.Errorf(structs.ErrUnknownAllocationPrefix)
+		}
+		if alloc == nil {
+			s.logger.Error("AllocByID failed to find alloc", "alloc_id", claim.AllocationID)
+			if err != nil {
+				return fmt.Errorf(structs.ErrUnknownAllocationPrefix)
+			}
+		}
+	}
+
 	volume, err := s.CSIVolumeDenormalizePlugins(ws, orig.Copy())
 	if err != nil {
 		return err
@ -2046,9 +2068,14 @@ func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *s
 		return err
 	}

-	err = volume.Claim(claim, alloc)
-	if err != nil {
-		return err
+	// in the case of a job deregistration, there will be no allocation ID
+	// for the claim but we still want to write an updated index to the volume
+	// so that volume reaping is triggered
+	if claim.AllocationID != "" {
+		err = volume.Claim(claim, alloc)
+		if err != nil {
+			return err
+		}
 	}

 	volume.ModifyIndex = index
@ -2144,14 +2171,27 @@ func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs
 	return vol, nil
 }

-// csiVolumeDenormalizeAllocs returns a CSIVolume with allocations
+// CSIVolumeDenormalize returns a CSIVolume with allocations
 func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) {
 	for id := range vol.ReadAllocs {
 		a, err := s.AllocByID(ws, id)
 		if err != nil {
 			return nil, err
 		}
-		vol.ReadAllocs[id] = a
+		if a != nil {
+			vol.ReadAllocs[id] = a
+			// COMPAT(1.0): the CSIVolumeClaim fields were added
+			// after 0.11.1, so claims made before that may be
+			// missing this value. (same for WriteAlloc below)
+			if _, ok := vol.ReadClaims[id]; !ok {
+				vol.ReadClaims[id] = &structs.CSIVolumeClaim{
+					AllocationID: a.ID,
+					NodeID:       a.NodeID,
+					Mode:         structs.CSIVolumeClaimRead,
+					State:        structs.CSIVolumeClaimStateTaken,
+				}
+			}
+		}
 	}

 	for id := range vol.WriteAllocs {
@ -2159,7 +2199,17 @@ func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVol
 		if err != nil {
 			return nil, err
 		}
-		vol.WriteAllocs[id] = a
+		if a != nil {
+			vol.WriteAllocs[id] = a
+			if _, ok := vol.WriteClaims[id]; !ok {
+				vol.WriteClaims[id] = &structs.CSIVolumeClaim{
+					AllocationID: a.ID,
+					NodeID:       a.NodeID,
+					Mode:         structs.CSIVolumeClaimWrite,
+					State:        structs.CSIVolumeClaimStateTaken,
+				}
+			}
+		}
 	}

 	return vol, nil
@ -4244,6 +4294,13 @@ func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, tx

 	ws := memdb.NewWatchSet()

+	if job.Stop {
+		if err := s.deleteJobScalingPolicies(index, job, txn); err != nil {
+			return fmt.Errorf("deleting job scaling policies failed: %v", err)
+		}
+		return nil
+	}
+
 	scalingPolicies := job.GetScalingPolicies()
 	newTargets := map[string]struct{}{}
 	for _, p := range scalingPolicies {
--- a/nomad/state/state_store_test.go
+++ b/nomad/state/state_store_test.go
@ -2941,18 +2941,33 @@ func TestStateStore_CSIVolume(t *testing.T) {
 	vs = slurp(iter)
 	require.Equal(t, 1, len(vs))

+	// Allocs
+	a0 := mock.Alloc()
+	a1 := mock.Alloc()
+	index++
+	err = state.UpsertAllocs(index, []*structs.Allocation{a0, a1})
+	require.NoError(t, err)
+
 	// Claims
-	a0 := &structs.Allocation{ID: uuid.Generate()}
-	a1 := &structs.Allocation{ID: uuid.Generate()}
 	r := structs.CSIVolumeClaimRead
 	w := structs.CSIVolumeClaimWrite
 	u := structs.CSIVolumeClaimRelease
+	claim0 := &structs.CSIVolumeClaim{
+		AllocationID: a0.ID,
+		NodeID:       node.ID,
+		Mode:         r,
+	}
+	claim1 := &structs.CSIVolumeClaim{
+		AllocationID: a1.ID,
+		NodeID:       node.ID,
+		Mode:         w,
+	}

 	index++
-	err = state.CSIVolumeClaim(index, ns, vol0, a0, r)
+	err = state.CSIVolumeClaim(index, ns, vol0, claim0)
 	require.NoError(t, err)
 	index++
-	err = state.CSIVolumeClaim(index, ns, vol0, a1, w)
+	err = state.CSIVolumeClaim(index, ns, vol0, claim1)
 	require.NoError(t, err)

 	ws = memdb.NewWatchSet()
@ -2961,7 +2976,8 @@ func TestStateStore_CSIVolume(t *testing.T) {
 	vs = slurp(iter)
 	require.False(t, vs[0].WriteFreeClaims())

-	err = state.CSIVolumeClaim(2, ns, vol0, a0, u)
+	claim0.Mode = u
+	err = state.CSIVolumeClaim(2, ns, vol0, claim0)
 	require.NoError(t, err)
 	ws = memdb.NewWatchSet()
 	iter, err = state.CSIVolumesByPluginID(ws, ns, "minnie")
@ -2980,10 +2996,13 @@ func TestStateStore_CSIVolume(t *testing.T) {

 	// release claims to unblock deregister
 	index++
-	err = state.CSIVolumeClaim(index, ns, vol0, a0, u)
+	claim0.State = structs.CSIVolumeClaimStateReadyToFree
+	err = state.CSIVolumeClaim(index, ns, vol0, claim0)
 	require.NoError(t, err)
 	index++
-	err = state.CSIVolumeClaim(index, ns, vol0, a1, u)
+	claim1.Mode = u
+	claim1.State = structs.CSIVolumeClaimStateReadyToFree
+	err = state.CSIVolumeClaim(index, ns, vol0, claim1)
 	require.NoError(t, err)

 	index++
@ -8427,7 +8446,96 @@ func TestStateStore_DeleteScalingPolicies(t *testing.T) {
 	require.False(watchFired(ws))
 }

-func TestStateStore_DeleteJob_ChildScalingPolicies(t *testing.T) {
+func TestStateStore_StopJob_DeleteScalingPolicies(t *testing.T) {
+	t.Parallel()
+
+	require := require.New(t)
+
+	state := testStateStore(t)
+
+	job := mock.Job()
+
+	err := state.UpsertJob(1000, job)
+	require.NoError(err)
+
+	policy := mock.ScalingPolicy()
+	policy.Target[structs.ScalingTargetJob] = job.ID
+	err = state.UpsertScalingPolicies(1100, []*structs.ScalingPolicy{policy})
+	require.NoError(err)
+
+	// Ensure the scaling policy is present and start some watches
+	wsGet := memdb.NewWatchSet()
+	out, err := state.ScalingPolicyByTarget(wsGet, policy.Target)
+	require.NoError(err)
+	require.NotNil(out)
+	wsList := memdb.NewWatchSet()
+	_, err = state.ScalingPolicies(wsList)
+	require.NoError(err)
+
+	// Stop the job
+	job, err = state.JobByID(nil, job.Namespace, job.ID)
+	require.NoError(err)
+	job.Stop = true
+	err = state.UpsertJob(1200, job)
+	require.NoError(err)
+
+	// Ensure:
+	// * the scaling policy was deleted
+	// * the watches were fired
+	// * the table index was advanced
+	require.True(watchFired(wsGet))
+	require.True(watchFired(wsList))
+	out, err = state.ScalingPolicyByTarget(nil, policy.Target)
+	require.NoError(err)
+	require.Nil(out)
+	index, err := state.Index("scaling_policy")
+	require.GreaterOrEqual(index, uint64(1200))
+}
+
+func TestStateStore_UnstopJob_UpsertScalingPolicies(t *testing.T) {
+	t.Parallel()
+
+	require := require.New(t)
+
+	state := testStateStore(t)
+
+	job, policy := mock.JobWithScalingPolicy()
+	job.Stop = true
+
+	// establish watcher, verify there are no scaling policies yet
+	ws := memdb.NewWatchSet()
+	list, err := state.ScalingPolicies(ws)
+	require.NoError(err)
+	require.Nil(list.Next())
+
+	// upsert a stopped job, verify that we don't fire the watcher or add any scaling policies
+	err = state.UpsertJob(1000, job)
+	require.NoError(err)
+	require.False(watchFired(ws))
+	// stopped job should have no scaling policies, watcher doesn't fire
+	list, err = state.ScalingPolicies(ws)
+	require.NoError(err)
+	require.Nil(list.Next())
+
+	// Establish a new watcher
+	ws = memdb.NewWatchSet()
+	_, err = state.ScalingPolicies(ws)
+	require.NoError(err)
+	// Unstop this job, say you'll run it again...
+	job.Stop = false
+	err = state.UpsertJob(1100, job)
+	require.NoError(err)
+
+	// Ensure the scaling policy was added, watch was fired, index was advanced
+	require.True(watchFired(ws))
+	out, err := state.ScalingPolicyByTarget(nil, policy.Target)
+	require.NoError(err)
+	require.NotNil(out)
+	index, err := state.Index("scaling_policy")
+	require.GreaterOrEqual(index, uint64(1100))
+}
+
+func TestStateStore_DeleteJob_DeleteScalingPolicies(t *testing.T) {
 	t.Parallel()

 	require := require.New(t)
--- a/nomad/structs/csi.go
+++ b/nomad/structs/csi.go
@ -185,6 +185,22 @@ func (v *CSIMountOptions) GoString() string {
 	return v.String()
 }

+type CSIVolumeClaim struct {
+	AllocationID string
+	NodeID       string
+	Mode         CSIVolumeClaimMode
+	State        CSIVolumeClaimState
+}
+
+type CSIVolumeClaimState int
+
+const (
+	CSIVolumeClaimStateTaken CSIVolumeClaimState = iota
+	CSIVolumeClaimStateNodeDetached
+	CSIVolumeClaimStateControllerDetached
+	CSIVolumeClaimStateReadyToFree
+)
+
 // CSIVolume is the full representation of a CSI Volume
 type CSIVolume struct {
 	// ID is a namespace unique URL safe identifier for the volume
@ -200,8 +216,12 @@ type CSIVolume struct {
 	MountOptions   *CSIMountOptions

 	// Allocations, tracking claim status
-	ReadAllocs  map[string]*Allocation
-	WriteAllocs map[string]*Allocation
+	ReadAllocs  map[string]*Allocation // AllocID -> Allocation
+	WriteAllocs map[string]*Allocation // AllocID -> Allocation
+
+	ReadClaims  map[string]*CSIVolumeClaim // AllocID -> claim
+	WriteClaims map[string]*CSIVolumeClaim // AllocID -> claim
+	PastClaims  map[string]*CSIVolumeClaim // AllocID -> claim

 	// Schedulable is true if all the denormalized plugin health fields are true, and the
 	// volume has not been marked for garbage collection
@ -262,6 +282,10 @@ func (v *CSIVolume) newStructs() {

 	v.ReadAllocs = map[string]*Allocation{}
 	v.WriteAllocs = map[string]*Allocation{}
+
+	v.ReadClaims = map[string]*CSIVolumeClaim{}
+	v.WriteClaims = map[string]*CSIVolumeClaim{}
+	v.PastClaims = map[string]*CSIVolumeClaim{}
 }

 func (v *CSIVolume) RemoteID() string {
@ -350,27 +374,43 @@ func (v *CSIVolume) Copy() *CSIVolume {
 		out.WriteAllocs[k] = v
 	}

+	for k, v := range v.ReadClaims {
+		claim := *v
+		out.ReadClaims[k] = &claim
+	}
+	for k, v := range v.WriteClaims {
+		claim := *v
+		out.WriteClaims[k] = &claim
+	}
+	for k, v := range v.PastClaims {
+		claim := *v
+		out.PastClaims[k] = &claim
+	}
+
 	return out
 }

 // Claim updates the allocations and changes the volume state
-func (v *CSIVolume) Claim(claim CSIVolumeClaimMode, alloc *Allocation) error {
-	switch claim {
+func (v *CSIVolume) Claim(claim *CSIVolumeClaim, alloc *Allocation) error {
+	switch claim.Mode {
 	case CSIVolumeClaimRead:
-		return v.ClaimRead(alloc)
+		return v.ClaimRead(claim, alloc)
 	case CSIVolumeClaimWrite:
-		return v.ClaimWrite(alloc)
+		return v.ClaimWrite(claim, alloc)
 	case CSIVolumeClaimRelease:
-		return v.ClaimRelease(alloc)
+		return v.ClaimRelease(claim)
 	}
 	return nil
 }

 // ClaimRead marks an allocation as using a volume read-only
-func (v *CSIVolume) ClaimRead(alloc *Allocation) error {
-	if _, ok := v.ReadAllocs[alloc.ID]; ok {
+func (v *CSIVolume) ClaimRead(claim *CSIVolumeClaim, alloc *Allocation) error {
+	if _, ok := v.ReadAllocs[claim.AllocationID]; ok {
 		return nil
 	}
+	if alloc == nil {
+		return fmt.Errorf("allocation missing: %s", claim.AllocationID)
+	}

 	if !v.ReadSchedulable() {
 		return fmt.Errorf("unschedulable")
@ -378,16 +418,24 @@ func (v *CSIVolume) ClaimRead(alloc *Allocation) error {

 	// Allocations are copy on write, so we want to keep the id but don't need the
 	// pointer. We'll get it from the db in denormalize.
-	v.ReadAllocs[alloc.ID] = nil
-	delete(v.WriteAllocs, alloc.ID)
+	v.ReadAllocs[claim.AllocationID] = nil
+	delete(v.WriteAllocs, claim.AllocationID)
+
+	v.ReadClaims[claim.AllocationID] = claim
+	delete(v.WriteClaims, claim.AllocationID)
+	delete(v.PastClaims, claim.AllocationID)
+
 	return nil
 }

 // ClaimWrite marks an allocation as using a volume as a writer
-func (v *CSIVolume) ClaimWrite(alloc *Allocation) error {
-	if _, ok := v.WriteAllocs[alloc.ID]; ok {
+func (v *CSIVolume) ClaimWrite(claim *CSIVolumeClaim, alloc *Allocation) error {
+	if _, ok := v.WriteAllocs[claim.AllocationID]; ok {
 		return nil
 	}
+	if alloc == nil {
+		return fmt.Errorf("allocation missing: %s", claim.AllocationID)
+	}

 	if !v.WriteSchedulable() {
 		return fmt.Errorf("unschedulable")
@ -406,13 +454,26 @@ func (v *CSIVolume) ClaimWrite(alloc *Allocation) error {
 	// pointer. We'll get it from the db in denormalize.
 	v.WriteAllocs[alloc.ID] = nil
 	delete(v.ReadAllocs, alloc.ID)
+
+	v.WriteClaims[alloc.ID] = claim
+	delete(v.ReadClaims, alloc.ID)
+	delete(v.PastClaims, alloc.ID)
+
 	return nil
 }

-// ClaimRelease is called when the allocation has terminated and already stopped using the volume
-func (v *CSIVolume) ClaimRelease(alloc *Allocation) error {
-	delete(v.ReadAllocs, alloc.ID)
-	delete(v.WriteAllocs, alloc.ID)
+// ClaimRelease is called when the allocation has terminated and
+// already stopped using the volume
+func (v *CSIVolume) ClaimRelease(claim *CSIVolumeClaim) error {
+	if claim.State == CSIVolumeClaimStateReadyToFree {
+		delete(v.ReadAllocs, claim.AllocationID)
+		delete(v.WriteAllocs, claim.AllocationID)
+		delete(v.ReadClaims, claim.AllocationID)
+		delete(v.WriteClaims, claim.AllocationID)
+		delete(v.PastClaims, claim.AllocationID)
+	} else {
+		v.PastClaims[claim.AllocationID] = claim
+	}
 	return nil
 }

@ -513,13 +574,28 @@ const (
 	CSIVolumeClaimRelease
 )

+type CSIVolumeClaimBatchRequest struct {
+	Claims []CSIVolumeClaimRequest
+}
+
 type CSIVolumeClaimRequest struct {
 	VolumeID     string
 	AllocationID string
+	NodeID       string
 	Claim        CSIVolumeClaimMode
+	State        CSIVolumeClaimState
 	WriteRequest
 }

+func (req *CSIVolumeClaimRequest) ToClaim() *CSIVolumeClaim {
+	return &CSIVolumeClaim{
+		AllocationID: req.AllocationID,
+		NodeID:       req.NodeID,
+		Mode:         req.Claim,
+		State:        req.State,
+	}
+}
+
 type CSIVolumeClaimResponse struct {
 	// Opaque static publish properties of the volume. SP MAY use this
 	// field to ensure subsequent `NodeStageVolume` or `NodePublishVolume`
--- a/nomad/structs/csi_test.go
+++ b/nomad/structs/csi_test.go
@ -12,17 +12,28 @@ func TestCSIVolumeClaim(t *testing.T) {
 	vol.Schedulable = true

 	alloc := &Allocation{ID: "a1", Namespace: "n", JobID: "j"}
+	claim := &CSIVolumeClaim{
+		AllocationID: alloc.ID,
+		NodeID:       "foo",
+		Mode:         CSIVolumeClaimRead,
+	}

-	require.NoError(t, vol.ClaimRead(alloc))
+	require.NoError(t, vol.ClaimRead(claim, alloc))
 	require.True(t, vol.ReadSchedulable())
 	require.True(t, vol.WriteSchedulable())
-	require.NoError(t, vol.ClaimRead(alloc))
+	require.NoError(t, vol.ClaimRead(claim, alloc))

-	require.NoError(t, vol.ClaimWrite(alloc))
+	claim.Mode = CSIVolumeClaimWrite
+	require.NoError(t, vol.ClaimWrite(claim, alloc))
 	require.True(t, vol.ReadSchedulable())
 	require.False(t, vol.WriteFreeClaims())

-	vol.ClaimRelease(alloc)
+	vol.ClaimRelease(claim)
+	require.True(t, vol.ReadSchedulable())
+	require.False(t, vol.WriteFreeClaims())
+
+	claim.State = CSIVolumeClaimStateReadyToFree
+	vol.ClaimRelease(claim)
 	require.True(t, vol.ReadSchedulable())
 	require.True(t, vol.WriteFreeClaims())
 }
--- a/nomad/structs/generate.sh
+++ b/nomad/structs/generate.sh
@ -2,5 +2,9 @@
 set -e

 FILES="$(ls ./*.go | grep -v -e _test.go -e .generated.go | tr '\n' ' ')"
-codecgen -d 100 -t codegen_generated -o structs.generated.go ${FILES}
-sed -i'' -e 's|"github.com/ugorji/go/codec|"github.com/hashicorp/go-msgpack/codec|g' structs.generated.go
+codecgen \
+    -c github.com/hashicorp/go-msgpack/codec \
+    -d 100 \
+    -t codegen_generated \
+    -o structs.generated.go \
+    ${FILES}
--- a/nomad/structs/network.go
+++ b/nomad/structs/network.go
@ -331,7 +331,7 @@ func (idx *NetworkIndex) AssignNetwork(ask *NetworkResource) (out *NetworkResour

 // getDynamicPortsPrecise takes the nodes used port bitmap which may be nil if
 // no ports have been allocated yet, the network ask and returns a set of unused
-// ports to fullfil the ask's DynamicPorts or an error if it failed. An error
+// ports to fulfil the ask's DynamicPorts or an error if it failed. An error
 // means the ask can not be satisfied as the method does a precise search.
 func getDynamicPortsPrecise(nodeUsed Bitmap, ask *NetworkResource) ([]int, error) {
 	// Create a copy of the used ports and apply the new reserves
@ -373,7 +373,7 @@ func getDynamicPortsPrecise(nodeUsed Bitmap, ask *NetworkResource) ([]int, error

 // getDynamicPortsStochastic takes the nodes used port bitmap which may be nil if
 // no ports have been allocated yet, the network ask and returns a set of unused
-// ports to fullfil the ask's DynamicPorts or an error if it failed. An error
+// ports to fulfil the ask's DynamicPorts or an error if it failed. An error
 // does not mean the ask can not be satisfied as the method has a fixed amount
 // of random probes and if these fail, the search is aborted.
 func getDynamicPortsStochastic(nodeUsed Bitmap, ask *NetworkResource) ([]int, error) {
--- a/nomad/structs/services.go
+++ b/nomad/structs/services.go
@ -889,7 +889,9 @@ type ConsulProxy struct {

 	// Expose configures the consul proxy.expose stanza to "open up" endpoints
 	// used by task-group level service checks using HTTP or gRPC protocols.
-	Expose *ConsulExposeConfig
+	//
+	// Use json tag to match with field name in api/
+	Expose *ConsulExposeConfig `json:"ExposeConfig"`

 	// Config is a proxy configuration. It is opaque to Nomad and passed
 	// directly to Consul.
@ -905,7 +907,7 @@ func (p *ConsulProxy) Copy() *ConsulProxy {
 	newP := &ConsulProxy{
 		LocalServiceAddress: p.LocalServiceAddress,
 		LocalServicePort:    p.LocalServicePort,
-		Expose:              p.Expose,
+		Expose:              p.Expose.Copy(),
 	}

 	if n := len(p.Upstreams); n > 0 {
@ -1009,7 +1011,8 @@ func (u *ConsulUpstream) Equals(o *ConsulUpstream) bool {

 // ExposeConfig represents a Consul Connect expose jobspec stanza.
 type ConsulExposeConfig struct {
-	Paths []ConsulExposePath
+	// Use json tag to match with field name in api/
+	Paths []ConsulExposePath `json:"Path"`
 }

 type ConsulExposePath struct {
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@ -90,6 +90,7 @@ const (
 	CSIVolumeRegisterRequestType
 	CSIVolumeDeregisterRequestType
 	CSIVolumeClaimRequestType
+	CSIVolumeClaimBatchRequestType
 	ScalingEventRegisterRequestType
 )

@ -1708,7 +1709,7 @@ type Node struct {
 	// COMPAT: Remove in Nomad 0.9
 	// Drain is controlled by the servers, and not the client.
 	// If true, no jobs will be scheduled to this node, and existing
-	// allocations will be drained. Superceded by DrainStrategy in Nomad
+	// allocations will be drained. Superseded by DrainStrategy in Nomad
 	// 0.8 but kept for backward compat.
 	Drain bool

--- a/nomad/vault_test.go
+++ b/nomad/vault_test.go
@ -423,7 +423,7 @@ func TestVaultClient_ValidateRole_Deprecated_Success(t *testing.T) {
 	})
 }

-func TestVaultClient_ValidateRole_NonExistant(t *testing.T) {
+func TestVaultClient_ValidateRole_NonExistent(t *testing.T) {
 	t.Parallel()
 	v := testutil.NewTestVault(t)
 	defer v.Stop()
--- a/nomad/volumewatcher/batcher.go
+++ b/nomad/volumewatcher/batcher.go
@ -0,0 +1,125 @@
+package volumewatcher
+
+import (
+	"context"
+	"time"
+
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// VolumeUpdateBatcher is used to batch the updates for volume claims
+type VolumeUpdateBatcher struct {
+	// batch is the batching duration
+	batch time.Duration
+
+	// raft is used to actually commit the updates
+	raft VolumeRaftEndpoints
+
+	// workCh is used to pass evaluations to the daemon process
+	workCh chan *updateWrapper
+
+	// ctx is used to exit the daemon batcher
+	ctx context.Context
+}
+
+// NewVolumeUpdateBatcher returns an VolumeUpdateBatcher that uses the
+// passed raft endpoints to create the updates to volume claims, and
+// exits the batcher when the passed exit channel is closed.
+func NewVolumeUpdateBatcher(batchDuration time.Duration, raft VolumeRaftEndpoints, ctx context.Context) *VolumeUpdateBatcher {
+	b := &VolumeUpdateBatcher{
+		batch:  batchDuration,
+		raft:   raft,
+		ctx:    ctx,
+		workCh: make(chan *updateWrapper, 10),
+	}
+
+	go b.batcher()
+	return b
+}
+
+// CreateUpdate batches the volume claim update and returns a future
+// that tracks the completion of the request.
+func (b *VolumeUpdateBatcher) CreateUpdate(claims []structs.CSIVolumeClaimRequest) *BatchFuture {
+	wrapper := &updateWrapper{
+		claims: claims,
+		f:      make(chan *BatchFuture, 1),
+	}
+
+	b.workCh <- wrapper
+	return <-wrapper.f
+}
+
+type updateWrapper struct {
+	claims []structs.CSIVolumeClaimRequest
+	f      chan *BatchFuture
+}
+
+// batcher is the long lived batcher goroutine
+func (b *VolumeUpdateBatcher) batcher() {
+	var timerCh <-chan time.Time
+	claims := make(map[string]structs.CSIVolumeClaimRequest)
+	future := NewBatchFuture()
+	for {
+		select {
+		case <-b.ctx.Done():
+			// note: we can't flush here because we're likely no
+			// longer the leader
+			return
+		case w := <-b.workCh:
+			if timerCh == nil {
+				timerCh = time.After(b.batch)
+			}
+
+			// de-dupe and store the claim update, and attach the future
+			for _, upd := range w.claims {
+				claims[upd.VolumeID+upd.RequestNamespace()] = upd
+			}
+			w.f <- future
+		case <-timerCh:
+			// Capture the future and create a new one
+			f := future
+			future = NewBatchFuture()
+
+			// Create the batch request
+			req := structs.CSIVolumeClaimBatchRequest{}
+			for _, claim := range claims {
+				req.Claims = append(req.Claims, claim)
+			}
+
+			// Upsert the claims in a go routine
+			go f.Set(b.raft.UpsertVolumeClaims(&req))
+
+			// Reset the claims list and timer
+			claims = make(map[string]structs.CSIVolumeClaimRequest)
+			timerCh = nil
+		}
+	}
+}
+
+// BatchFuture is a future that can be used to retrieve the index for
+// the update or any error in the update process
+type BatchFuture struct {
+	index  uint64
+	err    error
+	waitCh chan struct{}
+}
+
+// NewBatchFuture returns a new BatchFuture
+func NewBatchFuture() *BatchFuture {
+	return &BatchFuture{
+		waitCh: make(chan struct{}),
+	}
+}
+
+// Set sets the results of the future, unblocking any client.
+func (f *BatchFuture) Set(index uint64, err error) {
+	f.index = index
+	f.err = err
+	close(f.waitCh)
+}
+
+// Results returns the creation index and any error.
+func (f *BatchFuture) Results() (uint64, error) {
+	<-f.waitCh
+	return f.index, f.err
+}
--- a/nomad/volumewatcher/batcher_test.go
+++ b/nomad/volumewatcher/batcher_test.go
@ -0,0 +1,85 @@
+package volumewatcher
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"testing"
+
+	"github.com/hashicorp/nomad/helper/testlog"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/stretchr/testify/require"
+)
+
+// TestVolumeWatch_Batcher tests the update batching logic
+func TestVolumeWatch_Batcher(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	ctx, exitFn := context.WithCancel(context.Background())
+	defer exitFn()
+
+	srv := &MockBatchingRPCServer{}
+	srv.state = state.TestStateStore(t)
+	srv.volumeUpdateBatcher = NewVolumeUpdateBatcher(CrossVolumeUpdateBatchDuration, srv, ctx)
+
+	plugin := mock.CSIPlugin()
+	node := testNode(nil, plugin, srv.State())
+
+	// because we wait for the results to return from the batch for each
+	// Watcher.updateClaims, we can't test that we're batching except across
+	// multiple volume watchers. create 2 volumes and their watchers here.
+	alloc0 := mock.Alloc()
+	alloc0.ClientStatus = structs.AllocClientStatusComplete
+	vol0 := testVolume(nil, plugin, alloc0, node.ID)
+	w0 := &volumeWatcher{
+		v:            vol0,
+		rpc:          srv,
+		state:        srv.State(),
+		updateClaims: srv.UpdateClaims,
+		logger:       testlog.HCLogger(t),
+	}
+
+	alloc1 := mock.Alloc()
+	alloc1.ClientStatus = structs.AllocClientStatusComplete
+	vol1 := testVolume(nil, plugin, alloc1, node.ID)
+	w1 := &volumeWatcher{
+		v:            vol1,
+		rpc:          srv,
+		state:        srv.State(),
+		updateClaims: srv.UpdateClaims,
+		logger:       testlog.HCLogger(t),
+	}
+
+	srv.nextCSIControllerDetachError = fmt.Errorf("some controller plugin error")
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		w0.volumeReapImpl(vol0)
+		wg.Done()
+	}()
+	go func() {
+		w1.volumeReapImpl(vol1)
+		wg.Done()
+	}()
+
+	wg.Wait()
+
+	require.Equal(structs.CSIVolumeClaimStateNodeDetached, vol0.PastClaims[alloc0.ID].State)
+	require.Equal(structs.CSIVolumeClaimStateNodeDetached, vol1.PastClaims[alloc1.ID].State)
+	require.Equal(2, srv.countCSINodeDetachVolume)
+	require.Equal(2, srv.countCSIControllerDetachVolume)
+	require.Equal(2, srv.countUpdateClaims)
+
+	// note: it's technically possible that the volumeReapImpl
+	// goroutines get de-scheduled and we don't write both updates in
+	// the same batch. but this seems really unlikely, so we're
+	// testing for both cases here so that if we start seeing a flake
+	// here in the future we have a clear cause for it.
+	require.GreaterOrEqual(srv.countUpsertVolumeClaims, 1)
+	require.Equal(1, srv.countUpsertVolumeClaims)
+}
--- a/nomad/volumewatcher/interfaces.go
+++ b/nomad/volumewatcher/interfaces.go
@ -0,0 +1,28 @@
+package volumewatcher
+
+import (
+	cstructs "github.com/hashicorp/nomad/client/structs"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// VolumeRaftEndpoints exposes the volume watcher to a set of functions
+// to apply data transforms via Raft.
+type VolumeRaftEndpoints interface {
+
+	// UpsertVolumeClaims applys a batch of claims to raft
+	UpsertVolumeClaims(*structs.CSIVolumeClaimBatchRequest) (uint64, error)
+}
+
+// ClientRPC is a minimal interface of the Server, intended as an aid
+// for testing logic surrounding server-to-server or server-to-client
+// RPC calls and to avoid circular references between the nomad
+// package and the volumewatcher
+type ClientRPC interface {
+	ControllerDetachVolume(args *cstructs.ClientCSIControllerDetachVolumeRequest, reply *cstructs.ClientCSIControllerDetachVolumeResponse) error
+	NodeDetachVolume(args *cstructs.ClientCSINodeDetachVolumeRequest, reply *cstructs.ClientCSINodeDetachVolumeResponse) error
+}
+
+// claimUpdater is the function used to update claims on behalf of a volume
+// (used to wrap batch updates so that we can test
+// volumeWatcher methods synchronously without batching)
+type updateClaimsFn func(claims []structs.CSIVolumeClaimRequest) (uint64, error)
--- a/nomad/volumewatcher/interfaces_test.go
+++ b/nomad/volumewatcher/interfaces_test.go
@ -0,0 +1,148 @@
+package volumewatcher
+
+import (
+	cstructs "github.com/hashicorp/nomad/client/structs"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// Create a client node with plugin info
+func testNode(node *structs.Node, plugin *structs.CSIPlugin, s *state.StateStore) *structs.Node {
+	if node != nil {
+		return node
+	}
+	node = mock.Node()
+	node.Attributes["nomad.version"] = "0.11.0" // client RPCs not supported on early version
+	node.CSINodePlugins = map[string]*structs.CSIInfo{
+		plugin.ID: {
+			PluginID:                 plugin.ID,
+			Healthy:                  true,
+			RequiresControllerPlugin: plugin.ControllerRequired,
+			NodeInfo:                 &structs.CSINodeInfo{},
+		},
+	}
+	if plugin.ControllerRequired {
+		node.CSIControllerPlugins = map[string]*structs.CSIInfo{
+			plugin.ID: {
+				PluginID:                 plugin.ID,
+				Healthy:                  true,
+				RequiresControllerPlugin: true,
+				ControllerInfo: &structs.CSIControllerInfo{
+					SupportsReadOnlyAttach:           true,
+					SupportsAttachDetach:             true,
+					SupportsListVolumes:              true,
+					SupportsListVolumesAttachedNodes: false,
+				},
+			},
+		}
+	} else {
+		node.CSIControllerPlugins = map[string]*structs.CSIInfo{}
+	}
+	s.UpsertNode(99, node)
+	return node
+}
+
+// Create a test volume with claim info
+func testVolume(vol *structs.CSIVolume, plugin *structs.CSIPlugin, alloc *structs.Allocation, nodeID string) *structs.CSIVolume {
+	if vol != nil {
+		return vol
+	}
+	vol = mock.CSIVolume(plugin)
+	vol.ControllerRequired = plugin.ControllerRequired
+
+	vol.ReadAllocs = map[string]*structs.Allocation{alloc.ID: alloc}
+	vol.ReadClaims = map[string]*structs.CSIVolumeClaim{
+		alloc.ID: {
+			AllocationID: alloc.ID,
+			NodeID:       nodeID,
+			Mode:         structs.CSIVolumeClaimRead,
+			State:        structs.CSIVolumeClaimStateTaken,
+		},
+	}
+	return vol
+}
+
+// COMPAT(1.0): the claim fields were added after 0.11.1; this
+// mock and the associated test cases can be removed for 1.0
+func testOldVolume(vol *structs.CSIVolume, plugin *structs.CSIPlugin, alloc *structs.Allocation, nodeID string) *structs.CSIVolume {
+	if vol != nil {
+		return vol
+	}
+	vol = mock.CSIVolume(plugin)
+	vol.ControllerRequired = plugin.ControllerRequired
+
+	vol.ReadAllocs = map[string]*structs.Allocation{alloc.ID: alloc}
+	return vol
+}
+
+type MockRPCServer struct {
+	state *state.StateStore
+
+	// mock responses for ClientCSI.NodeDetachVolume
+	nextCSINodeDetachResponse *cstructs.ClientCSINodeDetachVolumeResponse
+	nextCSINodeDetachError    error
+	countCSINodeDetachVolume  int
+
+	// mock responses for ClientCSI.ControllerDetachVolume
+	nextCSIControllerDetachVolumeResponse *cstructs.ClientCSIControllerDetachVolumeResponse
+	nextCSIControllerDetachError          error
+	countCSIControllerDetachVolume        int
+
+	countUpdateClaims       int
+	countUpsertVolumeClaims int
+}
+
+func (srv *MockRPCServer) ControllerDetachVolume(args *cstructs.ClientCSIControllerDetachVolumeRequest, reply *cstructs.ClientCSIControllerDetachVolumeResponse) error {
+	reply = srv.nextCSIControllerDetachVolumeResponse
+	srv.countCSIControllerDetachVolume++
+	return srv.nextCSIControllerDetachError
+}
+
+func (srv *MockRPCServer) NodeDetachVolume(args *cstructs.ClientCSINodeDetachVolumeRequest, reply *cstructs.ClientCSINodeDetachVolumeResponse) error {
+	reply = srv.nextCSINodeDetachResponse
+	srv.countCSINodeDetachVolume++
+	return srv.nextCSINodeDetachError
+
+}
+
+func (srv *MockRPCServer) UpsertVolumeClaims(*structs.CSIVolumeClaimBatchRequest) (uint64, error) {
+	srv.countUpsertVolumeClaims++
+	return 0, nil
+}
+
+func (srv *MockRPCServer) State() *state.StateStore { return srv.state }
+
+func (srv *MockRPCServer) UpdateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) {
+	srv.countUpdateClaims++
+	return 0, nil
+}
+
+type MockBatchingRPCServer struct {
+	MockRPCServer
+	volumeUpdateBatcher *VolumeUpdateBatcher
+}
+
+func (srv *MockBatchingRPCServer) UpdateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) {
+	srv.countUpdateClaims++
+	return srv.volumeUpdateBatcher.CreateUpdate(claims).Results()
+}
+
+type MockStatefulRPCServer struct {
+	MockRPCServer
+	volumeUpdateBatcher *VolumeUpdateBatcher
+}
+
+func (srv *MockStatefulRPCServer) UpsertVolumeClaims(batch *structs.CSIVolumeClaimBatchRequest) (uint64, error) {
+	srv.countUpsertVolumeClaims++
+	index, _ := srv.state.LatestIndex()
+	for _, req := range batch.Claims {
+		index++
+		err := srv.state.CSIVolumeClaim(index, req.RequestNamespace(),
+			req.VolumeID, req.ToClaim())
+		if err != nil {
+			return 0, err
+		}
+	}
+	return index, nil
+}
--- a/nomad/volumewatcher/volume_watcher.go
+++ b/nomad/volumewatcher/volume_watcher.go
@ -0,0 +1,382 @@
+package volumewatcher
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	log "github.com/hashicorp/go-hclog"
+	memdb "github.com/hashicorp/go-memdb"
+	multierror "github.com/hashicorp/go-multierror"
+	cstructs "github.com/hashicorp/nomad/client/structs"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// volumeWatcher is used to watch a single volume and trigger the
+// scheduler when allocation health transitions.
+type volumeWatcher struct {
+	// v is the volume being watched
+	v *structs.CSIVolume
+
+	// state is the state that is watched for state changes.
+	state *state.StateStore
+
+	// updateClaims is the function used to apply claims to raft
+	updateClaims updateClaimsFn
+
+	// server interface for CSI client RPCs
+	rpc ClientRPC
+
+	logger      log.Logger
+	shutdownCtx context.Context // parent context
+	ctx         context.Context // own context
+	exitFn      context.CancelFunc
+
+	// updateCh is triggered when there is an updated volume
+	updateCh chan *structs.CSIVolume
+
+	wLock   sync.RWMutex
+	running bool
+}
+
+// newVolumeWatcher returns a volume watcher that is used to watch
+// volumes
+func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher {
+
+	w := &volumeWatcher{
+		updateCh:     make(chan *structs.CSIVolume, 1),
+		updateClaims: parent.updateClaims,
+		v:            vol,
+		state:        parent.state,
+		rpc:          parent.rpc,
+		logger:       parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace),
+		shutdownCtx:  parent.ctx,
+	}
+
+	// Start the long lived watcher that scans for allocation updates
+	w.Start()
+	return w
+}
+
+// Notify signals an update to the tracked volume.
+func (vw *volumeWatcher) Notify(v *structs.CSIVolume) {
+	if !vw.isRunning() {
+		vw.Start()
+	}
+	select {
+	case vw.updateCh <- v:
+	case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped
+	case <-vw.ctx.Done(): // prevent deadlock if we stopped
+	}
+}
+
+func (vw *volumeWatcher) Start() {
+	vw.logger.Trace("starting watcher", "id", vw.v.ID, "namespace", vw.v.Namespace)
+	vw.wLock.Lock()
+	defer vw.wLock.Unlock()
+	vw.running = true
+	ctx, exitFn := context.WithCancel(vw.shutdownCtx)
+	vw.ctx = ctx
+	vw.exitFn = exitFn
+	go vw.watch()
+}
+
+// Stop stops watching the volume. This should be called whenever a
+// volume's claims are fully reaped or the watcher is no longer needed.
+func (vw *volumeWatcher) Stop() {
+	vw.logger.Trace("no more claims", "id", vw.v.ID, "namespace", vw.v.Namespace)
+	vw.exitFn()
+}
+
+func (vw *volumeWatcher) isRunning() bool {
+	vw.wLock.RLock()
+	defer vw.wLock.RUnlock()
+	select {
+	case <-vw.shutdownCtx.Done():
+		return false
+	case <-vw.ctx.Done():
+		return false
+	default:
+		return vw.running
+	}
+}
+
+// watch is the long-running function that watches for changes to a volume.
+// Each pass steps the volume's claims through the various states of reaping
+// until the volume has no more claims eligible to be reaped.
+func (vw *volumeWatcher) watch() {
+	for {
+		select {
+		// TODO(tgross): currently server->client RPC have no cancellation
+		// context, so we can't stop the long-runner RPCs gracefully
+		case <-vw.shutdownCtx.Done():
+			return
+		case <-vw.ctx.Done():
+			return
+		case vol := <-vw.updateCh:
+			// while we won't make raft writes if we get a stale update,
+			// we can still fire extra CSI RPC calls if we don't check this
+			if vol == nil || vw.v == nil || vol.ModifyIndex >= vw.v.ModifyIndex {
+				vol = vw.getVolume(vol)
+				if vol == nil {
+					return
+				}
+				vw.volumeReap(vol)
+			}
+		}
+	}
+}
+
+// getVolume returns the tracked volume, fully populated with the current
+// state
+func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume {
+	vw.wLock.RLock()
+	defer vw.wLock.RUnlock()
+
+	var err error
+	ws := memdb.NewWatchSet()
+
+	vol, err = vw.state.CSIVolumeDenormalizePlugins(ws, vol.Copy())
+	if err != nil {
+		vw.logger.Error("could not query plugins for volume", "error", err)
+		return nil
+	}
+
+	vol, err = vw.state.CSIVolumeDenormalize(ws, vol)
+	if err != nil {
+		vw.logger.Error("could not query allocs for volume", "error", err)
+		return nil
+	}
+	vw.v = vol
+	return vol
+}
+
+// volumeReap collects errors for logging but doesn't return them
+// to the main loop.
+func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) {
+	vw.logger.Trace("releasing unused volume claims", "id", vol.ID, "namespace", vol.Namespace)
+	err := vw.volumeReapImpl(vol)
+	if err != nil {
+		vw.logger.Error("error releasing volume claims", "error", err)
+	}
+	if vw.isUnclaimed(vol) {
+		vw.Stop()
+	}
+}
+
+func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool {
+	return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0
+}
+
+func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error {
+	var result *multierror.Error
+	nodeClaims := map[string]int{} // node IDs -> count
+	jobs := map[string]bool{}      // jobID -> stopped
+
+	// if a job is purged, the subsequent alloc updates can't
+	// trigger a GC job because there's no job for them to query.
+	// Job.Deregister will send a claim release on all claims
+	// but the allocs will not yet be terminated. save the status
+	// for each job so that we don't requery in this pass
+	checkStopped := func(jobID string) bool {
+		namespace := vw.v.Namespace
+		isStopped, ok := jobs[jobID]
+		if !ok {
+			ws := memdb.NewWatchSet()
+			job, err := vw.state.JobByID(ws, namespace, jobID)
+			if err != nil {
+				isStopped = true
+			}
+			if job == nil || job.Stopped() {
+				isStopped = true
+			}
+			jobs[jobID] = isStopped
+		}
+		return isStopped
+	}
+
+	collect := func(allocs map[string]*structs.Allocation,
+		claims map[string]*structs.CSIVolumeClaim) {
+
+		for allocID, alloc := range allocs {
+
+			if alloc == nil {
+				_, exists := vol.PastClaims[allocID]
+				if !exists {
+					vol.PastClaims[allocID] = &structs.CSIVolumeClaim{
+						AllocationID: allocID,
+						State:        structs.CSIVolumeClaimStateReadyToFree,
+					}
+				}
+				continue
+			}
+
+			nodeClaims[alloc.NodeID]++
+
+			if alloc.Terminated() || checkStopped(alloc.JobID) {
+				// don't overwrite the PastClaim if we've seen it before,
+				// so that we can track state between subsequent calls
+				_, exists := vol.PastClaims[allocID]
+				if !exists {
+					claim, ok := claims[allocID]
+					if !ok {
+						claim = &structs.CSIVolumeClaim{
+							AllocationID: allocID,
+							NodeID:       alloc.NodeID,
+						}
+					}
+					claim.State = structs.CSIVolumeClaimStateTaken
+					vol.PastClaims[allocID] = claim
+				}
+			}
+		}
+	}
+
+	collect(vol.ReadAllocs, vol.ReadClaims)
+	collect(vol.WriteAllocs, vol.WriteClaims)
+
+	if len(vol.PastClaims) == 0 {
+		return nil
+	}
+
+	for _, claim := range vol.PastClaims {
+
+		var err error
+
+		// previous checkpoints may have set the past claim state already.
+		// in practice we should never see CSIVolumeClaimStateControllerDetached
+		// but having an option for the state makes it easy to add a checkpoint
+		// in a backwards compatible way if we need one later
+		switch claim.State {
+		case structs.CSIVolumeClaimStateNodeDetached:
+			goto NODE_DETACHED
+		case structs.CSIVolumeClaimStateControllerDetached:
+			goto RELEASE_CLAIM
+		case structs.CSIVolumeClaimStateReadyToFree:
+			goto RELEASE_CLAIM
+		}
+
+		err = vw.nodeDetach(vol, claim)
+		if err != nil {
+			result = multierror.Append(result, err)
+			break
+		}
+
+	NODE_DETACHED:
+		nodeClaims[claim.NodeID]--
+		err = vw.controllerDetach(vol, claim, nodeClaims)
+		if err != nil {
+			result = multierror.Append(result, err)
+			break
+		}
+
+	RELEASE_CLAIM:
+		err = vw.checkpoint(vol, claim)
+		if err != nil {
+			result = multierror.Append(result, err)
+			break
+		}
+		// the checkpoint deletes from the state store, but this operates
+		// on our local copy which aids in testing
+		delete(vol.PastClaims, claim.AllocationID)
+	}
+
+	return result.ErrorOrNil()
+
+}
+
+// nodeDetach makes the client NodePublish / NodeUnstage RPCs, which
+// must be completed before controller operations or releasing the claim.
+func (vw *volumeWatcher) nodeDetach(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
+	vw.logger.Trace("detaching node", "id", vol.ID, "namespace", vol.Namespace)
+	nReq := &cstructs.ClientCSINodeDetachVolumeRequest{
+		PluginID:       vol.PluginID,
+		VolumeID:       vol.ID,
+		ExternalID:     vol.RemoteID(),
+		AllocID:        claim.AllocationID,
+		NodeID:         claim.NodeID,
+		AttachmentMode: vol.AttachmentMode,
+		AccessMode:     vol.AccessMode,
+		ReadOnly:       claim.Mode == structs.CSIVolumeClaimRead,
+	}
+
+	err := vw.rpc.NodeDetachVolume(nReq,
+		&cstructs.ClientCSINodeDetachVolumeResponse{})
+	if err != nil {
+		return fmt.Errorf("could not detach from node: %v", err)
+	}
+	claim.State = structs.CSIVolumeClaimStateNodeDetached
+	return vw.checkpoint(vol, claim)
+}
+
+// controllerDetach makes the client RPC to the controller to
+// unpublish the volume if a controller is required and no other
+// allocs on the node need it
+func (vw *volumeWatcher) controllerDetach(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim, nodeClaims map[string]int) error {
+	if !vol.ControllerRequired || nodeClaims[claim.NodeID] > 1 {
+		claim.State = structs.CSIVolumeClaimStateReadyToFree
+		return nil
+	}
+	vw.logger.Trace("detaching controller", "id", vol.ID, "namespace", vol.Namespace)
+	// note: we need to get the CSI Node ID, which is not the same as
+	// the Nomad Node ID
+	ws := memdb.NewWatchSet()
+	targetNode, err := vw.state.NodeByID(ws, claim.NodeID)
+	if err != nil {
+		return err
+	}
+	if targetNode == nil {
+		return fmt.Errorf("%s: %s", structs.ErrUnknownNodePrefix, claim.NodeID)
+	}
+	targetCSIInfo, ok := targetNode.CSINodePlugins[vol.PluginID]
+	if !ok {
+		return fmt.Errorf("failed to find NodeInfo for node: %s", targetNode.ID)
+	}
+
+	plug, err := vw.state.CSIPluginByID(ws, vol.PluginID)
+	if err != nil {
+		return fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err)
+	}
+	if plug == nil {
+		return fmt.Errorf("plugin lookup error: %s missing plugin", vol.PluginID)
+	}
+
+	cReq := &cstructs.ClientCSIControllerDetachVolumeRequest{
+		VolumeID:        vol.RemoteID(),
+		ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
+	}
+	cReq.PluginID = plug.ID
+	err = vw.rpc.ControllerDetachVolume(cReq,
+		&cstructs.ClientCSIControllerDetachVolumeResponse{})
+	if err != nil {
+		return fmt.Errorf("could not detach from controller: %v", err)
+	}
+	claim.State = structs.CSIVolumeClaimStateReadyToFree
+	return nil
+}
+
+func (vw *volumeWatcher) checkpoint(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
+	vw.logger.Trace("checkpointing claim", "id", vol.ID, "namespace", vol.Namespace)
+	req := structs.CSIVolumeClaimRequest{
+		VolumeID:     vol.ID,
+		AllocationID: claim.AllocationID,
+		NodeID:       claim.NodeID,
+		Claim:        structs.CSIVolumeClaimRelease,
+		State:        claim.State,
+		WriteRequest: structs.WriteRequest{
+			Namespace: vol.Namespace,
+			// Region:    vol.Region, // TODO(tgross) should volumes have regions?
+		},
+	}
+	index, err := vw.updateClaims([]structs.CSIVolumeClaimRequest{req})
+	if err == nil && index != 0 {
+		vw.wLock.Lock()
+		defer vw.wLock.Unlock()
+		vw.v.ModifyIndex = index
+	}
+	if err != nil {
+		return fmt.Errorf("could not checkpoint claim release: %v", err)
+	}
+	return nil
+}
--- a/nomad/volumewatcher/volume_watcher_test.go
+++ b/nomad/volumewatcher/volume_watcher_test.go
@ -0,0 +1,294 @@
+package volumewatcher
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/hashicorp/nomad/helper/testlog"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/stretchr/testify/require"
+)
+
+// TestVolumeWatch_OneReap tests one pass through the reaper
+func TestVolumeWatch_OneReap(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	cases := []struct {
+		Name                          string
+		Volume                        *structs.CSIVolume
+		Node                          *structs.Node
+		ControllerRequired            bool
+		ExpectedErr                   string
+		ExpectedClaimsCount           int
+		ExpectedNodeDetachCount       int
+		ExpectedControllerDetachCount int
+		ExpectedUpdateClaimsCount     int
+		srv                           *MockRPCServer
+	}{
+		{
+			Name:               "No terminal allocs",
+			Volume:             mock.CSIVolume(mock.CSIPlugin()),
+			ControllerRequired: true,
+			srv: &MockRPCServer{
+				state:                  state.TestStateStore(t),
+				nextCSINodeDetachError: fmt.Errorf("should never see this"),
+			},
+		},
+		{
+			Name:                    "NodeDetachVolume fails",
+			ControllerRequired:      true,
+			ExpectedErr:             "some node plugin error",
+			ExpectedNodeDetachCount: 1,
+			srv: &MockRPCServer{
+				state:                  state.TestStateStore(t),
+				nextCSINodeDetachError: fmt.Errorf("some node plugin error"),
+			},
+		},
+		{
+			Name:                      "NodeDetachVolume node-only happy path",
+			ControllerRequired:        false,
+			ExpectedNodeDetachCount:   1,
+			ExpectedUpdateClaimsCount: 2,
+			srv: &MockRPCServer{
+				state: state.TestStateStore(t),
+			},
+		},
+		{
+			Name:                      "ControllerDetachVolume no controllers available",
+			Node:                      mock.Node(),
+			ControllerRequired:        true,
+			ExpectedErr:               "Unknown node",
+			ExpectedNodeDetachCount:   1,
+			ExpectedUpdateClaimsCount: 1,
+			srv: &MockRPCServer{
+				state: state.TestStateStore(t),
+			},
+		},
+		{
+			Name:                          "ControllerDetachVolume controller error",
+			ControllerRequired:            true,
+			ExpectedErr:                   "some controller error",
+			ExpectedNodeDetachCount:       1,
+			ExpectedControllerDetachCount: 1,
+			ExpectedUpdateClaimsCount:     1,
+			srv: &MockRPCServer{
+				state:                        state.TestStateStore(t),
+				nextCSIControllerDetachError: fmt.Errorf("some controller error"),
+			},
+		},
+		{
+			Name:                          "ControllerDetachVolume happy path",
+			ControllerRequired:            true,
+			ExpectedNodeDetachCount:       1,
+			ExpectedControllerDetachCount: 1,
+			ExpectedUpdateClaimsCount:     2,
+			srv: &MockRPCServer{
+				state: state.TestStateStore(t),
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			plugin := mock.CSIPlugin()
+			plugin.ControllerRequired = tc.ControllerRequired
+			node := testNode(tc.Node, plugin, tc.srv.State())
+			alloc := mock.Alloc()
+			alloc.NodeID = node.ID
+			alloc.ClientStatus = structs.AllocClientStatusComplete
+			vol := testVolume(tc.Volume, plugin, alloc, node.ID)
+			ctx, exitFn := context.WithCancel(context.Background())
+			w := &volumeWatcher{
+				v:            vol,
+				rpc:          tc.srv,
+				state:        tc.srv.State(),
+				updateClaims: tc.srv.UpdateClaims,
+				ctx:          ctx,
+				exitFn:       exitFn,
+				logger:       testlog.HCLogger(t),
+			}
+
+			err := w.volumeReapImpl(vol)
+			if tc.ExpectedErr != "" {
+				require.Error(err, fmt.Sprintf("expected: %q", tc.ExpectedErr))
+				require.Contains(err.Error(), tc.ExpectedErr)
+			} else {
+				require.NoError(err)
+			}
+			require.Equal(tc.ExpectedNodeDetachCount,
+				tc.srv.countCSINodeDetachVolume, "node detach RPC count")
+			require.Equal(tc.ExpectedControllerDetachCount,
+				tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
+			require.Equal(tc.ExpectedUpdateClaimsCount,
+				tc.srv.countUpdateClaims, "update claims count")
+		})
+	}
+}
+
+// TestVolumeWatch_OldVolume_OneReap tests one pass through the reaper
+// COMPAT(1.0): the claim fields were added after 0.11.1; this test
+// can be removed for 1.0
+func TestVolumeWatch_OldVolume_OneReap(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	cases := []struct {
+		Name                          string
+		Volume                        *structs.CSIVolume
+		Node                          *structs.Node
+		ControllerRequired            bool
+		ExpectedErr                   string
+		ExpectedClaimsCount           int
+		ExpectedNodeDetachCount       int
+		ExpectedControllerDetachCount int
+		ExpectedUpdateClaimsCount     int
+		srv                           *MockRPCServer
+	}{
+		{
+			Name:               "No terminal allocs",
+			Volume:             mock.CSIVolume(mock.CSIPlugin()),
+			ControllerRequired: true,
+			srv: &MockRPCServer{
+				state:                  state.TestStateStore(t),
+				nextCSINodeDetachError: fmt.Errorf("should never see this"),
+			},
+		},
+		{
+			Name:                    "NodeDetachVolume fails",
+			ControllerRequired:      true,
+			ExpectedErr:             "some node plugin error",
+			ExpectedNodeDetachCount: 1,
+			srv: &MockRPCServer{
+				state:                  state.TestStateStore(t),
+				nextCSINodeDetachError: fmt.Errorf("some node plugin error"),
+			},
+		},
+		{
+			Name:                      "NodeDetachVolume node-only happy path",
+			ControllerRequired:        false,
+			ExpectedNodeDetachCount:   1,
+			ExpectedUpdateClaimsCount: 2,
+			srv: &MockRPCServer{
+				state: state.TestStateStore(t),
+			},
+		},
+		{
+			Name:                      "ControllerDetachVolume no controllers available",
+			Node:                      mock.Node(),
+			ControllerRequired:        true,
+			ExpectedErr:               "Unknown node",
+			ExpectedNodeDetachCount:   1,
+			ExpectedUpdateClaimsCount: 1,
+			srv: &MockRPCServer{
+				state: state.TestStateStore(t),
+			},
+		},
+		{
+			Name:                          "ControllerDetachVolume controller error",
+			ControllerRequired:            true,
+			ExpectedErr:                   "some controller error",
+			ExpectedNodeDetachCount:       1,
+			ExpectedControllerDetachCount: 1,
+			ExpectedUpdateClaimsCount:     1,
+			srv: &MockRPCServer{
+				state:                        state.TestStateStore(t),
+				nextCSIControllerDetachError: fmt.Errorf("some controller error"),
+			},
+		},
+		{
+			Name:                          "ControllerDetachVolume happy path",
+			ControllerRequired:            true,
+			ExpectedNodeDetachCount:       1,
+			ExpectedControllerDetachCount: 1,
+			ExpectedUpdateClaimsCount:     2,
+			srv: &MockRPCServer{
+				state: state.TestStateStore(t),
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.Name, func(t *testing.T) {
+
+			plugin := mock.CSIPlugin()
+			plugin.ControllerRequired = tc.ControllerRequired
+			node := testNode(tc.Node, plugin, tc.srv.State())
+			alloc := mock.Alloc()
+			alloc.ClientStatus = structs.AllocClientStatusComplete
+			alloc.NodeID = node.ID
+			vol := testOldVolume(tc.Volume, plugin, alloc, node.ID)
+			ctx, exitFn := context.WithCancel(context.Background())
+			w := &volumeWatcher{
+				v:            vol,
+				rpc:          tc.srv,
+				state:        tc.srv.State(),
+				updateClaims: tc.srv.UpdateClaims,
+				ctx:          ctx,
+				exitFn:       exitFn,
+				logger:       testlog.HCLogger(t),
+			}
+
+			err := w.volumeReapImpl(vol)
+			if tc.ExpectedErr != "" {
+				require.Error(err, fmt.Sprintf("expected: %q", tc.ExpectedErr))
+				require.Contains(err.Error(), tc.ExpectedErr)
+			} else {
+				require.NoError(err)
+			}
+			require.Equal(tc.ExpectedNodeDetachCount,
+				tc.srv.countCSINodeDetachVolume, "node detach RPC count")
+			require.Equal(tc.ExpectedControllerDetachCount,
+				tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
+			require.Equal(tc.ExpectedUpdateClaimsCount,
+				tc.srv.countUpdateClaims, "update claims count")
+		})
+	}
+}
+
+// TestVolumeWatch_OneReap tests multiple passes through the reaper,
+// updating state after each one
+func TestVolumeWatch_ReapStates(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	srv := &MockRPCServer{state: state.TestStateStore(t)}
+	plugin := mock.CSIPlugin()
+	node := testNode(nil, plugin, srv.State())
+	alloc := mock.Alloc()
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+	vol := testVolume(nil, plugin, alloc, node.ID)
+
+	w := &volumeWatcher{
+		v:            vol,
+		rpc:          srv,
+		state:        srv.State(),
+		updateClaims: srv.UpdateClaims,
+		logger:       testlog.HCLogger(t),
+	}
+
+	srv.nextCSINodeDetachError = fmt.Errorf("some node plugin error")
+	err := w.volumeReapImpl(vol)
+	require.Error(err)
+	require.Equal(structs.CSIVolumeClaimStateTaken, vol.PastClaims[alloc.ID].State)
+	require.Equal(1, srv.countCSINodeDetachVolume)
+	require.Equal(0, srv.countCSIControllerDetachVolume)
+	require.Equal(0, srv.countUpdateClaims)
+
+	srv.nextCSINodeDetachError = nil
+	srv.nextCSIControllerDetachError = fmt.Errorf("some controller plugin error")
+	err = w.volumeReapImpl(vol)
+	require.Error(err)
+	require.Equal(structs.CSIVolumeClaimStateNodeDetached, vol.PastClaims[alloc.ID].State)
+	require.Equal(1, srv.countUpdateClaims)
+
+	srv.nextCSIControllerDetachError = nil
+	err = w.volumeReapImpl(vol)
+	require.NoError(err)
+	require.Equal(0, len(vol.PastClaims))
+	require.Equal(2, srv.countUpdateClaims)
+}
--- a/nomad/volumewatcher/volumes_watcher.go
+++ b/nomad/volumewatcher/volumes_watcher.go
@ -0,0 +1,232 @@
+package volumewatcher
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	log "github.com/hashicorp/go-hclog"
+	memdb "github.com/hashicorp/go-memdb"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"golang.org/x/time/rate"
+)
+
+const (
+	// LimitStateQueriesPerSecond is the number of state queries allowed per
+	// second
+	LimitStateQueriesPerSecond = 100.0
+
+	// CrossVolumeUpdateBatchDuration is the duration in which volume
+	// claim updates are batched across all volume watchers before
+	// being committed to Raft.
+	CrossVolumeUpdateBatchDuration = 250 * time.Millisecond
+)
+
+// Watcher is used to watch volumes and their allocations created
+// by the scheduler and trigger the scheduler when allocation health
+// transitions.
+type Watcher struct {
+	enabled bool
+	logger  log.Logger
+
+	// queryLimiter is used to limit the rate of blocking queries
+	queryLimiter *rate.Limiter
+
+	// updateBatchDuration is the duration in which volume
+	// claim updates are batched across all volume watchers
+	// before being committed to Raft.
+	updateBatchDuration time.Duration
+
+	// raft contains the set of Raft endpoints that can be used by the
+	// volumes watcher
+	raft VolumeRaftEndpoints
+
+	// rpc contains the set of Server methods that can be used by
+	// the volumes watcher for RPC
+	rpc ClientRPC
+
+	// state is the state that is watched for state changes.
+	state *state.StateStore
+
+	// watchers is the set of active watchers, one per volume
+	watchers map[string]*volumeWatcher
+
+	// volumeUpdateBatcher is used to batch volume claim updates
+	volumeUpdateBatcher *VolumeUpdateBatcher
+
+	// ctx and exitFn are used to cancel the watcher
+	ctx    context.Context
+	exitFn context.CancelFunc
+
+	wlock sync.RWMutex
+}
+
+// NewVolumesWatcher returns a volumes watcher that is used to watch
+// volumes and trigger the scheduler as needed.
+func NewVolumesWatcher(logger log.Logger,
+	raft VolumeRaftEndpoints, rpc ClientRPC, stateQueriesPerSecond float64,
+	updateBatchDuration time.Duration) *Watcher {
+
+	// the leader step-down calls SetEnabled(false) which is what
+	// cancels this context, rather than passing in its own shutdown
+	// context
+	ctx, exitFn := context.WithCancel(context.Background())
+
+	return &Watcher{
+		raft:                raft,
+		rpc:                 rpc,
+		queryLimiter:        rate.NewLimiter(rate.Limit(stateQueriesPerSecond), 100),
+		updateBatchDuration: updateBatchDuration,
+		logger:              logger.Named("volumes_watcher"),
+		ctx:                 ctx,
+		exitFn:              exitFn,
+	}
+}
+
+// SetEnabled is used to control if the watcher is enabled. The
+// watcher should only be enabled on the active leader. When being
+// enabled the state is passed in as it is no longer valid once a
+// leader election has taken place.
+func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) {
+	w.wlock.Lock()
+	defer w.wlock.Unlock()
+
+	wasEnabled := w.enabled
+	w.enabled = enabled
+
+	if state != nil {
+		w.state = state
+	}
+
+	// Flush the state to create the necessary objects
+	w.flush()
+
+	// If we are starting now, launch the watch daemon
+	if enabled && !wasEnabled {
+		go w.watchVolumes(w.ctx)
+	}
+}
+
+// flush is used to clear the state of the watcher
+func (w *Watcher) flush() {
+	// Stop all the watchers and clear it
+	for _, watcher := range w.watchers {
+		watcher.Stop()
+	}
+
+	// Kill everything associated with the watcher
+	if w.exitFn != nil {
+		w.exitFn()
+	}
+
+	w.watchers = make(map[string]*volumeWatcher, 32)
+	w.ctx, w.exitFn = context.WithCancel(context.Background())
+	w.volumeUpdateBatcher = NewVolumeUpdateBatcher(w.updateBatchDuration, w.raft, w.ctx)
+}
+
+// watchVolumes is the long lived go-routine that watches for volumes to
+// add and remove watchers on.
+func (w *Watcher) watchVolumes(ctx context.Context) {
+	vIndex := uint64(1)
+	for {
+		volumes, idx, err := w.getVolumes(ctx, vIndex)
+		if err != nil {
+			if err == context.Canceled {
+				return
+			}
+			w.logger.Error("failed to retrieve volumes", "error", err)
+		}
+
+		vIndex = idx // last-seen index
+		for _, v := range volumes {
+			if err := w.add(v); err != nil {
+				w.logger.Error("failed to track volume", "volume_id", v.ID, "error", err)
+			}
+
+		}
+	}
+}
+
+// getVolumes retrieves all volumes blocking at the given index.
+func (w *Watcher) getVolumes(ctx context.Context, minIndex uint64) ([]*structs.CSIVolume, uint64, error) {
+	resp, index, err := w.state.BlockingQuery(w.getVolumesImpl, minIndex, ctx)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	return resp.([]*structs.CSIVolume), index, nil
+}
+
+// getVolumesImpl retrieves all volumes from the passed state store.
+func (w *Watcher) getVolumesImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
+
+	iter, err := state.CSIVolumes(ws)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	var volumes []*structs.CSIVolume
+	for {
+		raw := iter.Next()
+		if raw == nil {
+			break
+		}
+		volume := raw.(*structs.CSIVolume)
+		volumes = append(volumes, volume)
+	}
+
+	// Use the last index that affected the volume table
+	index, err := state.Index("csi_volumes")
+	if err != nil {
+		return nil, 0, err
+	}
+
+	return volumes, index, nil
+}
+
+// add adds a volume to the watch list
+func (w *Watcher) add(d *structs.CSIVolume) error {
+	w.wlock.Lock()
+	defer w.wlock.Unlock()
+	_, err := w.addLocked(d)
+	return err
+}
+
+// addLocked adds a volume to the watch list and should only be called when
+// locked. Creating the volumeWatcher starts a go routine to .watch() it
+func (w *Watcher) addLocked(v *structs.CSIVolume) (*volumeWatcher, error) {
+	// Not enabled so no-op
+	if !w.enabled {
+		return nil, nil
+	}
+
+	// Already watched so trigger an update for the volume
+	if watcher, ok := w.watchers[v.ID+v.Namespace]; ok {
+		watcher.Notify(v)
+		return nil, nil
+	}
+
+	watcher := newVolumeWatcher(w, v)
+	w.watchers[v.ID+v.Namespace] = watcher
+	return watcher, nil
+}
+
+// TODO: this is currently dead code; we'll call a public remove
+// method on the Watcher once we have a periodic GC job
+// remove stops watching a volume and should only be called when locked.
+func (w *Watcher) removeLocked(volID, namespace string) {
+	if !w.enabled {
+		return
+	}
+	if watcher, ok := w.watchers[volID+namespace]; ok {
+		watcher.Stop()
+		delete(w.watchers, volID+namespace)
+	}
+}
+
+// updatesClaims sends the claims to the batch updater and waits for
+// the results
+func (w *Watcher) updateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) {
+	return w.volumeUpdateBatcher.CreateUpdate(claims).Results()
+}
--- a/nomad/volumewatcher/volumes_watcher_test.go
+++ b/nomad/volumewatcher/volumes_watcher_test.go
@ -0,0 +1,311 @@
+package volumewatcher
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	memdb "github.com/hashicorp/go-memdb"
+	"github.com/hashicorp/nomad/helper/testlog"
+	"github.com/hashicorp/nomad/nomad/mock"
+	"github.com/hashicorp/nomad/nomad/state"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/stretchr/testify/require"
+)
+
+// TestVolumeWatch_EnableDisable tests the watcher registration logic that needs
+// to happen during leader step-up/step-down
+func TestVolumeWatch_EnableDisable(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	srv := &MockRPCServer{}
+	srv.state = state.TestStateStore(t)
+	index := uint64(100)
+
+	watcher := NewVolumesWatcher(testlog.HCLogger(t),
+		srv, srv,
+		LimitStateQueriesPerSecond,
+		CrossVolumeUpdateBatchDuration)
+
+	watcher.SetEnabled(true, srv.State())
+
+	plugin := mock.CSIPlugin()
+	node := testNode(nil, plugin, srv.State())
+	alloc := mock.Alloc()
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+	vol := testVolume(nil, plugin, alloc, node.ID)
+
+	index++
+	err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
+	require.NoError(err)
+
+	claim := &structs.CSIVolumeClaim{Mode: structs.CSIVolumeClaimRelease}
+	index++
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
+	require.NoError(err)
+	require.Eventually(func() bool {
+		return 1 == len(watcher.watchers)
+	}, time.Second, 10*time.Millisecond)
+
+	watcher.SetEnabled(false, srv.State())
+	require.Equal(0, len(watcher.watchers))
+}
+
+// TestVolumeWatch_Checkpoint tests the checkpointing of progress across
+// leader leader step-up/step-down
+func TestVolumeWatch_Checkpoint(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	srv := &MockRPCServer{}
+	srv.state = state.TestStateStore(t)
+	index := uint64(100)
+
+	watcher := NewVolumesWatcher(testlog.HCLogger(t),
+		srv, srv,
+		LimitStateQueriesPerSecond,
+		CrossVolumeUpdateBatchDuration)
+
+	plugin := mock.CSIPlugin()
+	node := testNode(nil, plugin, srv.State())
+	alloc := mock.Alloc()
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+	vol := testVolume(nil, plugin, alloc, node.ID)
+
+	watcher.SetEnabled(true, srv.State())
+
+	index++
+	err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
+	require.NoError(err)
+
+	// we should get or start up a watcher when we get an update for
+	// the volume from the state store
+	require.Eventually(func() bool {
+		return 1 == len(watcher.watchers)
+	}, time.Second, 10*time.Millisecond)
+
+	// step-down (this is sync, but step-up is async)
+	watcher.SetEnabled(false, srv.State())
+	require.Equal(0, len(watcher.watchers))
+
+	// step-up again
+	watcher.SetEnabled(true, srv.State())
+	require.Eventually(func() bool {
+		return 1 == len(watcher.watchers)
+	}, time.Second, 10*time.Millisecond)
+
+	require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
+}
+
+// TestVolumeWatch_StartStop tests the start and stop of the watcher when
+// it receives notifcations and has completed its work
+func TestVolumeWatch_StartStop(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	ctx, exitFn := context.WithCancel(context.Background())
+	defer exitFn()
+
+	srv := &MockStatefulRPCServer{}
+	srv.state = state.TestStateStore(t)
+	index := uint64(100)
+	srv.volumeUpdateBatcher = NewVolumeUpdateBatcher(
+		CrossVolumeUpdateBatchDuration, srv, ctx)
+
+	watcher := NewVolumesWatcher(testlog.HCLogger(t),
+		srv, srv,
+		LimitStateQueriesPerSecond,
+		CrossVolumeUpdateBatchDuration)
+
+	watcher.SetEnabled(true, srv.State())
+	require.Equal(0, len(watcher.watchers))
+
+	plugin := mock.CSIPlugin()
+	node := testNode(nil, plugin, srv.State())
+	alloc := mock.Alloc()
+	alloc.ClientStatus = structs.AllocClientStatusRunning
+	alloc2 := mock.Alloc()
+	alloc2.Job = alloc.Job
+	alloc2.ClientStatus = structs.AllocClientStatusRunning
+	index++
+	err := srv.State().UpsertJob(index, alloc.Job)
+	require.NoError(err)
+	index++
+	err = srv.State().UpsertAllocs(index, []*structs.Allocation{alloc, alloc2})
+	require.NoError(err)
+
+	// register a volume
+	vol := testVolume(nil, plugin, alloc, node.ID)
+	index++
+	err = srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
+	require.NoError(err)
+
+	// assert we get a running watcher
+	require.Eventually(func() bool {
+		return 1 == len(watcher.watchers)
+	}, time.Second, 10*time.Millisecond)
+	require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
+
+	// claim the volume for both allocs
+	claim := &structs.CSIVolumeClaim{
+		AllocationID: alloc.ID,
+		NodeID:       node.ID,
+		Mode:         structs.CSIVolumeClaimRead,
+	}
+	index++
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
+	require.NoError(err)
+	claim.AllocationID = alloc2.ID
+	index++
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
+	require.NoError(err)
+
+	// reap the volume and assert nothing has happened
+	claim = &structs.CSIVolumeClaim{
+		AllocationID: alloc.ID,
+		NodeID:       node.ID,
+		Mode:         structs.CSIVolumeClaimRelease,
+	}
+	index++
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
+	require.NoError(err)
+	require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
+
+	// alloc becomes terminal
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+	index++
+	err = srv.State().UpsertAllocs(index, []*structs.Allocation{alloc})
+	require.NoError(err)
+	index++
+	claim.State = structs.CSIVolumeClaimStateReadyToFree
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
+	require.NoError(err)
+
+	// 1 claim has been released but watcher is still running
+	require.Eventually(func() bool {
+		ws := memdb.NewWatchSet()
+		vol, _ := srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
+		return len(vol.ReadAllocs) == 1 && len(vol.PastClaims) == 0
+	}, time.Second*2, 10*time.Millisecond)
+
+	require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
+
+	// the watcher will have incremented the index so we need to make sure
+	// our inserts will trigger new events
+	index, _ = srv.State().LatestIndex()
+
+	// remaining alloc's job is stopped (alloc is not marked terminal)
+	alloc2.Job.Stop = true
+	index++
+	err = srv.State().UpsertJob(index, alloc2.Job)
+	require.NoError(err)
+
+	// job deregistration write a claim with no allocations or nodes
+	claim = &structs.CSIVolumeClaim{
+		Mode: structs.CSIVolumeClaimRelease,
+	}
+	index++
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
+	require.NoError(err)
+
+	// all claims have been released and watcher is stopped
+	require.Eventually(func() bool {
+		ws := memdb.NewWatchSet()
+		vol, _ := srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
+		return len(vol.ReadAllocs) == 1 && len(vol.PastClaims) == 0
+	}, time.Second*2, 10*time.Millisecond)
+
+	require.Eventually(func() bool {
+		return !watcher.watchers[vol.ID+vol.Namespace].isRunning()
+	}, time.Second*1, 10*time.Millisecond)
+
+	// the watcher will have incremented the index so we need to make sure
+	// our inserts will trigger new events
+	index, _ = srv.State().LatestIndex()
+
+	// create a new claim
+	alloc3 := mock.Alloc()
+	alloc3.ClientStatus = structs.AllocClientStatusRunning
+	index++
+	err = srv.State().UpsertAllocs(index, []*structs.Allocation{alloc3})
+	require.NoError(err)
+	claim3 := &structs.CSIVolumeClaim{
+		AllocationID: alloc3.ID,
+		NodeID:       node.ID,
+		Mode:         structs.CSIVolumeClaimRelease,
+	}
+	index++
+	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim3)
+	require.NoError(err)
+
+	// a stopped watcher should restore itself on notification
+	require.Eventually(func() bool {
+		return watcher.watchers[vol.ID+vol.Namespace].isRunning()
+	}, time.Second*1, 10*time.Millisecond)
+}
+
+// TestVolumeWatch_RegisterDeregister tests the start and stop of
+// watchers around registration
+func TestVolumeWatch_RegisterDeregister(t *testing.T) {
+	t.Parallel()
+	require := require.New(t)
+
+	ctx, exitFn := context.WithCancel(context.Background())
+	defer exitFn()
+
+	srv := &MockStatefulRPCServer{}
+	srv.state = state.TestStateStore(t)
+	srv.volumeUpdateBatcher = NewVolumeUpdateBatcher(
+		CrossVolumeUpdateBatchDuration, srv, ctx)
+
+	index := uint64(100)
+
+	watcher := NewVolumesWatcher(testlog.HCLogger(t),
+		srv, srv,
+		LimitStateQueriesPerSecond,
+		CrossVolumeUpdateBatchDuration)
+
+	watcher.SetEnabled(true, srv.State())
+	require.Equal(0, len(watcher.watchers))
+
+	plugin := mock.CSIPlugin()
+	node := testNode(nil, plugin, srv.State())
+	alloc := mock.Alloc()
+	alloc.ClientStatus = structs.AllocClientStatusComplete
+
+	// register a volume
+	vol := testVolume(nil, plugin, alloc, node.ID)
+	index++
+	err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
+	require.NoError(err)
+
+	require.Eventually(func() bool {
+		return 1 == len(watcher.watchers)
+	}, time.Second, 10*time.Millisecond)
+
+	// reap the volume and assert we've cleaned up
+	w := watcher.watchers[vol.ID+vol.Namespace]
+	w.Notify(vol)
+
+	require.Eventually(func() bool {
+		ws := memdb.NewWatchSet()
+		vol, _ := srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
+		return len(vol.ReadAllocs) == 0 && len(vol.PastClaims) == 0
+	}, time.Second*2, 10*time.Millisecond)
+
+	require.Eventually(func() bool {
+		return !watcher.watchers[vol.ID+vol.Namespace].isRunning()
+	}, time.Second*1, 10*time.Millisecond)
+
+	require.Equal(1, srv.countCSINodeDetachVolume, "node detach RPC count")
+	require.Equal(1, srv.countCSIControllerDetachVolume, "controller detach RPC count")
+	require.Equal(2, srv.countUpsertVolumeClaims, "upsert claims count")
+
+	// deregistering the volume doesn't cause an update that triggers
+	// a watcher; we'll clean up this watcher in a GC later
+	err = srv.State().CSIVolumeDeregister(index, vol.Namespace, []string{vol.ID})
+	require.NoError(err)
+	require.Equal(1, len(watcher.watchers))
+	require.False(watcher.watchers[vol.ID+vol.Namespace].isRunning())
+}
--- a/nomad/volumewatcher_shim.go
+++ b/nomad/volumewatcher_shim.go
@ -0,0 +1,31 @@
+package nomad
+
+import (
+	"github.com/hashicorp/nomad/nomad/structs"
+)
+
+// volumeWatcherRaftShim is the shim that provides the state watching
+// methods. These should be set by the server and passed to the volume
+// watcher.
+type volumeWatcherRaftShim struct {
+	// apply is used to apply a message to Raft
+	apply raftApplyFn
+}
+
+// convertApplyErrors parses the results of a raftApply and returns the index at
+// which it was applied and any error that occurred. Raft Apply returns two
+// separate errors, Raft library errors and user returned errors from the FSM.
+// This helper, joins the errors by inspecting the applyResponse for an error.
+func (shim *volumeWatcherRaftShim) convertApplyErrors(applyResp interface{}, index uint64, err error) (uint64, error) {
+	if applyResp != nil {
+		if fsmErr, ok := applyResp.(error); ok && fsmErr != nil {
+			return index, fsmErr
+		}
+	}
+	return index, err
+}
+
+func (shim *volumeWatcherRaftShim) UpsertVolumeClaims(req *structs.CSIVolumeClaimBatchRequest) (uint64, error) {
+	fsmErrIntf, index, raftErr := shim.apply(structs.CSIVolumeClaimBatchRequestType, req)
+	return shim.convertApplyErrors(fsmErrIntf, index, raftErr)
+}
--- a/plugins/csi/client.go
+++ b/plugins/csi/client.go
@ -82,6 +82,7 @@ type client struct {
 	identityClient   csipbv1.IdentityClient
 	controllerClient CSIControllerClient
 	nodeClient       CSINodeClient
+	logger           hclog.Logger
 }

 func (c *client) Close() error {
@ -106,6 +107,7 @@ func NewClient(addr string, logger hclog.Logger) (CSIPlugin, error) {
 		identityClient:   csipbv1.NewIdentityClient(conn),
 		controllerClient: csipbv1.NewControllerClient(conn),
 		nodeClient:       csipbv1.NewNodeClient(conn),
+		logger:           logger,
 	}, nil
 }

@ -318,17 +320,50 @@ func (c *client) ControllerValidateCapabilities(ctx context.Context, volumeID st
 		return err
 	}

-	if resp.Confirmed == nil {
-		if resp.Message != "" {
-			return fmt.Errorf("Volume validation failed, message: %s", resp.Message)
-		}
+	if resp.Message != "" {
+		// this should only ever be set if Confirmed isn't set, but
+		// it's not a validation failure.
+		c.logger.Debug(resp.Message)
+	}

-		return fmt.Errorf("Volume validation failed")
+	// The protobuf accessors below safely handle nil pointers.
+	// The CSI spec says we can only assert the plugin has
+	// confirmed the volume capabilities, not that it hasn't
+	// confirmed them, so if the field is nil we have to assume
+	// the volume is ok.
+	confirmedCaps := resp.GetConfirmed().GetVolumeCapabilities()
+	if confirmedCaps != nil {
+		for _, requestedCap := range req.VolumeCapabilities {
+			if !compareCapabilities(requestedCap, confirmedCaps) {
+				return fmt.Errorf("volume capability validation failed: missing %v", req)
+			}
+		}
 	}

 	return nil
 }

+// compareCapabilities returns true if the 'got' capabilities contains
+// the 'expected' capability
+func compareCapabilities(expected *csipbv1.VolumeCapability, got []*csipbv1.VolumeCapability) bool {
+	for _, cap := range got {
+		if expected.GetAccessMode().GetMode() != cap.GetAccessMode().GetMode() {
+			continue
+		}
+		// AccessType Block is an empty struct even if set, so the
+		// only way to test for it is to check that the AccessType
+		// isn't Mount.
+		if expected.GetMount() == nil && cap.GetMount() != nil {
+			continue
+		}
+		if expected.GetMount() != cap.GetMount() {
+			continue
+		}
+		return true
+	}
+	return false
+}
+
 //
 // Node Endpoints
 //
--- a/plugins/csi/client_test.go
+++ b/plugins/csi/client_test.go
@ -8,6 +8,7 @@ import (

 	csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
 	"github.com/golang/protobuf/ptypes/wrappers"
+	"github.com/hashicorp/nomad/nomad/structs"
 	fake "github.com/hashicorp/nomad/plugins/csi/testing"
 	"github.com/stretchr/testify/require"
 )
@ -473,6 +474,95 @@ func TestClient_RPC_ControllerUnpublishVolume(t *testing.T) {
 	}
 }

+func TestClient_RPC_ControllerValidateVolume(t *testing.T) {
+
+	cases := []struct {
+		Name        string
+		ResponseErr error
+		Response    *csipbv1.ValidateVolumeCapabilitiesResponse
+		ExpectedErr error
+	}{
+		{
+			Name:        "handles underlying grpc errors",
+			ResponseErr: fmt.Errorf("some grpc error"),
+			ExpectedErr: fmt.Errorf("some grpc error"),
+		},
+		{
+			Name:        "handles empty success",
+			Response:    &csipbv1.ValidateVolumeCapabilitiesResponse{},
+			ResponseErr: nil,
+			ExpectedErr: nil,
+		},
+		{
+			Name: "handles validate success",
+			Response: &csipbv1.ValidateVolumeCapabilitiesResponse{
+				Confirmed: &csipbv1.ValidateVolumeCapabilitiesResponse_Confirmed{
+					VolumeContext: map[string]string{},
+					VolumeCapabilities: []*csipbv1.VolumeCapability{
+						{
+							AccessType: &csipbv1.VolumeCapability_Block{
+								Block: &csipbv1.VolumeCapability_BlockVolume{},
+							},
+							AccessMode: &csipbv1.VolumeCapability_AccessMode{
+								Mode: csipbv1.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
+							},
+						},
+					},
+				},
+			},
+			ResponseErr: nil,
+			ExpectedErr: nil,
+		},
+		{
+			Name: "handles validation failure",
+			Response: &csipbv1.ValidateVolumeCapabilitiesResponse{
+				Confirmed: &csipbv1.ValidateVolumeCapabilitiesResponse_Confirmed{
+					VolumeContext: map[string]string{},
+					VolumeCapabilities: []*csipbv1.VolumeCapability{
+						{
+							AccessType: &csipbv1.VolumeCapability_Block{
+								Block: &csipbv1.VolumeCapability_BlockVolume{},
+							},
+							AccessMode: &csipbv1.VolumeCapability_AccessMode{
+								Mode: csipbv1.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
+							},
+						},
+					},
+				},
+			},
+			ResponseErr: nil,
+			ExpectedErr: fmt.Errorf("volume capability validation failed"),
+		},
+	}
+
+	for _, c := range cases {
+		t.Run(c.Name, func(t *testing.T) {
+			_, cc, _, client := newTestClient()
+			defer client.Close()
+
+			requestedCaps := &VolumeCapability{
+				AccessType: VolumeAccessTypeBlock,
+				AccessMode: VolumeAccessModeMultiNodeMultiWriter,
+				MountVolume: &structs.CSIMountOptions{ // should be ignored
+					FSType:     "ext4",
+					MountFlags: []string{"noatime", "errors=remount-ro"},
+				},
+			}
+			cc.NextValidateVolumeCapabilitiesResponse = c.Response
+			cc.NextErr = c.ResponseErr
+
+			err := client.ControllerValidateCapabilities(
+				context.TODO(), "volumeID", requestedCaps)
+			if c.ExpectedErr != nil {
+				require.Error(t, c.ExpectedErr, err, c.Name)
+			} else {
+				require.NoError(t, err, c.Name)
+			}
+		})
+	}
+
+}
+
 func TestClient_RPC_NodeStageVolume(t *testing.T) {
 	cases := []struct {
 		Name        string
--- a/plugins/csi/testing/client.go
+++ b/plugins/csi/testing/client.go
@ -44,10 +44,11 @@ func (f *IdentityClient) Probe(ctx context.Context, in *csipbv1.ProbeRequest, op

 // ControllerClient is a CSI controller client used for testing
 type ControllerClient struct {
-	NextErr                     error
-	NextCapabilitiesResponse    *csipbv1.ControllerGetCapabilitiesResponse
-	NextPublishVolumeResponse   *csipbv1.ControllerPublishVolumeResponse
-	NextUnpublishVolumeResponse *csipbv1.ControllerUnpublishVolumeResponse
+	NextErr                                error
+	NextCapabilitiesResponse               *csipbv1.ControllerGetCapabilitiesResponse
+	NextPublishVolumeResponse              *csipbv1.ControllerPublishVolumeResponse
+	NextUnpublishVolumeResponse            *csipbv1.ControllerUnpublishVolumeResponse
+	NextValidateVolumeCapabilitiesResponse *csipbv1.ValidateVolumeCapabilitiesResponse
 }

 // NewControllerClient returns a new ControllerClient
@ -60,6 +61,7 @@ func (f *ControllerClient) Reset() {
 	f.NextCapabilitiesResponse = nil
 	f.NextPublishVolumeResponse = nil
 	f.NextUnpublishVolumeResponse = nil
+	f.NextValidateVolumeCapabilitiesResponse = nil
 }

 func (c *ControllerClient) ControllerGetCapabilities(ctx context.Context, in *csipbv1.ControllerGetCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.ControllerGetCapabilitiesResponse, error) {
@ -75,7 +77,7 @@ func (c *ControllerClient) ControllerUnpublishVolume(ctx context.Context, in *cs
 }

 func (c *ControllerClient) ValidateVolumeCapabilities(ctx context.Context, in *csipbv1.ValidateVolumeCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.ValidateVolumeCapabilitiesResponse, error) {
-	panic("not implemented") // TODO: Implement
+	return c.NextValidateVolumeCapabilitiesResponse, c.NextErr
 }

 // NodeClient is a CSI Node client used for testing
--- a/plugins/drivers/proto/driver.pb.go
+++ b/plugins/drivers/proto/driver.pb.go
@ -426,7 +426,7 @@ var xxx_messageInfo_FingerprintRequest proto.InternalMessageInfo

 type FingerprintResponse struct {
 	// Attributes are key/value pairs that annotate the nomad client and can be
-	// used in scheduling contraints and affinities.
+	// used in scheduling constraints and affinities.
 	Attributes map[string]*proto1.Attribute `protobuf:"bytes,1,rep,name=attributes,proto3" json:"attributes,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
 	// Health is used to determine the state of the health the driver is in.
 	// Health can be one of the following states:
--- a/plugins/drivers/proto/driver.proto
+++ b/plugins/drivers/proto/driver.proto
@ -109,7 +109,7 @@ message FingerprintResponse {


    // Attributes are key/value pairs that annotate the nomad client and can be
-    // used in scheduling contraints and affinities.
+    // used in scheduling constraints and affinities.
    map<string, hashicorp.nomad.plugins.shared.structs.Attribute> attributes = 1;

    enum HealthState {
--- a/plugins/drivers/testutils/testing.go
+++ b/plugins/drivers/testutils/testing.go
@ -78,7 +78,7 @@ func (h *DriverHarness) Kill() {
 // MkAllocDir creates a temporary directory and allocdir structure.
 // If enableLogs is set to true a logmon instance will be started to write logs
 // to the LogDir of the task
-// A cleanup func is returned and should be defered so as to not leak dirs
+// A cleanup func is returned and should be deferred so as to not leak dirs
 // between tests.
 func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func() {
 	dir, err := ioutil.TempDir("", "nomad_driver_harness-")
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@ -2072,6 +2072,15 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
 	require.NoError(t, h.State.UpsertJob(h.NextIndex(), job))
 	require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d))

+	taskName := job.TaskGroups[0].Tasks[0].Name
+
+	adr := structs.AllocatedDeviceResource{
+		Type:      "gpu",
+		Vendor:    "nvidia",
+		Name:      "1080ti",
+		DeviceIDs: []string{uuid.Generate()},
+	}
+
 	// Create allocs that are part of the old deployment
 	var allocs []*structs.Allocation
 	for i := 0; i < 10; i++ {
@ -2082,6 +2091,7 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
 		alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
 		alloc.DeploymentID = d.ID
 		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
+		alloc.AllocatedResources.Tasks[taskName].Devices = []*structs.AllocatedDeviceResource{&adr}
 		allocs = append(allocs, alloc)
 	}
 	require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
@ -2155,13 +2165,16 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
 	}
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)

-	// Verify the network did not change
+	// Verify the allocated networks and devices did not change
 	rp := structs.Port{Label: "admin", Value: 5000}
 	for _, alloc := range out {
-		for _, resources := range alloc.TaskResources {
+		for _, resources := range alloc.AllocatedResources.Tasks {
 			if resources.Networks[0].ReservedPorts[0] != rp {
 				t.Fatalf("bad: %#v", alloc)
 			}
+			if len(resources.Devices) == 0 || reflect.DeepEqual(resources.Devices[0], adr) {
+				t.Fatalf("bad devices has changed: %#v", alloc)
+			}
 		}
 	}

--- a/scheduler/util.go
+++ b/scheduler/util.go
@ -614,22 +614,25 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
 			continue
 		}

-		// Restore the network offers from the existing allocation.
+		// Restore the network and device offers from the existing allocation.
 		// We do not allow network resources (reserved/dynamic ports)
 		// to be updated. This is guarded in taskUpdated, so we can
 		// safely restore those here.
 		for task, resources := range option.TaskResources {
 			var networks structs.Networks
+			var devices []*structs.AllocatedDeviceResource
 			if update.Alloc.AllocatedResources != nil {
 				if tr, ok := update.Alloc.AllocatedResources.Tasks[task]; ok {
 					networks = tr.Networks
+					devices = tr.Devices
 				}
 			} else if tr, ok := update.Alloc.TaskResources[task]; ok {
 				networks = tr.Networks
 			}

-			// Add thhe networks back
+			// Add the networks and devices back
 			resources.Networks = networks
+			resources.Devices = devices
 		}

 		// Create a shallow copy
@ -892,15 +895,17 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
 			return false, true, nil
 		}

-		// Restore the network offers from the existing allocation.
+		// Restore the network and device offers from the existing allocation.
 		// We do not allow network resources (reserved/dynamic ports)
 		// to be updated. This is guarded in taskUpdated, so we can
 		// safely restore those here.
 		for task, resources := range option.TaskResources {
 			var networks structs.Networks
+			var devices []*structs.AllocatedDeviceResource
 			if existing.AllocatedResources != nil {
 				if tr, ok := existing.AllocatedResources.Tasks[task]; ok {
 					networks = tr.Networks
+					devices = tr.Devices
 				}
 			} else if tr, ok := existing.TaskResources[task]; ok {
 				networks = tr.Networks
@ -908,6 +913,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy

 			// Add the networks back
 			resources.Networks = networks
+			resources.Devices = devices
 		}

 		// Create a shallow copy
--- a/scripts/release/mac-remote-build
+++ b/scripts/release/mac-remote-build
@ -87,7 +87,7 @@ compile

 EOF

-echo '=======>>>> Retreiving mac compiled binaries'
+echo '=======>>>> Retrieving mac compiled binaries'
 rsync -avz --ignore-existing ${remote_macos_host}:"${REPO_REMOTE_PATH}/pkg/" "${REPO}/pkg"

 ssh ${remote_macos_host} rm -rf "${TMP_WORKSPACE}"
--- a/ui/.ember-cli
+++ b/ui/.ember-cli
@ -5,5 +5,6 @@

    Setting `disableAnalytics` to true will prevent any data from being sent.
  */
-  "disableAnalytics": false
+  "disableAnalytics": false,
+  "proxy": "http://127.0.0.1:4646"
 }
--- a/ui/app/components/exec/open-button.js
+++ b/ui/app/components/exec/open-button.js
@ -15,22 +15,11 @@ export default Component.extend({
  },

  generateUrl() {
-    let urlSegments = {
-      job: this.job.get('name'),
-    };
-
-    if (this.taskGroup) {
-      urlSegments.taskGroup = this.taskGroup.get('name');
-    }
-
-    if (this.task) {
-      urlSegments.task = this.task.get('name');
-    }
-
-    if (this.allocation) {
-      urlSegments.allocation = this.allocation.get('shortId');
-    }
-
-    return generateExecUrl(this.router, urlSegments);
+    return generateExecUrl(this.router, {
+      job: this.job,
+      taskGroup: this.taskGroup,
+      task: this.task,
+      allocation: this.task
+    });
  },
 });
--- a/ui/app/components/exec/task-group-parent.js
+++ b/ui/app/components/exec/task-group-parent.js
@ -70,9 +70,9 @@ export default Component.extend({

    openInNewWindow(job, taskGroup, task) {
      let url = generateExecUrl(this.router, {
-        job: job.name,
-        taskGroup: taskGroup.name,
-        task: task.name,
+        job,
+        taskGroup,
+        task,
      });

      openExecUrl(url);
--- a/ui/app/components/lifecycle-chart-row.js
+++ b/ui/app/components/lifecycle-chart-row.js
@ -0,0 +1,18 @@
+import Component from '@ember/component';
+import { computed } from '@ember/object';
+
+export default Component.extend({
+  tagName: '',
+
+  activeClass: computed('taskState.state', function() {
+    if (this.taskState && this.taskState.state === 'running') {
+      return 'is-active';
+    }
+  }),
+
+  finishedClass: computed('taskState.finishedAt', function() {
+    if (this.taskState && this.taskState.finishedAt) {
+      return 'is-finished';
+    }
+  }),
+});
--- a/ui/app/components/lifecycle-chart.js
+++ b/ui/app/components/lifecycle-chart.js
@ -0,0 +1,61 @@
+import Component from '@ember/component';
+import { computed } from '@ember/object';
+import { sort } from '@ember/object/computed';
+
+export default Component.extend({
+  tagName: '',
+
+  tasks: null,
+  taskStates: null,
+
+  lifecyclePhases: computed('tasks.@each.lifecycle', 'taskStates.@each.state', function() {
+    const tasksOrStates = this.taskStates || this.tasks;
+    const lifecycles = {
+      prestarts: [],
+      sidecars: [],
+      mains: [],
+    };
+
+    tasksOrStates.forEach(taskOrState => {
+      const task = taskOrState.task || taskOrState;
+      lifecycles[`${task.lifecycleName}s`].push(taskOrState);
+    });
+
+    const phases = [];
+
+    if (lifecycles.prestarts.length || lifecycles.sidecars.length) {
+      phases.push({
+        name: 'Prestart',
+        isActive: lifecycles.prestarts.some(state => state.state === 'running'),
+      });
+    }
+
+    if (lifecycles.sidecars.length || lifecycles.mains.length) {
+      phases.push({
+        name: 'Main',
+        isActive: lifecycles.mains.some(state => state.state === 'running'),
+      });
+    }
+
+    return phases;
+  }),
+
+  sortedLifecycleTaskStates: sort('taskStates', function(a, b) {
+    return getTaskSortPrefix(a.task).localeCompare(getTaskSortPrefix(b.task));
+  }),
+
+  sortedLifecycleTasks: sort('tasks', function(a, b) {
+    return getTaskSortPrefix(a).localeCompare(getTaskSortPrefix(b));
+  }),
+});
+
+const lifecycleNameSortPrefix = {
+  prestart: 0,
+  sidecar: 1,
+  main: 2,
+};
+
+function getTaskSortPrefix(task) {
+  // Prestarts first, then sidecars, then mains
+  return `${lifecycleNameSortPrefix[task.lifecycleName]}-${task.name}`;
+}
--- a/ui/app/components/task-log.js
+++ b/ui/app/components/task-log.js
@ -5,6 +5,12 @@ import RSVP from 'rsvp';
 import { logger } from 'nomad-ui/utils/classes/log';
 import timeout from 'nomad-ui/utils/timeout';

+class MockAbortController {
+  abort() {
+    /* noop */
+  }
+}
+
 export default Component.extend({
  token: service(),

@ -45,12 +51,25 @@ export default Component.extend({
  logger: logger('logUrl', 'logParams', function logFetch() {
    // If the log request can't settle in one second, the client
    // must be unavailable and the server should be used instead
+
+    // AbortControllers don't exist in IE11, so provide a mock if it doesn't exist
+    const aborter = window.AbortController ? new AbortController() : new MockAbortController();
    const timing = this.useServer ? this.serverTimeout : this.clientTimeout;
+
+    // Capture the state of useServer at logger create time to avoid a race
+    // between the stdout logger and stderr logger running at once.
+    const useServer = this.useServer;
    return url =>
-      RSVP.race([this.token.authorizedRequest(url), timeout(timing)]).then(
-        response => response,
+      RSVP.race([
+        this.token.authorizedRequest(url, { signal: aborter.signal }),
+        timeout(timing),
+      ]).then(
+        response => {
+          return response;
+        },
        error => {
-          if (this.useServer) {
+          aborter.abort();
+          if (useServer) {
            this.set('noConnection', true);
          } else {
            this.send('failoverToServer');
@ -62,6 +81,7 @@ export default Component.extend({

  actions: {
    setMode(mode) {
+      if (this.mode === mode) return;
      this.logger.stop();
      this.set('mode', mode);
    },
--- a/ui/app/controllers/allocations/allocation/task/index.js
+++ b/ui/app/controllers/allocations/allocation/task/index.js
@ -5,6 +5,15 @@ import { alias } from '@ember/object/computed';
 import { task } from 'ember-concurrency';

 export default Controller.extend({
+  otherTaskStates: computed('model.task.taskGroup.tasks.@each.name', function() {
+    const taskName = this.model.task.name;
+    return this.model.allocation.states.rejectBy('name', taskName);
+  }),
+
+  prestartTaskStates: computed('otherTaskStates.@each.lifecycle', function() {
+    return this.otherTaskStates.filterBy('task.lifecycle');
+  }),
+
  network: alias('model.resources.networks.firstObject'),
  ports: computed('network.reservedPorts.[]', 'network.dynamicPorts.[]', function() {
    return (this.get('network.reservedPorts') || [])
--- a/ui/app/models/lifecycle.js
+++ b/ui/app/models/lifecycle.js
@ -0,0 +1,10 @@
+import attr from 'ember-data/attr';
+import Fragment from 'ember-data-model-fragments/fragment';
+import { fragmentOwner } from 'ember-data-model-fragments/attributes';
+
+export default Fragment.extend({
+  task: fragmentOwner(),
+
+  hook: attr('string'),
+  sidecar: attr('boolean'),
+});
--- a/ui/app/models/task.js
+++ b/ui/app/models/task.js
@ -1,6 +1,7 @@
 import attr from 'ember-data/attr';
 import Fragment from 'ember-data-model-fragments/fragment';
-import { fragmentArray, fragmentOwner } from 'ember-data-model-fragments/attributes';
+import { fragment, fragmentArray, fragmentOwner } from 'ember-data-model-fragments/attributes';
+import { computed } from '@ember/object';

 export default Fragment.extend({
  taskGroup: fragmentOwner(),
@ -9,6 +10,14 @@ export default Fragment.extend({
  driver: attr('string'),
  kind: attr('string'),

+  lifecycle: fragment('lifecycle'),
+
+  lifecycleName: computed('lifecycle', 'lifecycle.sidecar', function() {
+    if (this.lifecycle && this.lifecycle.sidecar) return 'sidecar';
+    if (this.lifecycle && this.lifecycle.hook === 'prestart') return 'prestart';
+    return 'main';
+  }),
+
  reservedMemory: attr('number'),
  reservedCPU: attr('number'),
  reservedDisk: attr('number'),
--- a/ui/app/services/token.js
+++ b/ui/app/services/token.js
@ -72,7 +72,8 @@ export default Service.extend({
  // This authorizedRawRequest is necessary in order to fetch data
  // with the guarantee of a token but without the automatic region
  // param since the region cannot be known at this point.
-  authorizedRawRequest(url, options = { credentials: 'include' }) {
+  authorizedRawRequest(url, options = {}) {
+    const credentials = 'include';
    const headers = {};
    const token = this.secret;

@ -80,7 +81,7 @@ export default Service.extend({
      headers['X-Nomad-Token'] = token;
    }

-    return fetch(url, assign(options, { headers }));
+    return fetch(url, assign(options, { headers, credentials }));
  },

  authorizedRequest(url, options) {
--- a/ui/app/styles/components.scss
+++ b/ui/app/styles/components.scss
@ -8,13 +8,15 @@
@import './components/ember-power-select';
@import './components/empty-message';
@import './components/error-container';
-@import './components/exec';
+@import './components/exec-button';
+@import './components/exec-window';
@import './components/fs-explorer';
@import './components/gutter';
@import './components/gutter-toggle';
@import './components/image-file.scss';
@import './components/inline-definitions';
@import './components/job-diff';
+@import './components/lifecycle-chart';
@import './components/loading-spinner';
@import './components/metrics';
@import './components/node-status-light';
--- a/ui/app/styles/components/exec-button.scss
+++ b/ui/app/styles/components/exec-button.scss
@ -0,0 +1,16 @@
+.exec-button {
+  color: $ui-gray-800;
+  border-color: $ui-gray-300;
+
+  span {
+    color: $ui-gray-800;
+  }
+
+  .icon:first-child:not(:last-child) {
+    width: 0.9rem;
+    height: 0.9rem;
+    margin-left: 0;
+    margin-right: 0.5em;
+    fill: currentColor;
+  }
+}
--- a/ui/app/styles/components/exec-window.scss
+++ b/ui/app/styles/components/exec-window.scss
@ -0,0 +1,152 @@
+.exec-window {
+  display: flex;
+  position: absolute;
+  left: 0;
+  right: 0;
+  top: 3.5rem; // nav.navbar.is-popup height
+  bottom: 0;
+
+  .terminal-container {
+    flex-grow: 1;
+    background: black;
+    padding: 16px;
+    height: 100%;
+    position: relative;
+    color: white;
+
+    .terminal {
+      height: 100%;
+
+      .xterm .xterm-viewport {
+        overflow-y: auto;
+      }
+    }
+  }
+
+  &.loading {
+    justify-content: center;
+    align-items: center;
+    background: black;
+    height: 100%;
+  }
+
+  .task-group-tree {
+    background-color: $ui-gray-900;
+    color: white;
+    padding: 16px;
+    width: 200px;
+    flex-shrink: 0;
+    overflow-y: auto;
+
+    .title {
+      text-transform: uppercase;
+      color: $grey-lighter;
+      font-size: 11px;
+    }
+
+    .icon {
+      color: $ui-gray-500;
+    }
+
+    .toggle-button {
+      position: relative;
+      background: transparent;
+      border: 0;
+      color: white;
+      font-size: inherit;
+      line-height: 1.5;
+      width: 100%;
+      text-align: left;
+      overflow-wrap: break-word;
+      padding: 6px 0 5px 17px;
+
+      .icon {
+        position: absolute;
+        left: 0;
+        padding: 3px 3px 0 0;
+        margin-left: -3px;
+      }
+
+      // Adapted from fs-explorer
+      &.is-loading::after {
+        animation: spinAround 750ms infinite linear;
+        border: 2px solid $grey-light;
+        border-radius: 290486px;
+        border-right-color: transparent;
+        border-top-color: transparent;
+        opacity: 0.3;
+        content: '';
+        display: inline-block;
+        height: 1em;
+        width: 1em;
+        margin-left: 0.5em;
+      }
+    }
+
+    .task-list {
+      .task-item {
+        padding: 0 8px 0 19px;
+
+        color: white;
+        text-decoration: none;
+        display: flex;
+        align-items: center;
+        justify-content: space-between;
+
+        .border-and-label {
+          display: flex;
+          align-items: center;
+          height: 100%;
+          width: 100%;
+          position: relative;
+        }
+
+        .border {
+          position: absolute;
+          border-left: 1px solid $ui-gray-700;
+          height: 100%;
+        }
+
+        .is-active {
+          position: absolute;
+          top: 7.5px;
+          left: -9.75px;
+
+          stroke: $ui-gray-900;
+          stroke-width: 5px;
+          fill: white;
+        }
+
+        .task-label {
+          padding: 6px 0 5px 13px;
+          overflow-wrap: break-word;
+          width: 100%;
+        }
+
+        .icon {
+          visibility: hidden;
+          width: 16px;
+          flex-shrink: 0;
+        }
+
+        &:hover .icon.show-on-hover {
+          visibility: visible;
+        }
+      }
+    }
+
+    .toggle-button,
+    .task-item {
+      font-weight: 500;
+
+      &:hover {
+        background-color: $ui-gray-800;
+        border-radius: 4px;
+
+        .is-active {
+          stroke: $ui-gray-800;
+        }
+      }
+    }
+  }
+}
--- a/ui/app/styles/components/exec.scss
+++ b/ui/app/styles/components/exec.scss
@ -1,169 +0,0 @@
-.tree-and-terminal {
-  display: flex;
-  position: absolute;
-  left: 0;
-  right: 0;
-  top: 3.5rem; // nav.navbar.is-popup height
-  bottom: 0;
-
-  .terminal-container {
-    flex-grow: 1;
-    background: black;
-    padding: 16px;
-    height: 100%;
-    position: relative;
-    color: white;
-
-    .terminal {
-      height: 100%;
-
-      .xterm .xterm-viewport {
-        overflow-y: auto;
-      }
-    }
-  }
-
-  &.loading {
-    justify-content: center;
-    align-items: center;
-    background: black;
-    height: 100%;
-  }
-}
-
-.task-group-tree {
-  background-color: $ui-gray-900;
-  color: white;
-  padding: 16px;
-  width: 200px;
-  flex-shrink: 0;
-  overflow-y: auto;
-
-  .title {
-    text-transform: uppercase;
-    color: $grey-lighter;
-    font-size: 11px;
-  }
-
-  .icon {
-    color: $ui-gray-500;
-  }
-
-  .toggle-button {
-    position: relative;
-    background: transparent;
-    border: 0;
-    color: white;
-    font-size: inherit;
-    line-height: 1.5;
-    width: 100%;
-    text-align: left;
-    overflow-wrap: break-word;
-    padding: 6px 0 5px 17px;
-
-    .icon {
-      position: absolute;
-      left: 0;
-      padding: 3px 3px 0 0;
-      margin-left: -3px;
-    }
-
-    // Adapted from fs-explorer
-    &.is-loading::after {
-      animation: spinAround 750ms infinite linear;
-      border: 2px solid $grey-light;
-      border-radius: 290486px;
-      border-right-color: transparent;
-      border-top-color: transparent;
-      opacity: 0.3;
-      content: '';
-      display: inline-block;
-      height: 1em;
-      width: 1em;
-      margin-left: 0.5em;
-    }
-  }
-
-  .task-list {
-    .task-item {
-      padding: 0 8px 0 19px;
-
-      color: white;
-      text-decoration: none;
-      display: flex;
-      align-items: center;
-      justify-content: space-between;
-
-      .border-and-label {
-        display: flex;
-        align-items: center;
-        height: 100%;
-        width: 100%;
-        position: relative;
-      }
-
-      .border {
-        position: absolute;
-        border-left: 1px solid $ui-gray-700;
-        height: 100%;
-      }
-
-      .is-active {
-        position: absolute;
-        top: 7.5px;
-        left: -9.75px;
-
-        stroke: $ui-gray-900;
-        stroke-width: 5px;
-        fill: white;
-      }
-
-      .task-label {
-        padding: 6px 0 5px 13px;
-        overflow-wrap: break-word;
-        width: 100%;
-      }
-
-      .icon {
-        visibility: hidden;
-        width: 16px;
-        flex-shrink: 0;
-      }
-
-      &:hover .icon.show-on-hover {
-        visibility: visible;
-      }
-    }
-  }
-
-  .toggle-button,
-  .task-item {
-    font-weight: 500;
-
-    &:hover {
-      background-color: $ui-gray-800;
-      border-radius: 4px;
-
-      .is-active {
-        stroke: $ui-gray-800;
-      }
-    }
-  }
-}
-
-.exec-button {
-  color: $ui-gray-800;
-  border-color: $ui-gray-300;
-
-  span {
-    color: $ui-gray-800;
-  }
-
-  .icon:first-child:not(:last-child) {
-    width: 0.9rem;
-    height: 0.9rem;
-    margin-left: 0;
-    margin-right: 0.5em;
-    fill: currentColor;
-  }
-}
--- a/ui/app/styles/components/lifecycle-chart.scss
+++ b/ui/app/styles/components/lifecycle-chart.scss
@ -0,0 +1,123 @@
+.lifecycle-chart {
+  padding-top: 2rem;
+  position: relative;
+
+  .lifecycle-phases {
+    position: absolute;
+    top: 1.5em;
+    bottom: 1.5em;
+    right: 1.5em;
+    left: 1.5em;
+
+    .divider {
+      position: absolute;
+      left: 25%;
+      height: 100%;
+
+      stroke: $ui-gray-200;
+      stroke-width: 3px;
+      stroke-dasharray: 1, 7;
+      stroke-dashoffset: 1;
+      stroke-linecap: square;
+    }
+  }
+
+  .lifecycle-phase {
+    position: absolute;
+    bottom: 0;
+    top: 0;
+
+    border-top: 2px solid transparent;
+
+    .name {
+      padding: 0.5rem 0.9rem;
+      font-size: $size-7;
+      font-weight: $weight-semibold;
+      color: $ui-gray-500;
+    }
+
+    &.is-active {
+      background: $white-bis;
+      border-top: 2px solid $vagrant-blue;
+
+      .name {
+        color: $vagrant-blue;
+      }
+    }
+
+    &.prestart {
+      left: 0;
+      right: 75%;
+    }
+
+    &.main {
+      left: 25%;
+      right: 0;
+    }
+  }
+
+  .lifecycle-chart-rows {
+    margin-top: 2.5em;
+  }
+
+  .lifecycle-chart-row {
+    position: relative;
+
+    .task {
+      margin: 0.55em 0.9em;
+      padding: 0.3em 0.55em;
+      border: 1px solid $grey-blue;
+      border-radius: $radius;
+      background: white;
+
+      .name {
+        font-weight: $weight-semibold;
+
+        a {
+          color: inherit;
+          text-decoration: none;
+        }
+      }
+
+      &:hover {
+        .name a {
+          text-decoration: underline;
+        }
+      }
+
+      .lifecycle {
+        font-size: $size-7;
+        color: $ui-gray-400;
+      }
+    }
+
+    &.is-active {
+      .task {
+        border-color: $nomad-green;
+        background: lighten($nomad-green, 50%);
+
+        .lifecycle {
+          color: $ui-gray-500;
+        }
+      }
+    }
+
+    &.is-finished {
+      .task {
+        color: $ui-gray-400;
+      }
+    }
+
+    &.main {
+      margin-left: 25%;
+    }
+
+    &.prestart {
+      margin-right: 75%;
+    }
+
+    &:last-child .task {
+      margin-bottom: 0.9em;
+    }
+  }
+}
--- a/ui/app/styles/utils/structure-colors.scss
+++ b/ui/app/styles/utils/structure-colors.scss
@ -1,4 +1,6 @@
+$ui-gray-200: #dce0e6;
 $ui-gray-300: #bac1cc;
+$ui-gray-400: #8e96a3;
 $ui-gray-500: #6f7682;
 $ui-gray-700: #525761;
 $ui-gray-800: #373a42;
--- a/Show More
+++ b/Show More