Merge branch 'master' into b-reserved-scoring
This commit is contained in:
commit
c901d0e7dd
27
CHANGELOG.md
27
CHANGELOG.md
|
@ -1,9 +1,30 @@
|
|||
## 0.11.1 (Unreleased)
|
||||
## 0.11.2 (Unreleased)
|
||||
|
||||
FEATURES:
|
||||
* **Task dependencies UI**: task lifecycle charts and details
|
||||
|
||||
BUG FIXES:
|
||||
|
||||
* api: autoscaling policies should not be returned for stopped jobs [[GH-7768](https://github.com/hashicorp/nomad/issues/7768)]
|
||||
* core: job scale status endpoint was returning incorrect counts [[GH-7789](https://github.com/hashicorp/nomad/issues/7789)]
|
||||
* core: Fixed a bug where scores for allocations were biased toward nodes with resource reservations [[GH-7730](https://github.com/hashicorp/nomad/issues/7730)]
|
||||
* jobspec: autoscaling policy block should return a parsing error multiple `policy` blocks are provided [[GH-7716](https://github.com/hashicorp/nomad/issues/7716)]
|
||||
* ui: Fixed a bug where exec popup had incorrect URL for jobs where name ≠ id [[GH-7814](https://github.com/hashicorp/nomad/issues/7814)]
|
||||
|
||||
## 0.11.1 (April 22, 2020)
|
||||
|
||||
BUG FIXES:
|
||||
|
||||
* core: Fixed a bug that only ran a task `shutdown_delay` if the task had a registered service [[GH-7663](https://github.com/hashicorp/nomad/issues/7663)]
|
||||
* core: Fixed a bug where scores for allocations were biased toward nodes with resource reservations [[GH-7730](https://github.com/hashicorp/nomad/issues/7730)]
|
||||
* core: Fixed a panic when garbage collecting a job with allocations spanning multiple versions [[GH-7758](https://github.com/hashicorp/nomad/issues/7758)]
|
||||
* agent: Fixed a bug where http server logs did not honor json log formatting, and reduced http server logging level to Trace [[GH-7748](https://github.com/hashicorp/nomad/issues/7748)]
|
||||
* connect: Fixed bugs where some connect parameters would be ignored [[GH-7690](https://github.com/hashicorp/nomad/pull/7690)] [[GH-7684](https://github.com/hashicorp/nomad/pull/7684)]
|
||||
* connect: Fixed a bug where an absent connect sidecar_service stanza would trigger panic [[GH-7683](https://github.com/hashicorp/nomad/pull/7683)]
|
||||
* connect: Fixed a bug where some connect proxy fields would be dropped from 'job inspect' output [[GH-7397](https://github.com/hashicorp/nomad/issues/7397)]
|
||||
* csi: Fixed a panic when claiming a volume for an allocation that was already garbage collected [[GH-7760](https://github.com/hashicorp/nomad/issues/7760)]
|
||||
* csi: Fixed a bug where CSI plugins with `NODE_STAGE_VOLUME` capabilities were receiving an incorrect volume ID [[GH-7754](https://github.com/hashicorp/nomad/issues/7754)]
|
||||
* driver/docker: Fixed a bug where retrying failed docker creation may in rare cases trigger a panic [[GH-7749](https://github.com/hashicorp/nomad/issues/7749)]
|
||||
* scheduler: Fixed a bug in managing allocated devices for a job allocation in in-place update scenarios [[GH-7762](https://github.com/hashicorp/nomad/issues/7762)]
|
||||
* vault: Upgrade http2 library to fix Vault API calls that fail with `http2: no cached connection was available` [[GH-7673](https://github.com/hashicorp/nomad/issues/7673)]
|
||||
|
||||
## 0.11.0 (April 8, 2020)
|
||||
|
@ -62,7 +83,7 @@ BUG FIXES:
|
|||
|
||||
SECURITY:
|
||||
|
||||
* server: Override content-type headers for unsafe content. CVE-TBD [[GH-7468](https://github.com/hashicorp/nomad/issues/7468)]
|
||||
* server: Override content-type headers for unsafe content. CVE-2020-10944 [[GH-7468](https://github.com/hashicorp/nomad/issues/7468)]
|
||||
|
||||
## 0.10.4 (February 19, 2020)
|
||||
|
||||
|
|
16
GNUmakefile
16
GNUmakefile
|
@ -175,11 +175,7 @@ deps: ## Install build and development dependencies
|
|||
GO111MODULE=on go get -u gotest.tools/gotestsum
|
||||
GO111MODULE=on go get -u github.com/fatih/hclfmt
|
||||
GO111MODULE=on go get -u github.com/golang/protobuf/protoc-gen-go@v1.3.4
|
||||
|
||||
# The tag here must correspoond to codec version nomad uses, e.g. v1.1.5.
|
||||
# Though, v1.1.5 codecgen has a bug in code generator, so using a specific sha
|
||||
# here instead.
|
||||
GO111MODULE=on go get -u github.com/hashicorp/go-msgpack/codec/codecgen@f51b5189210768cf0d476580cf287620374d4f02
|
||||
GO111MODULE=on go get -u github.com/hashicorp/go-msgpack/codec/codecgen@v1.1.5
|
||||
|
||||
.PHONY: lint-deps
|
||||
lint-deps: ## Install linter dependencies
|
||||
|
@ -200,11 +196,15 @@ check: ## Lint the source code
|
|||
@golangci-lint run -j 1
|
||||
|
||||
@echo "==> Spell checking website..."
|
||||
@misspell -error -source=text website/source/
|
||||
@misspell -error -source=text website/pages/
|
||||
|
||||
@echo "==> Check proto files are in-sync..."
|
||||
@$(MAKE) proto
|
||||
@if (git status | grep -q .pb.go); then echo the following proto files are out of sync; git status |grep .pb.go; exit 1; fi
|
||||
@if (git status -s | grep -q .pb.go); then echo the following proto files are out of sync; git status -s | grep .pb.go; exit 1; fi
|
||||
|
||||
@echo "==> Check format of jobspecs and HCL files..."
|
||||
@$(MAKE) hclfmt
|
||||
@if (git status -s | grep -q -e '\.hcl$$' -e '\.nomad$$'); then echo the following HCL files are out of sync; git status -s | grep -e '\.hcl$$' -e '\.nomad$$'; exit 1; fi
|
||||
|
||||
@echo "==> Check API package is isolated from rest"
|
||||
@if go list --test -f '{{ join .Deps "\n" }}' ./api | grep github.com/hashicorp/nomad/ | grep -v -e /vendor/ -e /nomad/api/ -e nomad/api.test; then echo " /api package depends the ^^ above internal nomad packages. Remove such dependency"; exit 1; fi
|
||||
|
@ -229,7 +229,7 @@ generate-structs: ## Update generated code
|
|||
.PHONY: proto
|
||||
proto:
|
||||
@echo "--> Generating proto bindings..."
|
||||
@for file in $$(git ls-files "*.proto" | grep -v "vendor\/.*.proto"); do \
|
||||
@for file in $$(git ls-files "*.proto" | grep -E -v -- "vendor\/.*.proto|demo\/.*.proto"); do \
|
||||
protoc -I . -I ../../.. --go_out=plugins=grpc:. $$file; \
|
||||
done
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import (
|
|||
const (
|
||||
// The following levels are the only valid values for the `policy = "read"` stanza.
|
||||
// When policies are merged together, the most privilege is granted, except for deny
|
||||
// which always takes precedence and supercedes.
|
||||
// which always takes precedence and supersedes.
|
||||
PolicyDeny = "deny"
|
||||
PolicyRead = "read"
|
||||
PolicyList = "list"
|
||||
|
|
|
@ -45,7 +45,7 @@ type Jobs struct {
|
|||
client *Client
|
||||
}
|
||||
|
||||
// JobsParseRequest is used for arguments of the /vi/jobs/parse endpoint
|
||||
// JobsParseRequest is used for arguments of the /v1/jobs/parse endpoint
|
||||
type JobsParseRequest struct {
|
||||
// JobHCL is an hcl jobspec
|
||||
JobHCL string
|
||||
|
@ -60,7 +60,7 @@ func (c *Client) Jobs() *Jobs {
|
|||
return &Jobs{client: c}
|
||||
}
|
||||
|
||||
// Parse is used to convert the HCL repesentation of a Job to JSON server side.
|
||||
// ParseHCL is used to convert the HCL repesentation of a Job to JSON server side.
|
||||
// To parse the HCL client side see package github.com/hashicorp/nomad/jobspec
|
||||
func (j *Jobs) ParseHCL(jobHCL string, canonicalize bool) (*Job, error) {
|
||||
var job Job
|
||||
|
|
|
@ -125,7 +125,7 @@ func (a *allocHealthSetter) SetHealth(healthy, isDeploy bool, trackerTaskEvents
|
|||
a.ar.allocBroadcaster.Send(calloc)
|
||||
}
|
||||
|
||||
// initRunnerHooks intializes the runners hooks.
|
||||
// initRunnerHooks initializes the runners hooks.
|
||||
func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
|
||||
hookLogger := ar.logger.Named("runner_hook")
|
||||
|
||||
|
|
|
@ -104,6 +104,7 @@ func (c *csiHook) claimVolumesFromAlloc() (map[string]*volumeAndRequest, error)
|
|||
req := &structs.CSIVolumeClaimRequest{
|
||||
VolumeID: pair.request.Source,
|
||||
AllocationID: c.alloc.ID,
|
||||
NodeID: c.alloc.NodeID,
|
||||
Claim: claimType,
|
||||
}
|
||||
req.Region = c.alloc.Job.Region
|
||||
|
|
|
@ -143,12 +143,12 @@ func (c *CSI) ControllerDetachVolume(req *structs.ClientCSIControllerDetachVolum
|
|||
csiReq := req.ToCSIRequest()
|
||||
|
||||
// Submit the request for a volume to the CSI Plugin.
|
||||
ctx, cancelFn := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
ctx, cancelFn := c.requestContext()
|
||||
defer cancelFn()
|
||||
// CSI ControllerUnpublishVolume errors for timeout, codes.Unavailable and
|
||||
// codes.ResourceExhausted are retried; all other errors are fatal.
|
||||
_, err = plugin.ControllerUnpublishVolume(ctx, csiReq,
|
||||
grpc_retry.WithPerRetryTimeout(10*time.Second),
|
||||
grpc_retry.WithPerRetryTimeout(CSIPluginRequestTimeout),
|
||||
grpc_retry.WithMax(3),
|
||||
grpc_retry.WithBackoff(grpc_retry.BackoffExponential(100*time.Millisecond)))
|
||||
if err != nil {
|
||||
|
|
|
@ -21,14 +21,14 @@ import (
|
|||
|
||||
const (
|
||||
// AwsMetadataTimeout is the timeout used when contacting the AWS metadata
|
||||
// service
|
||||
// services.
|
||||
AwsMetadataTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
// map of instance type to approximate speed, in Mbits/s
|
||||
// Estimates from http://stackoverflow.com/a/35806587
|
||||
// This data is meant for a loose approximation
|
||||
var ec2InstanceSpeedMap = map[*regexp.Regexp]int{
|
||||
var ec2NetSpeedTable = map[*regexp.Regexp]int{
|
||||
regexp.MustCompile("t2.nano"): 30,
|
||||
regexp.MustCompile("t2.micro"): 70,
|
||||
regexp.MustCompile("t2.small"): 125,
|
||||
|
@ -46,6 +46,353 @@ var ec2InstanceSpeedMap = map[*regexp.Regexp]int{
|
|||
regexp.MustCompile(`.*\.32xlarge`): 10000,
|
||||
}
|
||||
|
||||
type ec2Specs struct {
|
||||
mhz float64
|
||||
cores int
|
||||
model string
|
||||
}
|
||||
|
||||
func (e ec2Specs) ticks() int {
|
||||
return int(e.mhz) * e.cores
|
||||
}
|
||||
|
||||
func specs(ghz float64, vCores int, model string) ec2Specs {
|
||||
return ec2Specs{
|
||||
mhz: ghz * 1000,
|
||||
cores: vCores,
|
||||
model: model,
|
||||
}
|
||||
}
|
||||
|
||||
// Map of instance type to documented CPU speed.
|
||||
//
|
||||
// Most values are taken from https://aws.amazon.com/ec2/instance-types/.
|
||||
// Values for a1 & m6g (Graviton) are taken from https://en.wikichip.org/wiki/annapurna_labs/alpine/al73400
|
||||
// Values for inf1 are taken from launching a inf1.xlarge and looking at /proc/cpuinfo
|
||||
//
|
||||
// In a few cases, AWS has upgraded the generation of CPU while keeping the same
|
||||
// instance designation. Since it is possible to launch on the lower performance
|
||||
// CPU, that one is used as the spec for the instance type.
|
||||
//
|
||||
// This table is provided as a best-effort to determine the number of CPU ticks
|
||||
// available for use by Nomad tasks. If an instance type is missing, the fallback
|
||||
// behavior is to use values from go-psutil, which is only capable of reading
|
||||
// "current" CPU MHz.
|
||||
var ec2ProcSpeedTable = map[string]ec2Specs{
|
||||
// -- General Purpose --
|
||||
|
||||
// a1
|
||||
"a1.medium": specs(2.3, 1, "AWS Graviton"),
|
||||
"a1.large": specs(2.3, 2, "AWS Graviton"),
|
||||
"a1.xlarge": specs(2.3, 4, "AWS Graviton"),
|
||||
"a1.2xlarge": specs(2.3, 8, "AWS Graviton"),
|
||||
"a1.4xlarge": specs(2.3, 16, "AWS Graviton"),
|
||||
"a1.metal": specs(2.3, 16, "AWS Graviton"),
|
||||
|
||||
// t3
|
||||
"t3.nano": specs(2.5, 2, "2.5 GHz Intel Scalable"),
|
||||
"t3.micro": specs(2.5, 2, "2.5 GHz Intel Scalable"),
|
||||
"t3.small": specs(2.5, 2, "2.5 GHz Intel Scalable"),
|
||||
"t3.medium": specs(2.5, 2, "2.5 GHz Intel Scalable"),
|
||||
"t3.large": specs(2.5, 2, "2.5 GHz Intel Scalable"),
|
||||
"t3.xlarge": specs(2.5, 4, "2.5 GHz Intel Scalable"),
|
||||
"t3.2xlarge": specs(2.5, 8, "2.5 GHz Intel Scalable"),
|
||||
|
||||
// t3a
|
||||
"t3a.nano": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"t3a.micro": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"t3a.small": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"t3a.medium": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"t3a.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"t3a.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"t3a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
|
||||
|
||||
// t2
|
||||
"t2.nano": specs(3.3, 1, "3.3 GHz Intel Scalable"),
|
||||
"t2.micro": specs(3.3, 1, "3.3 GHz Intel Scalable"),
|
||||
"t2.small": specs(3.3, 1, "3.3 GHz Intel Scalable"),
|
||||
"t2.medium": specs(3.3, 2, "3.3 GHz Intel Scalable"),
|
||||
"t2.large": specs(3.0, 2, "3.0 GHz Intel Scalable"),
|
||||
"t2.xlarge": specs(3.0, 4, "3.0 GHz Intel Scalable"),
|
||||
"t2.2xlarge": specs(3.0, 8, "3.0 GHz Intel Scalable"),
|
||||
|
||||
// m6g
|
||||
"m6g.medium": specs(2.3, 1, "AWS Graviton2 Neoverse"),
|
||||
"m6g.large": specs(2.3, 2, "AWS Graviton2 Neoverse"),
|
||||
"m6g.xlarge": specs(2.3, 4, "AWS Graviton2 Neoverse"),
|
||||
"m6g.2xlarge": specs(2.3, 8, "AWS Graviton2 Neoverse"),
|
||||
"m6g.4xlarge": specs(2.3, 16, "AWS Graviton2 Neoverse"),
|
||||
"m6g.8xlarge": specs(2.3, 32, "AWS Graviton2 Neoverse"),
|
||||
"m6g.12xlarge": specs(2.3, 48, "AWS Graviton2 Neoverse"),
|
||||
"m6g.16xlarge": specs(2.3, 64, "AWS Graviton2 Neoverse"),
|
||||
|
||||
// m5, m5d
|
||||
"m5.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
|
||||
"m5d.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"),
|
||||
|
||||
// m5a, m5ad
|
||||
"m5a.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.8xlarge": specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5a.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5ad.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5ad.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5ad.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5ad.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5ad.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"m5ad.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
|
||||
|
||||
// m5n, m5dn
|
||||
"m5n.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5n.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
|
||||
"m5dn.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
|
||||
|
||||
// m4
|
||||
"m4.large": specs(2.3, 2, "2.3 GHz Intel Xeon® E5-2686 v4"),
|
||||
"m4.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon® E5-2686 v4"),
|
||||
"m4.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon® E5-2686 v4"),
|
||||
"m4.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon® E5-2686 v4"),
|
||||
"m4.10xlarge": specs(2.3, 40, "2.3 GHz Intel Xeon® E5-2686 v4"),
|
||||
"m4.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon® E5-2686 v4"),
|
||||
|
||||
// -- Compute Optimized --
|
||||
|
||||
// c5, c5d
|
||||
"c5.large": specs(3.4, 2, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5.xlarge": specs(3.4, 4, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5.2xlarge": specs(3.4, 8, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5.4xlarge": specs(3.4, 16, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5.9xlarge": specs(3.4, 36, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5.12xlarge": specs(3.6, 48, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5.18xlarge": specs(3.6, 72, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5.24xlarge": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5.metal": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5d.large": specs(3.4, 2, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5d.xlarge": specs(3.4, 4, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5d.2xlarge": specs(3.4, 8, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5d.4xlarge": specs(3.4, 16, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5d.9xlarge": specs(3.4, 36, "3.4 GHz Intel Xeon Platinum 8000"),
|
||||
"c5d.12xlarge": specs(3.6, 48, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5d.18xlarge": specs(3.6, 72, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5d.24xlarge": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
|
||||
"c5d.metal": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"),
|
||||
|
||||
// c5n
|
||||
"c5n.large": specs(3.0, 2, "3.0 GHz Intel Xeon Platinum"),
|
||||
"c5n.xlarge": specs(3.0, 4, "3.0 GHz Intel Xeon Platinum"),
|
||||
"c5n.2xlarge": specs(3.0, 8, "3.0 GHz Intel Xeon Platinum"),
|
||||
"c5n.4xlarge": specs(3.0, 16, "3.0 GHz Intel Xeon Platinum"),
|
||||
"c5n.9xlarge": specs(3.0, 36, "3.0 GHz Intel Xeon Platinum"),
|
||||
"c5n.18xlarge": specs(3.0, 72, "3.0 GHz Intel Xeon Platinum"),
|
||||
"c5n.metal": specs(3.0, 72, "3.0 GHz Intel Xeon Platinum"),
|
||||
|
||||
// c4
|
||||
"c4.large": specs(2.9, 2, "2.9 GHz Intel Xeon E5-2666 v3"),
|
||||
"c4.xlarge": specs(2.9, 4, "2.9 GHz Intel Xeon E5-2666 v3"),
|
||||
"c4.2xlarge": specs(2.9, 8, "2.9 GHz Intel Xeon E5-2666 v3"),
|
||||
"c4.4xlarge": specs(2.9, 16, "2.9 GHz Intel Xeon E5-2666 v3"),
|
||||
"c4.8xlarge": specs(2.9, 36, "2.9 GHz Intel Xeon E5-2666 v3"),
|
||||
|
||||
// -- Memory Optimized --
|
||||
|
||||
// r5, r5d
|
||||
"r5.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
"r5d.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"),
|
||||
|
||||
// r5a, r5ad
|
||||
"r5a.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.8xlarge": specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5a.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.8xlarge": specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"),
|
||||
"r5ad.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"),
|
||||
|
||||
// r5n
|
||||
"r5n.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5n.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"),
|
||||
"r5dn.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
|
||||
|
||||
// r4
|
||||
"r4.large": specs(2.3, 2, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"r4.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"r4.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"r4.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"r4.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"r4.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
|
||||
// x1e
|
||||
"x1e.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
"x1e.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
"x1e.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
"x1e.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
"x1e.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
"x1e.32xlarge": specs(2.3, 128, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
|
||||
// x1
|
||||
"x1.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
"x1.32xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"),
|
||||
|
||||
// high-memory
|
||||
"u-6tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"),
|
||||
"u-9tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"),
|
||||
"u-12tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"),
|
||||
"u-18tb1.metal": specs(2.7, 448, "2.7 GHz Intel Xeon Scalable"),
|
||||
"u-24tb1.metal": specs(2.7, 448, "2.7 GHz Intel Xeon Scalable"),
|
||||
|
||||
// z1d
|
||||
"z1d.large": specs(4.0, 2, "4.0 GHz Intel Xeon Scalable"),
|
||||
"z1d.xlarge": specs(4.0, 4, "4.0 GHz Intel Xeon Scalable"),
|
||||
"z1d.2xlarge": specs(4.0, 8, "4.0 GHz Intel Xeon Scalable"),
|
||||
"z1d.3xlarge": specs(4.0, 12, "4.0 GHz Intel Xeon Scalable"),
|
||||
"z1d.6xlarge": specs(4.0, 24, "4.0 GHz Intel Xeon Scalable"),
|
||||
"z1d.12xlarge": specs(4.0, 48, "4.0 GHz Intel Xeon Scalable"),
|
||||
"z1d.metal": specs(4.0, 48, "4.0 GHz Intel Xeon Scalable"),
|
||||
|
||||
// -- Accelerated Computing --
|
||||
|
||||
// p3, p3dn
|
||||
"p3.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"p3.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"p3.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"p3dn.24xlarge": specs(2.5, 96, "2.5 GHz Intel Xeon P-8175M"),
|
||||
|
||||
// p2
|
||||
"p2.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"p2.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"p2.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
|
||||
// inf1
|
||||
"inf1.xlarge": specs(3.0, 4, "3.0 GHz Intel Xeon Platinum 8275CL"),
|
||||
"inf1.2xlarge": specs(3.0, 8, "3.0 GHz Intel Xeon Platinum 8275CL"),
|
||||
"inf1.6xlarge": specs(3.0, 24, "3.0 GHz Intel Xeon Platinum 8275CL"),
|
||||
"inf1.24xlarge": specs(3.0, 96, "3.0 GHz Intel Xeon Platinum 8275CL"),
|
||||
|
||||
// g4dn
|
||||
"g4dn.xlarge": specs(2.5, 4, "2.5 GHz Cascade Lake 24C"),
|
||||
"g4dn.2xlarge": specs(2.5, 8, "2.5 GHz Cascade Lake 24C"),
|
||||
"g4dn.4xlarge": specs(2.5, 16, "2.5 GHz Cascade Lake 24C"),
|
||||
"g4dn.8xlarge": specs(2.5, 32, "2.5 GHz Cascade Lake 24C"),
|
||||
"g4dn.16xlarge": specs(2.5, 64, "2.5 GHz Cascade Lake 24C"),
|
||||
"g4dn.12xlarge": specs(2.5, 48, "2.5 GHz Cascade Lake 24C"),
|
||||
"g4dn.metal": specs(2.5, 96, "2.5 GHz Cascade Lake 24C"),
|
||||
|
||||
// g3
|
||||
"g3s.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"g3s.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"g3s.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
"g3s.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"),
|
||||
|
||||
// f1
|
||||
"f1.2xlarge": specs(2.3, 8, "Intel Xeon E5-2686 v4"),
|
||||
"f1.4xlarge": specs(2.3, 16, "Intel Xeon E5-2686 v4"),
|
||||
"f1.16xlarge": specs(2.3, 64, "Intel Xeon E5-2686 v4"),
|
||||
|
||||
// -- Storage Optimized --
|
||||
|
||||
// i3
|
||||
"i3.large": specs(2.3, 2, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"i3.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"i3.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"i3.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"i3.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"i3.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"i3.metal": specs(2.3, 72, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
|
||||
// i3en
|
||||
"i3en.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.3xlarge": specs(3.1, 12, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.6xlarge": specs(3.1, 24, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
|
||||
"i3en.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"),
|
||||
|
||||
// d2
|
||||
"d2.xlarge": specs(2.4, 4, "2.4 GHz Intel Xeon E5-2676 v3"),
|
||||
"d2.2xlarge": specs(2.4, 8, "2.4 GHz Intel Xeon E5-2676 v3"),
|
||||
"d2.4xlarge": specs(2.4, 16, "2.4 GHz Intel Xeon E5-2676 v3"),
|
||||
"d2.8xlarge": specs(2.4, 36, "2.4 GHz Intel Xeon E5-2676 v3"),
|
||||
|
||||
// h1
|
||||
"h1.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"h1.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"h1.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
"h1.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5 2686 v4"),
|
||||
}
|
||||
|
||||
// EnvAWSFingerprint is used to fingerprint AWS metadata
|
||||
type EnvAWSFingerprint struct {
|
||||
StaticFingerprinter
|
||||
|
@ -128,25 +475,48 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F
|
|||
response.AddAttribute(key, v)
|
||||
}
|
||||
|
||||
// newNetwork is populated and added to the Nodes resources
|
||||
var newNetwork *structs.NetworkResource
|
||||
// accumulate resource information, then assign to response
|
||||
var resources *structs.Resources
|
||||
var nodeResources *structs.NodeResources
|
||||
|
||||
// copy over network specific information
|
||||
if val, ok := response.Attributes["unique.platform.aws.local-ipv4"]; ok && val != "" {
|
||||
response.AddAttribute("unique.network.ip-address", val)
|
||||
|
||||
newNetwork = &structs.NetworkResource{
|
||||
Device: "eth0",
|
||||
IP: val,
|
||||
CIDR: val + "/32",
|
||||
MBits: f.throughput(request, ec2meta, val),
|
||||
}
|
||||
|
||||
response.NodeResources = &structs.NodeResources{
|
||||
Networks: []*structs.NetworkResource{newNetwork},
|
||||
nodeResources = new(structs.NodeResources)
|
||||
nodeResources.Networks = []*structs.NetworkResource{
|
||||
{
|
||||
Device: "eth0",
|
||||
IP: val,
|
||||
CIDR: val + "/32",
|
||||
MBits: f.throughput(request, ec2meta, val),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// copy over CPU speed information
|
||||
if specs := f.lookupCPU(ec2meta); specs != nil {
|
||||
response.AddAttribute("cpu.modelname", specs.model)
|
||||
response.AddAttribute("cpu.frequency", fmt.Sprintf("%.0f", specs.mhz))
|
||||
response.AddAttribute("cpu.numcores", fmt.Sprintf("%d", specs.cores))
|
||||
f.logger.Debug("lookup ec2 cpu", "cores", specs.cores, "MHz", log.Fmt("%.0f", specs.mhz), "model", specs.model)
|
||||
|
||||
if ticks := specs.ticks(); request.Config.CpuCompute <= 0 {
|
||||
response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks))
|
||||
f.logger.Debug("setting ec2 cpu ticks", "ticks", ticks)
|
||||
resources = new(structs.Resources)
|
||||
resources.CPU = ticks
|
||||
if nodeResources == nil {
|
||||
nodeResources = new(structs.NodeResources)
|
||||
}
|
||||
nodeResources.Cpu = structs.NodeCpuResources{CpuShares: int64(ticks)}
|
||||
}
|
||||
} else {
|
||||
f.logger.Warn("failed to find the cpu specification for this instance type")
|
||||
}
|
||||
|
||||
response.Resources = resources
|
||||
response.NodeResources = nodeResources
|
||||
|
||||
// populate Links
|
||||
response.AddLink("aws.ec2", fmt.Sprintf("%s.%s",
|
||||
response.Attributes["platform.aws.placement.availability-zone"],
|
||||
|
@ -156,6 +526,28 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F
|
|||
return nil
|
||||
}
|
||||
|
||||
func (f *EnvAWSFingerprint) instanceType(ec2meta *ec2metadata.EC2Metadata) (string, error) {
|
||||
response, err := ec2meta.GetMetadata("instance-type")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(response), nil
|
||||
}
|
||||
|
||||
func (f *EnvAWSFingerprint) lookupCPU(ec2meta *ec2metadata.EC2Metadata) *ec2Specs {
|
||||
instanceType, err := f.instanceType(ec2meta)
|
||||
if err != nil {
|
||||
f.logger.Warn("failed to read EC2 metadata instance-type", "error", err)
|
||||
return nil
|
||||
}
|
||||
for iType, specs := range ec2ProcSpeedTable {
|
||||
if strings.EqualFold(iType, instanceType) {
|
||||
return &specs
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2metadata.EC2Metadata, ip string) int {
|
||||
throughput := request.Config.NetworkSpeed
|
||||
if throughput != 0 {
|
||||
|
@ -180,17 +572,15 @@ func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2
|
|||
|
||||
// EnvAWSFingerprint uses lookup table to approximate network speeds
|
||||
func (f *EnvAWSFingerprint) linkSpeed(ec2meta *ec2metadata.EC2Metadata) int {
|
||||
|
||||
resp, err := ec2meta.GetMetadata("instance-type")
|
||||
instanceType, err := f.instanceType(ec2meta)
|
||||
if err != nil {
|
||||
f.logger.Error("error reading instance-type", "error", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
key := strings.Trim(resp, "\n")
|
||||
netSpeed := 0
|
||||
for reg, speed := range ec2InstanceSpeedMap {
|
||||
if reg.MatchString(key) {
|
||||
for reg, speed := range ec2NetSpeedTable {
|
||||
if reg.MatchString(instanceType) {
|
||||
netSpeed = speed
|
||||
break
|
||||
}
|
||||
|
@ -210,11 +600,11 @@ func ec2MetaClient(endpoint string, timeout time.Duration) (*ec2metadata.EC2Meta
|
|||
c = c.WithEndpoint(endpoint)
|
||||
}
|
||||
|
||||
session, err := session.NewSession(c)
|
||||
sess, err := session.NewSession(c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ec2metadata.New(session, c), nil
|
||||
return ec2metadata.New(sess, c), nil
|
||||
}
|
||||
|
||||
func isAWS(ec2meta *ec2metadata.EC2Metadata) bool {
|
||||
|
|
|
@ -202,6 +202,74 @@ func TestNetworkFingerprint_AWS_IncompleteImitation(t *testing.T) {
|
|||
require.Nil(t, response.NodeResources)
|
||||
}
|
||||
|
||||
func TestCPUFingerprint_AWS_InstanceFound(t *testing.T) {
|
||||
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
|
||||
defer cleanup()
|
||||
|
||||
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
|
||||
f.(*EnvAWSFingerprint).endpoint = endpoint
|
||||
|
||||
node := &structs.Node{Attributes: make(map[string]string)}
|
||||
|
||||
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
|
||||
var response FingerprintResponse
|
||||
err := f.Fingerprint(request, &response)
|
||||
require.NoError(t, err)
|
||||
require.True(t, response.Detected)
|
||||
require.Equal(t, "2.5 GHz AMD EPYC 7000 series", response.Attributes["cpu.modelname"])
|
||||
require.Equal(t, "2500", response.Attributes["cpu.frequency"])
|
||||
require.Equal(t, "8", response.Attributes["cpu.numcores"])
|
||||
require.Equal(t, "20000", response.Attributes["cpu.totalcompute"])
|
||||
require.Equal(t, 20000, response.Resources.CPU)
|
||||
require.Equal(t, int64(20000), response.NodeResources.Cpu.CpuShares)
|
||||
}
|
||||
|
||||
func TestCPUFingerprint_AWS_OverrideCompute(t *testing.T) {
|
||||
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
|
||||
defer cleanup()
|
||||
|
||||
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
|
||||
f.(*EnvAWSFingerprint).endpoint = endpoint
|
||||
|
||||
node := &structs.Node{Attributes: make(map[string]string)}
|
||||
|
||||
request := &FingerprintRequest{Config: &config.Config{
|
||||
CpuCompute: 99999,
|
||||
}, Node: node}
|
||||
var response FingerprintResponse
|
||||
err := f.Fingerprint(request, &response)
|
||||
require.NoError(t, err)
|
||||
require.True(t, response.Detected)
|
||||
require.Equal(t, "2.5 GHz AMD EPYC 7000 series", response.Attributes["cpu.modelname"])
|
||||
require.Equal(t, "2500", response.Attributes["cpu.frequency"])
|
||||
require.Equal(t, "8", response.Attributes["cpu.numcores"])
|
||||
require.NotContains(t, response.Attributes, "cpu.totalcompute")
|
||||
require.Nil(t, response.Resources) // defaults in cpu fingerprinter
|
||||
require.Zero(t, response.NodeResources.Cpu) // defaults in cpu fingerprinter
|
||||
}
|
||||
|
||||
func TestCPUFingerprint_AWS_InstanceNotFound(t *testing.T) {
|
||||
endpoint, cleanup := startFakeEC2Metadata(t, unknownInstanceType)
|
||||
defer cleanup()
|
||||
|
||||
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
|
||||
f.(*EnvAWSFingerprint).endpoint = endpoint
|
||||
|
||||
node := &structs.Node{Attributes: make(map[string]string)}
|
||||
|
||||
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
|
||||
var response FingerprintResponse
|
||||
err := f.Fingerprint(request, &response)
|
||||
require.NoError(t, err)
|
||||
require.True(t, response.Detected)
|
||||
require.NotContains(t, response.Attributes, "cpu.modelname")
|
||||
require.NotContains(t, response.Attributes, "cpu.frequency")
|
||||
require.NotContains(t, response.Attributes, "cpu.numcores")
|
||||
require.NotContains(t, response.Attributes, "cpu.totalcompute")
|
||||
require.Nil(t, response.Resources)
|
||||
require.Nil(t, response.NodeResources)
|
||||
}
|
||||
|
||||
/// Utility functions for tests
|
||||
|
||||
func startFakeEC2Metadata(t *testing.T, endpoints []endpoint) (endpoint string, cleanup func()) {
|
||||
|
@ -252,7 +320,7 @@ var awsStubs = []endpoint{
|
|||
{
|
||||
Uri: "/latest/meta-data/instance-type",
|
||||
ContentType: "text/plain",
|
||||
Body: "m3.2xlarge",
|
||||
Body: "t3a.2xlarge",
|
||||
},
|
||||
{
|
||||
Uri: "/latest/meta-data/local-hostname",
|
||||
|
@ -276,6 +344,34 @@ var awsStubs = []endpoint{
|
|||
},
|
||||
}
|
||||
|
||||
var unknownInstanceType = []endpoint{
|
||||
{
|
||||
Uri: "/latest/meta-data/ami-id",
|
||||
ContentType: "text/plain",
|
||||
Body: "ami-1234",
|
||||
},
|
||||
{
|
||||
Uri: "/latest/meta-data/hostname",
|
||||
ContentType: "text/plain",
|
||||
Body: "ip-10-0-0-207.us-west-2.compute.internal",
|
||||
},
|
||||
{
|
||||
Uri: "/latest/meta-data/placement/availability-zone",
|
||||
ContentType: "text/plain",
|
||||
Body: "us-west-2a",
|
||||
},
|
||||
{
|
||||
Uri: "/latest/meta-data/instance-id",
|
||||
ContentType: "text/plain",
|
||||
Body: "i-b3ba3875",
|
||||
},
|
||||
{
|
||||
Uri: "/latest/meta-data/instance-type",
|
||||
ContentType: "text/plain",
|
||||
Body: "xyz123.uber",
|
||||
},
|
||||
}
|
||||
|
||||
// noNetworkAWSStubs mimics an EC2 instance but without local ip address
|
||||
// may happen in environments with odd EC2 Metadata emulation
|
||||
var noNetworkAWSStubs = []endpoint{
|
||||
|
|
|
@ -2,7 +2,6 @@ package csimanager
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -47,7 +46,6 @@ func TestInstanceManager_Shutdown(t *testing.T) {
|
|||
im.shutdownCtxCancelFn = cancelFn
|
||||
im.shutdownCh = make(chan struct{})
|
||||
im.updater = func(_ string, info *structs.CSIInfo) {
|
||||
fmt.Println(info)
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
pluginHealth = info.Healthy
|
||||
|
|
|
@ -166,7 +166,7 @@ func (v *volumeManager) stageVolume(ctx context.Context, vol *structs.CSIVolume,
|
|||
// CSI NodeStageVolume errors for timeout, codes.Unavailable and
|
||||
// codes.ResourceExhausted are retried; all other errors are fatal.
|
||||
return v.plugin.NodeStageVolume(ctx,
|
||||
vol.ID,
|
||||
vol.RemoteID(),
|
||||
publishContext,
|
||||
pluginStagingPath,
|
||||
capability,
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
codecgen -d 102 -t codegen_generated -o structs.generated.go structs.go
|
||||
sed -i'' -e 's|"github.com/ugorji/go/codec|"github.com/hashicorp/go-msgpack/codec|g' structs.generated.go
|
|
@ -1,6 +1,6 @@
|
|||
package structs
|
||||
|
||||
//go:generate ./generate.sh
|
||||
//go:generate codecgen -c github.com/hashicorp/go-msgpack/codec -d 102 -t codegen_generated -o structs.generated.go structs.go
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
|
|
@ -640,10 +640,12 @@ func (c *Command) Run(args []string) int {
|
|||
logGate.Flush()
|
||||
return 1
|
||||
}
|
||||
defer c.agent.Shutdown()
|
||||
|
||||
// Shutdown the HTTP server at the end
|
||||
defer func() {
|
||||
c.agent.Shutdown()
|
||||
|
||||
// Shutdown the http server at the end, to ease debugging if
|
||||
// the agent takes long to shutdown
|
||||
if c.httpServer != nil {
|
||||
c.httpServer.Shutdown()
|
||||
}
|
||||
|
|
|
@ -146,6 +146,7 @@ func NewHTTPServer(agent *Agent, config *Config) (*HTTPServer, error) {
|
|||
Addr: srv.Addr,
|
||||
Handler: gzip(mux),
|
||||
ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns),
|
||||
ErrorLog: newHTTPServerLogger(srv.logger),
|
||||
}
|
||||
|
||||
go func() {
|
||||
|
@ -466,7 +467,11 @@ func (s *HTTPServer) wrap(handler func(resp http.ResponseWriter, req *http.Reque
|
|||
|
||||
resp.WriteHeader(code)
|
||||
resp.Write([]byte(errMsg))
|
||||
s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
|
||||
if isAPIClientError(code) {
|
||||
s.logger.Debug("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
|
||||
} else {
|
||||
s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -520,7 +525,11 @@ func (s *HTTPServer) wrapNonJSON(handler func(resp http.ResponseWriter, req *htt
|
|||
code, errMsg := errCodeFromHandler(err)
|
||||
resp.WriteHeader(code)
|
||||
resp.Write([]byte(errMsg))
|
||||
s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
|
||||
if isAPIClientError(code) {
|
||||
s.logger.Debug("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
|
||||
} else {
|
||||
s.logger.Error("request failed", "method", req.Method, "path", reqURL, "error", err, "code", code)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -532,6 +541,11 @@ func (s *HTTPServer) wrapNonJSON(handler func(resp http.ResponseWriter, req *htt
|
|||
return f
|
||||
}
|
||||
|
||||
// isAPIClientError returns true if the passed http code represents a client error
|
||||
func isAPIClientError(code int) bool {
|
||||
return 400 <= code && code <= 499
|
||||
}
|
||||
|
||||
// decodeBody is used to decode a JSON request body
|
||||
func decodeBody(req *http.Request, out interface{}) error {
|
||||
dec := json.NewDecoder(req.Body)
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"log"
|
||||
|
||||
hclog "github.com/hashicorp/go-hclog"
|
||||
)
|
||||
|
||||
func newHTTPServerLogger(logger hclog.Logger) *log.Logger {
|
||||
return log.New(&httpServerLoggerAdapter{logger}, "", 0)
|
||||
}
|
||||
|
||||
// a logger adapter that forwards http server logs as a Trace level
|
||||
// hclog log entries. Logs related to panics are forwarded with Error level.
|
||||
//
|
||||
// HTTP server logs are typically spurious as they represent HTTP
|
||||
// client errors (e.g. TLS handshake failures).
|
||||
type httpServerLoggerAdapter struct {
|
||||
logger hclog.Logger
|
||||
}
|
||||
|
||||
func (l *httpServerLoggerAdapter) Write(data []byte) (int, error) {
|
||||
if bytes.Contains(data, []byte("panic")) {
|
||||
str := string(bytes.TrimRight(data, " \t\n"))
|
||||
l.logger.Error(str)
|
||||
} else if l.logger.IsTrace() {
|
||||
str := string(bytes.TrimRight(data, " \t\n"))
|
||||
l.logger.Trace(str)
|
||||
}
|
||||
|
||||
return len(data), nil
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestHttpServerLoggerFilters_Level_Info(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
hclogger := hclog.New(&hclog.LoggerOptions{
|
||||
Name: "testlog",
|
||||
Output: &buf,
|
||||
Level: hclog.Info,
|
||||
})
|
||||
|
||||
stdlogger := newHTTPServerLogger(hclogger)
|
||||
|
||||
// spurious logging would be filtered out
|
||||
stdlogger.Printf("spurious logging: %v", "arg")
|
||||
require.Empty(t, buf.String())
|
||||
|
||||
// panics are included
|
||||
stdlogger.Printf("panic while processing: %v", "endpoint")
|
||||
require.Contains(t, buf.String(), "[ERROR] testlog: panic while processing: endpoint")
|
||||
|
||||
}
|
||||
|
||||
func TestHttpServerLoggerFilters_Level_Trace(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
hclogger := hclog.New(&hclog.LoggerOptions{
|
||||
Name: "testlog",
|
||||
Output: &buf,
|
||||
Level: hclog.Trace,
|
||||
})
|
||||
|
||||
stdlogger := newHTTPServerLogger(hclogger)
|
||||
|
||||
// spurious logging will be included as Trace level
|
||||
stdlogger.Printf("spurious logging: %v", "arg")
|
||||
require.Contains(t, buf.String(), "[TRACE] testlog: spurious logging: arg")
|
||||
|
||||
stdlogger.Printf("panic while processing: %v", "endpoint")
|
||||
require.Contains(t, buf.String(), "[ERROR] testlog: panic while processing: endpoint")
|
||||
|
||||
}
|
|
@ -1082,6 +1082,18 @@ func TestHTTPServer_Limits_OK(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func Test_IsAPIClientError(t *testing.T) {
|
||||
trueCases := []int{400, 403, 404, 499}
|
||||
for _, c := range trueCases {
|
||||
require.Truef(t, isAPIClientError(c), "code: %v", c)
|
||||
}
|
||||
|
||||
falseCases := []int{100, 300, 500, 501, 505}
|
||||
for _, c := range falseCases {
|
||||
require.Falsef(t, isAPIClientError(c), "code: %v", c)
|
||||
}
|
||||
}
|
||||
|
||||
func httpTest(t testing.TB, cb func(c *Config), f func(srv *TestAgent)) {
|
||||
s := makeHTTPServer(t, cb)
|
||||
defer s.Shutdown()
|
||||
|
|
|
@ -85,14 +85,12 @@ func (c *DeploymentStatusCommand) Run(args []string) int {
|
|||
|
||||
// Check that we got exactly one argument
|
||||
args = flags.Args()
|
||||
if l := len(args); l != 1 {
|
||||
if l := len(args); l > 1 {
|
||||
c.Ui.Error("This command takes one argument: <deployment id>")
|
||||
c.Ui.Error(commandErrorText(c))
|
||||
return 1
|
||||
}
|
||||
|
||||
dID := args[0]
|
||||
|
||||
// Truncate the id unless full length is requested
|
||||
length := shortId
|
||||
if verbose {
|
||||
|
@ -106,7 +104,20 @@ func (c *DeploymentStatusCommand) Run(args []string) int {
|
|||
return 1
|
||||
}
|
||||
|
||||
// List if no arguments are provided
|
||||
if len(args) == 0 {
|
||||
deploys, _, err := client.Deployments().List(nil)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error retrieving deployments: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
c.Ui.Output(formatDeployments(deploys, length))
|
||||
return 0
|
||||
}
|
||||
|
||||
// Do a prefix lookup
|
||||
dID := args[0]
|
||||
deploy, possible, err := getDeployment(client.Deployments(), dID)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error retrieving deployment: %s", err))
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
package command
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/mitchellh/cli"
|
||||
"github.com/posener/complete"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDeploymentStatusCommand_Implements(t *testing.T) {
|
||||
|
@ -21,20 +21,23 @@ func TestDeploymentStatusCommand_Fails(t *testing.T) {
|
|||
cmd := &DeploymentStatusCommand{Meta: Meta{Ui: ui}}
|
||||
|
||||
// Fails on misuse
|
||||
if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 {
|
||||
t.Fatalf("expected exit code 1, got: %d", code)
|
||||
}
|
||||
if out := ui.ErrorWriter.String(); !strings.Contains(out, commandErrorText(cmd)) {
|
||||
t.Fatalf("expected help output, got: %s", out)
|
||||
}
|
||||
code := cmd.Run([]string{"some", "bad", "args"})
|
||||
require.Equal(t, 1, code)
|
||||
out := ui.ErrorWriter.String()
|
||||
require.Contains(t, out, commandErrorText(cmd))
|
||||
ui.ErrorWriter.Reset()
|
||||
|
||||
if code := cmd.Run([]string{"-address=nope", "12"}); code != 1 {
|
||||
t.Fatalf("expected exit code 1, got: %d", code)
|
||||
}
|
||||
if out := ui.ErrorWriter.String(); !strings.Contains(out, "Error retrieving deployment") {
|
||||
t.Fatalf("expected failed query error, got: %s", out)
|
||||
}
|
||||
code = cmd.Run([]string{"-address=nope", "12"})
|
||||
require.Equal(t, 1, code)
|
||||
out = ui.ErrorWriter.String()
|
||||
require.Contains(t, out, "Error retrieving deployment")
|
||||
ui.ErrorWriter.Reset()
|
||||
|
||||
code = cmd.Run([]string{"-address=nope"})
|
||||
require.Equal(t, 1, code)
|
||||
out = ui.ErrorWriter.String()
|
||||
// "deployments" indicates that we attempted to list all deployments
|
||||
require.Contains(t, out, "Error retrieving deployments")
|
||||
ui.ErrorWriter.Reset()
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ const (
|
|||
nomad job run -check-index %d %s
|
||||
|
||||
When running the job with the check-index flag, the job will only be run if the
|
||||
server side version matches the job modify index returned. If the index has
|
||||
job modify index given matches the server-side version. If the index has
|
||||
changed, another user has modified the job and the plan's results are
|
||||
potentially invalid.`
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
* [ ] Add structs/fields to `nomad/structs` package
|
||||
* Validation happens in this package and must be implemented
|
||||
* Implement other methods and tests from `api/` package
|
||||
* Note that analogous struct field names should match with `api/` package
|
||||
* [ ] Add conversion between `api/` and `nomad/structs` in `command/agent/job_endpoint.go`
|
||||
* [ ] Add check for job diff in `nomad/structs/diff.go`
|
||||
* Note that fields must be listed in alphabetical order in `FieldDiff` slices in `nomad/structs/diff_test.go`
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
grpc-checks
|
|
@ -0,0 +1,18 @@
|
|||
FROM golang:alpine as builder
|
||||
WORKDIR /build
|
||||
ADD . /build
|
||||
RUN apk add protoc && \
|
||||
go get -u github.com/golang/protobuf/protoc-gen-go
|
||||
RUN go version && \
|
||||
go env && \
|
||||
go generate && \
|
||||
CGO_ENABLED=0 GOOS=linux go build
|
||||
|
||||
FROM alpine:latest
|
||||
MAINTAINER nomadproject.io
|
||||
|
||||
WORKDIR /opt
|
||||
COPY --from=builder /build/grpc-checks /opt
|
||||
|
||||
ENTRYPOINT ["/opt/grpc-checks"]
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
# grpc-checks
|
||||
|
||||
An example service that exposes a gRPC healthcheck endpoint
|
||||
|
||||
### generate protobuf
|
||||
|
||||
Note that main.go also includes this as a go:generate directive
|
||||
so that running this by hand is not necessary
|
||||
|
||||
```bash
|
||||
$ protoc -I ./health ./health/health.proto --go_out=plugins=grpc:health
|
||||
```
|
||||
|
||||
### build & run example
|
||||
|
||||
Generate, compile, and run the example server.
|
||||
|
||||
```bash
|
||||
go generate
|
||||
go build
|
||||
go run main.go
|
||||
```
|
||||
|
||||
### publish
|
||||
|
||||
#### Testing locally
|
||||
```bash
|
||||
$ docker build -t hashicorpnomad/grpc-checks:test .
|
||||
$ docker run --rm hashicorpnomad/grpc-checks:test
|
||||
```
|
||||
|
||||
#### Upload to Docker Hub
|
||||
```bash
|
||||
# replace <version> with the next version number
|
||||
docker login
|
||||
$ docker build -t hashicorpnomad/grpc-checks:<version> .
|
||||
$ docker push hashicorpnomad/grpc-checks:<version>
|
||||
```
|
|
@ -0,0 +1,31 @@
|
|||
package example
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
|
||||
ghc "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
// Server is a trivial gRPC server that implements the standard grpc.health.v1
|
||||
// interface.
|
||||
type Server struct {
|
||||
}
|
||||
|
||||
func New() *Server {
|
||||
return new(Server)
|
||||
}
|
||||
|
||||
func (s *Server) Check(ctx context.Context, hcr *ghc.HealthCheckRequest) (*ghc.HealthCheckResponse, error) {
|
||||
log.Printf("Check:%s (%s)", hcr.Service, hcr.String())
|
||||
return &ghc.HealthCheckResponse{
|
||||
Status: ghc.HealthCheckResponse_SERVING,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Server) Watch(hcr *ghc.HealthCheckRequest, hws ghc.Health_WatchServer) error {
|
||||
log.Printf("Watch:%s (%s)", hcr.Service, hcr.String())
|
||||
return hws.Send(&ghc.HealthCheckResponse{
|
||||
Status: ghc.HealthCheckResponse_SERVING,
|
||||
})
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
module github.com/hashicorp/nomad/demo/grpc-checks
|
||||
|
||||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/golang/protobuf v1.3.5
|
||||
google.golang.org/grpc v1.28.1
|
||||
)
|
|
@ -0,0 +1,53 @@
|
|||
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
|
||||
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
|
||||
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
|
||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||
github.com/hashicorp/nomad v0.11.1 h1:ow411q+bAduxC0X0V3NLx9slQzwG9wiB66yVzpQ0aEg=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
||||
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 h1:gSJIx1SDwno+2ElGhA4+qG2zF97qiUzTM+rQ0klBOcE=
|
||||
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
||||
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
|
||||
google.golang.org/grpc v1.28.1 h1:C1QC6KzgSiLyBabDi87BbjaGreoRgGUF5nOyvfrAZ1k=
|
||||
google.golang.org/grpc v1.28.1/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
|
@ -0,0 +1,40 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
|
||||
"github.com/hashicorp/nomad/demo/grpc-checks/example"
|
||||
"google.golang.org/grpc"
|
||||
ghc "google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
port := os.Getenv("GRPC_HC_PORT")
|
||||
if port == "" {
|
||||
port = "3333"
|
||||
}
|
||||
address := fmt.Sprintf(":%s", port)
|
||||
|
||||
log.Printf("creating tcp listener on %s", address)
|
||||
listener, err := net.Listen("tcp", address)
|
||||
if err != nil {
|
||||
log.Printf("unable to create listener: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
log.Printf("creating grpc server")
|
||||
grpcServer := grpc.NewServer()
|
||||
|
||||
log.Printf("registering health server")
|
||||
ghc.RegisterHealthServer(grpcServer, example.New())
|
||||
|
||||
log.Printf("listening ...")
|
||||
if err := grpcServer.Serve(listener); err != nil {
|
||||
log.Printf("unable to listen: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ sudo docker --version
|
|||
sudo apt-get install unzip curl vim -y
|
||||
|
||||
echo "Installing Nomad..."
|
||||
NOMAD_VERSION=0.10.4
|
||||
NOMAD_VERSION=0.11.0
|
||||
cd /tmp/
|
||||
curl -sSL https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -o nomad.zip
|
||||
unzip nomad.zip
|
||||
|
|
|
@ -21,3 +21,16 @@ client {
|
|||
ports {
|
||||
http = 5656
|
||||
}
|
||||
|
||||
# Because we will potentially have two clients talking to the same
|
||||
# Docker daemon, we have to disable the dangling container cleanup,
|
||||
# otherwise they will stop each other's work thinking it was orphaned.
|
||||
plugin "docker" {
|
||||
config {
|
||||
gc {
|
||||
dangling_containers {
|
||||
enabled = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,3 +21,16 @@ client {
|
|||
ports {
|
||||
http = 5657
|
||||
}
|
||||
|
||||
# Because we will potentially have two clients talking to the same
|
||||
# Docker daemon, we have to disable the dangling container cleanup,
|
||||
# otherwise they will stop each other's work thinking it was orphaned.
|
||||
plugin "docker" {
|
||||
config {
|
||||
gc {
|
||||
dangling_containers {
|
||||
enabled = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -439,16 +439,21 @@ CREATE:
|
|||
return container, nil
|
||||
}
|
||||
|
||||
// Delete matching containers
|
||||
err = client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: container.ID,
|
||||
Force: true,
|
||||
})
|
||||
if err != nil {
|
||||
d.logger.Error("failed to purge container", "container_id", container.ID)
|
||||
return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
|
||||
} else {
|
||||
d.logger.Info("purged container", "container_id", container.ID)
|
||||
// Purge conflicting container if found.
|
||||
// If container is nil here, the conflicting container was
|
||||
// deleted in our check here, so retry again.
|
||||
if container != nil {
|
||||
// Delete matching containers
|
||||
err = client.RemoveContainer(docker.RemoveContainerOptions{
|
||||
ID: container.ID,
|
||||
Force: true,
|
||||
})
|
||||
if err != nil {
|
||||
d.logger.Error("failed to purge container", "container_id", container.ID)
|
||||
return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
|
||||
} else {
|
||||
d.logger.Info("purged container", "container_id", container.ID)
|
||||
}
|
||||
}
|
||||
|
||||
if attempted < 5 {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
NOMAD_SHA ?= $(shell git rev-parse HEAD)
|
||||
PKG_PATH = $(shell pwd)/../../pkg/linux_amd64/nomad
|
||||
|
||||
dev-cluster:
|
||||
terraform apply -auto-approve -var-file=terraform.tfvars.dev
|
||||
|
@ -6,5 +7,11 @@ dev-cluster:
|
|||
cd .. && NOMAD_E2E=1 go test -v . -nomad.sha=$(NOMAD_SHA) -provision.terraform ./provisioning.json -skipTests
|
||||
terraform output message
|
||||
|
||||
dev-cluster-from-local:
|
||||
terraform apply -auto-approve -var-file=terraform.tfvars.dev
|
||||
terraform output provisioning | jq . > ../provisioning.json
|
||||
cd .. && NOMAD_E2E=1 go test -v . -nomad.local_file=$(PKG_PATH) -provision.terraform ./provisioning.json -skipTests
|
||||
terraform output message
|
||||
|
||||
clean:
|
||||
terraform destroy -auto-approve
|
||||
|
|
|
@ -366,15 +366,16 @@ func parseScalingPolicy(out **api.ScalingPolicy, list *ast.ObjectList) error {
|
|||
|
||||
// If we have policy, then parse that
|
||||
if o := listVal.Filter("policy"); len(o.Items) > 0 {
|
||||
for _, o := range o.Elem().Items {
|
||||
var m map[string]interface{}
|
||||
if err := hcl.DecodeObject(&m, o.Val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := mapstructure.WeakDecode(m, &result.Policy); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(o.Elem().Items) > 1 {
|
||||
return fmt.Errorf("only one 'policy' block allowed per 'scaling' block")
|
||||
}
|
||||
p := o.Elem().Items[0]
|
||||
var m map[string]interface{}
|
||||
if err := hcl.DecodeObject(&m, p.Val); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mapstructure.WeakDecode(m, &result.Policy); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -281,7 +281,7 @@ func parseSidecarTask(item *ast.ObjectItem) (*api.SidecarTask, error) {
|
|||
KillSignal: task.KillSignal,
|
||||
}
|
||||
|
||||
// Parse ShutdownDelay separately to get pointer
|
||||
// Parse ShutdownDelay separatly to get pointer
|
||||
var m map[string]interface{}
|
||||
if err := hcl.DecodeObject(&m, item.Val); err != nil {
|
||||
return nil, err
|
||||
|
@ -320,6 +320,24 @@ func parseProxy(o *ast.ObjectItem) (*api.ConsulProxy, error) {
|
|||
}
|
||||
|
||||
var proxy api.ConsulProxy
|
||||
var m map[string]interface{}
|
||||
if err := hcl.DecodeObject(&m, o.Val); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
delete(m, "upstreams")
|
||||
delete(m, "expose")
|
||||
delete(m, "config")
|
||||
|
||||
dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
|
||||
Result: &proxy,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := dec.Decode(m); err != nil {
|
||||
return nil, fmt.Errorf("proxy: %v", err)
|
||||
}
|
||||
|
||||
var listVal *ast.ObjectList
|
||||
if ot, ok := o.Val.(*ast.ObjectType); ok {
|
||||
|
|
|
@ -894,28 +894,6 @@ func TestParse(t *testing.T) {
|
|||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"service-connect-sidecar_task-name.hcl",
|
||||
&api.Job{
|
||||
ID: helper.StringToPtr("sidecar_task_name"),
|
||||
Name: helper.StringToPtr("sidecar_task_name"),
|
||||
Type: helper.StringToPtr("service"),
|
||||
TaskGroups: []*api.TaskGroup{{
|
||||
Name: helper.StringToPtr("group"),
|
||||
Services: []*api.Service{{
|
||||
Name: "example",
|
||||
Connect: &api.ConsulConnect{
|
||||
Native: false,
|
||||
SidecarService: &api.ConsulSidecarService{},
|
||||
SidecarTask: &api.SidecarTask{
|
||||
Name: "my-sidecar",
|
||||
},
|
||||
},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"reschedule-job.hcl",
|
||||
&api.Job{
|
||||
|
@ -1051,6 +1029,7 @@ func TestParse(t *testing.T) {
|
|||
SidecarService: &api.ConsulSidecarService{
|
||||
Tags: []string{"side1", "side2"},
|
||||
Proxy: &api.ConsulProxy{
|
||||
LocalServicePort: 8080,
|
||||
Upstreams: []*api.ConsulUpstream{
|
||||
{
|
||||
DestinationName: "other-service",
|
||||
|
@ -1172,6 +1151,99 @@ func TestParse(t *testing.T) {
|
|||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"tg-service-connect-sidecar_task-name.hcl",
|
||||
&api.Job{
|
||||
ID: helper.StringToPtr("sidecar_task_name"),
|
||||
Name: helper.StringToPtr("sidecar_task_name"),
|
||||
Type: helper.StringToPtr("service"),
|
||||
TaskGroups: []*api.TaskGroup{{
|
||||
Name: helper.StringToPtr("group"),
|
||||
Services: []*api.Service{{
|
||||
Name: "example",
|
||||
Connect: &api.ConsulConnect{
|
||||
Native: false,
|
||||
SidecarService: &api.ConsulSidecarService{},
|
||||
SidecarTask: &api.SidecarTask{
|
||||
Name: "my-sidecar",
|
||||
},
|
||||
},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"tg-service-connect-proxy.hcl",
|
||||
&api.Job{
|
||||
ID: helper.StringToPtr("service-connect-proxy"),
|
||||
Name: helper.StringToPtr("service-connect-proxy"),
|
||||
Type: helper.StringToPtr("service"),
|
||||
TaskGroups: []*api.TaskGroup{{
|
||||
Name: helper.StringToPtr("group"),
|
||||
Services: []*api.Service{{
|
||||
Name: "example",
|
||||
Connect: &api.ConsulConnect{
|
||||
Native: false,
|
||||
SidecarService: &api.ConsulSidecarService{
|
||||
Proxy: &api.ConsulProxy{
|
||||
LocalServiceAddress: "10.0.1.2",
|
||||
LocalServicePort: 8080,
|
||||
ExposeConfig: &api.ConsulExposeConfig{
|
||||
Path: []*api.ConsulExposePath{{
|
||||
Path: "/metrics",
|
||||
Protocol: "http",
|
||||
LocalPathPort: 9001,
|
||||
ListenerPort: "metrics",
|
||||
}, {
|
||||
Path: "/health",
|
||||
Protocol: "http",
|
||||
LocalPathPort: 9002,
|
||||
ListenerPort: "health",
|
||||
}},
|
||||
},
|
||||
Upstreams: []*api.ConsulUpstream{{
|
||||
DestinationName: "upstream1",
|
||||
LocalBindPort: 2001,
|
||||
}, {
|
||||
DestinationName: "upstream2",
|
||||
LocalBindPort: 2002,
|
||||
}},
|
||||
Config: map[string]interface{}{
|
||||
"foo": "bar",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"tg-service-connect-local-service.hcl",
|
||||
&api.Job{
|
||||
ID: helper.StringToPtr("connect-proxy-local-service"),
|
||||
Name: helper.StringToPtr("connect-proxy-local-service"),
|
||||
Type: helper.StringToPtr("service"),
|
||||
TaskGroups: []*api.TaskGroup{{
|
||||
Name: helper.StringToPtr("group"),
|
||||
Services: []*api.Service{{
|
||||
Name: "example",
|
||||
Connect: &api.ConsulConnect{
|
||||
Native: false,
|
||||
SidecarService: &api.ConsulSidecarService{
|
||||
Proxy: &api.ConsulProxy{
|
||||
LocalServiceAddress: "10.0.1.2",
|
||||
LocalServicePort: 9876,
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"tg-service-check-expose.hcl",
|
||||
&api.Job{
|
||||
|
@ -1238,6 +1310,32 @@ func TestParse(t *testing.T) {
|
|||
},
|
||||
false,
|
||||
},
|
||||
|
||||
{
|
||||
"tg-scaling-policy-minimal.hcl",
|
||||
&api.Job{
|
||||
ID: helper.StringToPtr("elastic"),
|
||||
Name: helper.StringToPtr("elastic"),
|
||||
TaskGroups: []*api.TaskGroup{
|
||||
{
|
||||
Name: helper.StringToPtr("group"),
|
||||
Scaling: &api.ScalingPolicy{
|
||||
Min: nil,
|
||||
Max: 0,
|
||||
Policy: nil,
|
||||
Enabled: nil,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
|
||||
{
|
||||
"tg-scaling-policy-multi-policy.hcl",
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
job "elastic" {
|
||||
group "group" {
|
||||
scaling {}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
job "elastic" {
|
||||
group "group" {
|
||||
scaling {
|
||||
enabled = false
|
||||
min = 5
|
||||
max = 100
|
||||
|
||||
policy {
|
||||
foo = "right"
|
||||
b = true
|
||||
}
|
||||
|
||||
policy {
|
||||
foo = "wrong"
|
||||
c = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
job "connect-proxy-local-service" {
|
||||
type = "service"
|
||||
|
||||
group "group" {
|
||||
service {
|
||||
name = "example"
|
||||
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
local_service_port = 9876
|
||||
local_service_address = "10.0.1.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
job "service-connect-proxy" {
|
||||
type = "service"
|
||||
|
||||
group "group" {
|
||||
service {
|
||||
name = "example"
|
||||
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
local_service_port = 8080
|
||||
local_service_address = "10.0.1.2"
|
||||
|
||||
upstreams {
|
||||
destination_name = "upstream1"
|
||||
local_bind_port = 2001
|
||||
}
|
||||
|
||||
upstreams {
|
||||
destination_name = "upstream2"
|
||||
local_bind_port = 2002
|
||||
}
|
||||
|
||||
expose {
|
||||
path {
|
||||
path = "/metrics"
|
||||
protocol = "http"
|
||||
local_path_port = 9001
|
||||
listener_port = "metrics"
|
||||
}
|
||||
|
||||
path {
|
||||
path = "/health"
|
||||
protocol = "http"
|
||||
local_path_port = 9002
|
||||
listener_port = "health"
|
||||
}
|
||||
}
|
||||
|
||||
config {
|
||||
foo = "bar"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,12 +4,14 @@ job "sidecar_task_name" {
|
|||
group "group" {
|
||||
service {
|
||||
name = "example"
|
||||
|
||||
connect {
|
||||
sidecar_service {}
|
||||
sidecar_service = {}
|
||||
|
||||
sidecar_task {
|
||||
name = "my-sidecar"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,9 +8,7 @@ import (
|
|||
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
version "github.com/hashicorp/go-version"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/scheduler"
|
||||
|
@ -711,188 +709,30 @@ func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time,
|
|||
return timeDiff > interval.Nanoseconds()
|
||||
}
|
||||
|
||||
// TODO: we need a periodic trigger to iterate over all the volumes and split
|
||||
// them up into separate work items, same as we do for jobs.
|
||||
|
||||
// csiVolumeClaimGC is used to garbage collect CSI volume claims
|
||||
func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error {
|
||||
c.logger.Trace("garbage collecting unclaimed CSI volume claims")
|
||||
c.logger.Trace("garbage collecting unclaimed CSI volume claims", "eval.JobID", eval.JobID)
|
||||
|
||||
// Volume ID smuggled in with the eval's own JobID
|
||||
evalVolID := strings.Split(eval.JobID, ":")
|
||||
if len(evalVolID) != 3 {
|
||||
|
||||
// COMPAT(1.0): 0.11.0 shipped with 3 fields. tighten this check to len == 2
|
||||
if len(evalVolID) < 2 {
|
||||
c.logger.Error("volume gc called without volID")
|
||||
return nil
|
||||
}
|
||||
|
||||
volID := evalVolID[1]
|
||||
runningAllocs := evalVolID[2] == "purge"
|
||||
return volumeClaimReap(c.srv, volID, eval.Namespace,
|
||||
c.srv.config.Region, eval.LeaderACL, runningAllocs)
|
||||
}
|
||||
|
||||
func volumeClaimReap(srv RPCServer, volID, namespace, region, leaderACL string, runningAllocs bool) error {
|
||||
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
vol, err := srv.State().CSIVolumeByID(ws, namespace, volID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if vol == nil {
|
||||
return nil
|
||||
}
|
||||
vol, err = srv.State().CSIVolumeDenormalize(ws, vol)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
plug, err := srv.State().CSIPluginByID(ws, vol.PluginID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
gcClaims, nodeClaims := collectClaimsToGCImpl(vol, runningAllocs)
|
||||
|
||||
var result *multierror.Error
|
||||
for _, claim := range gcClaims {
|
||||
nodeClaims, err = volumeClaimReapImpl(srv,
|
||||
&volumeClaimReapArgs{
|
||||
vol: vol,
|
||||
plug: plug,
|
||||
allocID: claim.allocID,
|
||||
nodeID: claim.nodeID,
|
||||
mode: claim.mode,
|
||||
namespace: namespace,
|
||||
region: region,
|
||||
leaderACL: leaderACL,
|
||||
nodeClaims: nodeClaims,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
return result.ErrorOrNil()
|
||||
|
||||
}
|
||||
|
||||
type gcClaimRequest struct {
|
||||
allocID string
|
||||
nodeID string
|
||||
mode structs.CSIVolumeClaimMode
|
||||
}
|
||||
|
||||
func collectClaimsToGCImpl(vol *structs.CSIVolume, runningAllocs bool) ([]gcClaimRequest, map[string]int) {
|
||||
gcAllocs := []gcClaimRequest{}
|
||||
nodeClaims := map[string]int{} // node IDs -> count
|
||||
|
||||
collectFunc := func(allocs map[string]*structs.Allocation,
|
||||
mode structs.CSIVolumeClaimMode) {
|
||||
for _, alloc := range allocs {
|
||||
// we call denormalize on the volume above to populate
|
||||
// Allocation pointers. But the alloc might have been
|
||||
// garbage collected concurrently, so if the alloc is
|
||||
// still nil we can safely skip it.
|
||||
if alloc == nil {
|
||||
continue
|
||||
}
|
||||
nodeClaims[alloc.NodeID]++
|
||||
if runningAllocs || alloc.Terminated() {
|
||||
gcAllocs = append(gcAllocs, gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: alloc.NodeID,
|
||||
mode: mode,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collectFunc(vol.WriteAllocs, structs.CSIVolumeClaimWrite)
|
||||
collectFunc(vol.ReadAllocs, structs.CSIVolumeClaimRead)
|
||||
return gcAllocs, nodeClaims
|
||||
}
|
||||
|
||||
type volumeClaimReapArgs struct {
|
||||
vol *structs.CSIVolume
|
||||
plug *structs.CSIPlugin
|
||||
allocID string
|
||||
nodeID string
|
||||
mode structs.CSIVolumeClaimMode
|
||||
region string
|
||||
namespace string
|
||||
leaderACL string
|
||||
nodeClaims map[string]int // node IDs -> count
|
||||
}
|
||||
|
||||
func volumeClaimReapImpl(srv RPCServer, args *volumeClaimReapArgs) (map[string]int, error) {
|
||||
vol := args.vol
|
||||
nodeID := args.nodeID
|
||||
|
||||
// (1) NodePublish / NodeUnstage must be completed before controller
|
||||
// operations or releasing the claim.
|
||||
nReq := &cstructs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: args.plug.ID,
|
||||
VolumeID: vol.ID,
|
||||
ExternalID: vol.RemoteID(),
|
||||
AllocID: args.allocID,
|
||||
NodeID: nodeID,
|
||||
AttachmentMode: vol.AttachmentMode,
|
||||
AccessMode: vol.AccessMode,
|
||||
ReadOnly: args.mode == structs.CSIVolumeClaimRead,
|
||||
}
|
||||
err := srv.RPC("ClientCSI.NodeDetachVolume", nReq,
|
||||
&cstructs.ClientCSINodeDetachVolumeResponse{})
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
args.nodeClaims[nodeID]--
|
||||
|
||||
// (2) we only emit the controller unpublish if no other allocs
|
||||
// on the node need it, but we also only want to make this
|
||||
// call at most once per node
|
||||
if vol.ControllerRequired && args.nodeClaims[nodeID] < 1 {
|
||||
|
||||
// we need to get the CSI Node ID, which is not the same as
|
||||
// the Nomad Node ID
|
||||
ws := memdb.NewWatchSet()
|
||||
targetNode, err := srv.State().NodeByID(ws, nodeID)
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
if targetNode == nil {
|
||||
return args.nodeClaims, fmt.Errorf("%s: %s",
|
||||
structs.ErrUnknownNodePrefix, nodeID)
|
||||
}
|
||||
targetCSIInfo, ok := targetNode.CSINodePlugins[args.plug.ID]
|
||||
if !ok {
|
||||
return args.nodeClaims, fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
|
||||
}
|
||||
|
||||
cReq := &cstructs.ClientCSIControllerDetachVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
|
||||
}
|
||||
cReq.PluginID = args.plug.ID
|
||||
err = srv.RPC("ClientCSI.ControllerDetachVolume", cReq,
|
||||
&cstructs.ClientCSIControllerDetachVolumeResponse{})
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
}
|
||||
|
||||
// (3) release the claim from the state store, allowing it to be rescheduled
|
||||
req := &structs.CSIVolumeClaimRequest{
|
||||
VolumeID: vol.ID,
|
||||
AllocationID: args.allocID,
|
||||
Claim: structs.CSIVolumeClaimRelease,
|
||||
WriteRequest: structs.WriteRequest{
|
||||
Region: args.region,
|
||||
Namespace: args.namespace,
|
||||
AuthToken: args.leaderACL,
|
||||
},
|
||||
VolumeID: volID,
|
||||
Claim: structs.CSIVolumeClaimRelease,
|
||||
}
|
||||
err = srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
|
||||
if err != nil {
|
||||
return args.nodeClaims, err
|
||||
}
|
||||
return args.nodeClaims, nil
|
||||
req.Namespace = eval.Namespace
|
||||
req.Region = c.srv.config.Region
|
||||
|
||||
err := c.srv.RPC("CSIVolume.Claim", req, &structs.CSIVolumeClaimResponse{})
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -6,10 +6,8 @@ import (
|
|||
"time"
|
||||
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
@ -2195,270 +2193,3 @@ func TestAllocation_GCEligible(t *testing.T) {
|
|||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
require.True(allocGCEligible(alloc, nil, time.Now(), 1000))
|
||||
}
|
||||
|
||||
func TestCSI_GCVolumeClaims_Collection(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
||||
defer shutdownSrv()
|
||||
testutil.WaitForLeader(t, srv.RPC)
|
||||
|
||||
state := srv.fsm.State()
|
||||
ws := memdb.NewWatchSet()
|
||||
index := uint64(100)
|
||||
|
||||
// Create a client node, plugin, and volume
|
||||
node := mock.Node()
|
||||
node.Attributes["nomad.version"] = "0.11.0" // client RPCs not supported on early version
|
||||
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
||||
"csi-plugin-example": {
|
||||
PluginID: "csi-plugin-example",
|
||||
Healthy: true,
|
||||
RequiresControllerPlugin: true,
|
||||
NodeInfo: &structs.CSINodeInfo{},
|
||||
},
|
||||
}
|
||||
node.CSIControllerPlugins = map[string]*structs.CSIInfo{
|
||||
"csi-plugin-example": {
|
||||
PluginID: "csi-plugin-example",
|
||||
Healthy: true,
|
||||
RequiresControllerPlugin: true,
|
||||
ControllerInfo: &structs.CSIControllerInfo{
|
||||
SupportsReadOnlyAttach: true,
|
||||
SupportsAttachDetach: true,
|
||||
SupportsListVolumes: true,
|
||||
SupportsListVolumesAttachedNodes: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
err := state.UpsertNode(99, node)
|
||||
require.NoError(t, err)
|
||||
volId0 := uuid.Generate()
|
||||
ns := structs.DefaultNamespace
|
||||
vols := []*structs.CSIVolume{{
|
||||
ID: volId0,
|
||||
Namespace: ns,
|
||||
PluginID: "csi-plugin-example",
|
||||
AccessMode: structs.CSIVolumeAccessModeMultiNodeSingleWriter,
|
||||
AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem,
|
||||
}}
|
||||
|
||||
err = state.CSIVolumeRegister(index, vols)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
vol, err := state.CSIVolumeByID(ws, ns, volId0)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.True(t, vol.ControllerRequired)
|
||||
require.Len(t, vol.ReadAllocs, 0)
|
||||
require.Len(t, vol.WriteAllocs, 0)
|
||||
|
||||
// Create a job with 2 allocations
|
||||
job := mock.Job()
|
||||
job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{
|
||||
"_": {
|
||||
Name: "someVolume",
|
||||
Type: structs.VolumeTypeCSI,
|
||||
Source: volId0,
|
||||
ReadOnly: false,
|
||||
},
|
||||
}
|
||||
err = state.UpsertJob(index, job)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
|
||||
alloc1 := mock.Alloc()
|
||||
alloc1.JobID = job.ID
|
||||
alloc1.NodeID = node.ID
|
||||
err = state.UpsertJobSummary(index, mock.JobSummary(alloc1.JobID))
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
alloc1.TaskGroup = job.TaskGroups[0].Name
|
||||
|
||||
alloc2 := mock.Alloc()
|
||||
alloc2.JobID = job.ID
|
||||
alloc2.NodeID = node.ID
|
||||
err = state.UpsertJobSummary(index, mock.JobSummary(alloc2.JobID))
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
alloc2.TaskGroup = job.TaskGroups[0].Name
|
||||
|
||||
err = state.UpsertAllocs(104, []*structs.Allocation{alloc1, alloc2})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Claim the volumes and verify the claims were set
|
||||
err = state.CSIVolumeClaim(index, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
err = state.CSIVolumeClaim(index, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
|
||||
index++
|
||||
require.NoError(t, err)
|
||||
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, vol.ReadAllocs, 1)
|
||||
require.Len(t, vol.WriteAllocs, 1)
|
||||
|
||||
// Update both allocs as failed/terminated
|
||||
alloc1.ClientStatus = structs.AllocClientStatusFailed
|
||||
alloc2.ClientStatus = structs.AllocClientStatusFailed
|
||||
err = state.UpdateAllocsFromClient(index, []*structs.Allocation{alloc1, alloc2})
|
||||
require.NoError(t, err)
|
||||
|
||||
vol, err = state.CSIVolumeDenormalize(ws, vol)
|
||||
require.NoError(t, err)
|
||||
|
||||
gcClaims, nodeClaims := collectClaimsToGCImpl(vol, false)
|
||||
require.Equal(t, nodeClaims[node.ID], 2)
|
||||
require.Len(t, gcClaims, 2)
|
||||
}
|
||||
|
||||
func TestCSI_GCVolumeClaims_Reap(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
s, shutdownSrv := TestServer(t, func(c *Config) { c.NumSchedulers = 0 })
|
||||
defer shutdownSrv()
|
||||
testutil.WaitForLeader(t, s.RPC)
|
||||
|
||||
node := mock.Node()
|
||||
plugin := mock.CSIPlugin()
|
||||
vol := mock.CSIVolume(plugin)
|
||||
alloc := mock.Alloc()
|
||||
|
||||
cases := []struct {
|
||||
Name string
|
||||
Claim gcClaimRequest
|
||||
ClaimsCount map[string]int
|
||||
ControllerRequired bool
|
||||
ExpectedErr string
|
||||
ExpectedCount int
|
||||
ExpectedClaimsCount int
|
||||
ExpectedNodeDetachVolumeCount int
|
||||
ExpectedControllerDetachVolumeCount int
|
||||
ExpectedVolumeClaimCount int
|
||||
srv *MockRPCServer
|
||||
}{
|
||||
{
|
||||
Name: "NodeDetachVolume fails",
|
||||
Claim: gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: node.ID,
|
||||
mode: structs.CSIVolumeClaimRead,
|
||||
},
|
||||
ClaimsCount: map[string]int{node.ID: 1},
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "node plugin missing",
|
||||
ExpectedClaimsCount: 1,
|
||||
ExpectedNodeDetachVolumeCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: s.State(),
|
||||
nextCSINodeDetachVolumeError: fmt.Errorf("node plugin missing"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume no controllers",
|
||||
Claim: gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: node.ID,
|
||||
mode: structs.CSIVolumeClaimRead,
|
||||
},
|
||||
ClaimsCount: map[string]int{node.ID: 1},
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: fmt.Sprintf(
|
||||
"Unknown node: %s", node.ID),
|
||||
ExpectedClaimsCount: 0,
|
||||
ExpectedNodeDetachVolumeCount: 1,
|
||||
ExpectedControllerDetachVolumeCount: 0,
|
||||
srv: &MockRPCServer{
|
||||
state: s.State(),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume node-only",
|
||||
Claim: gcClaimRequest{
|
||||
allocID: alloc.ID,
|
||||
nodeID: node.ID,
|
||||
mode: structs.CSIVolumeClaimRead,
|
||||
},
|
||||
ClaimsCount: map[string]int{node.ID: 1},
|
||||
ControllerRequired: false,
|
||||
ExpectedClaimsCount: 0,
|
||||
ExpectedNodeDetachVolumeCount: 1,
|
||||
ExpectedControllerDetachVolumeCount: 0,
|
||||
ExpectedVolumeClaimCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: s.State(),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
vol.ControllerRequired = tc.ControllerRequired
|
||||
nodeClaims, err := volumeClaimReapImpl(tc.srv, &volumeClaimReapArgs{
|
||||
vol: vol,
|
||||
plug: plugin,
|
||||
allocID: tc.Claim.allocID,
|
||||
nodeID: tc.Claim.nodeID,
|
||||
mode: tc.Claim.mode,
|
||||
region: "global",
|
||||
namespace: "default",
|
||||
leaderACL: "not-in-use",
|
||||
nodeClaims: tc.ClaimsCount,
|
||||
})
|
||||
if tc.ExpectedErr != "" {
|
||||
require.EqualError(err, tc.ExpectedErr)
|
||||
} else {
|
||||
require.NoError(err)
|
||||
}
|
||||
require.Equal(tc.ExpectedClaimsCount,
|
||||
nodeClaims[tc.Claim.nodeID], "expected claims")
|
||||
require.Equal(tc.ExpectedNodeDetachVolumeCount,
|
||||
tc.srv.countCSINodeDetachVolume, "node detach RPC count")
|
||||
require.Equal(tc.ExpectedControllerDetachVolumeCount,
|
||||
tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
|
||||
require.Equal(tc.ExpectedVolumeClaimCount,
|
||||
tc.srv.countCSIVolumeClaim, "volume claim RPC count")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type MockRPCServer struct {
|
||||
state *state.StateStore
|
||||
|
||||
// mock responses for ClientCSI.NodeDetachVolume
|
||||
nextCSINodeDetachVolumeResponse *cstructs.ClientCSINodeDetachVolumeResponse
|
||||
nextCSINodeDetachVolumeError error
|
||||
countCSINodeDetachVolume int
|
||||
|
||||
// mock responses for ClientCSI.ControllerDetachVolume
|
||||
nextCSIControllerDetachVolumeResponse *cstructs.ClientCSIControllerDetachVolumeResponse
|
||||
nextCSIControllerDetachVolumeError error
|
||||
countCSIControllerDetachVolume int
|
||||
|
||||
// mock responses for CSI.VolumeClaim
|
||||
nextCSIVolumeClaimResponse *structs.CSIVolumeClaimResponse
|
||||
nextCSIVolumeClaimError error
|
||||
countCSIVolumeClaim int
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) RPC(method string, args interface{}, reply interface{}) error {
|
||||
switch method {
|
||||
case "ClientCSI.NodeDetachVolume":
|
||||
reply = srv.nextCSINodeDetachVolumeResponse
|
||||
srv.countCSINodeDetachVolume++
|
||||
return srv.nextCSINodeDetachVolumeError
|
||||
case "ClientCSI.ControllerDetachVolume":
|
||||
reply = srv.nextCSIControllerDetachVolumeResponse
|
||||
srv.countCSIControllerDetachVolume++
|
||||
return srv.nextCSIControllerDetachVolumeError
|
||||
case "CSIVolume.Claim":
|
||||
reply = srv.nextCSIVolumeClaimResponse
|
||||
srv.countCSIVolumeClaim++
|
||||
return srv.nextCSIVolumeClaimError
|
||||
default:
|
||||
return fmt.Errorf("unexpected method %q passed to mock", method)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) State() *state.StateStore { return srv.state }
|
||||
|
|
|
@ -348,15 +348,31 @@ func (v *CSIVolume) Claim(args *structs.CSIVolumeClaimRequest, reply *structs.CS
|
|||
return structs.ErrPermissionDenied
|
||||
}
|
||||
|
||||
// if this is a new claim, add a Volume and PublishContext from the
|
||||
// controller (if any) to the reply
|
||||
// COMPAT(1.0): the NodeID field was added after 0.11.0 and so we
|
||||
// need to ensure it's been populated during upgrades from 0.11.0
|
||||
// to later patch versions. Remove this block in 1.0
|
||||
if args.Claim != structs.CSIVolumeClaimRelease && args.NodeID == "" {
|
||||
state := v.srv.fsm.State()
|
||||
ws := memdb.NewWatchSet()
|
||||
alloc, err := state.AllocByID(ws, args.AllocationID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if alloc == nil {
|
||||
return fmt.Errorf("%s: %s",
|
||||
structs.ErrUnknownAllocationPrefix, args.AllocationID)
|
||||
}
|
||||
args.NodeID = alloc.NodeID
|
||||
}
|
||||
|
||||
if args.Claim != structs.CSIVolumeClaimRelease {
|
||||
// if this is a new claim, add a Volume and PublishContext from the
|
||||
// controller (if any) to the reply
|
||||
err = v.controllerPublishVolume(args, reply)
|
||||
if err != nil {
|
||||
return fmt.Errorf("controller publish: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
resp, index, err := v.srv.raftApply(structs.CSIVolumeClaimRequestType, args)
|
||||
if err != nil {
|
||||
v.logger.Error("csi raft apply failed", "error", err, "method", "claim")
|
||||
|
@ -400,6 +416,7 @@ func (v *CSIVolume) controllerPublishVolume(req *structs.CSIVolumeClaimRequest,
|
|||
return nil
|
||||
}
|
||||
|
||||
// get Nomad's ID for the client node (not the storage provider's ID)
|
||||
targetNode, err := state.NodeByID(ws, alloc.NodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -407,15 +424,19 @@ func (v *CSIVolume) controllerPublishVolume(req *structs.CSIVolumeClaimRequest,
|
|||
if targetNode == nil {
|
||||
return fmt.Errorf("%s: %s", structs.ErrUnknownNodePrefix, alloc.NodeID)
|
||||
}
|
||||
|
||||
// get the the storage provider's ID for the client node (not
|
||||
// Nomad's ID for the node)
|
||||
targetCSIInfo, ok := targetNode.CSINodePlugins[plug.ID]
|
||||
if !ok {
|
||||
return fmt.Errorf("Failed to find NodeInfo for node: %s", targetNode.ID)
|
||||
}
|
||||
externalNodeID := targetCSIInfo.NodeInfo.ID
|
||||
|
||||
method := "ClientCSI.ControllerAttachVolume"
|
||||
cReq := &cstructs.ClientCSIControllerAttachVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
|
||||
ClientCSINodeID: externalNodeID,
|
||||
AttachmentMode: vol.AttachmentMode,
|
||||
AccessMode: vol.AccessMode,
|
||||
ReadOnly: req.Claim == structs.CSIVolumeClaimRead,
|
||||
|
|
|
@ -201,11 +201,22 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
defer shutdown()
|
||||
testutil.WaitForLeader(t, srv.RPC)
|
||||
|
||||
index := uint64(1000)
|
||||
|
||||
state := srv.fsm.State()
|
||||
codec := rpcClient(t, srv)
|
||||
id0 := uuid.Generate()
|
||||
alloc := mock.BatchAlloc()
|
||||
|
||||
// Create a client node and alloc
|
||||
node := mock.Node()
|
||||
alloc.NodeID = node.ID
|
||||
summary := mock.JobSummary(alloc.JobID)
|
||||
index++
|
||||
require.NoError(t, state.UpsertJobSummary(index, summary))
|
||||
index++
|
||||
require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc}))
|
||||
|
||||
// Create an initial volume claim request; we expect it to fail
|
||||
// because there's no such volume yet.
|
||||
claimReq := &structs.CSIVolumeClaimRequest{
|
||||
|
@ -222,8 +233,8 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
require.EqualError(t, err, fmt.Sprintf("controller publish: volume not found: %s", id0),
|
||||
"expected 'volume not found' error because volume hasn't yet been created")
|
||||
|
||||
// Create a client node, plugin, alloc, and volume
|
||||
node := mock.Node()
|
||||
// Create a plugin and volume
|
||||
|
||||
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
||||
"minnie": {
|
||||
PluginID: "minnie",
|
||||
|
@ -231,7 +242,8 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
NodeInfo: &structs.CSINodeInfo{},
|
||||
},
|
||||
}
|
||||
err = state.UpsertNode(1002, node)
|
||||
index++
|
||||
err = state.UpsertNode(index, node)
|
||||
require.NoError(t, err)
|
||||
|
||||
vols := []*structs.CSIVolume{{
|
||||
|
@ -244,7 +256,8 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
Segments: map[string]string{"foo": "bar"},
|
||||
}},
|
||||
}}
|
||||
err = state.CSIVolumeRegister(1003, vols)
|
||||
index++
|
||||
err = state.CSIVolumeRegister(index, vols)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify that the volume exists, and is healthy
|
||||
|
@ -263,12 +276,6 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
require.Len(t, volGetResp.Volume.ReadAllocs, 0)
|
||||
require.Len(t, volGetResp.Volume.WriteAllocs, 0)
|
||||
|
||||
// Upsert the job and alloc
|
||||
alloc.NodeID = node.ID
|
||||
summary := mock.JobSummary(alloc.JobID)
|
||||
require.NoError(t, state.UpsertJobSummary(1004, summary))
|
||||
require.NoError(t, state.UpsertAllocs(1005, []*structs.Allocation{alloc}))
|
||||
|
||||
// Now our claim should succeed
|
||||
err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Claim", claimReq, claimResp)
|
||||
require.NoError(t, err)
|
||||
|
@ -284,8 +291,10 @@ func TestCSIVolumeEndpoint_Claim(t *testing.T) {
|
|||
alloc2 := mock.Alloc()
|
||||
alloc2.JobID = uuid.Generate()
|
||||
summary = mock.JobSummary(alloc2.JobID)
|
||||
require.NoError(t, state.UpsertJobSummary(1005, summary))
|
||||
require.NoError(t, state.UpsertAllocs(1006, []*structs.Allocation{alloc2}))
|
||||
index++
|
||||
require.NoError(t, state.UpsertJobSummary(index, summary))
|
||||
index++
|
||||
require.NoError(t, state.UpsertAllocs(index, []*structs.Allocation{alloc2}))
|
||||
claimReq.AllocationID = alloc2.ID
|
||||
err = msgpackrpc.CallWithCodec(codec, "CSIVolume.Claim", claimReq, claimResp)
|
||||
require.EqualError(t, err, "volume max claim reached",
|
||||
|
|
38
nomad/fsm.go
38
nomad/fsm.go
|
@ -270,6 +270,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} {
|
|||
return n.applyCSIVolumeDeregister(buf[1:], log.Index)
|
||||
case structs.CSIVolumeClaimRequestType:
|
||||
return n.applyCSIVolumeClaim(buf[1:], log.Index)
|
||||
case structs.CSIVolumeClaimBatchRequestType:
|
||||
return n.applyCSIVolumeBatchClaim(buf[1:], log.Index)
|
||||
case structs.ScalingEventRegisterRequestType:
|
||||
return n.applyUpsertScalingEvent(buf[1:], log.Index)
|
||||
}
|
||||
|
@ -1156,6 +1158,24 @@ func (n *nomadFSM) applyCSIVolumeDeregister(buf []byte, index uint64) interface{
|
|||
return nil
|
||||
}
|
||||
|
||||
func (n *nomadFSM) applyCSIVolumeBatchClaim(buf []byte, index uint64) interface{} {
|
||||
var batch *structs.CSIVolumeClaimBatchRequest
|
||||
if err := structs.Decode(buf, &batch); err != nil {
|
||||
panic(fmt.Errorf("failed to decode request: %v", err))
|
||||
}
|
||||
defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_batch_claim"}, time.Now())
|
||||
|
||||
for _, req := range batch.Claims {
|
||||
err := n.state.CSIVolumeClaim(index, req.RequestNamespace(),
|
||||
req.VolumeID, req.ToClaim())
|
||||
if err != nil {
|
||||
n.logger.Error("CSIVolumeClaim for batch failed", "error", err)
|
||||
return err // note: fails the remaining batch
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *nomadFSM) applyCSIVolumeClaim(buf []byte, index uint64) interface{} {
|
||||
var req structs.CSIVolumeClaimRequest
|
||||
if err := structs.Decode(buf, &req); err != nil {
|
||||
|
@ -1163,26 +1183,10 @@ func (n *nomadFSM) applyCSIVolumeClaim(buf []byte, index uint64) interface{} {
|
|||
}
|
||||
defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_csi_volume_claim"}, time.Now())
|
||||
|
||||
ws := memdb.NewWatchSet()
|
||||
alloc, err := n.state.AllocByID(ws, req.AllocationID)
|
||||
if err != nil {
|
||||
n.logger.Error("AllocByID failed", "error", err)
|
||||
return err
|
||||
}
|
||||
if alloc == nil {
|
||||
n.logger.Error("AllocByID failed to find alloc", "alloc_id", req.AllocationID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return structs.ErrUnknownAllocationPrefix
|
||||
}
|
||||
|
||||
if err := n.state.CSIVolumeClaim(index, req.RequestNamespace(), req.VolumeID, alloc, req.Claim); err != nil {
|
||||
if err := n.state.CSIVolumeClaim(index, req.RequestNamespace(), req.VolumeID, req.ToClaim()); err != nil {
|
||||
n.logger.Error("CSIVolumeClaim failed", "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
package nomad
|
||||
|
||||
import "github.com/hashicorp/nomad/nomad/state"
|
||||
|
||||
// RPCServer is a minimal interface of the Server, intended as
|
||||
// an aid for testing logic surrounding server-to-server or
|
||||
// server-to-client RPC calls
|
||||
type RPCServer interface {
|
||||
RPC(method string, args interface{}, reply interface{}) error
|
||||
State() *state.StateStore
|
||||
}
|
|
@ -737,19 +737,13 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD
|
|||
for _, vol := range volumesToGC {
|
||||
// we have to build this eval by hand rather than calling srv.CoreJob
|
||||
// here because we need to use the volume's namespace
|
||||
|
||||
runningAllocs := ":ok"
|
||||
if args.Purge {
|
||||
runningAllocs = ":purge"
|
||||
}
|
||||
|
||||
eval := &structs.Evaluation{
|
||||
ID: uuid.Generate(),
|
||||
Namespace: job.Namespace,
|
||||
Priority: structs.CoreJobPriority,
|
||||
Type: structs.JobTypeCore,
|
||||
TriggeredBy: structs.EvalTriggerAllocStop,
|
||||
JobID: structs.CoreJobCSIVolumeClaimGC + ":" + vol.Source + runningAllocs,
|
||||
JobID: structs.CoreJobCSIVolumeClaimGC + ":" + vol.Source,
|
||||
LeaderACL: j.srv.getLeaderAcl(),
|
||||
Status: structs.EvalStatusPending,
|
||||
CreateTime: now,
|
||||
|
@ -1806,10 +1800,6 @@ func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
|
|||
reply.JobScaleStatus = nil
|
||||
return nil
|
||||
}
|
||||
deployment, err := state.LatestDeploymentByJobID(ws, args.RequestNamespace(), args.JobID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
events, eventsIndex, err := state.ScalingEventsByJob(ws, args.RequestNamespace(), args.JobID)
|
||||
if err != nil {
|
||||
|
@ -1819,6 +1809,13 @@ func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
|
|||
events = make(map[string][]*structs.ScalingEvent)
|
||||
}
|
||||
|
||||
var allocs []*structs.Allocation
|
||||
var allocsIndex uint64
|
||||
allocs, err = state.AllocsByJob(ws, job.Namespace, job.ID, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Setup the output
|
||||
reply.JobScaleStatus = &structs.JobScaleStatus{
|
||||
JobID: job.ID,
|
||||
|
@ -1832,24 +1829,45 @@ func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
|
|||
tgScale := &structs.TaskGroupScaleStatus{
|
||||
Desired: tg.Count,
|
||||
}
|
||||
if deployment != nil {
|
||||
if ds, ok := deployment.TaskGroups[tg.Name]; ok {
|
||||
tgScale.Placed = ds.PlacedAllocs
|
||||
tgScale.Healthy = ds.HealthyAllocs
|
||||
tgScale.Unhealthy = ds.UnhealthyAllocs
|
||||
}
|
||||
}
|
||||
tgScale.Events = events[tg.Name]
|
||||
reply.JobScaleStatus.TaskGroups[tg.Name] = tgScale
|
||||
}
|
||||
|
||||
maxIndex := job.ModifyIndex
|
||||
if deployment != nil && deployment.ModifyIndex > maxIndex {
|
||||
maxIndex = deployment.ModifyIndex
|
||||
for _, alloc := range allocs {
|
||||
// TODO: ignore canaries until we figure out what we should do with canaries
|
||||
if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary {
|
||||
continue
|
||||
}
|
||||
if alloc.TerminalStatus() {
|
||||
continue
|
||||
}
|
||||
tgScale, ok := reply.JobScaleStatus.TaskGroups[alloc.TaskGroup]
|
||||
if !ok || tgScale == nil {
|
||||
continue
|
||||
}
|
||||
tgScale.Placed++
|
||||
if alloc.ClientStatus == structs.AllocClientStatusRunning {
|
||||
tgScale.Running++
|
||||
}
|
||||
if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.HasHealth() {
|
||||
if alloc.DeploymentStatus.IsHealthy() {
|
||||
tgScale.Healthy++
|
||||
} else if alloc.DeploymentStatus.IsUnhealthy() {
|
||||
tgScale.Unhealthy++
|
||||
}
|
||||
}
|
||||
if alloc.ModifyIndex > allocsIndex {
|
||||
allocsIndex = alloc.ModifyIndex
|
||||
}
|
||||
}
|
||||
|
||||
maxIndex := job.ModifyIndex
|
||||
if eventsIndex > maxIndex {
|
||||
maxIndex = eventsIndex
|
||||
}
|
||||
if allocsIndex > maxIndex {
|
||||
maxIndex = allocsIndex
|
||||
}
|
||||
reply.Index = maxIndex
|
||||
|
||||
// Set the query response
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
package nomad
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
@ -197,6 +199,21 @@ func exposePathForCheck(tg *structs.TaskGroup, s *structs.Service, check *struct
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// If the check is exposable but doesn't have a port label set build
|
||||
// a port with a generated label, add it to the group's Dynamic ports
|
||||
// and set the check port label to the generated label.
|
||||
//
|
||||
// This lets PortLabel be optional for any exposed check.
|
||||
if check.PortLabel == "" {
|
||||
port := structs.Port{
|
||||
Label: fmt.Sprintf("svc_%s_ck_%s", s.Name, uuid.Generate()[:6]),
|
||||
To: -1,
|
||||
}
|
||||
|
||||
tg.Networks[0].DynamicPorts = append(tg.Networks[0].DynamicPorts, port)
|
||||
check.PortLabel = port.Label
|
||||
}
|
||||
|
||||
// Determine the local service port (i.e. what port the service is actually
|
||||
// listening to inside the network namespace).
|
||||
//
|
||||
|
@ -216,9 +233,7 @@ func exposePathForCheck(tg *structs.TaskGroup, s *structs.Service, check *struct
|
|||
}
|
||||
|
||||
// The Path, Protocol, and PortLabel are just copied over from the service
|
||||
// check definition. It is required that the user configure their own port
|
||||
// mapping for each check, including setting the 'to = -1' sentinel value
|
||||
// enabling the network namespace pass-through.
|
||||
// check definition.
|
||||
return &structs.ConsulExposePath{
|
||||
Path: check.Path,
|
||||
Protocol: check.Protocol,
|
||||
|
|
|
@ -346,6 +346,36 @@ func TestJobExposeCheckHook_exposePathForCheck(t *testing.T) {
|
|||
}, s, c)
|
||||
require.EqualError(t, err, `unable to determine local service port for service check group1->service1->check1`)
|
||||
})
|
||||
|
||||
t.Run("empty check port", func(t *testing.T) {
|
||||
c := &structs.ServiceCheck{
|
||||
Name: "check1",
|
||||
Type: "http",
|
||||
Path: "/health",
|
||||
}
|
||||
s := &structs.Service{
|
||||
Name: "service1",
|
||||
PortLabel: "9999",
|
||||
Checks: []*structs.ServiceCheck{c},
|
||||
}
|
||||
tg := &structs.TaskGroup{
|
||||
Name: "group1",
|
||||
Services: []*structs.Service{s},
|
||||
Networks: structs.Networks{{
|
||||
Mode: "bridge",
|
||||
DynamicPorts: []structs.Port{},
|
||||
}},
|
||||
}
|
||||
ePath, err := exposePathForCheck(tg, s, c)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, tg.Networks[0].DynamicPorts, 1)
|
||||
require.Equal(t, &structs.ConsulExposePath{
|
||||
Path: "/health",
|
||||
Protocol: "",
|
||||
LocalPathPort: 9999,
|
||||
ListenerPort: tg.Networks[0].DynamicPorts[0].Label,
|
||||
}, ePath)
|
||||
})
|
||||
}
|
||||
|
||||
func TestJobExposeCheckHook_containsExposePath(t *testing.T) {
|
||||
|
|
|
@ -5627,42 +5627,104 @@ func TestJobEndpoint_GetScaleStatus(t *testing.T) {
|
|||
testutil.WaitForLeader(t, s1.RPC)
|
||||
state := s1.fsm.State()
|
||||
|
||||
job := mock.Job()
|
||||
jobV1 := mock.Job()
|
||||
|
||||
// check before job registration
|
||||
// check before registration
|
||||
// Fetch the scaling status
|
||||
get := &structs.JobScaleStatusRequest{
|
||||
JobID: job.ID,
|
||||
JobID: jobV1.ID,
|
||||
QueryOptions: structs.QueryOptions{
|
||||
Region: "global",
|
||||
Namespace: job.Namespace,
|
||||
Namespace: jobV1.Namespace,
|
||||
},
|
||||
}
|
||||
var resp2 structs.JobScaleStatusResponse
|
||||
require.NoError(msgpackrpc.CallWithCodec(codec, "Job.ScaleStatus", get, &resp2))
|
||||
require.Nil(resp2.JobScaleStatus)
|
||||
|
||||
// Create the register request
|
||||
err := state.UpsertJob(1000, job)
|
||||
require.Nil(err)
|
||||
// stopped (previous version)
|
||||
require.NoError(state.UpsertJob(1000, jobV1), "UpsertJob")
|
||||
a0 := mock.Alloc()
|
||||
a0.Job = jobV1
|
||||
a0.Namespace = jobV1.Namespace
|
||||
a0.JobID = jobV1.ID
|
||||
a0.ClientStatus = structs.AllocClientStatusComplete
|
||||
require.NoError(state.UpsertAllocs(1010, []*structs.Allocation{a0}), "UpsertAllocs")
|
||||
|
||||
jobV2 := jobV1.Copy()
|
||||
require.NoError(state.UpsertJob(1100, jobV2), "UpsertJob")
|
||||
a1 := mock.Alloc()
|
||||
a1.Job = jobV2
|
||||
a1.Namespace = jobV2.Namespace
|
||||
a1.JobID = jobV2.ID
|
||||
a1.ClientStatus = structs.AllocClientStatusRunning
|
||||
// healthy
|
||||
a1.DeploymentStatus = &structs.AllocDeploymentStatus{
|
||||
Healthy: helper.BoolToPtr(true),
|
||||
}
|
||||
a2 := mock.Alloc()
|
||||
a2.Job = jobV2
|
||||
a2.Namespace = jobV2.Namespace
|
||||
a2.JobID = jobV2.ID
|
||||
a2.ClientStatus = structs.AllocClientStatusPending
|
||||
// unhealthy
|
||||
a2.DeploymentStatus = &structs.AllocDeploymentStatus{
|
||||
Healthy: helper.BoolToPtr(false),
|
||||
}
|
||||
a3 := mock.Alloc()
|
||||
a3.Job = jobV2
|
||||
a3.Namespace = jobV2.Namespace
|
||||
a3.JobID = jobV2.ID
|
||||
a3.ClientStatus = structs.AllocClientStatusRunning
|
||||
// canary
|
||||
a3.DeploymentStatus = &structs.AllocDeploymentStatus{
|
||||
Healthy: helper.BoolToPtr(true),
|
||||
Canary: true,
|
||||
}
|
||||
// no health
|
||||
a4 := mock.Alloc()
|
||||
a4.Job = jobV2
|
||||
a4.Namespace = jobV2.Namespace
|
||||
a4.JobID = jobV2.ID
|
||||
a4.ClientStatus = structs.AllocClientStatusRunning
|
||||
// upsert allocations
|
||||
require.NoError(state.UpsertAllocs(1110, []*structs.Allocation{a1, a2, a3, a4}), "UpsertAllocs")
|
||||
|
||||
event := &structs.ScalingEvent{
|
||||
Time: time.Now().Unix(),
|
||||
Count: helper.Int64ToPtr(5),
|
||||
Message: "message",
|
||||
Error: false,
|
||||
Meta: map[string]interface{}{
|
||||
"a": "b",
|
||||
},
|
||||
EvalID: nil,
|
||||
}
|
||||
|
||||
require.NoError(state.UpsertScalingEvent(1003, &structs.ScalingEventRequest{
|
||||
Namespace: jobV2.Namespace,
|
||||
JobID: jobV2.ID,
|
||||
TaskGroup: jobV2.TaskGroups[0].Name,
|
||||
ScalingEvent: event,
|
||||
}), "UpsertScalingEvent")
|
||||
|
||||
// check after job registration
|
||||
require.NoError(msgpackrpc.CallWithCodec(codec, "Job.ScaleStatus", get, &resp2))
|
||||
require.NotNil(resp2.JobScaleStatus)
|
||||
|
||||
expectedStatus := structs.JobScaleStatus{
|
||||
JobID: job.ID,
|
||||
JobCreateIndex: job.CreateIndex,
|
||||
JobModifyIndex: job.ModifyIndex,
|
||||
JobStopped: job.Stop,
|
||||
JobID: jobV2.ID,
|
||||
JobCreateIndex: jobV2.CreateIndex,
|
||||
JobModifyIndex: a1.CreateIndex,
|
||||
JobStopped: jobV2.Stop,
|
||||
TaskGroups: map[string]*structs.TaskGroupScaleStatus{
|
||||
job.TaskGroups[0].Name: {
|
||||
Desired: job.TaskGroups[0].Count,
|
||||
Placed: 0,
|
||||
Running: 0,
|
||||
Healthy: 0,
|
||||
Unhealthy: 0,
|
||||
Events: nil,
|
||||
jobV2.TaskGroups[0].Name: {
|
||||
Desired: jobV2.TaskGroups[0].Count,
|
||||
Placed: 3,
|
||||
Running: 2,
|
||||
Healthy: 1,
|
||||
Unhealthy: 1,
|
||||
Events: []*structs.ScalingEvent{event},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -241,6 +241,9 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error {
|
|||
// Enable the NodeDrainer
|
||||
s.nodeDrainer.SetEnabled(true, s.State())
|
||||
|
||||
// Enable the volume watcher, since we are now the leader
|
||||
s.volumeWatcher.SetEnabled(true, s.State())
|
||||
|
||||
// Restore the eval broker state
|
||||
if err := s.restoreEvals(); err != nil {
|
||||
return err
|
||||
|
@ -870,6 +873,9 @@ func (s *Server) revokeLeadership() error {
|
|||
// Disable the node drainer
|
||||
s.nodeDrainer.SetEnabled(false, nil)
|
||||
|
||||
// Disable the volume watcher
|
||||
s.volumeWatcher.SetEnabled(false, nil)
|
||||
|
||||
// Disable any enterprise systems required.
|
||||
if err := s.revokeEnterpriseLeadership(); err != nil {
|
||||
return err
|
||||
|
|
|
@ -1313,6 +1313,9 @@ func CSIVolume(plugin *structs.CSIPlugin) *structs.CSIVolume {
|
|||
MountOptions: &structs.CSIMountOptions{},
|
||||
ReadAllocs: map[string]*structs.Allocation{},
|
||||
WriteAllocs: map[string]*structs.Allocation{},
|
||||
ReadClaims: map[string]*structs.CSIVolumeClaim{},
|
||||
WriteClaims: map[string]*structs.CSIVolumeClaim{},
|
||||
PastClaims: map[string]*structs.CSIVolumeClaim{},
|
||||
PluginID: plugin.ID,
|
||||
Provider: plugin.Provider,
|
||||
ProviderVersion: plugin.Version,
|
||||
|
|
|
@ -1149,7 +1149,7 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
|
|||
Priority: structs.CoreJobPriority,
|
||||
Type: structs.JobTypeCore,
|
||||
TriggeredBy: structs.EvalTriggerAllocStop,
|
||||
JobID: structs.CoreJobCSIVolumeClaimGC + ":" + volAndNamespace[0] + ":no",
|
||||
JobID: structs.CoreJobCSIVolumeClaimGC + ":" + volAndNamespace[0],
|
||||
LeaderACL: n.srv.getLeaderAcl(),
|
||||
Status: structs.EvalStatusPending,
|
||||
CreateTime: now.UTC().UnixNano(),
|
||||
|
|
|
@ -2381,9 +2381,17 @@ func TestClientEndpoint_UpdateAlloc_UnclaimVolumes(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
|
||||
// Claim the volumes and verify the claims were set
|
||||
err = state.CSIVolumeClaim(105, ns, volId0, alloc1, structs.CSIVolumeClaimWrite)
|
||||
err = state.CSIVolumeClaim(105, ns, volId0, &structs.CSIVolumeClaim{
|
||||
AllocationID: alloc1.ID,
|
||||
NodeID: alloc1.NodeID,
|
||||
Mode: structs.CSIVolumeClaimWrite,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
err = state.CSIVolumeClaim(106, ns, volId0, alloc2, structs.CSIVolumeClaimRead)
|
||||
err = state.CSIVolumeClaim(106, ns, volId0, &structs.CSIVolumeClaim{
|
||||
AllocationID: alloc2.ID,
|
||||
NodeID: alloc2.NodeID,
|
||||
Mode: structs.CSIVolumeClaimRead,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
vol, err = state.CSIVolumeByID(ws, ns, volId0)
|
||||
require.NoError(t, err)
|
||||
|
@ -2406,7 +2414,7 @@ func TestClientEndpoint_UpdateAlloc_UnclaimVolumes(t *testing.T) {
|
|||
|
||||
// Verify the eval for the claim GC was emitted
|
||||
// Lookup the evaluations
|
||||
eval, err := state.EvalsByJob(ws, job.Namespace, structs.CoreJobCSIVolumeClaimGC+":"+volId0+":no")
|
||||
eval, err := state.EvalsByJob(ws, job.Namespace, structs.CoreJobCSIVolumeClaimGC+":"+volId0)
|
||||
require.NotNil(t, eval)
|
||||
require.Nil(t, err)
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ import (
|
|||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs/config"
|
||||
"github.com/hashicorp/nomad/nomad/volumewatcher"
|
||||
"github.com/hashicorp/nomad/scheduler"
|
||||
"github.com/hashicorp/raft"
|
||||
raftboltdb "github.com/hashicorp/raft-boltdb"
|
||||
|
@ -186,6 +187,9 @@ type Server struct {
|
|||
// nodeDrainer is used to drain allocations from nodes.
|
||||
nodeDrainer *drainer.NodeDrainer
|
||||
|
||||
// volumeWatcher is used to release volume claims
|
||||
volumeWatcher *volumewatcher.Watcher
|
||||
|
||||
// evalBroker is used to manage the in-progress evaluations
|
||||
// that are waiting to be brokered to a sub-scheduler
|
||||
evalBroker *EvalBroker
|
||||
|
@ -399,6 +403,12 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulACLs consu
|
|||
return nil, fmt.Errorf("failed to create deployment watcher: %v", err)
|
||||
}
|
||||
|
||||
// Setup the volume watcher
|
||||
if err := s.setupVolumeWatcher(); err != nil {
|
||||
s.logger.Error("failed to create volume watcher", "error", err)
|
||||
return nil, fmt.Errorf("failed to create volume watcher: %v", err)
|
||||
}
|
||||
|
||||
// Setup the node drainer.
|
||||
s.setupNodeDrainer()
|
||||
|
||||
|
@ -993,6 +1003,27 @@ func (s *Server) setupDeploymentWatcher() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// setupVolumeWatcher creates a volume watcher that consumes the RPC
|
||||
// endpoints for state information and makes transitions via Raft through a
|
||||
// shim that provides the appropriate methods.
|
||||
func (s *Server) setupVolumeWatcher() error {
|
||||
|
||||
// Create the raft shim type to restrict the set of raft methods that can be
|
||||
// made
|
||||
raftShim := &volumeWatcherRaftShim{
|
||||
apply: s.raftApply,
|
||||
}
|
||||
|
||||
// Create the volume watcher
|
||||
s.volumeWatcher = volumewatcher.NewVolumesWatcher(
|
||||
s.logger, raftShim,
|
||||
s.staticEndpoints.ClientCSI,
|
||||
volumewatcher.LimitStateQueriesPerSecond,
|
||||
volumewatcher.CrossVolumeUpdateBatchDuration)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupNodeDrainer creates a node drainer which will be enabled when a server
|
||||
// becomes a leader.
|
||||
func (s *Server) setupNodeDrainer() {
|
||||
|
|
|
@ -1187,15 +1187,14 @@ func (s *StateStore) deleteJobFromPlugin(index uint64, txn *memdb.Txn, job *stru
|
|||
plugins := map[string]*structs.CSIPlugin{}
|
||||
|
||||
for _, a := range allocs {
|
||||
tg := job.LookupTaskGroup(a.TaskGroup)
|
||||
// if its nil, we can just panic
|
||||
tg := a.Job.LookupTaskGroup(a.TaskGroup)
|
||||
for _, t := range tg.Tasks {
|
||||
if t.CSIPluginConfig != nil {
|
||||
plugAllocs = append(plugAllocs, &pair{
|
||||
pluginID: t.CSIPluginConfig.ID,
|
||||
alloc: a,
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1479,16 +1478,10 @@ func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn
|
|||
return fmt.Errorf("index update failed: %v", err)
|
||||
}
|
||||
|
||||
// Delete any job scaling policies
|
||||
numDeletedScalingPolicies, err := txn.DeleteAll("scaling_policy", "target_prefix", namespace, jobID)
|
||||
if err != nil {
|
||||
// Delete any remaining job scaling policies
|
||||
if err := s.deleteJobScalingPolicies(index, job, txn); err != nil {
|
||||
return fmt.Errorf("deleting job scaling policies failed: %v", err)
|
||||
}
|
||||
if numDeletedScalingPolicies > 0 {
|
||||
if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
|
||||
return fmt.Errorf("index update failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the scaling events
|
||||
if _, err = txn.DeleteAll("scaling_event", "id", namespace, jobID); err != nil {
|
||||
|
@ -1507,6 +1500,20 @@ func (s *StateStore) DeleteJobTxn(index uint64, namespace, jobID string, txn Txn
|
|||
return nil
|
||||
}
|
||||
|
||||
// deleteJobScalingPolicies deletes any scaling policies associated with the job
|
||||
func (s *StateStore) deleteJobScalingPolicies(index uint64, job *structs.Job, txn *memdb.Txn) error {
|
||||
numDeletedScalingPolicies, err := txn.DeleteAll("scaling_policy", "target_prefix", job.Namespace, job.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting job scaling policies failed: %v", err)
|
||||
}
|
||||
if numDeletedScalingPolicies > 0 {
|
||||
if err := txn.Insert("index", &IndexEntry{"scaling_policy", index}); err != nil {
|
||||
return fmt.Errorf("index update failed: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteJobVersions deletes all versions of the given job.
|
||||
func (s *StateStore) deleteJobVersions(index uint64, job *structs.Job, txn *memdb.Txn) error {
|
||||
iter, err := txn.Get("job_version", "id_prefix", job.Namespace, job.ID)
|
||||
|
@ -2018,9 +2025,10 @@ func (s *StateStore) CSIVolumesByNamespace(ws memdb.WatchSet, namespace string)
|
|||
}
|
||||
|
||||
// CSIVolumeClaim updates the volume's claim count and allocation list
|
||||
func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *structs.Allocation, claim structs.CSIVolumeClaimMode) error {
|
||||
func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *structs.CSIVolumeClaim) error {
|
||||
txn := s.db.Txn(true)
|
||||
defer txn.Abort()
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
row, err := txn.First("csi_volumes", "id", namespace, id)
|
||||
if err != nil {
|
||||
|
@ -2035,7 +2043,21 @@ func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *s
|
|||
return fmt.Errorf("volume row conversion error")
|
||||
}
|
||||
|
||||
ws := memdb.NewWatchSet()
|
||||
var alloc *structs.Allocation
|
||||
if claim.Mode != structs.CSIVolumeClaimRelease {
|
||||
alloc, err = s.AllocByID(ws, claim.AllocationID)
|
||||
if err != nil {
|
||||
s.logger.Error("AllocByID failed", "error", err)
|
||||
return fmt.Errorf(structs.ErrUnknownAllocationPrefix)
|
||||
}
|
||||
if alloc == nil {
|
||||
s.logger.Error("AllocByID failed to find alloc", "alloc_id", claim.AllocationID)
|
||||
if err != nil {
|
||||
return fmt.Errorf(structs.ErrUnknownAllocationPrefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
volume, err := s.CSIVolumeDenormalizePlugins(ws, orig.Copy())
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -2046,9 +2068,14 @@ func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, alloc *s
|
|||
return err
|
||||
}
|
||||
|
||||
err = volume.Claim(claim, alloc)
|
||||
if err != nil {
|
||||
return err
|
||||
// in the case of a job deregistration, there will be no allocation ID
|
||||
// for the claim but we still want to write an updated index to the volume
|
||||
// so that volume reaping is triggered
|
||||
if claim.AllocationID != "" {
|
||||
err = volume.Claim(claim, alloc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
volume.ModifyIndex = index
|
||||
|
@ -2144,14 +2171,27 @@ func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs
|
|||
return vol, nil
|
||||
}
|
||||
|
||||
// csiVolumeDenormalizeAllocs returns a CSIVolume with allocations
|
||||
// CSIVolumeDenormalize returns a CSIVolume with allocations
|
||||
func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) {
|
||||
for id := range vol.ReadAllocs {
|
||||
a, err := s.AllocByID(ws, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vol.ReadAllocs[id] = a
|
||||
if a != nil {
|
||||
vol.ReadAllocs[id] = a
|
||||
// COMPAT(1.0): the CSIVolumeClaim fields were added
|
||||
// after 0.11.1, so claims made before that may be
|
||||
// missing this value. (same for WriteAlloc below)
|
||||
if _, ok := vol.ReadClaims[id]; !ok {
|
||||
vol.ReadClaims[id] = &structs.CSIVolumeClaim{
|
||||
AllocationID: a.ID,
|
||||
NodeID: a.NodeID,
|
||||
Mode: structs.CSIVolumeClaimRead,
|
||||
State: structs.CSIVolumeClaimStateTaken,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for id := range vol.WriteAllocs {
|
||||
|
@ -2159,7 +2199,17 @@ func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVol
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vol.WriteAllocs[id] = a
|
||||
if a != nil {
|
||||
vol.WriteAllocs[id] = a
|
||||
if _, ok := vol.WriteClaims[id]; !ok {
|
||||
vol.WriteClaims[id] = &structs.CSIVolumeClaim{
|
||||
AllocationID: a.ID,
|
||||
NodeID: a.NodeID,
|
||||
Mode: structs.CSIVolumeClaimWrite,
|
||||
State: structs.CSIVolumeClaimStateTaken,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vol, nil
|
||||
|
@ -4244,6 +4294,13 @@ func (s *StateStore) updateJobScalingPolicies(index uint64, job *structs.Job, tx
|
|||
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
if job.Stop {
|
||||
if err := s.deleteJobScalingPolicies(index, job, txn); err != nil {
|
||||
return fmt.Errorf("deleting job scaling policies failed: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
scalingPolicies := job.GetScalingPolicies()
|
||||
newTargets := map[string]struct{}{}
|
||||
for _, p := range scalingPolicies {
|
||||
|
|
|
@ -2941,18 +2941,33 @@ func TestStateStore_CSIVolume(t *testing.T) {
|
|||
vs = slurp(iter)
|
||||
require.Equal(t, 1, len(vs))
|
||||
|
||||
// Allocs
|
||||
a0 := mock.Alloc()
|
||||
a1 := mock.Alloc()
|
||||
index++
|
||||
err = state.UpsertAllocs(index, []*structs.Allocation{a0, a1})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Claims
|
||||
a0 := &structs.Allocation{ID: uuid.Generate()}
|
||||
a1 := &structs.Allocation{ID: uuid.Generate()}
|
||||
r := structs.CSIVolumeClaimRead
|
||||
w := structs.CSIVolumeClaimWrite
|
||||
u := structs.CSIVolumeClaimRelease
|
||||
claim0 := &structs.CSIVolumeClaim{
|
||||
AllocationID: a0.ID,
|
||||
NodeID: node.ID,
|
||||
Mode: r,
|
||||
}
|
||||
claim1 := &structs.CSIVolumeClaim{
|
||||
AllocationID: a1.ID,
|
||||
NodeID: node.ID,
|
||||
Mode: w,
|
||||
}
|
||||
|
||||
index++
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, a0, r)
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, claim0)
|
||||
require.NoError(t, err)
|
||||
index++
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, a1, w)
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, claim1)
|
||||
require.NoError(t, err)
|
||||
|
||||
ws = memdb.NewWatchSet()
|
||||
|
@ -2961,7 +2976,8 @@ func TestStateStore_CSIVolume(t *testing.T) {
|
|||
vs = slurp(iter)
|
||||
require.False(t, vs[0].WriteFreeClaims())
|
||||
|
||||
err = state.CSIVolumeClaim(2, ns, vol0, a0, u)
|
||||
claim0.Mode = u
|
||||
err = state.CSIVolumeClaim(2, ns, vol0, claim0)
|
||||
require.NoError(t, err)
|
||||
ws = memdb.NewWatchSet()
|
||||
iter, err = state.CSIVolumesByPluginID(ws, ns, "minnie")
|
||||
|
@ -2980,10 +2996,13 @@ func TestStateStore_CSIVolume(t *testing.T) {
|
|||
|
||||
// release claims to unblock deregister
|
||||
index++
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, a0, u)
|
||||
claim0.State = structs.CSIVolumeClaimStateReadyToFree
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, claim0)
|
||||
require.NoError(t, err)
|
||||
index++
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, a1, u)
|
||||
claim1.Mode = u
|
||||
claim1.State = structs.CSIVolumeClaimStateReadyToFree
|
||||
err = state.CSIVolumeClaim(index, ns, vol0, claim1)
|
||||
require.NoError(t, err)
|
||||
|
||||
index++
|
||||
|
@ -8427,7 +8446,96 @@ func TestStateStore_DeleteScalingPolicies(t *testing.T) {
|
|||
require.False(watchFired(ws))
|
||||
}
|
||||
|
||||
func TestStateStore_DeleteJob_ChildScalingPolicies(t *testing.T) {
|
||||
func TestStateStore_StopJob_DeleteScalingPolicies(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require := require.New(t)
|
||||
|
||||
state := testStateStore(t)
|
||||
|
||||
job := mock.Job()
|
||||
|
||||
err := state.UpsertJob(1000, job)
|
||||
require.NoError(err)
|
||||
|
||||
policy := mock.ScalingPolicy()
|
||||
policy.Target[structs.ScalingTargetJob] = job.ID
|
||||
err = state.UpsertScalingPolicies(1100, []*structs.ScalingPolicy{policy})
|
||||
require.NoError(err)
|
||||
|
||||
// Ensure the scaling policy is present and start some watches
|
||||
wsGet := memdb.NewWatchSet()
|
||||
out, err := state.ScalingPolicyByTarget(wsGet, policy.Target)
|
||||
require.NoError(err)
|
||||
require.NotNil(out)
|
||||
wsList := memdb.NewWatchSet()
|
||||
_, err = state.ScalingPolicies(wsList)
|
||||
require.NoError(err)
|
||||
|
||||
// Stop the job
|
||||
job, err = state.JobByID(nil, job.Namespace, job.ID)
|
||||
require.NoError(err)
|
||||
job.Stop = true
|
||||
err = state.UpsertJob(1200, job)
|
||||
require.NoError(err)
|
||||
|
||||
// Ensure:
|
||||
// * the scaling policy was deleted
|
||||
// * the watches were fired
|
||||
// * the table index was advanced
|
||||
require.True(watchFired(wsGet))
|
||||
require.True(watchFired(wsList))
|
||||
out, err = state.ScalingPolicyByTarget(nil, policy.Target)
|
||||
require.NoError(err)
|
||||
require.Nil(out)
|
||||
index, err := state.Index("scaling_policy")
|
||||
require.GreaterOrEqual(index, uint64(1200))
|
||||
}
|
||||
|
||||
func TestStateStore_UnstopJob_UpsertScalingPolicies(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require := require.New(t)
|
||||
|
||||
state := testStateStore(t)
|
||||
|
||||
job, policy := mock.JobWithScalingPolicy()
|
||||
job.Stop = true
|
||||
|
||||
// establish watcher, verify there are no scaling policies yet
|
||||
ws := memdb.NewWatchSet()
|
||||
list, err := state.ScalingPolicies(ws)
|
||||
require.NoError(err)
|
||||
require.Nil(list.Next())
|
||||
|
||||
// upsert a stopped job, verify that we don't fire the watcher or add any scaling policies
|
||||
err = state.UpsertJob(1000, job)
|
||||
require.NoError(err)
|
||||
require.False(watchFired(ws))
|
||||
// stopped job should have no scaling policies, watcher doesn't fire
|
||||
list, err = state.ScalingPolicies(ws)
|
||||
require.NoError(err)
|
||||
require.Nil(list.Next())
|
||||
|
||||
// Establish a new watcher
|
||||
ws = memdb.NewWatchSet()
|
||||
_, err = state.ScalingPolicies(ws)
|
||||
require.NoError(err)
|
||||
// Unstop this job, say you'll run it again...
|
||||
job.Stop = false
|
||||
err = state.UpsertJob(1100, job)
|
||||
require.NoError(err)
|
||||
|
||||
// Ensure the scaling policy was added, watch was fired, index was advanced
|
||||
require.True(watchFired(ws))
|
||||
out, err := state.ScalingPolicyByTarget(nil, policy.Target)
|
||||
require.NoError(err)
|
||||
require.NotNil(out)
|
||||
index, err := state.Index("scaling_policy")
|
||||
require.GreaterOrEqual(index, uint64(1100))
|
||||
}
|
||||
|
||||
func TestStateStore_DeleteJob_DeleteScalingPolicies(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
require := require.New(t)
|
||||
|
|
|
@ -185,6 +185,22 @@ func (v *CSIMountOptions) GoString() string {
|
|||
return v.String()
|
||||
}
|
||||
|
||||
type CSIVolumeClaim struct {
|
||||
AllocationID string
|
||||
NodeID string
|
||||
Mode CSIVolumeClaimMode
|
||||
State CSIVolumeClaimState
|
||||
}
|
||||
|
||||
type CSIVolumeClaimState int
|
||||
|
||||
const (
|
||||
CSIVolumeClaimStateTaken CSIVolumeClaimState = iota
|
||||
CSIVolumeClaimStateNodeDetached
|
||||
CSIVolumeClaimStateControllerDetached
|
||||
CSIVolumeClaimStateReadyToFree
|
||||
)
|
||||
|
||||
// CSIVolume is the full representation of a CSI Volume
|
||||
type CSIVolume struct {
|
||||
// ID is a namespace unique URL safe identifier for the volume
|
||||
|
@ -200,8 +216,12 @@ type CSIVolume struct {
|
|||
MountOptions *CSIMountOptions
|
||||
|
||||
// Allocations, tracking claim status
|
||||
ReadAllocs map[string]*Allocation
|
||||
WriteAllocs map[string]*Allocation
|
||||
ReadAllocs map[string]*Allocation // AllocID -> Allocation
|
||||
WriteAllocs map[string]*Allocation // AllocID -> Allocation
|
||||
|
||||
ReadClaims map[string]*CSIVolumeClaim // AllocID -> claim
|
||||
WriteClaims map[string]*CSIVolumeClaim // AllocID -> claim
|
||||
PastClaims map[string]*CSIVolumeClaim // AllocID -> claim
|
||||
|
||||
// Schedulable is true if all the denormalized plugin health fields are true, and the
|
||||
// volume has not been marked for garbage collection
|
||||
|
@ -262,6 +282,10 @@ func (v *CSIVolume) newStructs() {
|
|||
|
||||
v.ReadAllocs = map[string]*Allocation{}
|
||||
v.WriteAllocs = map[string]*Allocation{}
|
||||
|
||||
v.ReadClaims = map[string]*CSIVolumeClaim{}
|
||||
v.WriteClaims = map[string]*CSIVolumeClaim{}
|
||||
v.PastClaims = map[string]*CSIVolumeClaim{}
|
||||
}
|
||||
|
||||
func (v *CSIVolume) RemoteID() string {
|
||||
|
@ -350,27 +374,43 @@ func (v *CSIVolume) Copy() *CSIVolume {
|
|||
out.WriteAllocs[k] = v
|
||||
}
|
||||
|
||||
for k, v := range v.ReadClaims {
|
||||
claim := *v
|
||||
out.ReadClaims[k] = &claim
|
||||
}
|
||||
for k, v := range v.WriteClaims {
|
||||
claim := *v
|
||||
out.WriteClaims[k] = &claim
|
||||
}
|
||||
for k, v := range v.PastClaims {
|
||||
claim := *v
|
||||
out.PastClaims[k] = &claim
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// Claim updates the allocations and changes the volume state
|
||||
func (v *CSIVolume) Claim(claim CSIVolumeClaimMode, alloc *Allocation) error {
|
||||
switch claim {
|
||||
func (v *CSIVolume) Claim(claim *CSIVolumeClaim, alloc *Allocation) error {
|
||||
switch claim.Mode {
|
||||
case CSIVolumeClaimRead:
|
||||
return v.ClaimRead(alloc)
|
||||
return v.ClaimRead(claim, alloc)
|
||||
case CSIVolumeClaimWrite:
|
||||
return v.ClaimWrite(alloc)
|
||||
return v.ClaimWrite(claim, alloc)
|
||||
case CSIVolumeClaimRelease:
|
||||
return v.ClaimRelease(alloc)
|
||||
return v.ClaimRelease(claim)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClaimRead marks an allocation as using a volume read-only
|
||||
func (v *CSIVolume) ClaimRead(alloc *Allocation) error {
|
||||
if _, ok := v.ReadAllocs[alloc.ID]; ok {
|
||||
func (v *CSIVolume) ClaimRead(claim *CSIVolumeClaim, alloc *Allocation) error {
|
||||
if _, ok := v.ReadAllocs[claim.AllocationID]; ok {
|
||||
return nil
|
||||
}
|
||||
if alloc == nil {
|
||||
return fmt.Errorf("allocation missing: %s", claim.AllocationID)
|
||||
}
|
||||
|
||||
if !v.ReadSchedulable() {
|
||||
return fmt.Errorf("unschedulable")
|
||||
|
@ -378,16 +418,24 @@ func (v *CSIVolume) ClaimRead(alloc *Allocation) error {
|
|||
|
||||
// Allocations are copy on write, so we want to keep the id but don't need the
|
||||
// pointer. We'll get it from the db in denormalize.
|
||||
v.ReadAllocs[alloc.ID] = nil
|
||||
delete(v.WriteAllocs, alloc.ID)
|
||||
v.ReadAllocs[claim.AllocationID] = nil
|
||||
delete(v.WriteAllocs, claim.AllocationID)
|
||||
|
||||
v.ReadClaims[claim.AllocationID] = claim
|
||||
delete(v.WriteClaims, claim.AllocationID)
|
||||
delete(v.PastClaims, claim.AllocationID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClaimWrite marks an allocation as using a volume as a writer
|
||||
func (v *CSIVolume) ClaimWrite(alloc *Allocation) error {
|
||||
if _, ok := v.WriteAllocs[alloc.ID]; ok {
|
||||
func (v *CSIVolume) ClaimWrite(claim *CSIVolumeClaim, alloc *Allocation) error {
|
||||
if _, ok := v.WriteAllocs[claim.AllocationID]; ok {
|
||||
return nil
|
||||
}
|
||||
if alloc == nil {
|
||||
return fmt.Errorf("allocation missing: %s", claim.AllocationID)
|
||||
}
|
||||
|
||||
if !v.WriteSchedulable() {
|
||||
return fmt.Errorf("unschedulable")
|
||||
|
@ -406,13 +454,26 @@ func (v *CSIVolume) ClaimWrite(alloc *Allocation) error {
|
|||
// pointer. We'll get it from the db in denormalize.
|
||||
v.WriteAllocs[alloc.ID] = nil
|
||||
delete(v.ReadAllocs, alloc.ID)
|
||||
|
||||
v.WriteClaims[alloc.ID] = claim
|
||||
delete(v.ReadClaims, alloc.ID)
|
||||
delete(v.PastClaims, alloc.ID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClaimRelease is called when the allocation has terminated and already stopped using the volume
|
||||
func (v *CSIVolume) ClaimRelease(alloc *Allocation) error {
|
||||
delete(v.ReadAllocs, alloc.ID)
|
||||
delete(v.WriteAllocs, alloc.ID)
|
||||
// ClaimRelease is called when the allocation has terminated and
|
||||
// already stopped using the volume
|
||||
func (v *CSIVolume) ClaimRelease(claim *CSIVolumeClaim) error {
|
||||
if claim.State == CSIVolumeClaimStateReadyToFree {
|
||||
delete(v.ReadAllocs, claim.AllocationID)
|
||||
delete(v.WriteAllocs, claim.AllocationID)
|
||||
delete(v.ReadClaims, claim.AllocationID)
|
||||
delete(v.WriteClaims, claim.AllocationID)
|
||||
delete(v.PastClaims, claim.AllocationID)
|
||||
} else {
|
||||
v.PastClaims[claim.AllocationID] = claim
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -513,13 +574,28 @@ const (
|
|||
CSIVolumeClaimRelease
|
||||
)
|
||||
|
||||
type CSIVolumeClaimBatchRequest struct {
|
||||
Claims []CSIVolumeClaimRequest
|
||||
}
|
||||
|
||||
type CSIVolumeClaimRequest struct {
|
||||
VolumeID string
|
||||
AllocationID string
|
||||
NodeID string
|
||||
Claim CSIVolumeClaimMode
|
||||
State CSIVolumeClaimState
|
||||
WriteRequest
|
||||
}
|
||||
|
||||
func (req *CSIVolumeClaimRequest) ToClaim() *CSIVolumeClaim {
|
||||
return &CSIVolumeClaim{
|
||||
AllocationID: req.AllocationID,
|
||||
NodeID: req.NodeID,
|
||||
Mode: req.Claim,
|
||||
State: req.State,
|
||||
}
|
||||
}
|
||||
|
||||
type CSIVolumeClaimResponse struct {
|
||||
// Opaque static publish properties of the volume. SP MAY use this
|
||||
// field to ensure subsequent `NodeStageVolume` or `NodePublishVolume`
|
||||
|
|
|
@ -12,17 +12,28 @@ func TestCSIVolumeClaim(t *testing.T) {
|
|||
vol.Schedulable = true
|
||||
|
||||
alloc := &Allocation{ID: "a1", Namespace: "n", JobID: "j"}
|
||||
claim := &CSIVolumeClaim{
|
||||
AllocationID: alloc.ID,
|
||||
NodeID: "foo",
|
||||
Mode: CSIVolumeClaimRead,
|
||||
}
|
||||
|
||||
require.NoError(t, vol.ClaimRead(alloc))
|
||||
require.NoError(t, vol.ClaimRead(claim, alloc))
|
||||
require.True(t, vol.ReadSchedulable())
|
||||
require.True(t, vol.WriteSchedulable())
|
||||
require.NoError(t, vol.ClaimRead(alloc))
|
||||
require.NoError(t, vol.ClaimRead(claim, alloc))
|
||||
|
||||
require.NoError(t, vol.ClaimWrite(alloc))
|
||||
claim.Mode = CSIVolumeClaimWrite
|
||||
require.NoError(t, vol.ClaimWrite(claim, alloc))
|
||||
require.True(t, vol.ReadSchedulable())
|
||||
require.False(t, vol.WriteFreeClaims())
|
||||
|
||||
vol.ClaimRelease(alloc)
|
||||
vol.ClaimRelease(claim)
|
||||
require.True(t, vol.ReadSchedulable())
|
||||
require.False(t, vol.WriteFreeClaims())
|
||||
|
||||
claim.State = CSIVolumeClaimStateReadyToFree
|
||||
vol.ClaimRelease(claim)
|
||||
require.True(t, vol.ReadSchedulable())
|
||||
require.True(t, vol.WriteFreeClaims())
|
||||
}
|
||||
|
|
|
@ -2,5 +2,9 @@
|
|||
set -e
|
||||
|
||||
FILES="$(ls ./*.go | grep -v -e _test.go -e .generated.go | tr '\n' ' ')"
|
||||
codecgen -d 100 -t codegen_generated -o structs.generated.go ${FILES}
|
||||
sed -i'' -e 's|"github.com/ugorji/go/codec|"github.com/hashicorp/go-msgpack/codec|g' structs.generated.go
|
||||
codecgen \
|
||||
-c github.com/hashicorp/go-msgpack/codec \
|
||||
-d 100 \
|
||||
-t codegen_generated \
|
||||
-o structs.generated.go \
|
||||
${FILES}
|
||||
|
|
|
@ -331,7 +331,7 @@ func (idx *NetworkIndex) AssignNetwork(ask *NetworkResource) (out *NetworkResour
|
|||
|
||||
// getDynamicPortsPrecise takes the nodes used port bitmap which may be nil if
|
||||
// no ports have been allocated yet, the network ask and returns a set of unused
|
||||
// ports to fullfil the ask's DynamicPorts or an error if it failed. An error
|
||||
// ports to fulfil the ask's DynamicPorts or an error if it failed. An error
|
||||
// means the ask can not be satisfied as the method does a precise search.
|
||||
func getDynamicPortsPrecise(nodeUsed Bitmap, ask *NetworkResource) ([]int, error) {
|
||||
// Create a copy of the used ports and apply the new reserves
|
||||
|
@ -373,7 +373,7 @@ func getDynamicPortsPrecise(nodeUsed Bitmap, ask *NetworkResource) ([]int, error
|
|||
|
||||
// getDynamicPortsStochastic takes the nodes used port bitmap which may be nil if
|
||||
// no ports have been allocated yet, the network ask and returns a set of unused
|
||||
// ports to fullfil the ask's DynamicPorts or an error if it failed. An error
|
||||
// ports to fulfil the ask's DynamicPorts or an error if it failed. An error
|
||||
// does not mean the ask can not be satisfied as the method has a fixed amount
|
||||
// of random probes and if these fail, the search is aborted.
|
||||
func getDynamicPortsStochastic(nodeUsed Bitmap, ask *NetworkResource) ([]int, error) {
|
||||
|
|
|
@ -889,7 +889,9 @@ type ConsulProxy struct {
|
|||
|
||||
// Expose configures the consul proxy.expose stanza to "open up" endpoints
|
||||
// used by task-group level service checks using HTTP or gRPC protocols.
|
||||
Expose *ConsulExposeConfig
|
||||
//
|
||||
// Use json tag to match with field name in api/
|
||||
Expose *ConsulExposeConfig `json:"ExposeConfig"`
|
||||
|
||||
// Config is a proxy configuration. It is opaque to Nomad and passed
|
||||
// directly to Consul.
|
||||
|
@ -905,7 +907,7 @@ func (p *ConsulProxy) Copy() *ConsulProxy {
|
|||
newP := &ConsulProxy{
|
||||
LocalServiceAddress: p.LocalServiceAddress,
|
||||
LocalServicePort: p.LocalServicePort,
|
||||
Expose: p.Expose,
|
||||
Expose: p.Expose.Copy(),
|
||||
}
|
||||
|
||||
if n := len(p.Upstreams); n > 0 {
|
||||
|
@ -1009,7 +1011,8 @@ func (u *ConsulUpstream) Equals(o *ConsulUpstream) bool {
|
|||
|
||||
// ExposeConfig represents a Consul Connect expose jobspec stanza.
|
||||
type ConsulExposeConfig struct {
|
||||
Paths []ConsulExposePath
|
||||
// Use json tag to match with field name in api/
|
||||
Paths []ConsulExposePath `json:"Path"`
|
||||
}
|
||||
|
||||
type ConsulExposePath struct {
|
||||
|
|
|
@ -90,6 +90,7 @@ const (
|
|||
CSIVolumeRegisterRequestType
|
||||
CSIVolumeDeregisterRequestType
|
||||
CSIVolumeClaimRequestType
|
||||
CSIVolumeClaimBatchRequestType
|
||||
ScalingEventRegisterRequestType
|
||||
)
|
||||
|
||||
|
@ -1708,7 +1709,7 @@ type Node struct {
|
|||
// COMPAT: Remove in Nomad 0.9
|
||||
// Drain is controlled by the servers, and not the client.
|
||||
// If true, no jobs will be scheduled to this node, and existing
|
||||
// allocations will be drained. Superceded by DrainStrategy in Nomad
|
||||
// allocations will be drained. Superseded by DrainStrategy in Nomad
|
||||
// 0.8 but kept for backward compat.
|
||||
Drain bool
|
||||
|
||||
|
|
|
@ -423,7 +423,7 @@ func TestVaultClient_ValidateRole_Deprecated_Success(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestVaultClient_ValidateRole_NonExistant(t *testing.T) {
|
||||
func TestVaultClient_ValidateRole_NonExistent(t *testing.T) {
|
||||
t.Parallel()
|
||||
v := testutil.NewTestVault(t)
|
||||
defer v.Stop()
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// VolumeUpdateBatcher is used to batch the updates for volume claims
|
||||
type VolumeUpdateBatcher struct {
|
||||
// batch is the batching duration
|
||||
batch time.Duration
|
||||
|
||||
// raft is used to actually commit the updates
|
||||
raft VolumeRaftEndpoints
|
||||
|
||||
// workCh is used to pass evaluations to the daemon process
|
||||
workCh chan *updateWrapper
|
||||
|
||||
// ctx is used to exit the daemon batcher
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
// NewVolumeUpdateBatcher returns an VolumeUpdateBatcher that uses the
|
||||
// passed raft endpoints to create the updates to volume claims, and
|
||||
// exits the batcher when the passed exit channel is closed.
|
||||
func NewVolumeUpdateBatcher(batchDuration time.Duration, raft VolumeRaftEndpoints, ctx context.Context) *VolumeUpdateBatcher {
|
||||
b := &VolumeUpdateBatcher{
|
||||
batch: batchDuration,
|
||||
raft: raft,
|
||||
ctx: ctx,
|
||||
workCh: make(chan *updateWrapper, 10),
|
||||
}
|
||||
|
||||
go b.batcher()
|
||||
return b
|
||||
}
|
||||
|
||||
// CreateUpdate batches the volume claim update and returns a future
|
||||
// that tracks the completion of the request.
|
||||
func (b *VolumeUpdateBatcher) CreateUpdate(claims []structs.CSIVolumeClaimRequest) *BatchFuture {
|
||||
wrapper := &updateWrapper{
|
||||
claims: claims,
|
||||
f: make(chan *BatchFuture, 1),
|
||||
}
|
||||
|
||||
b.workCh <- wrapper
|
||||
return <-wrapper.f
|
||||
}
|
||||
|
||||
type updateWrapper struct {
|
||||
claims []structs.CSIVolumeClaimRequest
|
||||
f chan *BatchFuture
|
||||
}
|
||||
|
||||
// batcher is the long lived batcher goroutine
|
||||
func (b *VolumeUpdateBatcher) batcher() {
|
||||
var timerCh <-chan time.Time
|
||||
claims := make(map[string]structs.CSIVolumeClaimRequest)
|
||||
future := NewBatchFuture()
|
||||
for {
|
||||
select {
|
||||
case <-b.ctx.Done():
|
||||
// note: we can't flush here because we're likely no
|
||||
// longer the leader
|
||||
return
|
||||
case w := <-b.workCh:
|
||||
if timerCh == nil {
|
||||
timerCh = time.After(b.batch)
|
||||
}
|
||||
|
||||
// de-dupe and store the claim update, and attach the future
|
||||
for _, upd := range w.claims {
|
||||
claims[upd.VolumeID+upd.RequestNamespace()] = upd
|
||||
}
|
||||
w.f <- future
|
||||
case <-timerCh:
|
||||
// Capture the future and create a new one
|
||||
f := future
|
||||
future = NewBatchFuture()
|
||||
|
||||
// Create the batch request
|
||||
req := structs.CSIVolumeClaimBatchRequest{}
|
||||
for _, claim := range claims {
|
||||
req.Claims = append(req.Claims, claim)
|
||||
}
|
||||
|
||||
// Upsert the claims in a go routine
|
||||
go f.Set(b.raft.UpsertVolumeClaims(&req))
|
||||
|
||||
// Reset the claims list and timer
|
||||
claims = make(map[string]structs.CSIVolumeClaimRequest)
|
||||
timerCh = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BatchFuture is a future that can be used to retrieve the index for
|
||||
// the update or any error in the update process
|
||||
type BatchFuture struct {
|
||||
index uint64
|
||||
err error
|
||||
waitCh chan struct{}
|
||||
}
|
||||
|
||||
// NewBatchFuture returns a new BatchFuture
|
||||
func NewBatchFuture() *BatchFuture {
|
||||
return &BatchFuture{
|
||||
waitCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Set sets the results of the future, unblocking any client.
|
||||
func (f *BatchFuture) Set(index uint64, err error) {
|
||||
f.index = index
|
||||
f.err = err
|
||||
close(f.waitCh)
|
||||
}
|
||||
|
||||
// Results returns the creation index and any error.
|
||||
func (f *BatchFuture) Results() (uint64, error) {
|
||||
<-f.waitCh
|
||||
return f.index, f.err
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestVolumeWatch_Batcher tests the update batching logic
|
||||
func TestVolumeWatch_Batcher(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
ctx, exitFn := context.WithCancel(context.Background())
|
||||
defer exitFn()
|
||||
|
||||
srv := &MockBatchingRPCServer{}
|
||||
srv.state = state.TestStateStore(t)
|
||||
srv.volumeUpdateBatcher = NewVolumeUpdateBatcher(CrossVolumeUpdateBatchDuration, srv, ctx)
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
node := testNode(nil, plugin, srv.State())
|
||||
|
||||
// because we wait for the results to return from the batch for each
|
||||
// Watcher.updateClaims, we can't test that we're batching except across
|
||||
// multiple volume watchers. create 2 volumes and their watchers here.
|
||||
alloc0 := mock.Alloc()
|
||||
alloc0.ClientStatus = structs.AllocClientStatusComplete
|
||||
vol0 := testVolume(nil, plugin, alloc0, node.ID)
|
||||
w0 := &volumeWatcher{
|
||||
v: vol0,
|
||||
rpc: srv,
|
||||
state: srv.State(),
|
||||
updateClaims: srv.UpdateClaims,
|
||||
logger: testlog.HCLogger(t),
|
||||
}
|
||||
|
||||
alloc1 := mock.Alloc()
|
||||
alloc1.ClientStatus = structs.AllocClientStatusComplete
|
||||
vol1 := testVolume(nil, plugin, alloc1, node.ID)
|
||||
w1 := &volumeWatcher{
|
||||
v: vol1,
|
||||
rpc: srv,
|
||||
state: srv.State(),
|
||||
updateClaims: srv.UpdateClaims,
|
||||
logger: testlog.HCLogger(t),
|
||||
}
|
||||
|
||||
srv.nextCSIControllerDetachError = fmt.Errorf("some controller plugin error")
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
|
||||
go func() {
|
||||
w0.volumeReapImpl(vol0)
|
||||
wg.Done()
|
||||
}()
|
||||
go func() {
|
||||
w1.volumeReapImpl(vol1)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
require.Equal(structs.CSIVolumeClaimStateNodeDetached, vol0.PastClaims[alloc0.ID].State)
|
||||
require.Equal(structs.CSIVolumeClaimStateNodeDetached, vol1.PastClaims[alloc1.ID].State)
|
||||
require.Equal(2, srv.countCSINodeDetachVolume)
|
||||
require.Equal(2, srv.countCSIControllerDetachVolume)
|
||||
require.Equal(2, srv.countUpdateClaims)
|
||||
|
||||
// note: it's technically possible that the volumeReapImpl
|
||||
// goroutines get de-scheduled and we don't write both updates in
|
||||
// the same batch. but this seems really unlikely, so we're
|
||||
// testing for both cases here so that if we start seeing a flake
|
||||
// here in the future we have a clear cause for it.
|
||||
require.GreaterOrEqual(srv.countUpsertVolumeClaims, 1)
|
||||
require.Equal(1, srv.countUpsertVolumeClaims)
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// VolumeRaftEndpoints exposes the volume watcher to a set of functions
|
||||
// to apply data transforms via Raft.
|
||||
type VolumeRaftEndpoints interface {
|
||||
|
||||
// UpsertVolumeClaims applys a batch of claims to raft
|
||||
UpsertVolumeClaims(*structs.CSIVolumeClaimBatchRequest) (uint64, error)
|
||||
}
|
||||
|
||||
// ClientRPC is a minimal interface of the Server, intended as an aid
|
||||
// for testing logic surrounding server-to-server or server-to-client
|
||||
// RPC calls and to avoid circular references between the nomad
|
||||
// package and the volumewatcher
|
||||
type ClientRPC interface {
|
||||
ControllerDetachVolume(args *cstructs.ClientCSIControllerDetachVolumeRequest, reply *cstructs.ClientCSIControllerDetachVolumeResponse) error
|
||||
NodeDetachVolume(args *cstructs.ClientCSINodeDetachVolumeRequest, reply *cstructs.ClientCSINodeDetachVolumeResponse) error
|
||||
}
|
||||
|
||||
// claimUpdater is the function used to update claims on behalf of a volume
|
||||
// (used to wrap batch updates so that we can test
|
||||
// volumeWatcher methods synchronously without batching)
|
||||
type updateClaimsFn func(claims []structs.CSIVolumeClaimRequest) (uint64, error)
|
|
@ -0,0 +1,148 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// Create a client node with plugin info
|
||||
func testNode(node *structs.Node, plugin *structs.CSIPlugin, s *state.StateStore) *structs.Node {
|
||||
if node != nil {
|
||||
return node
|
||||
}
|
||||
node = mock.Node()
|
||||
node.Attributes["nomad.version"] = "0.11.0" // client RPCs not supported on early version
|
||||
node.CSINodePlugins = map[string]*structs.CSIInfo{
|
||||
plugin.ID: {
|
||||
PluginID: plugin.ID,
|
||||
Healthy: true,
|
||||
RequiresControllerPlugin: plugin.ControllerRequired,
|
||||
NodeInfo: &structs.CSINodeInfo{},
|
||||
},
|
||||
}
|
||||
if plugin.ControllerRequired {
|
||||
node.CSIControllerPlugins = map[string]*structs.CSIInfo{
|
||||
plugin.ID: {
|
||||
PluginID: plugin.ID,
|
||||
Healthy: true,
|
||||
RequiresControllerPlugin: true,
|
||||
ControllerInfo: &structs.CSIControllerInfo{
|
||||
SupportsReadOnlyAttach: true,
|
||||
SupportsAttachDetach: true,
|
||||
SupportsListVolumes: true,
|
||||
SupportsListVolumesAttachedNodes: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
} else {
|
||||
node.CSIControllerPlugins = map[string]*structs.CSIInfo{}
|
||||
}
|
||||
s.UpsertNode(99, node)
|
||||
return node
|
||||
}
|
||||
|
||||
// Create a test volume with claim info
|
||||
func testVolume(vol *structs.CSIVolume, plugin *structs.CSIPlugin, alloc *structs.Allocation, nodeID string) *structs.CSIVolume {
|
||||
if vol != nil {
|
||||
return vol
|
||||
}
|
||||
vol = mock.CSIVolume(plugin)
|
||||
vol.ControllerRequired = plugin.ControllerRequired
|
||||
|
||||
vol.ReadAllocs = map[string]*structs.Allocation{alloc.ID: alloc}
|
||||
vol.ReadClaims = map[string]*structs.CSIVolumeClaim{
|
||||
alloc.ID: {
|
||||
AllocationID: alloc.ID,
|
||||
NodeID: nodeID,
|
||||
Mode: structs.CSIVolumeClaimRead,
|
||||
State: structs.CSIVolumeClaimStateTaken,
|
||||
},
|
||||
}
|
||||
return vol
|
||||
}
|
||||
|
||||
// COMPAT(1.0): the claim fields were added after 0.11.1; this
|
||||
// mock and the associated test cases can be removed for 1.0
|
||||
func testOldVolume(vol *structs.CSIVolume, plugin *structs.CSIPlugin, alloc *structs.Allocation, nodeID string) *structs.CSIVolume {
|
||||
if vol != nil {
|
||||
return vol
|
||||
}
|
||||
vol = mock.CSIVolume(plugin)
|
||||
vol.ControllerRequired = plugin.ControllerRequired
|
||||
|
||||
vol.ReadAllocs = map[string]*structs.Allocation{alloc.ID: alloc}
|
||||
return vol
|
||||
}
|
||||
|
||||
type MockRPCServer struct {
|
||||
state *state.StateStore
|
||||
|
||||
// mock responses for ClientCSI.NodeDetachVolume
|
||||
nextCSINodeDetachResponse *cstructs.ClientCSINodeDetachVolumeResponse
|
||||
nextCSINodeDetachError error
|
||||
countCSINodeDetachVolume int
|
||||
|
||||
// mock responses for ClientCSI.ControllerDetachVolume
|
||||
nextCSIControllerDetachVolumeResponse *cstructs.ClientCSIControllerDetachVolumeResponse
|
||||
nextCSIControllerDetachError error
|
||||
countCSIControllerDetachVolume int
|
||||
|
||||
countUpdateClaims int
|
||||
countUpsertVolumeClaims int
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) ControllerDetachVolume(args *cstructs.ClientCSIControllerDetachVolumeRequest, reply *cstructs.ClientCSIControllerDetachVolumeResponse) error {
|
||||
reply = srv.nextCSIControllerDetachVolumeResponse
|
||||
srv.countCSIControllerDetachVolume++
|
||||
return srv.nextCSIControllerDetachError
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) NodeDetachVolume(args *cstructs.ClientCSINodeDetachVolumeRequest, reply *cstructs.ClientCSINodeDetachVolumeResponse) error {
|
||||
reply = srv.nextCSINodeDetachResponse
|
||||
srv.countCSINodeDetachVolume++
|
||||
return srv.nextCSINodeDetachError
|
||||
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) UpsertVolumeClaims(*structs.CSIVolumeClaimBatchRequest) (uint64, error) {
|
||||
srv.countUpsertVolumeClaims++
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (srv *MockRPCServer) State() *state.StateStore { return srv.state }
|
||||
|
||||
func (srv *MockRPCServer) UpdateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) {
|
||||
srv.countUpdateClaims++
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
type MockBatchingRPCServer struct {
|
||||
MockRPCServer
|
||||
volumeUpdateBatcher *VolumeUpdateBatcher
|
||||
}
|
||||
|
||||
func (srv *MockBatchingRPCServer) UpdateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) {
|
||||
srv.countUpdateClaims++
|
||||
return srv.volumeUpdateBatcher.CreateUpdate(claims).Results()
|
||||
}
|
||||
|
||||
type MockStatefulRPCServer struct {
|
||||
MockRPCServer
|
||||
volumeUpdateBatcher *VolumeUpdateBatcher
|
||||
}
|
||||
|
||||
func (srv *MockStatefulRPCServer) UpsertVolumeClaims(batch *structs.CSIVolumeClaimBatchRequest) (uint64, error) {
|
||||
srv.countUpsertVolumeClaims++
|
||||
index, _ := srv.state.LatestIndex()
|
||||
for _, req := range batch.Claims {
|
||||
index++
|
||||
err := srv.state.CSIVolumeClaim(index, req.RequestNamespace(),
|
||||
req.VolumeID, req.ToClaim())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
return index, nil
|
||||
}
|
|
@ -0,0 +1,382 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
multierror "github.com/hashicorp/go-multierror"
|
||||
cstructs "github.com/hashicorp/nomad/client/structs"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// volumeWatcher is used to watch a single volume and trigger the
|
||||
// scheduler when allocation health transitions.
|
||||
type volumeWatcher struct {
|
||||
// v is the volume being watched
|
||||
v *structs.CSIVolume
|
||||
|
||||
// state is the state that is watched for state changes.
|
||||
state *state.StateStore
|
||||
|
||||
// updateClaims is the function used to apply claims to raft
|
||||
updateClaims updateClaimsFn
|
||||
|
||||
// server interface for CSI client RPCs
|
||||
rpc ClientRPC
|
||||
|
||||
logger log.Logger
|
||||
shutdownCtx context.Context // parent context
|
||||
ctx context.Context // own context
|
||||
exitFn context.CancelFunc
|
||||
|
||||
// updateCh is triggered when there is an updated volume
|
||||
updateCh chan *structs.CSIVolume
|
||||
|
||||
wLock sync.RWMutex
|
||||
running bool
|
||||
}
|
||||
|
||||
// newVolumeWatcher returns a volume watcher that is used to watch
|
||||
// volumes
|
||||
func newVolumeWatcher(parent *Watcher, vol *structs.CSIVolume) *volumeWatcher {
|
||||
|
||||
w := &volumeWatcher{
|
||||
updateCh: make(chan *structs.CSIVolume, 1),
|
||||
updateClaims: parent.updateClaims,
|
||||
v: vol,
|
||||
state: parent.state,
|
||||
rpc: parent.rpc,
|
||||
logger: parent.logger.With("volume_id", vol.ID, "namespace", vol.Namespace),
|
||||
shutdownCtx: parent.ctx,
|
||||
}
|
||||
|
||||
// Start the long lived watcher that scans for allocation updates
|
||||
w.Start()
|
||||
return w
|
||||
}
|
||||
|
||||
// Notify signals an update to the tracked volume.
|
||||
func (vw *volumeWatcher) Notify(v *structs.CSIVolume) {
|
||||
if !vw.isRunning() {
|
||||
vw.Start()
|
||||
}
|
||||
select {
|
||||
case vw.updateCh <- v:
|
||||
case <-vw.shutdownCtx.Done(): // prevent deadlock if we stopped
|
||||
case <-vw.ctx.Done(): // prevent deadlock if we stopped
|
||||
}
|
||||
}
|
||||
|
||||
func (vw *volumeWatcher) Start() {
|
||||
vw.logger.Trace("starting watcher", "id", vw.v.ID, "namespace", vw.v.Namespace)
|
||||
vw.wLock.Lock()
|
||||
defer vw.wLock.Unlock()
|
||||
vw.running = true
|
||||
ctx, exitFn := context.WithCancel(vw.shutdownCtx)
|
||||
vw.ctx = ctx
|
||||
vw.exitFn = exitFn
|
||||
go vw.watch()
|
||||
}
|
||||
|
||||
// Stop stops watching the volume. This should be called whenever a
|
||||
// volume's claims are fully reaped or the watcher is no longer needed.
|
||||
func (vw *volumeWatcher) Stop() {
|
||||
vw.logger.Trace("no more claims", "id", vw.v.ID, "namespace", vw.v.Namespace)
|
||||
vw.exitFn()
|
||||
}
|
||||
|
||||
func (vw *volumeWatcher) isRunning() bool {
|
||||
vw.wLock.RLock()
|
||||
defer vw.wLock.RUnlock()
|
||||
select {
|
||||
case <-vw.shutdownCtx.Done():
|
||||
return false
|
||||
case <-vw.ctx.Done():
|
||||
return false
|
||||
default:
|
||||
return vw.running
|
||||
}
|
||||
}
|
||||
|
||||
// watch is the long-running function that watches for changes to a volume.
|
||||
// Each pass steps the volume's claims through the various states of reaping
|
||||
// until the volume has no more claims eligible to be reaped.
|
||||
func (vw *volumeWatcher) watch() {
|
||||
for {
|
||||
select {
|
||||
// TODO(tgross): currently server->client RPC have no cancellation
|
||||
// context, so we can't stop the long-runner RPCs gracefully
|
||||
case <-vw.shutdownCtx.Done():
|
||||
return
|
||||
case <-vw.ctx.Done():
|
||||
return
|
||||
case vol := <-vw.updateCh:
|
||||
// while we won't make raft writes if we get a stale update,
|
||||
// we can still fire extra CSI RPC calls if we don't check this
|
||||
if vol == nil || vw.v == nil || vol.ModifyIndex >= vw.v.ModifyIndex {
|
||||
vol = vw.getVolume(vol)
|
||||
if vol == nil {
|
||||
return
|
||||
}
|
||||
vw.volumeReap(vol)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getVolume returns the tracked volume, fully populated with the current
|
||||
// state
|
||||
func (vw *volumeWatcher) getVolume(vol *structs.CSIVolume) *structs.CSIVolume {
|
||||
vw.wLock.RLock()
|
||||
defer vw.wLock.RUnlock()
|
||||
|
||||
var err error
|
||||
ws := memdb.NewWatchSet()
|
||||
|
||||
vol, err = vw.state.CSIVolumeDenormalizePlugins(ws, vol.Copy())
|
||||
if err != nil {
|
||||
vw.logger.Error("could not query plugins for volume", "error", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
vol, err = vw.state.CSIVolumeDenormalize(ws, vol)
|
||||
if err != nil {
|
||||
vw.logger.Error("could not query allocs for volume", "error", err)
|
||||
return nil
|
||||
}
|
||||
vw.v = vol
|
||||
return vol
|
||||
}
|
||||
|
||||
// volumeReap collects errors for logging but doesn't return them
|
||||
// to the main loop.
|
||||
func (vw *volumeWatcher) volumeReap(vol *structs.CSIVolume) {
|
||||
vw.logger.Trace("releasing unused volume claims", "id", vol.ID, "namespace", vol.Namespace)
|
||||
err := vw.volumeReapImpl(vol)
|
||||
if err != nil {
|
||||
vw.logger.Error("error releasing volume claims", "error", err)
|
||||
}
|
||||
if vw.isUnclaimed(vol) {
|
||||
vw.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool {
|
||||
return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0
|
||||
}
|
||||
|
||||
func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error {
|
||||
var result *multierror.Error
|
||||
nodeClaims := map[string]int{} // node IDs -> count
|
||||
jobs := map[string]bool{} // jobID -> stopped
|
||||
|
||||
// if a job is purged, the subsequent alloc updates can't
|
||||
// trigger a GC job because there's no job for them to query.
|
||||
// Job.Deregister will send a claim release on all claims
|
||||
// but the allocs will not yet be terminated. save the status
|
||||
// for each job so that we don't requery in this pass
|
||||
checkStopped := func(jobID string) bool {
|
||||
namespace := vw.v.Namespace
|
||||
isStopped, ok := jobs[jobID]
|
||||
if !ok {
|
||||
ws := memdb.NewWatchSet()
|
||||
job, err := vw.state.JobByID(ws, namespace, jobID)
|
||||
if err != nil {
|
||||
isStopped = true
|
||||
}
|
||||
if job == nil || job.Stopped() {
|
||||
isStopped = true
|
||||
}
|
||||
jobs[jobID] = isStopped
|
||||
}
|
||||
return isStopped
|
||||
}
|
||||
|
||||
collect := func(allocs map[string]*structs.Allocation,
|
||||
claims map[string]*structs.CSIVolumeClaim) {
|
||||
|
||||
for allocID, alloc := range allocs {
|
||||
|
||||
if alloc == nil {
|
||||
_, exists := vol.PastClaims[allocID]
|
||||
if !exists {
|
||||
vol.PastClaims[allocID] = &structs.CSIVolumeClaim{
|
||||
AllocationID: allocID,
|
||||
State: structs.CSIVolumeClaimStateReadyToFree,
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
nodeClaims[alloc.NodeID]++
|
||||
|
||||
if alloc.Terminated() || checkStopped(alloc.JobID) {
|
||||
// don't overwrite the PastClaim if we've seen it before,
|
||||
// so that we can track state between subsequent calls
|
||||
_, exists := vol.PastClaims[allocID]
|
||||
if !exists {
|
||||
claim, ok := claims[allocID]
|
||||
if !ok {
|
||||
claim = &structs.CSIVolumeClaim{
|
||||
AllocationID: allocID,
|
||||
NodeID: alloc.NodeID,
|
||||
}
|
||||
}
|
||||
claim.State = structs.CSIVolumeClaimStateTaken
|
||||
vol.PastClaims[allocID] = claim
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collect(vol.ReadAllocs, vol.ReadClaims)
|
||||
collect(vol.WriteAllocs, vol.WriteClaims)
|
||||
|
||||
if len(vol.PastClaims) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, claim := range vol.PastClaims {
|
||||
|
||||
var err error
|
||||
|
||||
// previous checkpoints may have set the past claim state already.
|
||||
// in practice we should never see CSIVolumeClaimStateControllerDetached
|
||||
// but having an option for the state makes it easy to add a checkpoint
|
||||
// in a backwards compatible way if we need one later
|
||||
switch claim.State {
|
||||
case structs.CSIVolumeClaimStateNodeDetached:
|
||||
goto NODE_DETACHED
|
||||
case structs.CSIVolumeClaimStateControllerDetached:
|
||||
goto RELEASE_CLAIM
|
||||
case structs.CSIVolumeClaimStateReadyToFree:
|
||||
goto RELEASE_CLAIM
|
||||
}
|
||||
|
||||
err = vw.nodeDetach(vol, claim)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
break
|
||||
}
|
||||
|
||||
NODE_DETACHED:
|
||||
nodeClaims[claim.NodeID]--
|
||||
err = vw.controllerDetach(vol, claim, nodeClaims)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
break
|
||||
}
|
||||
|
||||
RELEASE_CLAIM:
|
||||
err = vw.checkpoint(vol, claim)
|
||||
if err != nil {
|
||||
result = multierror.Append(result, err)
|
||||
break
|
||||
}
|
||||
// the checkpoint deletes from the state store, but this operates
|
||||
// on our local copy which aids in testing
|
||||
delete(vol.PastClaims, claim.AllocationID)
|
||||
}
|
||||
|
||||
return result.ErrorOrNil()
|
||||
|
||||
}
|
||||
|
||||
// nodeDetach makes the client NodePublish / NodeUnstage RPCs, which
|
||||
// must be completed before controller operations or releasing the claim.
|
||||
func (vw *volumeWatcher) nodeDetach(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
|
||||
vw.logger.Trace("detaching node", "id", vol.ID, "namespace", vol.Namespace)
|
||||
nReq := &cstructs.ClientCSINodeDetachVolumeRequest{
|
||||
PluginID: vol.PluginID,
|
||||
VolumeID: vol.ID,
|
||||
ExternalID: vol.RemoteID(),
|
||||
AllocID: claim.AllocationID,
|
||||
NodeID: claim.NodeID,
|
||||
AttachmentMode: vol.AttachmentMode,
|
||||
AccessMode: vol.AccessMode,
|
||||
ReadOnly: claim.Mode == structs.CSIVolumeClaimRead,
|
||||
}
|
||||
|
||||
err := vw.rpc.NodeDetachVolume(nReq,
|
||||
&cstructs.ClientCSINodeDetachVolumeResponse{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not detach from node: %v", err)
|
||||
}
|
||||
claim.State = structs.CSIVolumeClaimStateNodeDetached
|
||||
return vw.checkpoint(vol, claim)
|
||||
}
|
||||
|
||||
// controllerDetach makes the client RPC to the controller to
|
||||
// unpublish the volume if a controller is required and no other
|
||||
// allocs on the node need it
|
||||
func (vw *volumeWatcher) controllerDetach(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim, nodeClaims map[string]int) error {
|
||||
if !vol.ControllerRequired || nodeClaims[claim.NodeID] > 1 {
|
||||
claim.State = structs.CSIVolumeClaimStateReadyToFree
|
||||
return nil
|
||||
}
|
||||
vw.logger.Trace("detaching controller", "id", vol.ID, "namespace", vol.Namespace)
|
||||
// note: we need to get the CSI Node ID, which is not the same as
|
||||
// the Nomad Node ID
|
||||
ws := memdb.NewWatchSet()
|
||||
targetNode, err := vw.state.NodeByID(ws, claim.NodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetNode == nil {
|
||||
return fmt.Errorf("%s: %s", structs.ErrUnknownNodePrefix, claim.NodeID)
|
||||
}
|
||||
targetCSIInfo, ok := targetNode.CSINodePlugins[vol.PluginID]
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to find NodeInfo for node: %s", targetNode.ID)
|
||||
}
|
||||
|
||||
plug, err := vw.state.CSIPluginByID(ws, vol.PluginID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("plugin lookup error: %s %v", vol.PluginID, err)
|
||||
}
|
||||
if plug == nil {
|
||||
return fmt.Errorf("plugin lookup error: %s missing plugin", vol.PluginID)
|
||||
}
|
||||
|
||||
cReq := &cstructs.ClientCSIControllerDetachVolumeRequest{
|
||||
VolumeID: vol.RemoteID(),
|
||||
ClientCSINodeID: targetCSIInfo.NodeInfo.ID,
|
||||
}
|
||||
cReq.PluginID = plug.ID
|
||||
err = vw.rpc.ControllerDetachVolume(cReq,
|
||||
&cstructs.ClientCSIControllerDetachVolumeResponse{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not detach from controller: %v", err)
|
||||
}
|
||||
claim.State = structs.CSIVolumeClaimStateReadyToFree
|
||||
return nil
|
||||
}
|
||||
|
||||
func (vw *volumeWatcher) checkpoint(vol *structs.CSIVolume, claim *structs.CSIVolumeClaim) error {
|
||||
vw.logger.Trace("checkpointing claim", "id", vol.ID, "namespace", vol.Namespace)
|
||||
req := structs.CSIVolumeClaimRequest{
|
||||
VolumeID: vol.ID,
|
||||
AllocationID: claim.AllocationID,
|
||||
NodeID: claim.NodeID,
|
||||
Claim: structs.CSIVolumeClaimRelease,
|
||||
State: claim.State,
|
||||
WriteRequest: structs.WriteRequest{
|
||||
Namespace: vol.Namespace,
|
||||
// Region: vol.Region, // TODO(tgross) should volumes have regions?
|
||||
},
|
||||
}
|
||||
index, err := vw.updateClaims([]structs.CSIVolumeClaimRequest{req})
|
||||
if err == nil && index != 0 {
|
||||
vw.wLock.Lock()
|
||||
defer vw.wLock.Unlock()
|
||||
vw.v.ModifyIndex = index
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not checkpoint claim release: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,294 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestVolumeWatch_OneReap tests one pass through the reaper
|
||||
func TestVolumeWatch_OneReap(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
cases := []struct {
|
||||
Name string
|
||||
Volume *structs.CSIVolume
|
||||
Node *structs.Node
|
||||
ControllerRequired bool
|
||||
ExpectedErr string
|
||||
ExpectedClaimsCount int
|
||||
ExpectedNodeDetachCount int
|
||||
ExpectedControllerDetachCount int
|
||||
ExpectedUpdateClaimsCount int
|
||||
srv *MockRPCServer
|
||||
}{
|
||||
{
|
||||
Name: "No terminal allocs",
|
||||
Volume: mock.CSIVolume(mock.CSIPlugin()),
|
||||
ControllerRequired: true,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
nextCSINodeDetachError: fmt.Errorf("should never see this"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "NodeDetachVolume fails",
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "some node plugin error",
|
||||
ExpectedNodeDetachCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
nextCSINodeDetachError: fmt.Errorf("some node plugin error"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "NodeDetachVolume node-only happy path",
|
||||
ControllerRequired: false,
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 2,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume no controllers available",
|
||||
Node: mock.Node(),
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "Unknown node",
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume controller error",
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "some controller error",
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedControllerDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
nextCSIControllerDetachError: fmt.Errorf("some controller error"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume happy path",
|
||||
ControllerRequired: true,
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedControllerDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 2,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
plugin.ControllerRequired = tc.ControllerRequired
|
||||
node := testNode(tc.Node, plugin, tc.srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.NodeID = node.ID
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
vol := testVolume(tc.Volume, plugin, alloc, node.ID)
|
||||
ctx, exitFn := context.WithCancel(context.Background())
|
||||
w := &volumeWatcher{
|
||||
v: vol,
|
||||
rpc: tc.srv,
|
||||
state: tc.srv.State(),
|
||||
updateClaims: tc.srv.UpdateClaims,
|
||||
ctx: ctx,
|
||||
exitFn: exitFn,
|
||||
logger: testlog.HCLogger(t),
|
||||
}
|
||||
|
||||
err := w.volumeReapImpl(vol)
|
||||
if tc.ExpectedErr != "" {
|
||||
require.Error(err, fmt.Sprintf("expected: %q", tc.ExpectedErr))
|
||||
require.Contains(err.Error(), tc.ExpectedErr)
|
||||
} else {
|
||||
require.NoError(err)
|
||||
}
|
||||
require.Equal(tc.ExpectedNodeDetachCount,
|
||||
tc.srv.countCSINodeDetachVolume, "node detach RPC count")
|
||||
require.Equal(tc.ExpectedControllerDetachCount,
|
||||
tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
|
||||
require.Equal(tc.ExpectedUpdateClaimsCount,
|
||||
tc.srv.countUpdateClaims, "update claims count")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVolumeWatch_OldVolume_OneReap tests one pass through the reaper
|
||||
// COMPAT(1.0): the claim fields were added after 0.11.1; this test
|
||||
// can be removed for 1.0
|
||||
func TestVolumeWatch_OldVolume_OneReap(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
cases := []struct {
|
||||
Name string
|
||||
Volume *structs.CSIVolume
|
||||
Node *structs.Node
|
||||
ControllerRequired bool
|
||||
ExpectedErr string
|
||||
ExpectedClaimsCount int
|
||||
ExpectedNodeDetachCount int
|
||||
ExpectedControllerDetachCount int
|
||||
ExpectedUpdateClaimsCount int
|
||||
srv *MockRPCServer
|
||||
}{
|
||||
{
|
||||
Name: "No terminal allocs",
|
||||
Volume: mock.CSIVolume(mock.CSIPlugin()),
|
||||
ControllerRequired: true,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
nextCSINodeDetachError: fmt.Errorf("should never see this"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "NodeDetachVolume fails",
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "some node plugin error",
|
||||
ExpectedNodeDetachCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
nextCSINodeDetachError: fmt.Errorf("some node plugin error"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "NodeDetachVolume node-only happy path",
|
||||
ControllerRequired: false,
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 2,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume no controllers available",
|
||||
Node: mock.Node(),
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "Unknown node",
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume controller error",
|
||||
ControllerRequired: true,
|
||||
ExpectedErr: "some controller error",
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedControllerDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 1,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
nextCSIControllerDetachError: fmt.Errorf("some controller error"),
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ControllerDetachVolume happy path",
|
||||
ControllerRequired: true,
|
||||
ExpectedNodeDetachCount: 1,
|
||||
ExpectedControllerDetachCount: 1,
|
||||
ExpectedUpdateClaimsCount: 2,
|
||||
srv: &MockRPCServer{
|
||||
state: state.TestStateStore(t),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
plugin.ControllerRequired = tc.ControllerRequired
|
||||
node := testNode(tc.Node, plugin, tc.srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
alloc.NodeID = node.ID
|
||||
vol := testOldVolume(tc.Volume, plugin, alloc, node.ID)
|
||||
ctx, exitFn := context.WithCancel(context.Background())
|
||||
w := &volumeWatcher{
|
||||
v: vol,
|
||||
rpc: tc.srv,
|
||||
state: tc.srv.State(),
|
||||
updateClaims: tc.srv.UpdateClaims,
|
||||
ctx: ctx,
|
||||
exitFn: exitFn,
|
||||
logger: testlog.HCLogger(t),
|
||||
}
|
||||
|
||||
err := w.volumeReapImpl(vol)
|
||||
if tc.ExpectedErr != "" {
|
||||
require.Error(err, fmt.Sprintf("expected: %q", tc.ExpectedErr))
|
||||
require.Contains(err.Error(), tc.ExpectedErr)
|
||||
} else {
|
||||
require.NoError(err)
|
||||
}
|
||||
require.Equal(tc.ExpectedNodeDetachCount,
|
||||
tc.srv.countCSINodeDetachVolume, "node detach RPC count")
|
||||
require.Equal(tc.ExpectedControllerDetachCount,
|
||||
tc.srv.countCSIControllerDetachVolume, "controller detach RPC count")
|
||||
require.Equal(tc.ExpectedUpdateClaimsCount,
|
||||
tc.srv.countUpdateClaims, "update claims count")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVolumeWatch_OneReap tests multiple passes through the reaper,
|
||||
// updating state after each one
|
||||
func TestVolumeWatch_ReapStates(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
srv := &MockRPCServer{state: state.TestStateStore(t)}
|
||||
plugin := mock.CSIPlugin()
|
||||
node := testNode(nil, plugin, srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
vol := testVolume(nil, plugin, alloc, node.ID)
|
||||
|
||||
w := &volumeWatcher{
|
||||
v: vol,
|
||||
rpc: srv,
|
||||
state: srv.State(),
|
||||
updateClaims: srv.UpdateClaims,
|
||||
logger: testlog.HCLogger(t),
|
||||
}
|
||||
|
||||
srv.nextCSINodeDetachError = fmt.Errorf("some node plugin error")
|
||||
err := w.volumeReapImpl(vol)
|
||||
require.Error(err)
|
||||
require.Equal(structs.CSIVolumeClaimStateTaken, vol.PastClaims[alloc.ID].State)
|
||||
require.Equal(1, srv.countCSINodeDetachVolume)
|
||||
require.Equal(0, srv.countCSIControllerDetachVolume)
|
||||
require.Equal(0, srv.countUpdateClaims)
|
||||
|
||||
srv.nextCSINodeDetachError = nil
|
||||
srv.nextCSIControllerDetachError = fmt.Errorf("some controller plugin error")
|
||||
err = w.volumeReapImpl(vol)
|
||||
require.Error(err)
|
||||
require.Equal(structs.CSIVolumeClaimStateNodeDetached, vol.PastClaims[alloc.ID].State)
|
||||
require.Equal(1, srv.countUpdateClaims)
|
||||
|
||||
srv.nextCSIControllerDetachError = nil
|
||||
err = w.volumeReapImpl(vol)
|
||||
require.NoError(err)
|
||||
require.Equal(0, len(vol.PastClaims))
|
||||
require.Equal(2, srv.countUpdateClaims)
|
||||
}
|
|
@ -0,0 +1,232 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
log "github.com/hashicorp/go-hclog"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
const (
|
||||
// LimitStateQueriesPerSecond is the number of state queries allowed per
|
||||
// second
|
||||
LimitStateQueriesPerSecond = 100.0
|
||||
|
||||
// CrossVolumeUpdateBatchDuration is the duration in which volume
|
||||
// claim updates are batched across all volume watchers before
|
||||
// being committed to Raft.
|
||||
CrossVolumeUpdateBatchDuration = 250 * time.Millisecond
|
||||
)
|
||||
|
||||
// Watcher is used to watch volumes and their allocations created
|
||||
// by the scheduler and trigger the scheduler when allocation health
|
||||
// transitions.
|
||||
type Watcher struct {
|
||||
enabled bool
|
||||
logger log.Logger
|
||||
|
||||
// queryLimiter is used to limit the rate of blocking queries
|
||||
queryLimiter *rate.Limiter
|
||||
|
||||
// updateBatchDuration is the duration in which volume
|
||||
// claim updates are batched across all volume watchers
|
||||
// before being committed to Raft.
|
||||
updateBatchDuration time.Duration
|
||||
|
||||
// raft contains the set of Raft endpoints that can be used by the
|
||||
// volumes watcher
|
||||
raft VolumeRaftEndpoints
|
||||
|
||||
// rpc contains the set of Server methods that can be used by
|
||||
// the volumes watcher for RPC
|
||||
rpc ClientRPC
|
||||
|
||||
// state is the state that is watched for state changes.
|
||||
state *state.StateStore
|
||||
|
||||
// watchers is the set of active watchers, one per volume
|
||||
watchers map[string]*volumeWatcher
|
||||
|
||||
// volumeUpdateBatcher is used to batch volume claim updates
|
||||
volumeUpdateBatcher *VolumeUpdateBatcher
|
||||
|
||||
// ctx and exitFn are used to cancel the watcher
|
||||
ctx context.Context
|
||||
exitFn context.CancelFunc
|
||||
|
||||
wlock sync.RWMutex
|
||||
}
|
||||
|
||||
// NewVolumesWatcher returns a volumes watcher that is used to watch
|
||||
// volumes and trigger the scheduler as needed.
|
||||
func NewVolumesWatcher(logger log.Logger,
|
||||
raft VolumeRaftEndpoints, rpc ClientRPC, stateQueriesPerSecond float64,
|
||||
updateBatchDuration time.Duration) *Watcher {
|
||||
|
||||
// the leader step-down calls SetEnabled(false) which is what
|
||||
// cancels this context, rather than passing in its own shutdown
|
||||
// context
|
||||
ctx, exitFn := context.WithCancel(context.Background())
|
||||
|
||||
return &Watcher{
|
||||
raft: raft,
|
||||
rpc: rpc,
|
||||
queryLimiter: rate.NewLimiter(rate.Limit(stateQueriesPerSecond), 100),
|
||||
updateBatchDuration: updateBatchDuration,
|
||||
logger: logger.Named("volumes_watcher"),
|
||||
ctx: ctx,
|
||||
exitFn: exitFn,
|
||||
}
|
||||
}
|
||||
|
||||
// SetEnabled is used to control if the watcher is enabled. The
|
||||
// watcher should only be enabled on the active leader. When being
|
||||
// enabled the state is passed in as it is no longer valid once a
|
||||
// leader election has taken place.
|
||||
func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) {
|
||||
w.wlock.Lock()
|
||||
defer w.wlock.Unlock()
|
||||
|
||||
wasEnabled := w.enabled
|
||||
w.enabled = enabled
|
||||
|
||||
if state != nil {
|
||||
w.state = state
|
||||
}
|
||||
|
||||
// Flush the state to create the necessary objects
|
||||
w.flush()
|
||||
|
||||
// If we are starting now, launch the watch daemon
|
||||
if enabled && !wasEnabled {
|
||||
go w.watchVolumes(w.ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// flush is used to clear the state of the watcher
|
||||
func (w *Watcher) flush() {
|
||||
// Stop all the watchers and clear it
|
||||
for _, watcher := range w.watchers {
|
||||
watcher.Stop()
|
||||
}
|
||||
|
||||
// Kill everything associated with the watcher
|
||||
if w.exitFn != nil {
|
||||
w.exitFn()
|
||||
}
|
||||
|
||||
w.watchers = make(map[string]*volumeWatcher, 32)
|
||||
w.ctx, w.exitFn = context.WithCancel(context.Background())
|
||||
w.volumeUpdateBatcher = NewVolumeUpdateBatcher(w.updateBatchDuration, w.raft, w.ctx)
|
||||
}
|
||||
|
||||
// watchVolumes is the long lived go-routine that watches for volumes to
|
||||
// add and remove watchers on.
|
||||
func (w *Watcher) watchVolumes(ctx context.Context) {
|
||||
vIndex := uint64(1)
|
||||
for {
|
||||
volumes, idx, err := w.getVolumes(ctx, vIndex)
|
||||
if err != nil {
|
||||
if err == context.Canceled {
|
||||
return
|
||||
}
|
||||
w.logger.Error("failed to retrieve volumes", "error", err)
|
||||
}
|
||||
|
||||
vIndex = idx // last-seen index
|
||||
for _, v := range volumes {
|
||||
if err := w.add(v); err != nil {
|
||||
w.logger.Error("failed to track volume", "volume_id", v.ID, "error", err)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getVolumes retrieves all volumes blocking at the given index.
|
||||
func (w *Watcher) getVolumes(ctx context.Context, minIndex uint64) ([]*structs.CSIVolume, uint64, error) {
|
||||
resp, index, err := w.state.BlockingQuery(w.getVolumesImpl, minIndex, ctx)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return resp.([]*structs.CSIVolume), index, nil
|
||||
}
|
||||
|
||||
// getVolumesImpl retrieves all volumes from the passed state store.
|
||||
func (w *Watcher) getVolumesImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
|
||||
|
||||
iter, err := state.CSIVolumes(ws)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
var volumes []*structs.CSIVolume
|
||||
for {
|
||||
raw := iter.Next()
|
||||
if raw == nil {
|
||||
break
|
||||
}
|
||||
volume := raw.(*structs.CSIVolume)
|
||||
volumes = append(volumes, volume)
|
||||
}
|
||||
|
||||
// Use the last index that affected the volume table
|
||||
index, err := state.Index("csi_volumes")
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return volumes, index, nil
|
||||
}
|
||||
|
||||
// add adds a volume to the watch list
|
||||
func (w *Watcher) add(d *structs.CSIVolume) error {
|
||||
w.wlock.Lock()
|
||||
defer w.wlock.Unlock()
|
||||
_, err := w.addLocked(d)
|
||||
return err
|
||||
}
|
||||
|
||||
// addLocked adds a volume to the watch list and should only be called when
|
||||
// locked. Creating the volumeWatcher starts a go routine to .watch() it
|
||||
func (w *Watcher) addLocked(v *structs.CSIVolume) (*volumeWatcher, error) {
|
||||
// Not enabled so no-op
|
||||
if !w.enabled {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Already watched so trigger an update for the volume
|
||||
if watcher, ok := w.watchers[v.ID+v.Namespace]; ok {
|
||||
watcher.Notify(v)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
watcher := newVolumeWatcher(w, v)
|
||||
w.watchers[v.ID+v.Namespace] = watcher
|
||||
return watcher, nil
|
||||
}
|
||||
|
||||
// TODO: this is currently dead code; we'll call a public remove
|
||||
// method on the Watcher once we have a periodic GC job
|
||||
// remove stops watching a volume and should only be called when locked.
|
||||
func (w *Watcher) removeLocked(volID, namespace string) {
|
||||
if !w.enabled {
|
||||
return
|
||||
}
|
||||
if watcher, ok := w.watchers[volID+namespace]; ok {
|
||||
watcher.Stop()
|
||||
delete(w.watchers, volID+namespace)
|
||||
}
|
||||
}
|
||||
|
||||
// updatesClaims sends the claims to the batch updater and waits for
|
||||
// the results
|
||||
func (w *Watcher) updateClaims(claims []structs.CSIVolumeClaimRequest) (uint64, error) {
|
||||
return w.volumeUpdateBatcher.CreateUpdate(claims).Results()
|
||||
}
|
|
@ -0,0 +1,311 @@
|
|||
package volumewatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
"github.com/hashicorp/nomad/helper/testlog"
|
||||
"github.com/hashicorp/nomad/nomad/mock"
|
||||
"github.com/hashicorp/nomad/nomad/state"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestVolumeWatch_EnableDisable tests the watcher registration logic that needs
|
||||
// to happen during leader step-up/step-down
|
||||
func TestVolumeWatch_EnableDisable(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
srv := &MockRPCServer{}
|
||||
srv.state = state.TestStateStore(t)
|
||||
index := uint64(100)
|
||||
|
||||
watcher := NewVolumesWatcher(testlog.HCLogger(t),
|
||||
srv, srv,
|
||||
LimitStateQueriesPerSecond,
|
||||
CrossVolumeUpdateBatchDuration)
|
||||
|
||||
watcher.SetEnabled(true, srv.State())
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
node := testNode(nil, plugin, srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
vol := testVolume(nil, plugin, alloc, node.ID)
|
||||
|
||||
index++
|
||||
err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
|
||||
require.NoError(err)
|
||||
|
||||
claim := &structs.CSIVolumeClaim{Mode: structs.CSIVolumeClaimRelease}
|
||||
index++
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
|
||||
require.NoError(err)
|
||||
require.Eventually(func() bool {
|
||||
return 1 == len(watcher.watchers)
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
watcher.SetEnabled(false, srv.State())
|
||||
require.Equal(0, len(watcher.watchers))
|
||||
}
|
||||
|
||||
// TestVolumeWatch_Checkpoint tests the checkpointing of progress across
|
||||
// leader leader step-up/step-down
|
||||
func TestVolumeWatch_Checkpoint(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
srv := &MockRPCServer{}
|
||||
srv.state = state.TestStateStore(t)
|
||||
index := uint64(100)
|
||||
|
||||
watcher := NewVolumesWatcher(testlog.HCLogger(t),
|
||||
srv, srv,
|
||||
LimitStateQueriesPerSecond,
|
||||
CrossVolumeUpdateBatchDuration)
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
node := testNode(nil, plugin, srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
vol := testVolume(nil, plugin, alloc, node.ID)
|
||||
|
||||
watcher.SetEnabled(true, srv.State())
|
||||
|
||||
index++
|
||||
err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
|
||||
require.NoError(err)
|
||||
|
||||
// we should get or start up a watcher when we get an update for
|
||||
// the volume from the state store
|
||||
require.Eventually(func() bool {
|
||||
return 1 == len(watcher.watchers)
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
// step-down (this is sync, but step-up is async)
|
||||
watcher.SetEnabled(false, srv.State())
|
||||
require.Equal(0, len(watcher.watchers))
|
||||
|
||||
// step-up again
|
||||
watcher.SetEnabled(true, srv.State())
|
||||
require.Eventually(func() bool {
|
||||
return 1 == len(watcher.watchers)
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
|
||||
}
|
||||
|
||||
// TestVolumeWatch_StartStop tests the start and stop of the watcher when
|
||||
// it receives notifcations and has completed its work
|
||||
func TestVolumeWatch_StartStop(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
ctx, exitFn := context.WithCancel(context.Background())
|
||||
defer exitFn()
|
||||
|
||||
srv := &MockStatefulRPCServer{}
|
||||
srv.state = state.TestStateStore(t)
|
||||
index := uint64(100)
|
||||
srv.volumeUpdateBatcher = NewVolumeUpdateBatcher(
|
||||
CrossVolumeUpdateBatchDuration, srv, ctx)
|
||||
|
||||
watcher := NewVolumesWatcher(testlog.HCLogger(t),
|
||||
srv, srv,
|
||||
LimitStateQueriesPerSecond,
|
||||
CrossVolumeUpdateBatchDuration)
|
||||
|
||||
watcher.SetEnabled(true, srv.State())
|
||||
require.Equal(0, len(watcher.watchers))
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
node := testNode(nil, plugin, srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.ClientStatus = structs.AllocClientStatusRunning
|
||||
alloc2 := mock.Alloc()
|
||||
alloc2.Job = alloc.Job
|
||||
alloc2.ClientStatus = structs.AllocClientStatusRunning
|
||||
index++
|
||||
err := srv.State().UpsertJob(index, alloc.Job)
|
||||
require.NoError(err)
|
||||
index++
|
||||
err = srv.State().UpsertAllocs(index, []*structs.Allocation{alloc, alloc2})
|
||||
require.NoError(err)
|
||||
|
||||
// register a volume
|
||||
vol := testVolume(nil, plugin, alloc, node.ID)
|
||||
index++
|
||||
err = srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
|
||||
require.NoError(err)
|
||||
|
||||
// assert we get a running watcher
|
||||
require.Eventually(func() bool {
|
||||
return 1 == len(watcher.watchers)
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
|
||||
|
||||
// claim the volume for both allocs
|
||||
claim := &structs.CSIVolumeClaim{
|
||||
AllocationID: alloc.ID,
|
||||
NodeID: node.ID,
|
||||
Mode: structs.CSIVolumeClaimRead,
|
||||
}
|
||||
index++
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
|
||||
require.NoError(err)
|
||||
claim.AllocationID = alloc2.ID
|
||||
index++
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
|
||||
require.NoError(err)
|
||||
|
||||
// reap the volume and assert nothing has happened
|
||||
claim = &structs.CSIVolumeClaim{
|
||||
AllocationID: alloc.ID,
|
||||
NodeID: node.ID,
|
||||
Mode: structs.CSIVolumeClaimRelease,
|
||||
}
|
||||
index++
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
|
||||
require.NoError(err)
|
||||
require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
|
||||
|
||||
// alloc becomes terminal
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
index++
|
||||
err = srv.State().UpsertAllocs(index, []*structs.Allocation{alloc})
|
||||
require.NoError(err)
|
||||
index++
|
||||
claim.State = structs.CSIVolumeClaimStateReadyToFree
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
|
||||
require.NoError(err)
|
||||
|
||||
// 1 claim has been released but watcher is still running
|
||||
require.Eventually(func() bool {
|
||||
ws := memdb.NewWatchSet()
|
||||
vol, _ := srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
|
||||
return len(vol.ReadAllocs) == 1 && len(vol.PastClaims) == 0
|
||||
}, time.Second*2, 10*time.Millisecond)
|
||||
|
||||
require.True(watcher.watchers[vol.ID+vol.Namespace].isRunning())
|
||||
|
||||
// the watcher will have incremented the index so we need to make sure
|
||||
// our inserts will trigger new events
|
||||
index, _ = srv.State().LatestIndex()
|
||||
|
||||
// remaining alloc's job is stopped (alloc is not marked terminal)
|
||||
alloc2.Job.Stop = true
|
||||
index++
|
||||
err = srv.State().UpsertJob(index, alloc2.Job)
|
||||
require.NoError(err)
|
||||
|
||||
// job deregistration write a claim with no allocations or nodes
|
||||
claim = &structs.CSIVolumeClaim{
|
||||
Mode: structs.CSIVolumeClaimRelease,
|
||||
}
|
||||
index++
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
|
||||
require.NoError(err)
|
||||
|
||||
// all claims have been released and watcher is stopped
|
||||
require.Eventually(func() bool {
|
||||
ws := memdb.NewWatchSet()
|
||||
vol, _ := srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
|
||||
return len(vol.ReadAllocs) == 1 && len(vol.PastClaims) == 0
|
||||
}, time.Second*2, 10*time.Millisecond)
|
||||
|
||||
require.Eventually(func() bool {
|
||||
return !watcher.watchers[vol.ID+vol.Namespace].isRunning()
|
||||
}, time.Second*1, 10*time.Millisecond)
|
||||
|
||||
// the watcher will have incremented the index so we need to make sure
|
||||
// our inserts will trigger new events
|
||||
index, _ = srv.State().LatestIndex()
|
||||
|
||||
// create a new claim
|
||||
alloc3 := mock.Alloc()
|
||||
alloc3.ClientStatus = structs.AllocClientStatusRunning
|
||||
index++
|
||||
err = srv.State().UpsertAllocs(index, []*structs.Allocation{alloc3})
|
||||
require.NoError(err)
|
||||
claim3 := &structs.CSIVolumeClaim{
|
||||
AllocationID: alloc3.ID,
|
||||
NodeID: node.ID,
|
||||
Mode: structs.CSIVolumeClaimRelease,
|
||||
}
|
||||
index++
|
||||
err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim3)
|
||||
require.NoError(err)
|
||||
|
||||
// a stopped watcher should restore itself on notification
|
||||
require.Eventually(func() bool {
|
||||
return watcher.watchers[vol.ID+vol.Namespace].isRunning()
|
||||
}, time.Second*1, 10*time.Millisecond)
|
||||
}
|
||||
|
||||
// TestVolumeWatch_RegisterDeregister tests the start and stop of
|
||||
// watchers around registration
|
||||
func TestVolumeWatch_RegisterDeregister(t *testing.T) {
|
||||
t.Parallel()
|
||||
require := require.New(t)
|
||||
|
||||
ctx, exitFn := context.WithCancel(context.Background())
|
||||
defer exitFn()
|
||||
|
||||
srv := &MockStatefulRPCServer{}
|
||||
srv.state = state.TestStateStore(t)
|
||||
srv.volumeUpdateBatcher = NewVolumeUpdateBatcher(
|
||||
CrossVolumeUpdateBatchDuration, srv, ctx)
|
||||
|
||||
index := uint64(100)
|
||||
|
||||
watcher := NewVolumesWatcher(testlog.HCLogger(t),
|
||||
srv, srv,
|
||||
LimitStateQueriesPerSecond,
|
||||
CrossVolumeUpdateBatchDuration)
|
||||
|
||||
watcher.SetEnabled(true, srv.State())
|
||||
require.Equal(0, len(watcher.watchers))
|
||||
|
||||
plugin := mock.CSIPlugin()
|
||||
node := testNode(nil, plugin, srv.State())
|
||||
alloc := mock.Alloc()
|
||||
alloc.ClientStatus = structs.AllocClientStatusComplete
|
||||
|
||||
// register a volume
|
||||
vol := testVolume(nil, plugin, alloc, node.ID)
|
||||
index++
|
||||
err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol})
|
||||
require.NoError(err)
|
||||
|
||||
require.Eventually(func() bool {
|
||||
return 1 == len(watcher.watchers)
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
// reap the volume and assert we've cleaned up
|
||||
w := watcher.watchers[vol.ID+vol.Namespace]
|
||||
w.Notify(vol)
|
||||
|
||||
require.Eventually(func() bool {
|
||||
ws := memdb.NewWatchSet()
|
||||
vol, _ := srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
|
||||
return len(vol.ReadAllocs) == 0 && len(vol.PastClaims) == 0
|
||||
}, time.Second*2, 10*time.Millisecond)
|
||||
|
||||
require.Eventually(func() bool {
|
||||
return !watcher.watchers[vol.ID+vol.Namespace].isRunning()
|
||||
}, time.Second*1, 10*time.Millisecond)
|
||||
|
||||
require.Equal(1, srv.countCSINodeDetachVolume, "node detach RPC count")
|
||||
require.Equal(1, srv.countCSIControllerDetachVolume, "controller detach RPC count")
|
||||
require.Equal(2, srv.countUpsertVolumeClaims, "upsert claims count")
|
||||
|
||||
// deregistering the volume doesn't cause an update that triggers
|
||||
// a watcher; we'll clean up this watcher in a GC later
|
||||
err = srv.State().CSIVolumeDeregister(index, vol.Namespace, []string{vol.ID})
|
||||
require.NoError(err)
|
||||
require.Equal(1, len(watcher.watchers))
|
||||
require.False(watcher.watchers[vol.ID+vol.Namespace].isRunning())
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package nomad
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
)
|
||||
|
||||
// volumeWatcherRaftShim is the shim that provides the state watching
|
||||
// methods. These should be set by the server and passed to the volume
|
||||
// watcher.
|
||||
type volumeWatcherRaftShim struct {
|
||||
// apply is used to apply a message to Raft
|
||||
apply raftApplyFn
|
||||
}
|
||||
|
||||
// convertApplyErrors parses the results of a raftApply and returns the index at
|
||||
// which it was applied and any error that occurred. Raft Apply returns two
|
||||
// separate errors, Raft library errors and user returned errors from the FSM.
|
||||
// This helper, joins the errors by inspecting the applyResponse for an error.
|
||||
func (shim *volumeWatcherRaftShim) convertApplyErrors(applyResp interface{}, index uint64, err error) (uint64, error) {
|
||||
if applyResp != nil {
|
||||
if fsmErr, ok := applyResp.(error); ok && fsmErr != nil {
|
||||
return index, fsmErr
|
||||
}
|
||||
}
|
||||
return index, err
|
||||
}
|
||||
|
||||
func (shim *volumeWatcherRaftShim) UpsertVolumeClaims(req *structs.CSIVolumeClaimBatchRequest) (uint64, error) {
|
||||
fsmErrIntf, index, raftErr := shim.apply(structs.CSIVolumeClaimBatchRequestType, req)
|
||||
return shim.convertApplyErrors(fsmErrIntf, index, raftErr)
|
||||
}
|
|
@ -82,6 +82,7 @@ type client struct {
|
|||
identityClient csipbv1.IdentityClient
|
||||
controllerClient CSIControllerClient
|
||||
nodeClient CSINodeClient
|
||||
logger hclog.Logger
|
||||
}
|
||||
|
||||
func (c *client) Close() error {
|
||||
|
@ -106,6 +107,7 @@ func NewClient(addr string, logger hclog.Logger) (CSIPlugin, error) {
|
|||
identityClient: csipbv1.NewIdentityClient(conn),
|
||||
controllerClient: csipbv1.NewControllerClient(conn),
|
||||
nodeClient: csipbv1.NewNodeClient(conn),
|
||||
logger: logger,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -318,17 +320,50 @@ func (c *client) ControllerValidateCapabilities(ctx context.Context, volumeID st
|
|||
return err
|
||||
}
|
||||
|
||||
if resp.Confirmed == nil {
|
||||
if resp.Message != "" {
|
||||
return fmt.Errorf("Volume validation failed, message: %s", resp.Message)
|
||||
}
|
||||
if resp.Message != "" {
|
||||
// this should only ever be set if Confirmed isn't set, but
|
||||
// it's not a validation failure.
|
||||
c.logger.Debug(resp.Message)
|
||||
}
|
||||
|
||||
return fmt.Errorf("Volume validation failed")
|
||||
// The protobuf accessors below safely handle nil pointers.
|
||||
// The CSI spec says we can only assert the plugin has
|
||||
// confirmed the volume capabilities, not that it hasn't
|
||||
// confirmed them, so if the field is nil we have to assume
|
||||
// the volume is ok.
|
||||
confirmedCaps := resp.GetConfirmed().GetVolumeCapabilities()
|
||||
if confirmedCaps != nil {
|
||||
for _, requestedCap := range req.VolumeCapabilities {
|
||||
if !compareCapabilities(requestedCap, confirmedCaps) {
|
||||
return fmt.Errorf("volume capability validation failed: missing %v", req)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// compareCapabilities returns true if the 'got' capabilities contains
|
||||
// the 'expected' capability
|
||||
func compareCapabilities(expected *csipbv1.VolumeCapability, got []*csipbv1.VolumeCapability) bool {
|
||||
for _, cap := range got {
|
||||
if expected.GetAccessMode().GetMode() != cap.GetAccessMode().GetMode() {
|
||||
continue
|
||||
}
|
||||
// AccessType Block is an empty struct even if set, so the
|
||||
// only way to test for it is to check that the AccessType
|
||||
// isn't Mount.
|
||||
if expected.GetMount() == nil && cap.GetMount() != nil {
|
||||
continue
|
||||
}
|
||||
if expected.GetMount() != cap.GetMount() {
|
||||
continue
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
//
|
||||
// Node Endpoints
|
||||
//
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
|
||||
csipbv1 "github.com/container-storage-interface/spec/lib/go/csi"
|
||||
"github.com/golang/protobuf/ptypes/wrappers"
|
||||
"github.com/hashicorp/nomad/nomad/structs"
|
||||
fake "github.com/hashicorp/nomad/plugins/csi/testing"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
@ -473,6 +474,95 @@ func TestClient_RPC_ControllerUnpublishVolume(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestClient_RPC_ControllerValidateVolume(t *testing.T) {
|
||||
|
||||
cases := []struct {
|
||||
Name string
|
||||
ResponseErr error
|
||||
Response *csipbv1.ValidateVolumeCapabilitiesResponse
|
||||
ExpectedErr error
|
||||
}{
|
||||
{
|
||||
Name: "handles underlying grpc errors",
|
||||
ResponseErr: fmt.Errorf("some grpc error"),
|
||||
ExpectedErr: fmt.Errorf("some grpc error"),
|
||||
},
|
||||
{
|
||||
Name: "handles empty success",
|
||||
Response: &csipbv1.ValidateVolumeCapabilitiesResponse{},
|
||||
ResponseErr: nil,
|
||||
ExpectedErr: nil,
|
||||
},
|
||||
{
|
||||
Name: "handles validate success",
|
||||
Response: &csipbv1.ValidateVolumeCapabilitiesResponse{
|
||||
Confirmed: &csipbv1.ValidateVolumeCapabilitiesResponse_Confirmed{
|
||||
VolumeContext: map[string]string{},
|
||||
VolumeCapabilities: []*csipbv1.VolumeCapability{
|
||||
{
|
||||
AccessType: &csipbv1.VolumeCapability_Block{
|
||||
Block: &csipbv1.VolumeCapability_BlockVolume{},
|
||||
},
|
||||
AccessMode: &csipbv1.VolumeCapability_AccessMode{
|
||||
Mode: csipbv1.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ResponseErr: nil,
|
||||
ExpectedErr: nil,
|
||||
},
|
||||
{
|
||||
Name: "handles validation failure",
|
||||
Response: &csipbv1.ValidateVolumeCapabilitiesResponse{
|
||||
Confirmed: &csipbv1.ValidateVolumeCapabilitiesResponse_Confirmed{
|
||||
VolumeContext: map[string]string{},
|
||||
VolumeCapabilities: []*csipbv1.VolumeCapability{
|
||||
{
|
||||
AccessType: &csipbv1.VolumeCapability_Block{
|
||||
Block: &csipbv1.VolumeCapability_BlockVolume{},
|
||||
},
|
||||
AccessMode: &csipbv1.VolumeCapability_AccessMode{
|
||||
Mode: csipbv1.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ResponseErr: nil,
|
||||
ExpectedErr: fmt.Errorf("volume capability validation failed"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.Name, func(t *testing.T) {
|
||||
_, cc, _, client := newTestClient()
|
||||
defer client.Close()
|
||||
|
||||
requestedCaps := &VolumeCapability{
|
||||
AccessType: VolumeAccessTypeBlock,
|
||||
AccessMode: VolumeAccessModeMultiNodeMultiWriter,
|
||||
MountVolume: &structs.CSIMountOptions{ // should be ignored
|
||||
FSType: "ext4",
|
||||
MountFlags: []string{"noatime", "errors=remount-ro"},
|
||||
},
|
||||
}
|
||||
cc.NextValidateVolumeCapabilitiesResponse = c.Response
|
||||
cc.NextErr = c.ResponseErr
|
||||
|
||||
err := client.ControllerValidateCapabilities(
|
||||
context.TODO(), "volumeID", requestedCaps)
|
||||
if c.ExpectedErr != nil {
|
||||
require.Error(t, c.ExpectedErr, err, c.Name)
|
||||
} else {
|
||||
require.NoError(t, err, c.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestClient_RPC_NodeStageVolume(t *testing.T) {
|
||||
cases := []struct {
|
||||
Name string
|
||||
|
|
|
@ -44,10 +44,11 @@ func (f *IdentityClient) Probe(ctx context.Context, in *csipbv1.ProbeRequest, op
|
|||
|
||||
// ControllerClient is a CSI controller client used for testing
|
||||
type ControllerClient struct {
|
||||
NextErr error
|
||||
NextCapabilitiesResponse *csipbv1.ControllerGetCapabilitiesResponse
|
||||
NextPublishVolumeResponse *csipbv1.ControllerPublishVolumeResponse
|
||||
NextUnpublishVolumeResponse *csipbv1.ControllerUnpublishVolumeResponse
|
||||
NextErr error
|
||||
NextCapabilitiesResponse *csipbv1.ControllerGetCapabilitiesResponse
|
||||
NextPublishVolumeResponse *csipbv1.ControllerPublishVolumeResponse
|
||||
NextUnpublishVolumeResponse *csipbv1.ControllerUnpublishVolumeResponse
|
||||
NextValidateVolumeCapabilitiesResponse *csipbv1.ValidateVolumeCapabilitiesResponse
|
||||
}
|
||||
|
||||
// NewControllerClient returns a new ControllerClient
|
||||
|
@ -60,6 +61,7 @@ func (f *ControllerClient) Reset() {
|
|||
f.NextCapabilitiesResponse = nil
|
||||
f.NextPublishVolumeResponse = nil
|
||||
f.NextUnpublishVolumeResponse = nil
|
||||
f.NextValidateVolumeCapabilitiesResponse = nil
|
||||
}
|
||||
|
||||
func (c *ControllerClient) ControllerGetCapabilities(ctx context.Context, in *csipbv1.ControllerGetCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.ControllerGetCapabilitiesResponse, error) {
|
||||
|
@ -75,7 +77,7 @@ func (c *ControllerClient) ControllerUnpublishVolume(ctx context.Context, in *cs
|
|||
}
|
||||
|
||||
func (c *ControllerClient) ValidateVolumeCapabilities(ctx context.Context, in *csipbv1.ValidateVolumeCapabilitiesRequest, opts ...grpc.CallOption) (*csipbv1.ValidateVolumeCapabilitiesResponse, error) {
|
||||
panic("not implemented") // TODO: Implement
|
||||
return c.NextValidateVolumeCapabilitiesResponse, c.NextErr
|
||||
}
|
||||
|
||||
// NodeClient is a CSI Node client used for testing
|
||||
|
|
|
@ -426,7 +426,7 @@ var xxx_messageInfo_FingerprintRequest proto.InternalMessageInfo
|
|||
|
||||
type FingerprintResponse struct {
|
||||
// Attributes are key/value pairs that annotate the nomad client and can be
|
||||
// used in scheduling contraints and affinities.
|
||||
// used in scheduling constraints and affinities.
|
||||
Attributes map[string]*proto1.Attribute `protobuf:"bytes,1,rep,name=attributes,proto3" json:"attributes,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
|
||||
// Health is used to determine the state of the health the driver is in.
|
||||
// Health can be one of the following states:
|
||||
|
|
|
@ -109,7 +109,7 @@ message FingerprintResponse {
|
|||
|
||||
|
||||
// Attributes are key/value pairs that annotate the nomad client and can be
|
||||
// used in scheduling contraints and affinities.
|
||||
// used in scheduling constraints and affinities.
|
||||
map<string, hashicorp.nomad.plugins.shared.structs.Attribute> attributes = 1;
|
||||
|
||||
enum HealthState {
|
||||
|
|
|
@ -78,7 +78,7 @@ func (h *DriverHarness) Kill() {
|
|||
// MkAllocDir creates a temporary directory and allocdir structure.
|
||||
// If enableLogs is set to true a logmon instance will be started to write logs
|
||||
// to the LogDir of the task
|
||||
// A cleanup func is returned and should be defered so as to not leak dirs
|
||||
// A cleanup func is returned and should be deferred so as to not leak dirs
|
||||
// between tests.
|
||||
func (h *DriverHarness) MkAllocDir(t *drivers.TaskConfig, enableLogs bool) func() {
|
||||
dir, err := ioutil.TempDir("", "nomad_driver_harness-")
|
||||
|
|
|
@ -2072,6 +2072,15 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
|
|||
require.NoError(t, h.State.UpsertJob(h.NextIndex(), job))
|
||||
require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d))
|
||||
|
||||
taskName := job.TaskGroups[0].Tasks[0].Name
|
||||
|
||||
adr := structs.AllocatedDeviceResource{
|
||||
Type: "gpu",
|
||||
Vendor: "nvidia",
|
||||
Name: "1080ti",
|
||||
DeviceIDs: []string{uuid.Generate()},
|
||||
}
|
||||
|
||||
// Create allocs that are part of the old deployment
|
||||
var allocs []*structs.Allocation
|
||||
for i := 0; i < 10; i++ {
|
||||
|
@ -2082,6 +2091,7 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
|
|||
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
|
||||
alloc.DeploymentID = d.ID
|
||||
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
|
||||
alloc.AllocatedResources.Tasks[taskName].Devices = []*structs.AllocatedDeviceResource{&adr}
|
||||
allocs = append(allocs, alloc)
|
||||
}
|
||||
require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
|
||||
|
@ -2155,13 +2165,16 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
|
|||
}
|
||||
h.AssertEvalStatus(t, structs.EvalStatusComplete)
|
||||
|
||||
// Verify the network did not change
|
||||
// Verify the allocated networks and devices did not change
|
||||
rp := structs.Port{Label: "admin", Value: 5000}
|
||||
for _, alloc := range out {
|
||||
for _, resources := range alloc.TaskResources {
|
||||
for _, resources := range alloc.AllocatedResources.Tasks {
|
||||
if resources.Networks[0].ReservedPorts[0] != rp {
|
||||
t.Fatalf("bad: %#v", alloc)
|
||||
}
|
||||
if len(resources.Devices) == 0 || reflect.DeepEqual(resources.Devices[0], adr) {
|
||||
t.Fatalf("bad devices has changed: %#v", alloc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -614,22 +614,25 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
|
|||
continue
|
||||
}
|
||||
|
||||
// Restore the network offers from the existing allocation.
|
||||
// Restore the network and device offers from the existing allocation.
|
||||
// We do not allow network resources (reserved/dynamic ports)
|
||||
// to be updated. This is guarded in taskUpdated, so we can
|
||||
// safely restore those here.
|
||||
for task, resources := range option.TaskResources {
|
||||
var networks structs.Networks
|
||||
var devices []*structs.AllocatedDeviceResource
|
||||
if update.Alloc.AllocatedResources != nil {
|
||||
if tr, ok := update.Alloc.AllocatedResources.Tasks[task]; ok {
|
||||
networks = tr.Networks
|
||||
devices = tr.Devices
|
||||
}
|
||||
} else if tr, ok := update.Alloc.TaskResources[task]; ok {
|
||||
networks = tr.Networks
|
||||
}
|
||||
|
||||
// Add thhe networks back
|
||||
// Add the networks and devices back
|
||||
resources.Networks = networks
|
||||
resources.Devices = devices
|
||||
}
|
||||
|
||||
// Create a shallow copy
|
||||
|
@ -892,15 +895,17 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
|
|||
return false, true, nil
|
||||
}
|
||||
|
||||
// Restore the network offers from the existing allocation.
|
||||
// Restore the network and device offers from the existing allocation.
|
||||
// We do not allow network resources (reserved/dynamic ports)
|
||||
// to be updated. This is guarded in taskUpdated, so we can
|
||||
// safely restore those here.
|
||||
for task, resources := range option.TaskResources {
|
||||
var networks structs.Networks
|
||||
var devices []*structs.AllocatedDeviceResource
|
||||
if existing.AllocatedResources != nil {
|
||||
if tr, ok := existing.AllocatedResources.Tasks[task]; ok {
|
||||
networks = tr.Networks
|
||||
devices = tr.Devices
|
||||
}
|
||||
} else if tr, ok := existing.TaskResources[task]; ok {
|
||||
networks = tr.Networks
|
||||
|
@ -908,6 +913,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
|
|||
|
||||
// Add the networks back
|
||||
resources.Networks = networks
|
||||
resources.Devices = devices
|
||||
}
|
||||
|
||||
// Create a shallow copy
|
||||
|
|
|
@ -87,7 +87,7 @@ compile
|
|||
|
||||
EOF
|
||||
|
||||
echo '=======>>>> Retreiving mac compiled binaries'
|
||||
echo '=======>>>> Retrieving mac compiled binaries'
|
||||
rsync -avz --ignore-existing ${remote_macos_host}:"${REPO_REMOTE_PATH}/pkg/" "${REPO}/pkg"
|
||||
|
||||
ssh ${remote_macos_host} rm -rf "${TMP_WORKSPACE}"
|
||||
|
|
|
@ -5,5 +5,6 @@
|
|||
|
||||
Setting `disableAnalytics` to true will prevent any data from being sent.
|
||||
*/
|
||||
"disableAnalytics": false
|
||||
"disableAnalytics": false,
|
||||
"proxy": "http://127.0.0.1:4646"
|
||||
}
|
||||
|
|
|
@ -15,22 +15,11 @@ export default Component.extend({
|
|||
},
|
||||
|
||||
generateUrl() {
|
||||
let urlSegments = {
|
||||
job: this.job.get('name'),
|
||||
};
|
||||
|
||||
if (this.taskGroup) {
|
||||
urlSegments.taskGroup = this.taskGroup.get('name');
|
||||
}
|
||||
|
||||
if (this.task) {
|
||||
urlSegments.task = this.task.get('name');
|
||||
}
|
||||
|
||||
if (this.allocation) {
|
||||
urlSegments.allocation = this.allocation.get('shortId');
|
||||
}
|
||||
|
||||
return generateExecUrl(this.router, urlSegments);
|
||||
return generateExecUrl(this.router, {
|
||||
job: this.job,
|
||||
taskGroup: this.taskGroup,
|
||||
task: this.task,
|
||||
allocation: this.task
|
||||
});
|
||||
},
|
||||
});
|
||||
|
|
|
@ -70,9 +70,9 @@ export default Component.extend({
|
|||
|
||||
openInNewWindow(job, taskGroup, task) {
|
||||
let url = generateExecUrl(this.router, {
|
||||
job: job.name,
|
||||
taskGroup: taskGroup.name,
|
||||
task: task.name,
|
||||
job,
|
||||
taskGroup,
|
||||
task,
|
||||
});
|
||||
|
||||
openExecUrl(url);
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
import Component from '@ember/component';
|
||||
import { computed } from '@ember/object';
|
||||
|
||||
export default Component.extend({
|
||||
tagName: '',
|
||||
|
||||
activeClass: computed('taskState.state', function() {
|
||||
if (this.taskState && this.taskState.state === 'running') {
|
||||
return 'is-active';
|
||||
}
|
||||
}),
|
||||
|
||||
finishedClass: computed('taskState.finishedAt', function() {
|
||||
if (this.taskState && this.taskState.finishedAt) {
|
||||
return 'is-finished';
|
||||
}
|
||||
}),
|
||||
});
|
|
@ -0,0 +1,61 @@
|
|||
import Component from '@ember/component';
|
||||
import { computed } from '@ember/object';
|
||||
import { sort } from '@ember/object/computed';
|
||||
|
||||
export default Component.extend({
|
||||
tagName: '',
|
||||
|
||||
tasks: null,
|
||||
taskStates: null,
|
||||
|
||||
lifecyclePhases: computed('tasks.@each.lifecycle', 'taskStates.@each.state', function() {
|
||||
const tasksOrStates = this.taskStates || this.tasks;
|
||||
const lifecycles = {
|
||||
prestarts: [],
|
||||
sidecars: [],
|
||||
mains: [],
|
||||
};
|
||||
|
||||
tasksOrStates.forEach(taskOrState => {
|
||||
const task = taskOrState.task || taskOrState;
|
||||
lifecycles[`${task.lifecycleName}s`].push(taskOrState);
|
||||
});
|
||||
|
||||
const phases = [];
|
||||
|
||||
if (lifecycles.prestarts.length || lifecycles.sidecars.length) {
|
||||
phases.push({
|
||||
name: 'Prestart',
|
||||
isActive: lifecycles.prestarts.some(state => state.state === 'running'),
|
||||
});
|
||||
}
|
||||
|
||||
if (lifecycles.sidecars.length || lifecycles.mains.length) {
|
||||
phases.push({
|
||||
name: 'Main',
|
||||
isActive: lifecycles.mains.some(state => state.state === 'running'),
|
||||
});
|
||||
}
|
||||
|
||||
return phases;
|
||||
}),
|
||||
|
||||
sortedLifecycleTaskStates: sort('taskStates', function(a, b) {
|
||||
return getTaskSortPrefix(a.task).localeCompare(getTaskSortPrefix(b.task));
|
||||
}),
|
||||
|
||||
sortedLifecycleTasks: sort('tasks', function(a, b) {
|
||||
return getTaskSortPrefix(a).localeCompare(getTaskSortPrefix(b));
|
||||
}),
|
||||
});
|
||||
|
||||
const lifecycleNameSortPrefix = {
|
||||
prestart: 0,
|
||||
sidecar: 1,
|
||||
main: 2,
|
||||
};
|
||||
|
||||
function getTaskSortPrefix(task) {
|
||||
// Prestarts first, then sidecars, then mains
|
||||
return `${lifecycleNameSortPrefix[task.lifecycleName]}-${task.name}`;
|
||||
}
|
|
@ -5,6 +5,12 @@ import RSVP from 'rsvp';
|
|||
import { logger } from 'nomad-ui/utils/classes/log';
|
||||
import timeout from 'nomad-ui/utils/timeout';
|
||||
|
||||
class MockAbortController {
|
||||
abort() {
|
||||
/* noop */
|
||||
}
|
||||
}
|
||||
|
||||
export default Component.extend({
|
||||
token: service(),
|
||||
|
||||
|
@ -45,12 +51,25 @@ export default Component.extend({
|
|||
logger: logger('logUrl', 'logParams', function logFetch() {
|
||||
// If the log request can't settle in one second, the client
|
||||
// must be unavailable and the server should be used instead
|
||||
|
||||
// AbortControllers don't exist in IE11, so provide a mock if it doesn't exist
|
||||
const aborter = window.AbortController ? new AbortController() : new MockAbortController();
|
||||
const timing = this.useServer ? this.serverTimeout : this.clientTimeout;
|
||||
|
||||
// Capture the state of useServer at logger create time to avoid a race
|
||||
// between the stdout logger and stderr logger running at once.
|
||||
const useServer = this.useServer;
|
||||
return url =>
|
||||
RSVP.race([this.token.authorizedRequest(url), timeout(timing)]).then(
|
||||
response => response,
|
||||
RSVP.race([
|
||||
this.token.authorizedRequest(url, { signal: aborter.signal }),
|
||||
timeout(timing),
|
||||
]).then(
|
||||
response => {
|
||||
return response;
|
||||
},
|
||||
error => {
|
||||
if (this.useServer) {
|
||||
aborter.abort();
|
||||
if (useServer) {
|
||||
this.set('noConnection', true);
|
||||
} else {
|
||||
this.send('failoverToServer');
|
||||
|
@ -62,6 +81,7 @@ export default Component.extend({
|
|||
|
||||
actions: {
|
||||
setMode(mode) {
|
||||
if (this.mode === mode) return;
|
||||
this.logger.stop();
|
||||
this.set('mode', mode);
|
||||
},
|
||||
|
|
|
@ -5,6 +5,15 @@ import { alias } from '@ember/object/computed';
|
|||
import { task } from 'ember-concurrency';
|
||||
|
||||
export default Controller.extend({
|
||||
otherTaskStates: computed('model.task.taskGroup.tasks.@each.name', function() {
|
||||
const taskName = this.model.task.name;
|
||||
return this.model.allocation.states.rejectBy('name', taskName);
|
||||
}),
|
||||
|
||||
prestartTaskStates: computed('otherTaskStates.@each.lifecycle', function() {
|
||||
return this.otherTaskStates.filterBy('task.lifecycle');
|
||||
}),
|
||||
|
||||
network: alias('model.resources.networks.firstObject'),
|
||||
ports: computed('network.reservedPorts.[]', 'network.dynamicPorts.[]', function() {
|
||||
return (this.get('network.reservedPorts') || [])
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
import attr from 'ember-data/attr';
|
||||
import Fragment from 'ember-data-model-fragments/fragment';
|
||||
import { fragmentOwner } from 'ember-data-model-fragments/attributes';
|
||||
|
||||
export default Fragment.extend({
|
||||
task: fragmentOwner(),
|
||||
|
||||
hook: attr('string'),
|
||||
sidecar: attr('boolean'),
|
||||
});
|
|
@ -1,6 +1,7 @@
|
|||
import attr from 'ember-data/attr';
|
||||
import Fragment from 'ember-data-model-fragments/fragment';
|
||||
import { fragmentArray, fragmentOwner } from 'ember-data-model-fragments/attributes';
|
||||
import { fragment, fragmentArray, fragmentOwner } from 'ember-data-model-fragments/attributes';
|
||||
import { computed } from '@ember/object';
|
||||
|
||||
export default Fragment.extend({
|
||||
taskGroup: fragmentOwner(),
|
||||
|
@ -9,6 +10,14 @@ export default Fragment.extend({
|
|||
driver: attr('string'),
|
||||
kind: attr('string'),
|
||||
|
||||
lifecycle: fragment('lifecycle'),
|
||||
|
||||
lifecycleName: computed('lifecycle', 'lifecycle.sidecar', function() {
|
||||
if (this.lifecycle && this.lifecycle.sidecar) return 'sidecar';
|
||||
if (this.lifecycle && this.lifecycle.hook === 'prestart') return 'prestart';
|
||||
return 'main';
|
||||
}),
|
||||
|
||||
reservedMemory: attr('number'),
|
||||
reservedCPU: attr('number'),
|
||||
reservedDisk: attr('number'),
|
||||
|
|
|
@ -72,7 +72,8 @@ export default Service.extend({
|
|||
// This authorizedRawRequest is necessary in order to fetch data
|
||||
// with the guarantee of a token but without the automatic region
|
||||
// param since the region cannot be known at this point.
|
||||
authorizedRawRequest(url, options = { credentials: 'include' }) {
|
||||
authorizedRawRequest(url, options = {}) {
|
||||
const credentials = 'include';
|
||||
const headers = {};
|
||||
const token = this.secret;
|
||||
|
||||
|
@ -80,7 +81,7 @@ export default Service.extend({
|
|||
headers['X-Nomad-Token'] = token;
|
||||
}
|
||||
|
||||
return fetch(url, assign(options, { headers }));
|
||||
return fetch(url, assign(options, { headers, credentials }));
|
||||
},
|
||||
|
||||
authorizedRequest(url, options) {
|
||||
|
|
|
@ -8,13 +8,15 @@
|
|||
@import './components/ember-power-select';
|
||||
@import './components/empty-message';
|
||||
@import './components/error-container';
|
||||
@import './components/exec';
|
||||
@import './components/exec-button';
|
||||
@import './components/exec-window';
|
||||
@import './components/fs-explorer';
|
||||
@import './components/gutter';
|
||||
@import './components/gutter-toggle';
|
||||
@import './components/image-file.scss';
|
||||
@import './components/inline-definitions';
|
||||
@import './components/job-diff';
|
||||
@import './components/lifecycle-chart';
|
||||
@import './components/loading-spinner';
|
||||
@import './components/metrics';
|
||||
@import './components/node-status-light';
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
.exec-button {
|
||||
color: $ui-gray-800;
|
||||
border-color: $ui-gray-300;
|
||||
|
||||
span {
|
||||
color: $ui-gray-800;
|
||||
}
|
||||
|
||||
.icon:first-child:not(:last-child) {
|
||||
width: 0.9rem;
|
||||
height: 0.9rem;
|
||||
margin-left: 0;
|
||||
margin-right: 0.5em;
|
||||
fill: currentColor;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
.exec-window {
|
||||
display: flex;
|
||||
position: absolute;
|
||||
left: 0;
|
||||
right: 0;
|
||||
top: 3.5rem; // nav.navbar.is-popup height
|
||||
bottom: 0;
|
||||
|
||||
.terminal-container {
|
||||
flex-grow: 1;
|
||||
background: black;
|
||||
padding: 16px;
|
||||
height: 100%;
|
||||
position: relative;
|
||||
color: white;
|
||||
|
||||
.terminal {
|
||||
height: 100%;
|
||||
|
||||
.xterm .xterm-viewport {
|
||||
overflow-y: auto;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
&.loading {
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
background: black;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.task-group-tree {
|
||||
background-color: $ui-gray-900;
|
||||
color: white;
|
||||
padding: 16px;
|
||||
width: 200px;
|
||||
flex-shrink: 0;
|
||||
overflow-y: auto;
|
||||
|
||||
.title {
|
||||
text-transform: uppercase;
|
||||
color: $grey-lighter;
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
.icon {
|
||||
color: $ui-gray-500;
|
||||
}
|
||||
|
||||
.toggle-button {
|
||||
position: relative;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: white;
|
||||
font-size: inherit;
|
||||
line-height: 1.5;
|
||||
width: 100%;
|
||||
text-align: left;
|
||||
overflow-wrap: break-word;
|
||||
padding: 6px 0 5px 17px;
|
||||
|
||||
.icon {
|
||||
position: absolute;
|
||||
left: 0;
|
||||
padding: 3px 3px 0 0;
|
||||
margin-left: -3px;
|
||||
}
|
||||
|
||||
// Adapted from fs-explorer
|
||||
&.is-loading::after {
|
||||
animation: spinAround 750ms infinite linear;
|
||||
border: 2px solid $grey-light;
|
||||
border-radius: 290486px;
|
||||
border-right-color: transparent;
|
||||
border-top-color: transparent;
|
||||
opacity: 0.3;
|
||||
content: '';
|
||||
display: inline-block;
|
||||
height: 1em;
|
||||
width: 1em;
|
||||
margin-left: 0.5em;
|
||||
}
|
||||
}
|
||||
|
||||
.task-list {
|
||||
.task-item {
|
||||
padding: 0 8px 0 19px;
|
||||
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
|
||||
.border-and-label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.border {
|
||||
position: absolute;
|
||||
border-left: 1px solid $ui-gray-700;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.is-active {
|
||||
position: absolute;
|
||||
top: 7.5px;
|
||||
left: -9.75px;
|
||||
|
||||
stroke: $ui-gray-900;
|
||||
stroke-width: 5px;
|
||||
fill: white;
|
||||
}
|
||||
|
||||
.task-label {
|
||||
padding: 6px 0 5px 13px;
|
||||
overflow-wrap: break-word;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.icon {
|
||||
visibility: hidden;
|
||||
width: 16px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
&:hover .icon.show-on-hover {
|
||||
visibility: visible;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.toggle-button,
|
||||
.task-item {
|
||||
font-weight: 500;
|
||||
|
||||
&:hover {
|
||||
background-color: $ui-gray-800;
|
||||
border-radius: 4px;
|
||||
|
||||
.is-active {
|
||||
stroke: $ui-gray-800;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,169 +0,0 @@
|
|||
.tree-and-terminal {
|
||||
display: flex;
|
||||
position: absolute;
|
||||
left: 0;
|
||||
right: 0;
|
||||
top: 3.5rem; // nav.navbar.is-popup height
|
||||
bottom: 0;
|
||||
|
||||
.terminal-container {
|
||||
flex-grow: 1;
|
||||
background: black;
|
||||
padding: 16px;
|
||||
height: 100%;
|
||||
position: relative;
|
||||
color: white;
|
||||
|
||||
.terminal {
|
||||
height: 100%;
|
||||
|
||||
.xterm .xterm-viewport {
|
||||
overflow-y: auto;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
&.loading {
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
background: black;
|
||||
height: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
.task-group-tree {
|
||||
background-color: $ui-gray-900;
|
||||
color: white;
|
||||
padding: 16px;
|
||||
width: 200px;
|
||||
flex-shrink: 0;
|
||||
overflow-y: auto;
|
||||
|
||||
.title {
|
||||
text-transform: uppercase;
|
||||
color: $grey-lighter;
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
.icon {
|
||||
color: $ui-gray-500;
|
||||
}
|
||||
|
||||
.toggle-button {
|
||||
position: relative;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: white;
|
||||
font-size: inherit;
|
||||
line-height: 1.5;
|
||||
width: 100%;
|
||||
text-align: left;
|
||||
overflow-wrap: break-word;
|
||||
padding: 6px 0 5px 17px;
|
||||
|
||||
.icon {
|
||||
position: absolute;
|
||||
left: 0;
|
||||
padding: 3px 3px 0 0;
|
||||
margin-left: -3px;
|
||||
}
|
||||
|
||||
// Adapted from fs-explorer
|
||||
&.is-loading::after {
|
||||
animation: spinAround 750ms infinite linear;
|
||||
border: 2px solid $grey-light;
|
||||
border-radius: 290486px;
|
||||
border-right-color: transparent;
|
||||
border-top-color: transparent;
|
||||
opacity: 0.3;
|
||||
content: '';
|
||||
display: inline-block;
|
||||
height: 1em;
|
||||
width: 1em;
|
||||
margin-left: 0.5em;
|
||||
}
|
||||
}
|
||||
|
||||
.task-list {
|
||||
.task-item {
|
||||
padding: 0 8px 0 19px;
|
||||
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
|
||||
.border-and-label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.border {
|
||||
position: absolute;
|
||||
border-left: 1px solid $ui-gray-700;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.is-active {
|
||||
position: absolute;
|
||||
top: 7.5px;
|
||||
left: -9.75px;
|
||||
|
||||
stroke: $ui-gray-900;
|
||||
stroke-width: 5px;
|
||||
fill: white;
|
||||
}
|
||||
|
||||
.task-label {
|
||||
padding: 6px 0 5px 13px;
|
||||
overflow-wrap: break-word;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.icon {
|
||||
visibility: hidden;
|
||||
width: 16px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
&:hover .icon.show-on-hover {
|
||||
visibility: visible;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.toggle-button,
|
||||
.task-item {
|
||||
font-weight: 500;
|
||||
|
||||
&:hover {
|
||||
background-color: $ui-gray-800;
|
||||
border-radius: 4px;
|
||||
|
||||
.is-active {
|
||||
stroke: $ui-gray-800;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.exec-button {
|
||||
color: $ui-gray-800;
|
||||
border-color: $ui-gray-300;
|
||||
|
||||
span {
|
||||
color: $ui-gray-800;
|
||||
}
|
||||
|
||||
.icon:first-child:not(:last-child) {
|
||||
width: 0.9rem;
|
||||
height: 0.9rem;
|
||||
margin-left: 0;
|
||||
margin-right: 0.5em;
|
||||
fill: currentColor;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
.lifecycle-chart {
|
||||
padding-top: 2rem;
|
||||
position: relative;
|
||||
|
||||
.lifecycle-phases {
|
||||
position: absolute;
|
||||
top: 1.5em;
|
||||
bottom: 1.5em;
|
||||
right: 1.5em;
|
||||
left: 1.5em;
|
||||
|
||||
.divider {
|
||||
position: absolute;
|
||||
left: 25%;
|
||||
height: 100%;
|
||||
|
||||
stroke: $ui-gray-200;
|
||||
stroke-width: 3px;
|
||||
stroke-dasharray: 1, 7;
|
||||
stroke-dashoffset: 1;
|
||||
stroke-linecap: square;
|
||||
}
|
||||
}
|
||||
|
||||
.lifecycle-phase {
|
||||
position: absolute;
|
||||
bottom: 0;
|
||||
top: 0;
|
||||
|
||||
border-top: 2px solid transparent;
|
||||
|
||||
.name {
|
||||
padding: 0.5rem 0.9rem;
|
||||
font-size: $size-7;
|
||||
font-weight: $weight-semibold;
|
||||
color: $ui-gray-500;
|
||||
}
|
||||
|
||||
&.is-active {
|
||||
background: $white-bis;
|
||||
border-top: 2px solid $vagrant-blue;
|
||||
|
||||
.name {
|
||||
color: $vagrant-blue;
|
||||
}
|
||||
}
|
||||
|
||||
&.prestart {
|
||||
left: 0;
|
||||
right: 75%;
|
||||
}
|
||||
|
||||
&.main {
|
||||
left: 25%;
|
||||
right: 0;
|
||||
}
|
||||
}
|
||||
|
||||
.lifecycle-chart-rows {
|
||||
margin-top: 2.5em;
|
||||
}
|
||||
|
||||
.lifecycle-chart-row {
|
||||
position: relative;
|
||||
|
||||
.task {
|
||||
margin: 0.55em 0.9em;
|
||||
padding: 0.3em 0.55em;
|
||||
border: 1px solid $grey-blue;
|
||||
border-radius: $radius;
|
||||
background: white;
|
||||
|
||||
.name {
|
||||
font-weight: $weight-semibold;
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
}
|
||||
}
|
||||
|
||||
&:hover {
|
||||
.name a {
|
||||
text-decoration: underline;
|
||||
}
|
||||
}
|
||||
|
||||
.lifecycle {
|
||||
font-size: $size-7;
|
||||
color: $ui-gray-400;
|
||||
}
|
||||
}
|
||||
|
||||
&.is-active {
|
||||
.task {
|
||||
border-color: $nomad-green;
|
||||
background: lighten($nomad-green, 50%);
|
||||
|
||||
.lifecycle {
|
||||
color: $ui-gray-500;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
&.is-finished {
|
||||
.task {
|
||||
color: $ui-gray-400;
|
||||
}
|
||||
}
|
||||
|
||||
&.main {
|
||||
margin-left: 25%;
|
||||
}
|
||||
|
||||
&.prestart {
|
||||
margin-right: 75%;
|
||||
}
|
||||
|
||||
&:last-child .task {
|
||||
margin-bottom: 0.9em;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
$ui-gray-200: #dce0e6;
|
||||
$ui-gray-300: #bac1cc;
|
||||
$ui-gray-400: #8e96a3;
|
||||
$ui-gray-500: #6f7682;
|
||||
$ui-gray-700: #525761;
|
||||
$ui-gray-800: #373a42;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue