From f8596a36021b3a1b0258dfc234055330d48ea1ff Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Mon, 27 Apr 2020 22:04:49 -0600 Subject: [PATCH 1/4] env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types. --- client/fingerprint/env_aws.go | 436 +++++++++++++++++++++++++++-- client/fingerprint/env_aws_test.go | 98 ++++++- 2 files changed, 512 insertions(+), 22 deletions(-) diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 1e1d47127..a1c65a9e6 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -21,14 +21,14 @@ import ( const ( // AwsMetadataTimeout is the timeout used when contacting the AWS metadata - // service + // services. AwsMetadataTimeout = 2 * time.Second ) // map of instance type to approximate speed, in Mbits/s // Estimates from http://stackoverflow.com/a/35806587 // This data is meant for a loose approximation -var ec2InstanceSpeedMap = map[*regexp.Regexp]int{ +var ec2NetSpeedTable = map[*regexp.Regexp]int{ regexp.MustCompile("t2.nano"): 30, regexp.MustCompile("t2.micro"): 70, regexp.MustCompile("t2.small"): 125, @@ -46,6 +46,353 @@ var ec2InstanceSpeedMap = map[*regexp.Regexp]int{ regexp.MustCompile(`.*\.32xlarge`): 10000, } +type ec2Specs struct { + mhz float64 + cores int + model string +} + +func (e ec2Specs) ticks() int { + return int(e.mhz) * e.cores +} + +func specs(ghz float64, vCores int, model string) ec2Specs { + return ec2Specs{ + mhz: ghz * 1000, + cores: vCores, + model: model, + } +} + +// Map of instance type to documented CPU speed. +// +// Most values are taken from https://aws.amazon.com/ec2/instance-types/. +// Values for a1 & m6g (Graviton) are taken from https://en.wikichip.org/wiki/annapurna_labs/alpine/al73400 +// Values for inf1 are taken from launching a inf1.xlarge and looking at /proc/cpuinfo +// +// In a few cases, AWS has upgraded the generation of CPU while keeping the same +// instance designation. Since it is possible to launch on the lower performance +// CPU, that one is used as the spec for the instance type. +// +// This table is provided as a best-effort to determine the number of CPU ticks +// available for use by Nomad tasks. If an instance type is missing, the fallback +// behavior is to use values from go-psutil, which is only capable of reading +// "current" CPU MHz. +var ec2ProcSpeedTable = map[string]ec2Specs{ + // -- General Purpose -- + + // a1 + "a1.medium": specs(2.3, 1, "AWS Graviton"), + "a1.large": specs(2.3, 2, "AWS Graviton"), + "a1.xlarge": specs(2.3, 4, "AWS Graviton"), + "a1.2xlarge": specs(2.3, 8, "AWS Graviton"), + "a1.4xlarge": specs(2.3, 16, "AWS Graviton"), + "a1.metal": specs(2.3, 16, "AWS Graviton"), + + // t3 + "t3.nano": specs(2.5, 2, "2.5 GHz Intel Scalable"), + "t3.micro": specs(2.5, 2, "2.5 GHz Intel Scalable"), + "t3.small": specs(2.5, 2, "2.5 GHz Intel Scalable"), + "t3.medium": specs(2.5, 2, "2.5 GHz Intel Scalable"), + "t3.large": specs(2.5, 2, "2.5 GHz Intel Scalable"), + "t3.xlarge": specs(2.5, 4, "2.5 GHz Intel Scalable"), + "t3.2xlarge": specs(2.5, 8, "2.5 GHz Intel Scalable"), + + // t3a + "t3a.nano": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "t3a.micro": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "t3a.small": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "t3a.medium": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "t3a.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "t3a.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"), + "t3a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"), + + // t2 + "t2.nano": specs(3.3, 1, "3.3 GHz Intel Scalable"), + "t2.micro": specs(3.3, 1, "3.3 GHz Intel Scalable"), + "t2.small": specs(3.3, 1, "3.3 GHz Intel Scalable"), + "t2.medium": specs(3.3, 2, "3.3 GHz Intel Scalable"), + "t2.large": specs(3.0, 2, "3.0 GHz Intel Scalable"), + "t2.xlarge": specs(3.0, 4, "3.0 GHz Intel Scalable"), + "t2.2xlarge": specs(3.0, 8, "3.0 GHz Intel Scalable"), + + // m6g + "m6g.medium": specs(2.3, 1, "AWS Graviton2 Neoverse"), + "m6g.large": specs(2.3, 2, "AWS Graviton2 Neoverse"), + "m6g.xlarge": specs(2.3, 4, "AWS Graviton2 Neoverse"), + "m6g.2xlarge": specs(2.3, 8, "AWS Graviton2 Neoverse"), + "m6g.4xlarge": specs(2.3, 16, "AWS Graviton2 Neoverse"), + "m6g.8xlarge": specs(2.3, 32, "AWS Graviton2 Neoverse"), + "m6g.12xlarge": specs(2.3, 48, "AWS Graviton2 Neoverse"), + "m6g.16xlarge": specs(2.3, 64, "AWS Graviton2 Neoverse"), + + // m5, m5d + "m5.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum"), + "m5.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum"), + "m5.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum"), + "m5.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum"), + "m5.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum"), + "m5.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum"), + "m5.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum"), + "m5.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"), + "m5.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"), + "m5d.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum"), + "m5d.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum"), + "m5d.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum"), + "m5d.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum"), + "m5d.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum"), + "m5d.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum"), + "m5d.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum"), + "m5d.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"), + "m5d.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum"), + + // m5a, m5ad + "m5a.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "m5a.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"), + "m5a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"), + "m5a.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"), + "m5a.8xlarge": specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"), + "m5a.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"), + "m5a.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"), + "m5a.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"), + "m5ad.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "m5ad.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"), + "m5ad.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"), + "m5ad.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"), + "m5ad.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"), + "m5ad.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"), + + // m5n, m5dn + "m5n.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"), + "m5n.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"), + "m5n.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"), + "m5n.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"), + "m5n.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"), + "m5n.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"), + "m5n.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"), + "m5n.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"), + "m5dn.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"), + "m5dn.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"), + "m5dn.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"), + "m5dn.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"), + "m5dn.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"), + "m5dn.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"), + "m5dn.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"), + "m5dn.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"), + + // m4 + "m4.large": specs(2.3, 2, "2.3 GHz Intel Xeon® E5-2686 v4"), + "m4.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon® E5-2686 v4"), + "m4.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon® E5-2686 v4"), + "m4.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon® E5-2686 v4"), + "m4.10xlarge": specs(2.3, 40, "2.3 GHz Intel Xeon® E5-2686 v4"), + "m4.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon® E5-2686 v4"), + + // -- Compute Optimized -- + + // c5, c5d + "c5.large": specs(3.4, 2, "3.4 GHz Intel Xeon Platinum 8000"), + "c5.xlarge": specs(3.4, 4, "3.4 GHz Intel Xeon Platinum 8000"), + "c5.2xlarge": specs(3.4, 8, "3.4 GHz Intel Xeon Platinum 8000"), + "c5.4xlarge": specs(3.4, 16, "3.4 GHz Intel Xeon Platinum 8000"), + "c5.9xlarge": specs(3.4, 36, "3.4 GHz Intel Xeon Platinum 8000"), + "c5.12xlarge": specs(3.6, 48, "3.6 GHz Intel Xeon Scalable"), + "c5.18xlarge": specs(3.6, 72, "3.6 GHz Intel Xeon Scalable"), + "c5.24xlarge": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"), + "c5.metal": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"), + "c5d.large": specs(3.4, 2, "3.4 GHz Intel Xeon Platinum 8000"), + "c5d.xlarge": specs(3.4, 4, "3.4 GHz Intel Xeon Platinum 8000"), + "c5d.2xlarge": specs(3.4, 8, "3.4 GHz Intel Xeon Platinum 8000"), + "c5d.4xlarge": specs(3.4, 16, "3.4 GHz Intel Xeon Platinum 8000"), + "c5d.9xlarge": specs(3.4, 36, "3.4 GHz Intel Xeon Platinum 8000"), + "c5d.12xlarge": specs(3.6, 48, "3.6 GHz Intel Xeon Scalable"), + "c5d.18xlarge": specs(3.6, 72, "3.6 GHz Intel Xeon Scalable"), + "c5d.24xlarge": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"), + "c5d.metal": specs(3.6, 96, "3.6 GHz Intel Xeon Scalable"), + + // c5n + "c5n.large": specs(3.0, 2, "3.0 GHz Intel Xeon Platinum"), + "c5n.xlarge": specs(3.0, 4, "3.0 GHz Intel Xeon Platinum"), + "c5n.2xlarge": specs(3.0, 8, "3.0 GHz Intel Xeon Platinum"), + "c5n.4xlarge": specs(3.0, 16, "3.0 GHz Intel Xeon Platinum"), + "c5n.9xlarge": specs(3.0, 36, "3.0 GHz Intel Xeon Platinum"), + "c5n.18xlarge": specs(3.0, 72, "3.0 GHz Intel Xeon Platinum"), + "c5n.metal": specs(3.0, 72, "3.0 GHz Intel Xeon Platinum"), + + // c4 + "c4.large": specs(2.9, 2, "2.9 GHz Intel Xeon E5-2666 v3"), + "c4.xlarge": specs(2.9, 4, "2.9 GHz Intel Xeon E5-2666 v3"), + "c4.2xlarge": specs(2.9, 8, "2.9 GHz Intel Xeon E5-2666 v3"), + "c4.4xlarge": specs(2.9, 16, "2.9 GHz Intel Xeon E5-2666 v3"), + "c4.8xlarge": specs(2.9, 36, "2.9 GHz Intel Xeon E5-2666 v3"), + + // -- Memory Optimized -- + + // r5, r5d + "r5.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"), + "r5.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.large": specs(3.1, 2, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"), + "r5d.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Platinum 8175"), + + // r5a, r5ad + "r5a.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "r5a.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"), + "r5a.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"), + "r5a.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"), + "r5a.8xlarge": specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"), + "r5a.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"), + "r5a.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"), + "r5a.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.large": specs(2.5, 2, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.xlarge": specs(2.5, 4, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.2xlarge": specs(2.5, 8, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.4xlarge": specs(2.5, 16, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.8xlarge": specs(2.5, 32, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.12xlarge": specs(2.5, 48, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.16xlarge": specs(2.5, 64, "2.5 GHz AMD EPYC 7000 series"), + "r5ad.24xlarge": specs(2.5, 96, "2.5 GHz AMD EPYC 7000 series"), + + // r5n + "r5n.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"), + "r5n.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"), + "r5n.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"), + "r5n.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"), + "r5n.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"), + "r5n.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"), + "r5n.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"), + "r5n.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"), + "r5dn.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"), + "r5dn.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"), + "r5dn.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"), + "r5dn.4xlarge": specs(3.1, 16, "3.1 GHz Intel Xeon Scalable"), + "r5dn.8xlarge": specs(3.1, 32, "3.1 GHz Intel Xeon Scalable"), + "r5dn.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"), + "r5dn.16xlarge": specs(3.1, 64, "3.1 GHz Intel Xeon Scalable"), + "r5dn.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"), + + // r4 + "r4.large": specs(2.3, 2, "2.3 GHz Intel Xeon E5-2686 v4"), + "r4.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"), + "r4.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5-2686 v4"), + "r4.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5-2686 v4"), + "r4.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"), + "r4.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"), + + // x1e + "x1e.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E7-8880 v3"), + "x1e.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E7-8880 v3"), + "x1e.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E7-8880 v3"), + "x1e.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E7-8880 v3"), + "x1e.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"), + "x1e.32xlarge": specs(2.3, 128, "2.3 GHz Intel Xeon E7-8880 v3"), + + // x1 + "x1.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"), + "x1.32xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E7-8880 v3"), + + // high-memory + "u-6tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"), + "u-9tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"), + "u-12tb1.metal": specs(2.1, 448, "2.1 GHz Intel Xeon Platinum 8176M"), + "u-18tb1.metal": specs(2.7, 448, "2.7 GHz Intel Xeon Scalable"), + "u-24tb1.metal": specs(2.7, 448, "2.7 GHz Intel Xeon Scalable"), + + // z1d + "z1d.large": specs(4.0, 2, "4.0 GHz Intel Xeon Scalable"), + "z1d.xlarge": specs(4.0, 4, "4.0 GHz Intel Xeon Scalable"), + "z1d.2xlarge": specs(4.0, 8, "4.0 GHz Intel Xeon Scalable"), + "z1d.3xlarge": specs(4.0, 12, "4.0 GHz Intel Xeon Scalable"), + "z1d.6xlarge": specs(4.0, 24, "4.0 GHz Intel Xeon Scalable"), + "z1d.12xlarge": specs(4.0, 48, "4.0 GHz Intel Xeon Scalable"), + "z1d.metal": specs(4.0, 48, "4.0 GHz Intel Xeon Scalable"), + + // -- Accelerated Computing -- + + // p3, p3dn + "p3.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5-2686 v4"), + "p3.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"), + "p3.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"), + "p3dn.24xlarge": specs(2.5, 96, "2.5 GHz Intel Xeon P-8175M"), + + // p2 + "p2.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"), + "p2.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"), + "p2.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"), + + // inf1 + "inf1.xlarge": specs(3.0, 4, "3.0 GHz Intel Xeon Platinum 8275CL"), + "inf1.2xlarge": specs(3.0, 8, "3.0 GHz Intel Xeon Platinum 8275CL"), + "inf1.6xlarge": specs(3.0, 24, "3.0 GHz Intel Xeon Platinum 8275CL"), + "inf1.24xlarge": specs(3.0, 96, "3.0 GHz Intel Xeon Platinum 8275CL"), + + // g4dn + "g4dn.xlarge": specs(2.5, 4, "2.5 GHz Cascade Lake 24C"), + "g4dn.2xlarge": specs(2.5, 8, "2.5 GHz Cascade Lake 24C"), + "g4dn.4xlarge": specs(2.5, 16, "2.5 GHz Cascade Lake 24C"), + "g4dn.8xlarge": specs(2.5, 32, "2.5 GHz Cascade Lake 24C"), + "g4dn.16xlarge": specs(2.5, 64, "2.5 GHz Cascade Lake 24C"), + "g4dn.12xlarge": specs(2.5, 48, "2.5 GHz Cascade Lake 24C"), + "g4dn.metal": specs(2.5, 96, "2.5 GHz Cascade Lake 24C"), + + // g3 + "g3s.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5-2686 v4"), + "g3s.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5-2686 v4"), + "g3s.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5-2686 v4"), + "g3s.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5-2686 v4"), + + // f1 + "f1.2xlarge": specs(2.3, 8, "Intel Xeon E5-2686 v4"), + "f1.4xlarge": specs(2.3, 16, "Intel Xeon E5-2686 v4"), + "f1.16xlarge": specs(2.3, 64, "Intel Xeon E5-2686 v4"), + + // -- Storage Optimized -- + + // i3 + "i3.large": specs(2.3, 2, "2.3 GHz Intel Xeon E5 2686 v4"), + "i3.xlarge": specs(2.3, 4, "2.3 GHz Intel Xeon E5 2686 v4"), + "i3.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5 2686 v4"), + "i3.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5 2686 v4"), + "i3.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5 2686 v4"), + "i3.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5 2686 v4"), + "i3.metal": specs(2.3, 72, "2.3 GHz Intel Xeon E5 2686 v4"), + + // i3en + "i3en.large": specs(3.1, 2, "3.1 GHz Intel Xeon Scalable"), + "i3en.xlarge": specs(3.1, 4, "3.1 GHz Intel Xeon Scalable"), + "i3en.2xlarge": specs(3.1, 8, "3.1 GHz Intel Xeon Scalable"), + "i3en.3xlarge": specs(3.1, 12, "3.1 GHz Intel Xeon Scalable"), + "i3en.6xlarge": specs(3.1, 24, "3.1 GHz Intel Xeon Scalable"), + "i3en.12xlarge": specs(3.1, 48, "3.1 GHz Intel Xeon Scalable"), + "i3en.24xlarge": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"), + "i3en.metal": specs(3.1, 96, "3.1 GHz Intel Xeon Scalable"), + + // d2 + "d2.xlarge": specs(2.4, 4, "2.4 GHz Intel Xeon E5-2676 v3"), + "d2.2xlarge": specs(2.4, 8, "2.4 GHz Intel Xeon E5-2676 v3"), + "d2.4xlarge": specs(2.4, 16, "2.4 GHz Intel Xeon E5-2676 v3"), + "d2.8xlarge": specs(2.4, 36, "2.4 GHz Intel Xeon E5-2676 v3"), + + // h1 + "h1.2xlarge": specs(2.3, 8, "2.3 GHz Intel Xeon E5 2686 v4"), + "h1.4xlarge": specs(2.3, 16, "2.3 GHz Intel Xeon E5 2686 v4"), + "h1.8xlarge": specs(2.3, 32, "2.3 GHz Intel Xeon E5 2686 v4"), + "h1.16xlarge": specs(2.3, 64, "2.3 GHz Intel Xeon E5 2686 v4"), +} + // EnvAWSFingerprint is used to fingerprint AWS metadata type EnvAWSFingerprint struct { StaticFingerprinter @@ -128,25 +475,52 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F response.AddAttribute(key, v) } - // newNetwork is populated and added to the Nodes resources - var newNetwork *structs.NetworkResource + // accumulate resource information, then assign to response + var resources *structs.Resources + var nodeResources *structs.NodeResources // copy over network specific information if val, ok := response.Attributes["unique.platform.aws.local-ipv4"]; ok && val != "" { response.AddAttribute("unique.network.ip-address", val) - - newNetwork = &structs.NetworkResource{ - Device: "eth0", - IP: val, - CIDR: val + "/32", - MBits: f.throughput(request, ec2meta, val), - } - - response.NodeResources = &structs.NodeResources{ - Networks: []*structs.NetworkResource{newNetwork}, + nodeResources = new(structs.NodeResources) + nodeResources.Networks = []*structs.NetworkResource{ + { + Device: "eth0", + IP: val, + CIDR: val + "/32", + MBits: f.throughput(request, ec2meta, val), + }, } } + // copy over CPU speed information + if specs := f.lookupCPU(ec2meta); specs != nil { + response.AddAttribute("cpu.modelname", specs.model) + f.logger.Debug("lookup ec2 cpu model name", "model", specs.model) + + response.AddAttribute("cpu.frequency", fmt.Sprintf("%.0f", specs.mhz)) + f.logger.Debug("lookup ec2 cpu frequency", "MHz", log.Fmt("%.0f", specs.mhz)) + + response.AddAttribute("cpu.numcores", fmt.Sprintf("%d", specs.cores)) + f.logger.Debug("lookup ec2 cpu cores", "cores", specs.cores) + + if ticks := specs.ticks(); request.Config.CpuCompute <= 0 { + response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks)) + f.logger.Debug("setting ec2 cpu ticks", "ticks", ticks) + resources = new(structs.Resources) + resources.CPU = ticks + if nodeResources == nil { + nodeResources = new(structs.NodeResources) + } + nodeResources.Cpu = structs.NodeCpuResources{CpuShares: int64(ticks)} + } + } else { + f.logger.Warn("no cpu specification found for this instance type") + } + + response.Resources = resources + response.NodeResources = nodeResources + // populate Links response.AddLink("aws.ec2", fmt.Sprintf("%s.%s", response.Attributes["platform.aws.placement.availability-zone"], @@ -156,6 +530,28 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F return nil } +func (f *EnvAWSFingerprint) instanceType(ec2meta *ec2metadata.EC2Metadata) (string, error) { + response, err := ec2meta.GetMetadata("instance-type") + if err != nil { + return "", err + } + return strings.TrimSpace(response), nil +} + +func (f *EnvAWSFingerprint) lookupCPU(ec2meta *ec2metadata.EC2Metadata) *ec2Specs { + instanceType, err := f.instanceType(ec2meta) + if err != nil { + f.logger.Error("error reading instance-type", "error", err) + return nil + } + for iType, specs := range ec2ProcSpeedTable { + if strings.EqualFold(iType, instanceType) { + return &specs + } + } + return nil +} + func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2metadata.EC2Metadata, ip string) int { throughput := request.Config.NetworkSpeed if throughput != 0 { @@ -180,17 +576,15 @@ func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2 // EnvAWSFingerprint uses lookup table to approximate network speeds func (f *EnvAWSFingerprint) linkSpeed(ec2meta *ec2metadata.EC2Metadata) int { - - resp, err := ec2meta.GetMetadata("instance-type") + instanceType, err := f.instanceType(ec2meta) if err != nil { f.logger.Error("error reading instance-type", "error", err) return 0 } - key := strings.Trim(resp, "\n") netSpeed := 0 - for reg, speed := range ec2InstanceSpeedMap { - if reg.MatchString(key) { + for reg, speed := range ec2NetSpeedTable { + if reg.MatchString(instanceType) { netSpeed = speed break } @@ -210,11 +604,11 @@ func ec2MetaClient(endpoint string, timeout time.Duration) (*ec2metadata.EC2Meta c = c.WithEndpoint(endpoint) } - session, err := session.NewSession(c) + sess, err := session.NewSession(c) if err != nil { return nil, err } - return ec2metadata.New(session, c), nil + return ec2metadata.New(sess, c), nil } func isAWS(ec2meta *ec2metadata.EC2Metadata) bool { diff --git a/client/fingerprint/env_aws_test.go b/client/fingerprint/env_aws_test.go index 8ab53af05..598ad3653 100644 --- a/client/fingerprint/env_aws_test.go +++ b/client/fingerprint/env_aws_test.go @@ -202,6 +202,74 @@ func TestNetworkFingerprint_AWS_IncompleteImitation(t *testing.T) { require.Nil(t, response.NodeResources) } +func TestCPUFingerprint_AWS_InstanceFound(t *testing.T) { + endpoint, cleanup := startFakeEC2Metadata(t, awsStubs) + defer cleanup() + + f := NewEnvAWSFingerprint(testlog.HCLogger(t)) + f.(*EnvAWSFingerprint).endpoint = endpoint + + node := &structs.Node{Attributes: make(map[string]string)} + + request := &FingerprintRequest{Config: &config.Config{}, Node: node} + var response FingerprintResponse + err := f.Fingerprint(request, &response) + require.NoError(t, err) + require.True(t, response.Detected) + require.Equal(t, "2.5 GHz AMD EPYC 7000 series", response.Attributes["cpu.modelname"]) + require.Equal(t, "2500", response.Attributes["cpu.frequency"]) + require.Equal(t, "8", response.Attributes["cpu.numcores"]) + require.Equal(t, "20000", response.Attributes["cpu.totalcompute"]) + require.Equal(t, 20000, response.Resources.CPU) + require.Equal(t, int64(20000), response.NodeResources.Cpu.CpuShares) +} + +func TestCPUFingerprint_AWS_OverrideCompute(t *testing.T) { + endpoint, cleanup := startFakeEC2Metadata(t, awsStubs) + defer cleanup() + + f := NewEnvAWSFingerprint(testlog.HCLogger(t)) + f.(*EnvAWSFingerprint).endpoint = endpoint + + node := &structs.Node{Attributes: make(map[string]string)} + + request := &FingerprintRequest{Config: &config.Config{ + CpuCompute: 99999, + }, Node: node} + var response FingerprintResponse + err := f.Fingerprint(request, &response) + require.NoError(t, err) + require.True(t, response.Detected) + require.Equal(t, "2.5 GHz AMD EPYC 7000 series", response.Attributes["cpu.modelname"]) + require.Equal(t, "2500", response.Attributes["cpu.frequency"]) + require.Equal(t, "8", response.Attributes["cpu.numcores"]) + require.NotContains(t, response.Attributes, "cpu.totalcompute") + require.Nil(t, response.Resources) // defaults in cpu fingerprinter + require.Zero(t, response.NodeResources.Cpu) // defaults in cpu fingerprinter +} + +func TestCPUFingerprint_AWS_InstanceNotFound(t *testing.T) { + endpoint, cleanup := startFakeEC2Metadata(t, unknownInstanceType) + defer cleanup() + + f := NewEnvAWSFingerprint(testlog.HCLogger(t)) + f.(*EnvAWSFingerprint).endpoint = endpoint + + node := &structs.Node{Attributes: make(map[string]string)} + + request := &FingerprintRequest{Config: &config.Config{}, Node: node} + var response FingerprintResponse + err := f.Fingerprint(request, &response) + require.NoError(t, err) + require.True(t, response.Detected) + require.NotContains(t, response.Attributes, "cpu.modelname") + require.NotContains(t, response.Attributes, "cpu.frequency") + require.NotContains(t, response.Attributes, "cpu.numcores") + require.NotContains(t, response.Attributes, "cpu.totalcompute") + require.Nil(t, response.Resources) + require.Nil(t, response.NodeResources) +} + /// Utility functions for tests func startFakeEC2Metadata(t *testing.T, endpoints []endpoint) (endpoint string, cleanup func()) { @@ -252,7 +320,7 @@ var awsStubs = []endpoint{ { Uri: "/latest/meta-data/instance-type", ContentType: "text/plain", - Body: "m3.2xlarge", + Body: "t3a.2xlarge", }, { Uri: "/latest/meta-data/local-hostname", @@ -276,6 +344,34 @@ var awsStubs = []endpoint{ }, } +var unknownInstanceType = []endpoint{ + { + Uri: "/latest/meta-data/ami-id", + ContentType: "text/plain", + Body: "ami-1234", + }, + { + Uri: "/latest/meta-data/hostname", + ContentType: "text/plain", + Body: "ip-10-0-0-207.us-west-2.compute.internal", + }, + { + Uri: "/latest/meta-data/placement/availability-zone", + ContentType: "text/plain", + Body: "us-west-2a", + }, + { + Uri: "/latest/meta-data/instance-id", + ContentType: "text/plain", + Body: "i-b3ba3875", + }, + { + Uri: "/latest/meta-data/instance-type", + ContentType: "text/plain", + Body: "xyz123.uber", + }, +} + // noNetworkAWSStubs mimics an EC2 instance but without local ip address // may happen in environments with odd EC2 Metadata emulation var noNetworkAWSStubs = []endpoint{ From 5ddc6077013104a038c5acb2ea6b59a1a8cc677d Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Wed, 29 Apr 2020 10:33:53 -0600 Subject: [PATCH 2/4] env_aws: fixup log line Co-Authored-By: Mahmood Ali --- client/fingerprint/env_aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index a1c65a9e6..a43134d2d 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -515,7 +515,7 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F nodeResources.Cpu = structs.NodeCpuResources{CpuShares: int64(ticks)} } } else { - f.logger.Warn("no cpu specification found for this instance type") + f.logger.Warn("failed to find the cpu specification for this instance type") } response.Resources = resources From 67303b666c833f75511e62fc27d8e59ef154ea05 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Wed, 29 Apr 2020 10:34:26 -0600 Subject: [PATCH 3/4] env_aws: downgrade log line Co-Authored-By: Mahmood Ali --- client/fingerprint/env_aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index a43134d2d..7973df568 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -541,7 +541,7 @@ func (f *EnvAWSFingerprint) instanceType(ec2meta *ec2metadata.EC2Metadata) (stri func (f *EnvAWSFingerprint) lookupCPU(ec2meta *ec2metadata.EC2Metadata) *ec2Specs { instanceType, err := f.instanceType(ec2meta) if err != nil { - f.logger.Error("error reading instance-type", "error", err) + f.logger.Warn("failed to read EC2 metadata instance-type", "error", err) return nil } for iType, specs := range ec2ProcSpeedTable { From 880c4e23d3f2b513f65f73a1e903f8b9c431eadb Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Wed, 29 Apr 2020 10:47:36 -0600 Subject: [PATCH 4/4] env_aws: combine 3 log lines into 1 --- client/fingerprint/env_aws.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 7973df568..0b2466fd3 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -496,13 +496,9 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F // copy over CPU speed information if specs := f.lookupCPU(ec2meta); specs != nil { response.AddAttribute("cpu.modelname", specs.model) - f.logger.Debug("lookup ec2 cpu model name", "model", specs.model) - response.AddAttribute("cpu.frequency", fmt.Sprintf("%.0f", specs.mhz)) - f.logger.Debug("lookup ec2 cpu frequency", "MHz", log.Fmt("%.0f", specs.mhz)) - response.AddAttribute("cpu.numcores", fmt.Sprintf("%d", specs.cores)) - f.logger.Debug("lookup ec2 cpu cores", "cores", specs.cores) + f.logger.Debug("lookup ec2 cpu", "cores", specs.cores, "MHz", log.Fmt("%.0f", specs.mhz), "model", specs.model) if ticks := specs.ticks(); request.Config.CpuCompute <= 0 { response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks))