open-nomad/client/fingerprint/env_aws_test.go

452 lines
13 KiB
Go
Raw Normal View History

2015-08-28 16:31:20 +00:00
package fingerprint
import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"github.com/hashicorp/nomad/client/config"
2018-06-13 22:33:25 +00:00
"github.com/hashicorp/nomad/helper/testlog"
2015-08-28 16:31:20 +00:00
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
2015-08-28 16:31:20 +00:00
)
func TestEnvAWSFingerprint_nonAws(t *testing.T) {
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = "http://127.0.0.1/latest"
2015-08-28 16:31:20 +00:00
node := &structs.Node{
Attributes: make(map[string]string),
}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.Empty(t, response.Attributes)
2015-08-28 16:31:20 +00:00
}
func TestEnvAWSFingerprint_aws(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
2015-08-28 16:31:20 +00:00
node := &structs.Node{
Attributes: make(map[string]string),
}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
2015-08-28 16:31:20 +00:00
keys := []string{
"platform.aws.ami-id",
2016-01-23 02:12:16 +00:00
"unique.platform.aws.hostname",
"unique.platform.aws.instance-id",
"platform.aws.instance-type",
2016-01-23 02:12:16 +00:00
"unique.platform.aws.local-hostname",
"unique.platform.aws.local-ipv4",
"unique.platform.aws.public-hostname",
"unique.platform.aws.public-ipv4",
"platform.aws.placement.availability-zone",
2016-01-23 02:12:16 +00:00
"unique.network.ip-address",
2015-08-28 16:31:20 +00:00
}
for _, k := range keys {
assertNodeAttributeContains(t, response.Attributes, k)
2015-08-28 16:31:20 +00:00
}
require.NotEmpty(t, response.Links)
// confirm we have at least instance-id and ami-id
for _, k := range []string{"aws.ec2"} {
assertNodeLinksContains(t, response.Links, k)
}
2015-08-28 16:31:20 +00:00
}
2015-09-22 21:56:04 +00:00
func TestNetworkFingerprint_AWS(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
defer cleanup()
2015-09-22 21:56:04 +00:00
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
2015-09-22 21:56:04 +00:00
node := &structs.Node{
Attributes: make(map[string]string),
}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
2015-09-22 21:56:04 +00:00
assertNodeAttributeContains(t, response.Attributes, "unique.network.ip-address")
2015-09-23 04:22:23 +00:00
require.NotNil(t, response.NodeResources)
require.Len(t, response.NodeResources.Networks, 1)
2015-09-23 04:22:23 +00:00
// Test at least the first Network Resource
2018-10-03 16:47:18 +00:00
net := response.NodeResources.Networks[0]
require.NotEmpty(t, net.IP, "Expected Network Resource to have an IP")
require.NotEmpty(t, net.CIDR, "Expected Network Resource to have a CIDR")
require.NotEmpty(t, net.Device, "Expected Network Resource to have a Device Name")
2015-09-22 21:56:04 +00:00
}
func TestNetworkFingerprint_AWS_network(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
{
node := &structs.Node{
Attributes: make(map[string]string),
}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.True(t, response.Detected, "expected response to be applicable")
assertNodeAttributeContains(t, response.Attributes, "unique.network.ip-address")
require.NotNil(t, response.NodeResources)
require.Len(t, response.NodeResources.Networks, 1)
// Test at least the first Network Resource
2018-10-03 16:47:18 +00:00
net := response.NodeResources.Networks[0]
require.NotEmpty(t, net.IP, "Expected Network Resource to have an IP")
require.NotEmpty(t, net.CIDR, "Expected Network Resource to have a CIDR")
require.NotEmpty(t, net.Device, "Expected Network Resource to have a Device Name")
require.Equal(t, 1000, net.MBits)
}
// Try again this time setting a network speed in the config
{
node := &structs.Node{
Attributes: make(map[string]string),
}
cfg := &config.Config{
NetworkSpeed: 10,
}
request := &FingerprintRequest{Config: cfg, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
assertNodeAttributeContains(t, response.Attributes, "unique.network.ip-address")
require.NotNil(t, response.NodeResources)
require.Len(t, response.NodeResources.Networks, 1)
// Test at least the first Network Resource
2018-10-03 16:47:18 +00:00
net := response.NodeResources.Networks[0]
require.NotEmpty(t, net.IP, "Expected Network Resource to have an IP")
require.NotEmpty(t, net.CIDR, "Expected Network Resource to have a CIDR")
require.NotEmpty(t, net.Device, "Expected Network Resource to have a Device Name")
require.Equal(t, 10, net.MBits)
}
}
func TestNetworkFingerprint_AWS_NoNetwork(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, noNetworkAWSStubs)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
2015-09-22 21:56:04 +00:00
node := &structs.Node{
Attributes: make(map[string]string),
2015-09-22 21:56:04 +00:00
}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.True(t, response.Detected, "expected response to be applicable")
require.Equal(t, "ami-1234", response.Attributes["platform.aws.ami-id"])
require.Nil(t, response.NodeResources.Networks)
}
func TestNetworkFingerprint_AWS_IncompleteImitation(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, incompleteAWSImitationStubs)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
node := &structs.Node{
Attributes: make(map[string]string),
}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.False(t, response.Detected, "expected response not to be applicable")
require.NotContains(t, response.Attributes, "platform.aws.ami-id")
require.Nil(t, response.NodeResources)
}
env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types.
2020-04-28 04:04:49 +00:00
func TestCPUFingerprint_AWS_InstanceFound(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
node := &structs.Node{Attributes: make(map[string]string)}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.True(t, response.Detected)
require.Equal(t, "2200", response.Attributes["cpu.frequency"])
env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types.
2020-04-28 04:04:49 +00:00
require.Equal(t, "8", response.Attributes["cpu.numcores"])
require.Equal(t, "17600", response.Attributes["cpu.totalcompute"])
require.Equal(t, 17600, response.Resources.CPU)
require.Equal(t, int64(17600), response.NodeResources.Cpu.CpuShares)
env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types.
2020-04-28 04:04:49 +00:00
}
func TestCPUFingerprint_AWS_OverrideCompute(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, awsStubs)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
node := &structs.Node{Attributes: make(map[string]string)}
request := &FingerprintRequest{Config: &config.Config{
CpuCompute: 99999,
}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.True(t, response.Detected)
require.Equal(t, "2200", response.Attributes["cpu.frequency"])
env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types.
2020-04-28 04:04:49 +00:00
require.Equal(t, "8", response.Attributes["cpu.numcores"])
require.NotContains(t, response.Attributes, "cpu.totalcompute")
require.Nil(t, response.Resources) // defaults in cpu fingerprinter
require.Zero(t, response.NodeResources.Cpu) // defaults in cpu fingerprinter
}
func TestCPUFingerprint_AWS_InstanceNotFound(t *testing.T) {
endpoint, cleanup := startFakeEC2Metadata(t, unknownInstanceType)
defer cleanup()
f := NewEnvAWSFingerprint(testlog.HCLogger(t))
f.(*EnvAWSFingerprint).endpoint = endpoint
node := &structs.Node{Attributes: make(map[string]string)}
request := &FingerprintRequest{Config: &config.Config{}, Node: node}
var response FingerprintResponse
err := f.Fingerprint(request, &response)
require.NoError(t, err)
require.True(t, response.Detected)
require.NotContains(t, response.Attributes, "cpu.modelname")
require.NotContains(t, response.Attributes, "cpu.frequency")
require.NotContains(t, response.Attributes, "cpu.numcores")
require.NotContains(t, response.Attributes, "cpu.totalcompute")
require.Nil(t, response.Resources)
require.Nil(t, response.NodeResources)
}
/// Utility functions for tests
func startFakeEC2Metadata(t *testing.T, endpoints []endpoint) (endpoint string, cleanup func()) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
for _, e := range endpoints {
if r.RequestURI == e.Uri {
w.Header().Set("Content-Type", e.ContentType)
fmt.Fprintln(w, e.Body)
}
}
}))
return ts.URL + "/latest", ts.Close
}
type routes struct {
Endpoints []*endpoint `json:"endpoints"`
}
type endpoint struct {
Uri string `json:"uri"`
ContentType string `json:"content-type"`
Body string `json:"body"`
}
// awsStubs mimics normal EC2 instance metadata
var awsStubs = []endpoint{
{
Uri: "/latest/meta-data/ami-id",
ContentType: "text/plain",
Body: "ami-1234",
},
{
Uri: "/latest/meta-data/hostname",
ContentType: "text/plain",
Body: "ip-10-0-0-207.us-west-2.compute.internal",
},
{
Uri: "/latest/meta-data/placement/availability-zone",
ContentType: "text/plain",
Body: "us-west-2a",
},
{
Uri: "/latest/meta-data/instance-id",
ContentType: "text/plain",
Body: "i-b3ba3875",
},
{
Uri: "/latest/meta-data/instance-type",
ContentType: "text/plain",
env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types.
2020-04-28 04:04:49 +00:00
Body: "t3a.2xlarge",
},
{
Uri: "/latest/meta-data/local-hostname",
ContentType: "text/plain",
Body: "ip-10-0-0-207.us-west-2.compute.internal",
},
{
Uri: "/latest/meta-data/local-ipv4",
ContentType: "text/plain",
Body: "10.0.0.207",
},
{
Uri: "/latest/meta-data/public-hostname",
ContentType: "text/plain",
Body: "ec2-54-191-117-175.us-west-2.compute.amazonaws.com",
},
{
Uri: "/latest/meta-data/public-ipv4",
ContentType: "text/plain",
Body: "54.191.117.175",
},
{
Uri: "/latest/meta-data/mac",
ContentType: "text/plain",
Body: "0a:20:d2:42:b3:55",
},
2015-09-22 21:56:04 +00:00
}
env_aws: use best-effort lookup table for CPU performance in EC2 Fixes #7681 The current behavior of the CPU fingerprinter in AWS is that it reads the **current** speed from `/proc/cpuinfo` (`CPU MHz` field). This is because the max CPU frequency is not available by reading anything on the EC2 instance itself. Normally on Linux one would look at e.g. `sys/devices/system/cpu/cpuN/cpufreq/cpuinfo_max_freq` or perhaps parse the values from the `CPU max MHz` field in `/proc/cpuinfo`, but those values are not available. Furthermore, no metadata about the CPU is made available in the EC2 metadata service. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Since `go-psutil` cannot determine the max CPU speed it defaults to the current CPU speed, which could be basically any number between 0 and the true max. This is particularly bad on large, powerful reserved instances which often idle at ~800 MHz while Nomad does its fingerprinting (typically IO bound), which Nomad then uses as the max, which results in severe loss of available resources. Since the CPU specification is unavailable programmatically (at least not without sudo) use a best-effort lookup table. This table was generated by going through every instance type in AWS documentation and copy-pasting the numbers. https://aws.amazon.com/ec2/instance-types/ This approach obviously is not ideal as future instance types will need to be added as they are introduced to AWS. However, using the table should only be an improvement over the status quo since right now Nomad miscalculates available CPU resources on all instance types.
2020-04-28 04:04:49 +00:00
var unknownInstanceType = []endpoint{
{
Uri: "/latest/meta-data/ami-id",
ContentType: "text/plain",
Body: "ami-1234",
},
{
Uri: "/latest/meta-data/hostname",
ContentType: "text/plain",
Body: "ip-10-0-0-207.us-west-2.compute.internal",
},
{
Uri: "/latest/meta-data/placement/availability-zone",
ContentType: "text/plain",
Body: "us-west-2a",
},
{
Uri: "/latest/meta-data/instance-id",
ContentType: "text/plain",
Body: "i-b3ba3875",
},
{
Uri: "/latest/meta-data/instance-type",
ContentType: "text/plain",
Body: "xyz123.uber",
},
}
// noNetworkAWSStubs mimics an EC2 instance but without local ip address
// may happen in environments with odd EC2 Metadata emulation
var noNetworkAWSStubs = []endpoint{
{
Uri: "/latest/meta-data/ami-id",
ContentType: "text/plain",
Body: "ami-1234",
},
{
Uri: "/latest/meta-data/hostname",
ContentType: "text/plain",
Body: "ip-10-0-0-207.us-west-2.compute.internal",
},
{
Uri: "/latest/meta-data/placement/availability-zone",
ContentType: "text/plain",
Body: "us-west-2a",
},
{
Uri: "/latest/meta-data/instance-id",
ContentType: "text/plain",
Body: "i-b3ba3875",
},
{
Uri: "/latest/meta-data/instance-type",
ContentType: "text/plain",
Body: "m3.2xlarge",
},
{
Uri: "/latest/meta-data/local-hostname",
ContentType: "text/plain",
Body: "ip-10-0-0-207.us-west-2.compute.internal",
},
{
Uri: "/latest/meta-data/local-ipv4",
ContentType: "text/plain",
Body: "",
},
{
Uri: "/latest/meta-data/public-hostname",
ContentType: "text/plain",
Body: "ec2-54-191-117-175.us-west-2.compute.amazonaws.com",
},
{
Uri: "/latest/meta-data/public-ipv4",
ContentType: "text/plain",
Body: "54.191.117.175",
},
}
// incompleteAWSImitationsStub mimics environments where some AWS endpoints
// return empty, namely Hetzner
var incompleteAWSImitationStubs = []endpoint{
{
Uri: "/latest/meta-data/hostname",
ContentType: "text/plain",
Body: "ip-10-0-0-207.us-west-2.compute.internal",
},
{
Uri: "/latest/meta-data/instance-id",
ContentType: "text/plain",
Body: "i-b3ba3875",
},
{
Uri: "/latest/meta-data/local-ipv4",
ContentType: "text/plain",
Body: "",
},
{
Uri: "/latest/meta-data/public-ipv4",
ContentType: "text/plain",
Body: "54.191.117.175",
},
}