e693d15a5b
Previously, Nomad was using a hand-made lookup table for looking up EC2 CPU performance characteristics (core count + speed = ticks). This data was incomplete and incorrect depending on region. The AWS API has the correct data but requires API keys to use (i.e. should not be queried directly from Nomad). This change introduces a lookup table generated by a small command line tool in Nomad's tools module which uses the Amazon AWS API. Running the tool requires AWS_* environment variables set. $ # in nomad/tools/cpuinfo $ go run . Going forward, Nomad can incorporate regeneration of the lookup table somewhere in the CI pipeline so that we remain up-to-date on the latest offerings from EC2. Fixes #7830
288 lines
8.3 KiB
Go
288 lines
8.3 KiB
Go
package fingerprint
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/aws/aws-sdk-go/aws"
|
|
"github.com/aws/aws-sdk-go/aws/awserr"
|
|
"github.com/aws/aws-sdk-go/aws/ec2metadata"
|
|
"github.com/aws/aws-sdk-go/aws/session"
|
|
log "github.com/hashicorp/go-hclog"
|
|
|
|
cleanhttp "github.com/hashicorp/go-cleanhttp"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
)
|
|
|
|
const (
|
|
// AwsMetadataTimeout is the timeout used when contacting the AWS metadata
|
|
// services.
|
|
AwsMetadataTimeout = 2 * time.Second
|
|
)
|
|
|
|
// map of instance type to approximate speed, in Mbits/s
|
|
// Estimates from http://stackoverflow.com/a/35806587
|
|
// This data is meant for a loose approximation
|
|
var ec2NetSpeedTable = map[*regexp.Regexp]int{
|
|
regexp.MustCompile("t2.nano"): 30,
|
|
regexp.MustCompile("t2.micro"): 70,
|
|
regexp.MustCompile("t2.small"): 125,
|
|
regexp.MustCompile("t2.medium"): 300,
|
|
regexp.MustCompile("m3.medium"): 400,
|
|
regexp.MustCompile("c4.8xlarge"): 4000,
|
|
regexp.MustCompile("x1.16xlarge"): 5000,
|
|
regexp.MustCompile(`.*\.large`): 500,
|
|
regexp.MustCompile(`.*\.xlarge`): 750,
|
|
regexp.MustCompile(`.*\.2xlarge`): 1000,
|
|
regexp.MustCompile(`.*\.4xlarge`): 2000,
|
|
regexp.MustCompile(`.*\.8xlarge`): 10000,
|
|
regexp.MustCompile(`.*\.10xlarge`): 10000,
|
|
regexp.MustCompile(`.*\.16xlarge`): 10000,
|
|
regexp.MustCompile(`.*\.32xlarge`): 10000,
|
|
}
|
|
|
|
// EnvAWSFingerprint is used to fingerprint AWS metadata
|
|
type EnvAWSFingerprint struct {
|
|
StaticFingerprinter
|
|
|
|
// endpoint for EC2 metadata as expected by AWS SDK
|
|
endpoint string
|
|
|
|
logger log.Logger
|
|
}
|
|
|
|
// NewEnvAWSFingerprint is used to create a fingerprint from AWS metadata
|
|
func NewEnvAWSFingerprint(logger log.Logger) Fingerprint {
|
|
f := &EnvAWSFingerprint{
|
|
logger: logger.Named("env_aws"),
|
|
endpoint: strings.TrimSuffix(os.Getenv("AWS_ENV_URL"), "/meta-data/"),
|
|
}
|
|
return f
|
|
}
|
|
|
|
func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error {
|
|
cfg := request.Config
|
|
|
|
timeout := AwsMetadataTimeout
|
|
|
|
// Check if we should tighten the timeout
|
|
if cfg.ReadBoolDefault(TightenNetworkTimeoutsConfig, false) {
|
|
timeout = 1 * time.Millisecond
|
|
}
|
|
|
|
ec2meta, err := ec2MetaClient(f.endpoint, timeout)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to setup ec2Metadata client: %v", err)
|
|
}
|
|
|
|
if !isAWS(ec2meta) {
|
|
return nil
|
|
}
|
|
|
|
// Keys and whether they should be namespaced as unique. Any key whose value
|
|
// uniquely identifies a node, such as ip, should be marked as unique. When
|
|
// marked as unique, the key isn't included in the computed node class.
|
|
keys := map[string]bool{
|
|
"ami-id": false,
|
|
"hostname": true,
|
|
"instance-id": true,
|
|
"instance-type": false,
|
|
"local-hostname": true,
|
|
"local-ipv4": true,
|
|
"public-hostname": true,
|
|
"public-ipv4": true,
|
|
"mac": true,
|
|
"placement/availability-zone": false,
|
|
}
|
|
|
|
for k, unique := range keys {
|
|
resp, err := ec2meta.GetMetadata(k)
|
|
v := strings.TrimSpace(resp)
|
|
if v == "" {
|
|
f.logger.Debug("read an empty value", "attribute", k)
|
|
continue
|
|
} else if awsErr, ok := err.(awserr.RequestFailure); ok {
|
|
f.logger.Debug("could not read attribute value", "attribute", k, "error", awsErr)
|
|
continue
|
|
} else if awsErr, ok := err.(awserr.Error); ok {
|
|
// if it's a URL error, assume we're not in an AWS environment
|
|
// TODO: better way to detect AWS? Check xen virtualization?
|
|
if _, ok := awsErr.OrigErr().(*url.Error); ok {
|
|
return nil
|
|
}
|
|
|
|
// not sure what other errors it would return
|
|
return err
|
|
}
|
|
|
|
// assume we want blank entries
|
|
key := "platform.aws." + strings.Replace(k, "/", ".", -1)
|
|
if unique {
|
|
key = structs.UniqueNamespace(key)
|
|
}
|
|
|
|
response.AddAttribute(key, v)
|
|
}
|
|
|
|
// accumulate resource information, then assign to response
|
|
var resources *structs.Resources
|
|
var nodeResources *structs.NodeResources
|
|
|
|
// copy over network specific information
|
|
if val, ok := response.Attributes["unique.platform.aws.local-ipv4"]; ok && val != "" {
|
|
response.AddAttribute("unique.network.ip-address", val)
|
|
nodeResources = new(structs.NodeResources)
|
|
nodeResources.Networks = []*structs.NetworkResource{
|
|
{
|
|
Mode: "host",
|
|
Device: "eth0",
|
|
IP: val,
|
|
CIDR: val + "/32",
|
|
MBits: f.throughput(request, ec2meta, val),
|
|
},
|
|
}
|
|
}
|
|
|
|
// copy over IPv6 network specific information
|
|
if val, ok := response.Attributes["unique.platform.aws.mac"]; ok && val != "" {
|
|
k := "network/interfaces/macs/" + val + "/ipv6s"
|
|
addrsStr, err := ec2meta.GetMetadata(k)
|
|
addrsStr = strings.TrimSpace(addrsStr)
|
|
if addrsStr == "" {
|
|
f.logger.Debug("read an empty value", "attribute", k)
|
|
} else if awsErr, ok := err.(awserr.RequestFailure); ok {
|
|
f.logger.Debug("could not read attribute value", "attribute", k, "error", awsErr)
|
|
} else if awsErr, ok := err.(awserr.Error); ok {
|
|
// if it's a URL error, assume we're not in an AWS environment
|
|
// TODO: better way to detect AWS? Check xen virtualization?
|
|
if _, ok := awsErr.OrigErr().(*url.Error); ok {
|
|
return nil
|
|
}
|
|
|
|
// not sure what other errors it would return
|
|
return err
|
|
} else {
|
|
addrs := strings.SplitN(addrsStr, "\n", 2)
|
|
response.AddAttribute("unique.platform.aws.public-ipv6", addrs[0])
|
|
}
|
|
}
|
|
|
|
// copy over CPU speed information
|
|
if specs := f.lookupCPU(ec2meta); specs != nil {
|
|
response.AddAttribute("cpu.frequency", fmt.Sprintf("%d", specs.MHz))
|
|
response.AddAttribute("cpu.numcores", fmt.Sprintf("%d", specs.Cores))
|
|
f.logger.Debug("lookup ec2 cpu", "cores", specs.Cores, "ghz", log.Fmt("%.1f", specs.GHz()))
|
|
|
|
if ticks := specs.Ticks(); request.Config.CpuCompute <= 0 {
|
|
response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks))
|
|
f.logger.Debug("setting ec2 cpu", "ticks", ticks)
|
|
resources = new(structs.Resources)
|
|
resources.CPU = ticks
|
|
if nodeResources == nil {
|
|
nodeResources = new(structs.NodeResources)
|
|
}
|
|
nodeResources.Cpu = structs.NodeCpuResources{CpuShares: int64(ticks)}
|
|
}
|
|
} else {
|
|
f.logger.Warn("failed to find the cpu specification for this instance type")
|
|
}
|
|
|
|
response.Resources = resources
|
|
response.NodeResources = nodeResources
|
|
|
|
// populate Links
|
|
response.AddLink("aws.ec2", fmt.Sprintf("%s.%s",
|
|
response.Attributes["platform.aws.placement.availability-zone"],
|
|
response.Attributes["unique.platform.aws.instance-id"]))
|
|
response.Detected = true
|
|
|
|
return nil
|
|
}
|
|
|
|
func (f *EnvAWSFingerprint) instanceType(ec2meta *ec2metadata.EC2Metadata) (string, error) {
|
|
response, err := ec2meta.GetMetadata("instance-type")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return strings.TrimSpace(response), nil
|
|
}
|
|
|
|
func (f *EnvAWSFingerprint) lookupCPU(ec2meta *ec2metadata.EC2Metadata) *CPU {
|
|
instanceType, err := f.instanceType(ec2meta)
|
|
if err != nil {
|
|
f.logger.Warn("failed to read EC2 metadata instance-type", "error", err)
|
|
return nil
|
|
}
|
|
return LookupEC2CPU(instanceType)
|
|
}
|
|
|
|
func (f *EnvAWSFingerprint) throughput(request *FingerprintRequest, ec2meta *ec2metadata.EC2Metadata, ip string) int {
|
|
throughput := request.Config.NetworkSpeed
|
|
if throughput != 0 {
|
|
return throughput
|
|
}
|
|
|
|
throughput = f.linkSpeed(ec2meta)
|
|
if throughput != 0 {
|
|
return throughput
|
|
}
|
|
|
|
if request.Node.Resources != nil && len(request.Node.Resources.Networks) > 0 {
|
|
for _, n := range request.Node.Resources.Networks {
|
|
if n.IP == ip {
|
|
return n.MBits
|
|
}
|
|
}
|
|
}
|
|
|
|
return defaultNetworkSpeed
|
|
}
|
|
|
|
// EnvAWSFingerprint uses lookup table to approximate network speeds
|
|
func (f *EnvAWSFingerprint) linkSpeed(ec2meta *ec2metadata.EC2Metadata) int {
|
|
instanceType, err := f.instanceType(ec2meta)
|
|
if err != nil {
|
|
f.logger.Error("error reading instance-type", "error", err)
|
|
return 0
|
|
}
|
|
|
|
netSpeed := 0
|
|
for reg, speed := range ec2NetSpeedTable {
|
|
if reg.MatchString(instanceType) {
|
|
netSpeed = speed
|
|
break
|
|
}
|
|
}
|
|
|
|
return netSpeed
|
|
}
|
|
|
|
func ec2MetaClient(endpoint string, timeout time.Duration) (*ec2metadata.EC2Metadata, error) {
|
|
client := &http.Client{
|
|
Timeout: timeout,
|
|
Transport: cleanhttp.DefaultTransport(),
|
|
}
|
|
|
|
c := aws.NewConfig().WithHTTPClient(client).WithMaxRetries(0)
|
|
if endpoint != "" {
|
|
c = c.WithEndpoint(endpoint)
|
|
}
|
|
|
|
sess, err := session.NewSession(c)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return ec2metadata.New(sess, c), nil
|
|
}
|
|
|
|
func isAWS(ec2meta *ec2metadata.EC2Metadata) bool {
|
|
v, err := ec2meta.GetMetadata("ami-id")
|
|
v = strings.TrimSpace(v)
|
|
return err == nil && v != ""
|
|
}
|