Merge pull request #5405 from hashicorp/e2e_metrics

Prometheus metrics for the e2e environment
This commit is contained in:
Preetha 2019-03-21 09:30:12 -05:00 committed by GitHub
commit fac6d8c918
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 350 additions and 1 deletions

28
e2e/fabio/fabio.nomad Normal file
View File

@ -0,0 +1,28 @@
job "fabio" {
datacenters = ["dc1", "dc2"]
type = "system"
group "fabio" {
task "fabio" {
driver = "docker"
config {
image = "fabiolb/fabio"
network_mode = "host"
}
resources {
cpu = 100
memory = 64
network {
mbits = 20
port "lb" {
static = 9999
}
port "ui" {
static = 9998
}
}
}
}
}
}

View File

@ -0,0 +1,50 @@
job "hello" {
datacenters = ["dc1"]
update {
max_parallel = 1
min_healthy_time = "15s"
auto_revert = true
}
group "hello" {
count = 3
task "hello" {
driver = "raw_exec"
config {
command = "local/hello"
}
artifact {
source = "https://s3.amazonaws.com/nomad-community-demo/hellov1"
destination = "local/hello"
mode = "file"
}
resources {
cpu = 500
memory = 256
network {
mbits = 10
port "web" {}
}
}
service {
name = "hello"
tags = ["urlprefix-/"]
port = "web"
check {
name = "alive"
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@ -0,0 +1,57 @@
job "redis" {
datacenters = ["dc1", "dc2"]
group "cache" {
count = 4
update {
max_parallel = 1
min_healthy_time = "5s"
healthy_deadline = "30s"
progress_deadline = "1m"
}
restart {
mode = "fail"
attempts = 0
}
reschedule {
attempts = 3
interval = "10m"
unlimited = false
}
spread {
attribute = "${node.datacenter}"
weight = 100
}
task "redis" {
driver = "docker"
config {
image = "redis:4.0"
port_map {
db = 6379
}
}
resources {
cpu = 500
memory = 256
network {
mbits = 10
port "db" {}
}
}
service {
name = "redis-cache"
tags = ["global", "cache"]
port = "db"
check {
name = "alive"
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@ -0,0 +1,69 @@
package metrics
import (
"testing"
"flag"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/require"
)
var metrics = flag.Bool("metrics", false, "run metrics tests")
func WaitForCluster(t *testing.T, nomadClient *api.Client) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(t, nomadClient)
// Ensure that we have four client nodes in ready state
e2eutil.WaitForNodesReady(t, nomadClient, 1)
}
// TestMetrics runs fabio/prometheus and waits for those to succeed
// After that a series of jobs added to the input directory are executed
// Unlike other e2e tests this test does not clean up after itself.
func TestMetrics(t *testing.T) {
if !*metrics {
t.Skip("skipping test in non-integration mode.")
}
require := require.New(t)
// Build Nomad api client
nomadClient, err := api.NewClient(api.DefaultConfig())
require.Nil(err)
WaitForCluster(t, nomadClient)
// Run fabio
fabioAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "../fabio/fabio.nomad", "fabio")
require.NotEmpty(fabioAllocs)
// Run prometheus
prometheusAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "../prometheus/prometheus.nomad", "prometheus")
require.NotEmpty(prometheusAllocs)
// List all job jobFiles in the input directory and run them and wait for allocations
var jobFiles []string
root := "input/"
err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if strings.HasSuffix(path, ".nomad") {
jobFiles = append(jobFiles, path)
}
return nil
})
require.Nil(err)
for _, file := range jobFiles {
uuid := uuid.Generate()
jobId := "metrics" + uuid[0:8]
fmt.Println("Registering ", file)
allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, file, jobId)
require.NotEmpty(allocs)
}
clientAddr := nomadClient.Address()
clientIP := clientAddr[0:strings.LastIndex(clientAddr, ":")]
fmt.Printf("Prometheus Metrics available at %s:9999\n", clientIP)
}

View File

@ -0,0 +1,76 @@
job "prometheus" {
datacenters = ["dc1", "dc2"]
type = "service"
group "monitoring" {
count = 1
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
ephemeral_disk {
size = 300
}
task "prometheus" {
template {
change_mode = "noop"
destination = "local/prometheus.yml"
data = <<EOH
---
global:
scrape_interval: 5s
evaluation_interval: 5s
scrape_configs:
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: '{{ env "NOMAD_IP_prometheus_ui" }}:8500'
services: ['nomad-client', 'nomad']
relabel_configs:
- source_labels: ['__meta_consul_tags']
regex: '(.*)http(.*)'
action: keep
scrape_interval: 5s
metrics_path: /v1/metrics
params:
format: ['prometheus']
EOH
}
driver = "docker"
config {
image = "prom/prometheus:latest"
volumes = [
"local/prometheus.yml:/etc/prometheus/prometheus.yml"
]
port_map {
prometheus_ui = 9090
}
}
resources {
network {
mbits = 10
port "prometheus_ui" {}
}
}
service {
name = "prometheus"
tags = ["urlprefix-/"]
port = "prometheus_ui"
check {
name = "prometheus_ui port alive"
type = "http"
path = "/-/healthy"
interval = "10s"
timeout = "2s"
}
}
}
}
}

View File

@ -17,4 +17,12 @@ consul {
vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -21,3 +21,11 @@ vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -21,3 +21,11 @@ vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -21,3 +21,11 @@ vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -21,3 +21,11 @@ vault {
enabled = true
address = "http://active.vault.service.consul:8200"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -19,3 +19,10 @@ vault {
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -19,3 +19,10 @@ vault {
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -19,3 +19,10 @@ vault {
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}

View File

@ -19,3 +19,11 @@ vault {
token = ""
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}