Merge pull request #5405 from hashicorp/e2e_metrics
Prometheus metrics for the e2e environment
This commit is contained in:
commit
fac6d8c918
|
@ -0,0 +1,28 @@
|
|||
job "fabio" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "system"
|
||||
|
||||
group "fabio" {
|
||||
task "fabio" {
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "fabiolb/fabio"
|
||||
network_mode = "host"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
network {
|
||||
mbits = 20
|
||||
port "lb" {
|
||||
static = 9999
|
||||
}
|
||||
port "ui" {
|
||||
static = 9998
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
job "hello" {
|
||||
datacenters = ["dc1"]
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
min_healthy_time = "15s"
|
||||
auto_revert = true
|
||||
}
|
||||
|
||||
group "hello" {
|
||||
|
||||
count = 3
|
||||
|
||||
task "hello" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "local/hello"
|
||||
}
|
||||
|
||||
artifact {
|
||||
source = "https://s3.amazonaws.com/nomad-community-demo/hellov1"
|
||||
destination = "local/hello"
|
||||
mode = "file"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 256
|
||||
network {
|
||||
mbits = 10
|
||||
port "web" {}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "hello"
|
||||
tags = ["urlprefix-/"]
|
||||
port = "web"
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
job "redis" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
|
||||
group "cache" {
|
||||
count = 4
|
||||
update {
|
||||
max_parallel = 1
|
||||
min_healthy_time = "5s"
|
||||
healthy_deadline = "30s"
|
||||
progress_deadline = "1m"
|
||||
}
|
||||
restart {
|
||||
mode = "fail"
|
||||
attempts = 0
|
||||
}
|
||||
reschedule {
|
||||
attempts = 3
|
||||
interval = "10m"
|
||||
unlimited = false
|
||||
}
|
||||
spread {
|
||||
attribute = "${node.datacenter}"
|
||||
weight = 100
|
||||
}
|
||||
task "redis" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "redis:4.0"
|
||||
port_map {
|
||||
db = 6379
|
||||
}
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 256
|
||||
network {
|
||||
mbits = 10
|
||||
port "db" {}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "redis-cache"
|
||||
tags = ["global", "cache"]
|
||||
port = "db"
|
||||
check {
|
||||
name = "alive"
|
||||
type = "tcp"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/nomad/api"
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var metrics = flag.Bool("metrics", false, "run metrics tests")
|
||||
|
||||
func WaitForCluster(t *testing.T, nomadClient *api.Client) {
|
||||
// Ensure cluster has leader before running tests
|
||||
e2eutil.WaitForLeader(t, nomadClient)
|
||||
// Ensure that we have four client nodes in ready state
|
||||
e2eutil.WaitForNodesReady(t, nomadClient, 1)
|
||||
}
|
||||
|
||||
// TestMetrics runs fabio/prometheus and waits for those to succeed
|
||||
// After that a series of jobs added to the input directory are executed
|
||||
// Unlike other e2e tests this test does not clean up after itself.
|
||||
func TestMetrics(t *testing.T) {
|
||||
if !*metrics {
|
||||
t.Skip("skipping test in non-integration mode.")
|
||||
}
|
||||
require := require.New(t)
|
||||
// Build Nomad api client
|
||||
nomadClient, err := api.NewClient(api.DefaultConfig())
|
||||
require.Nil(err)
|
||||
WaitForCluster(t, nomadClient)
|
||||
|
||||
// Run fabio
|
||||
fabioAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "../fabio/fabio.nomad", "fabio")
|
||||
require.NotEmpty(fabioAllocs)
|
||||
|
||||
// Run prometheus
|
||||
prometheusAllocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "../prometheus/prometheus.nomad", "prometheus")
|
||||
require.NotEmpty(prometheusAllocs)
|
||||
|
||||
// List all job jobFiles in the input directory and run them and wait for allocations
|
||||
var jobFiles []string
|
||||
|
||||
root := "input/"
|
||||
err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
|
||||
if strings.HasSuffix(path, ".nomad") {
|
||||
jobFiles = append(jobFiles, path)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
require.Nil(err)
|
||||
for _, file := range jobFiles {
|
||||
uuid := uuid.Generate()
|
||||
jobId := "metrics" + uuid[0:8]
|
||||
fmt.Println("Registering ", file)
|
||||
allocs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, file, jobId)
|
||||
require.NotEmpty(allocs)
|
||||
}
|
||||
clientAddr := nomadClient.Address()
|
||||
clientIP := clientAddr[0:strings.LastIndex(clientAddr, ":")]
|
||||
fmt.Printf("Prometheus Metrics available at %s:9999\n", clientIP)
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
job "prometheus" {
|
||||
datacenters = ["dc1", "dc2"]
|
||||
type = "service"
|
||||
|
||||
group "monitoring" {
|
||||
count = 1
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
ephemeral_disk {
|
||||
size = 300
|
||||
}
|
||||
|
||||
task "prometheus" {
|
||||
template {
|
||||
change_mode = "noop"
|
||||
destination = "local/prometheus.yml"
|
||||
data = <<EOH
|
||||
---
|
||||
global:
|
||||
scrape_interval: 5s
|
||||
evaluation_interval: 5s
|
||||
|
||||
scrape_configs:
|
||||
|
||||
- job_name: 'nomad_metrics'
|
||||
|
||||
consul_sd_configs:
|
||||
- server: '{{ env "NOMAD_IP_prometheus_ui" }}:8500'
|
||||
services: ['nomad-client', 'nomad']
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: ['__meta_consul_tags']
|
||||
regex: '(.*)http(.*)'
|
||||
action: keep
|
||||
|
||||
scrape_interval: 5s
|
||||
metrics_path: /v1/metrics
|
||||
params:
|
||||
format: ['prometheus']
|
||||
EOH
|
||||
}
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "prom/prometheus:latest"
|
||||
volumes = [
|
||||
"local/prometheus.yml:/etc/prometheus/prometheus.yml"
|
||||
]
|
||||
port_map {
|
||||
prometheus_ui = 9090
|
||||
}
|
||||
}
|
||||
resources {
|
||||
network {
|
||||
mbits = 10
|
||||
port "prometheus_ui" {}
|
||||
}
|
||||
}
|
||||
service {
|
||||
name = "prometheus"
|
||||
tags = ["urlprefix-/"]
|
||||
port = "prometheus_ui"
|
||||
check {
|
||||
name = "prometheus_ui port alive"
|
||||
type = "http"
|
||||
path = "/-/healthy"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,4 +17,12 @@ consul {
|
|||
vault {
|
||||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
|
|
|
@ -21,3 +21,11 @@ vault {
|
|||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
|
|
|
@ -21,3 +21,11 @@ vault {
|
|||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
|
@ -21,3 +21,11 @@ vault {
|
|||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
|
@ -21,3 +21,11 @@ vault {
|
|||
enabled = true
|
||||
address = "http://active.vault.service.consul:8200"
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
|
@ -19,3 +19,10 @@ vault {
|
|||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
|
@ -19,3 +19,10 @@ vault {
|
|||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
|
@ -19,3 +19,10 @@ vault {
|
|||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
|
@ -19,3 +19,11 @@ vault {
|
|||
token = ""
|
||||
}
|
||||
|
||||
telemetry {
|
||||
collection_interval = "1s"
|
||||
disable_hostname = true
|
||||
prometheus_metrics = true
|
||||
publish_allocation_metrics = true
|
||||
publish_node_metrics = true
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue