4c4895e19c
The `Metrics` suite uses prometheus to scrape Nomad metrics so that we're testing the full user experience of extracting metrics from Nomad. With the addition of mTLS, we need to make sure prometheus also has mTLS configuration because the metrics endpoint is protected. Update the Nomad client configuration and prometheus job to bind-mount the client's certs into the task so that the job can use these certs to scrape the server. This is a temporary solution that gets the job passing; we should give the job its own certificates (issued by Vault?) when we've done some of the infrastructure rework we'd like.
107 lines
2.1 KiB
HCL
107 lines
2.1 KiB
HCL
job "prometheus" {
|
|
datacenters = ["dc1", "dc2"]
|
|
type = "service"
|
|
|
|
constraint {
|
|
attribute = "${attr.kernel.name}"
|
|
value = "linux"
|
|
}
|
|
|
|
group "monitoring" {
|
|
count = 1
|
|
|
|
restart {
|
|
attempts = 2
|
|
interval = "30m"
|
|
delay = "15s"
|
|
mode = "fail"
|
|
}
|
|
|
|
ephemeral_disk {
|
|
size = 300
|
|
}
|
|
|
|
network {
|
|
port "prometheus_ui" {
|
|
to = 9090
|
|
}
|
|
}
|
|
|
|
task "prometheus" {
|
|
template {
|
|
change_mode = "noop"
|
|
destination = "local/prometheus.yml"
|
|
|
|
data = <<EOH
|
|
---
|
|
global:
|
|
scrape_interval: 5s
|
|
evaluation_interval: 5s
|
|
|
|
scrape_configs:
|
|
|
|
- job_name: 'nomad_metrics'
|
|
|
|
consul_sd_configs:
|
|
- server: '{{ env "NOMAD_IP_prometheus_ui" }}:8500'
|
|
services: ['nomad-client', 'nomad']
|
|
|
|
relabel_configs:
|
|
- source_labels: ['__meta_consul_tags']
|
|
regex: '(.*)http(.*)'
|
|
action: keep
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: '/etc/nomad.d/tls/ca.crt'
|
|
cert_file: '/etc/nomad.d/tls/agent.crt'
|
|
key_file: '/etc/nomad.d/tls/agent.key'
|
|
|
|
scrape_interval: 5s
|
|
metrics_path: /v1/metrics
|
|
params:
|
|
format: ['prometheus']
|
|
EOH
|
|
|
|
}
|
|
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "prom/prometheus:latest"
|
|
|
|
volumes = [
|
|
"local/prometheus.yml:/etc/prometheus/prometheus.yml",
|
|
]
|
|
|
|
# TODO: https://github.com/hashicorp/nomad/issues/11484
|
|
# This is very much not how we should do this, because it
|
|
# exposes the client's mTLS cert to the task and lets the
|
|
# prometheus masquerade as the client.
|
|
mount {
|
|
type = "bind"
|
|
target = "/etc/nomad.d/tls"
|
|
source = "/etc/nomad.d/tls"
|
|
readonly = true
|
|
}
|
|
|
|
ports = ["prometheus_ui"]
|
|
}
|
|
|
|
service {
|
|
name = "prometheus"
|
|
tags = ["urlprefix-/"]
|
|
port = "prometheus_ui"
|
|
|
|
check {
|
|
name = "prometheus_ui port alive"
|
|
type = "http"
|
|
path = "/-/healthy"
|
|
interval = "10s"
|
|
timeout = "2s"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|