9f05d62338
Use HCP Consul and HCP Vault for the Consul and Vault clusters used in E2E testing. This has the following benefits: * Without the need to support mTLS bootstrapping for Consul and Vault, we can simplify the mTLS configuration by leaning on Terraform instead of janky bash shell scripting. * Vault bootstrapping is no longer required, so we can eliminate even more janky shell scripting * Our E2E exercises HCP, which is important to us as an organization * With the reduction in configurability, we can simplify the Terraform configuration and drop the complicated `provision.sh`/`provision.ps1` scripts we were using previously. We can template Nomad configuration files and upload them with the `file` provisioner. * Packer builds for Linux and Windows become much simpler. tl;dr way less janky shell scripting!
106 lines
2 KiB
HCL
106 lines
2 KiB
HCL
job "prometheus" {
|
|
datacenters = ["dc1", "dc2"]
|
|
type = "service"
|
|
|
|
constraint {
|
|
attribute = "${attr.kernel.name}"
|
|
value = "linux"
|
|
}
|
|
|
|
group "monitoring" {
|
|
count = 1
|
|
|
|
restart {
|
|
attempts = 2
|
|
interval = "30m"
|
|
delay = "15s"
|
|
mode = "fail"
|
|
}
|
|
|
|
ephemeral_disk {
|
|
size = 300
|
|
}
|
|
|
|
network {
|
|
port "prometheus_ui" {
|
|
to = 9090
|
|
}
|
|
}
|
|
|
|
task "prometheus" {
|
|
template {
|
|
change_mode = "noop"
|
|
destination = "local/prometheus.yml"
|
|
|
|
data = <<EOH
|
|
---
|
|
global:
|
|
scrape_interval: 5s
|
|
evaluation_interval: 5s
|
|
|
|
scrape_configs:
|
|
|
|
- job_name: 'nomad_metrics'
|
|
|
|
consul_sd_configs:
|
|
- server: '{{ env "NOMAD_IP_prometheus_ui" }}:8500'
|
|
|
|
relabel_configs:
|
|
- source_labels: ['__meta_consul_tags']
|
|
regex: '(.*)http(.*)'
|
|
action: keep
|
|
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: '/etc/nomad.d/tls/ca.crt'
|
|
cert_file: '/etc/nomad.d/tls/agent.crt'
|
|
key_file: '/etc/nomad.d/tls/agent.key'
|
|
|
|
scrape_interval: 5s
|
|
metrics_path: /v1/metrics
|
|
params:
|
|
format: ['prometheus']
|
|
EOH
|
|
|
|
}
|
|
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "prom/prometheus:latest"
|
|
|
|
volumes = [
|
|
"local/prometheus.yml:/etc/prometheus/prometheus.yml",
|
|
]
|
|
|
|
# TODO: https://github.com/hashicorp/nomad/issues/11484
|
|
# This is very much not how we should do this, because it
|
|
# exposes the client's mTLS cert to the task and lets the
|
|
# prometheus masquerade as the client.
|
|
mount {
|
|
type = "bind"
|
|
target = "/etc/nomad.d/tls"
|
|
source = "/etc/nomad.d/tls"
|
|
readonly = true
|
|
}
|
|
|
|
ports = ["prometheus_ui"]
|
|
}
|
|
|
|
service {
|
|
name = "prometheus"
|
|
tags = ["urlprefix-/"]
|
|
port = "prometheus_ui"
|
|
|
|
check {
|
|
name = "prometheus_ui port alive"
|
|
type = "http"
|
|
path = "/-/healthy"
|
|
interval = "10s"
|
|
timeout = "2s"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|