diff --git a/.changelog/17938.txt b/.changelog/17938.txt new file mode 100644 index 000000000..ab6c87bd6 --- /dev/null +++ b/.changelog/17938.txt @@ -0,0 +1,3 @@ +```release-note:improvement +metrics: Add `allocs.memory.max_allocated` to report the value of tasks' `memory_max` resource value +``` diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index d173913f2..3f64299ed 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -1482,9 +1482,11 @@ func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) { func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { alloc := tr.Alloc() var allocatedMem float32 + var allocatedMemMax float32 if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil { // Convert to bytes to match other memory metrics allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024 + allocatedMemMax = float32(taskRes.Memory.MemoryMaxMB) * 1024 * 1024 } ms := ru.ResourceUsage.MemoryStats @@ -1508,6 +1510,10 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"}, allocatedMem, tr.baseLabels) } + if allocatedMemMax > 0 { + metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_allocated"}, + allocatedMemMax, tr.baseLabels) + } } // TODO Remove Backwardscompat or use tr.Alloc()? diff --git a/website/content/docs/operations/metrics-reference.mdx b/website/content/docs/operations/metrics-reference.mdx index 369872855..35749c13e 100644 --- a/website/content/docs/operations/metrics-reference.mdx +++ b/website/content/docs/operations/metrics-reference.mdx @@ -205,6 +205,7 @@ task driver; not all task drivers can provide all metrics. | `nomad.client.allocs.memory.cache` | Amount of memory cached by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group | | `nomad.client.allocs.memory.kernel_max_usage` | Maximum amount of memory ever used by the kernel for this task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group | | `nomad.client.allocs.memory.kernel_usage` | Amount of memory used by the kernel for this task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group | +| `nomad.client.allocs.memory.max_allocated` | Maximum amount of oversubscription memory allocated by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group | | `nomad.client.allocs.memory.max_usage` | Maximum amount of memory ever used by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group | | `nomad.client.allocs.memory.rss` | Amount of RSS memory consumed by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group | | `nomad.client.allocs.memory.swap` | Amount of memory swapped by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |