Backport of metrics: report task memory_max value into release/1.6.x (#18004)

This pull request was automerged via backport-assistant
This commit is contained in:
hc-github-team-nomad-core 2023-07-19 15:50:34 -05:00 committed by GitHub
parent b7689e87ec
commit b1bfb59394
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 0 deletions

3
.changelog/17938.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
metrics: Add `allocs.memory.max_allocated` to report the value of tasks' `memory_max` resource value
```

View File

@ -1482,9 +1482,11 @@ func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) {
func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
alloc := tr.Alloc()
var allocatedMem float32
var allocatedMemMax float32
if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil {
// Convert to bytes to match other memory metrics
allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024
allocatedMemMax = float32(taskRes.Memory.MemoryMaxMB) * 1024 * 1024
}
ms := ru.ResourceUsage.MemoryStats
@ -1508,6 +1510,10 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"},
allocatedMem, tr.baseLabels)
}
if allocatedMemMax > 0 {
metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "max_allocated"},
allocatedMemMax, tr.baseLabels)
}
}
// TODO Remove Backwardscompat or use tr.Alloc()?

View File

@ -205,6 +205,7 @@ task driver; not all task drivers can provide all metrics.
| `nomad.client.allocs.memory.cache` | Amount of memory cached by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.kernel_max_usage` | Maximum amount of memory ever used by the kernel for this task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.kernel_usage` | Amount of memory used by the kernel for this task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.max_allocated` | Maximum amount of oversubscription memory allocated by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.max_usage` | Maximum amount of memory ever used by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.rss` | Amount of RSS memory consumed by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |
| `nomad.client.allocs.memory.swap` | Amount of memory swapped by the task | Bytes | Gauge | alloc_id, host, job, namespace, task, task_group |