cli: Add a nomad operator client state command (#15469)

Signed-off-by: dttung2905 <ttdao.2015@accountancy.smu.edu.sg>
This commit is contained in:
Dao Thanh Tung 2023-01-11 23:03:31 +08:00 committed by GitHub
parent 76bed82192
commit 09b25d71b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 850 additions and 0 deletions

3
.changelog/15469.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
cli: add a nomad operator client state command
```

View File

@ -614,6 +614,12 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"operator client-state": func() (cli.Command, error) {
return &OperatorClientStateCommand{
Meta: meta,
}, nil
},
"operator debug": func() (cli.Command, error) {
return &OperatorDebugCommand{
Meta: meta,

View File

@ -0,0 +1,129 @@
package command
import (
"encoding/json"
"fmt"
"strings"
"github.com/hashicorp/go-hclog"
trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/state"
"github.com/posener/complete"
)
type OperatorClientStateCommand struct {
Meta
}
func (c *OperatorClientStateCommand) Help() string {
helpText := `
Usage: nomad operator client-state <path_to_nomad_dir>
Emits a representation of the stored client state in JSON format.
`
return strings.TrimSpace(helpText)
}
func (c *OperatorClientStateCommand) AutocompleteFlags() complete.Flags {
return complete.Flags{}
}
func (c *OperatorClientStateCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictNothing
}
func (c *OperatorClientStateCommand) Synopsis() string {
return "Dump the nomad client state"
}
func (c *OperatorClientStateCommand) Name() string { return "operator client-state" }
func (c *OperatorClientStateCommand) Run(args []string) int {
if len(args) != 1 {
c.Ui.Error("This command takes one argument: <nomad-data-dir>")
c.Ui.Error(commandErrorText(c))
return 1
}
logger := hclog.L()
db, err := state.NewBoltStateDB(logger, args[0])
if err != nil {
c.Ui.Error(fmt.Sprintf("failed to open client state: %v", err))
return 1
}
defer db.Close()
allocs, _, err := db.GetAllAllocations()
if err != nil {
c.Ui.Error(fmt.Sprintf("failed to get allocations: %v", err))
return 1
}
data := map[string]*clientStateAlloc{}
for _, alloc := range allocs {
allocID := alloc.ID
deployState, err := db.GetDeploymentStatus(allocID)
if err != nil {
c.Ui.Error(fmt.Sprintf("failed to get deployment status for %s: %v", allocID, err))
return 1
}
tasks := map[string]*taskState{}
tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
for _, jt := range tg.Tasks {
ls, rs, err := db.GetTaskRunnerState(allocID, jt.Name)
if err != nil {
c.Ui.Error(fmt.Sprintf("failed to get task runner state %s: %v", allocID, err))
return 1
}
var ds interface{}
if ls.TaskHandle == nil {
continue
}
err = ls.TaskHandle.GetDriverState(&ds)
if err != nil {
c.Ui.Error(fmt.Sprintf("failed to parse driver state %s: %v", allocID, err))
return 1
}
tasks[jt.Name] = &taskState{
LocalState: ls,
RemoteState: rs,
DriverState: ds,
}
}
data[allocID] = &clientStateAlloc{
Alloc: alloc,
DeployStatus: deployState,
Tasks: tasks,
}
}
output := debugOutput{
Allocations: data,
}
bytes, err := json.Marshal(output)
if err != nil {
c.Ui.Error(fmt.Sprintf("failed to serialize client state: %v", err))
return 1
}
c.Ui.Output(string(bytes))
return 0
}
type debugOutput struct {
Allocations map[string]*clientStateAlloc
}
type clientStateAlloc struct {
Alloc any
DeployStatus any
Tasks map[string]*taskState
}
type taskState struct {
LocalState *trstate.LocalState
RemoteState any
DriverState interface{}
}

View File

@ -0,0 +1,29 @@
package command
import (
"strings"
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/require"
)
func TestOperatorClientStateCommand(t *testing.T) {
ci.Parallel(t)
ui := cli.NewMockUi()
cmd := &OperatorClientStateCommand{Meta: Meta{Ui: ui}}
failedCode := cmd.Run([]string{"some", "bad", "args"})
require.Equal(t, 1, failedCode)
if out := ui.ErrorWriter.String(); !strings.Contains(out, commandErrorText(cmd)) {
t.Fatalf("expected help output, got: %s", out)
}
ui.ErrorWriter.Reset()
dir := t.TempDir()
code := cmd.Run([]string{dir})
require.Equal(t, 0, code)
require.Contains(t, ui.OutputWriter.String(), "{}")
}

View File

@ -0,0 +1,679 @@
---
layout: docs
page_title: 'Commands: operator client-state'
description: >
The `operator client-state` command generates a representation of the
stored client state in JSON format.
---
# Command: operator client-state
The `operator client-state` command generates a representation of the
stored client state in JSON format.
## Usage
```plaintext
nomad operator client-state
```
## Example
The output of this command can be piped to `jq` for further filtering and analysis:
```shell-session
$ nomad operator client-state | jq
{
"Allocations": {
"3b0ed734-f721-45d3-420a-3d96926b3f1d": {
"Alloc": {
"ID": "3b0ed734-f721-45d3-420a-3d96926b3f1d",
"Namespace": "default",
"EvalID": "042fbfeb-0c75-e696-b9b8-e3b4328a4988",
"Name": "docs.example[0]",
"NodeID": "00d48d89-d512-3ee6-4b95-271b72415916",
"NodeName": "YOUR_NOMAD_NAME_HERE",
"JobID": "docs",
"Job": {
"Stop": false,
"Region": "global",
"Namespace": "default",
"ID": "docs",
"ParentID": "",
"Name": "docs",
"Type": "service",
"Priority": 50,
"AllAtOnce": false,
"Datacenters": [
"dc1"
],
"Constraints": null,
"Affinities": null,
"Spreads": null,
"TaskGroups": [
{
"Name": "example",
"Count": 1,
"Update": {
"Stagger": 30000000000,
"MaxParallel": 1,
"HealthCheck": "checks",
"MinHealthyTime": 10000000000,
"HealthyDeadline": 300000000000,
"ProgressDeadline": 600000000000,
"AutoRevert": false,
"AutoPromote": false,
"Canary": 0
},
"Migrate": {
"MaxParallel": 1,
"HealthCheck": "checks",
"MinHealthyTime": 10000000000,
"HealthyDeadline": 300000000000
},
"Constraints": null,
"Scaling": null,
"RestartPolicy": {
"Attempts": 2,
"Interval": 1800000000000,
"Delay": 15000000000,
"Mode": "fail"
},
"Tasks": [
{
"Name": "server",
"Driver": "docker",
"User": "",
"Config": {
"args": [
"-listen",
":5678",
"-text",
"hello world"
],
"image": "hashicorp/http-echo",
"ports": [
"http"
]
},
"Env": null,
"Services": null,
"Vault": null,
"Templates": null,
"Constraints": null,
"Affinities": null,
"Resources": {
"CPU": 100,
"Cores": 0,
"MemoryMB": 300,
"MemoryMaxMB": 0,
"DiskMB": 0,
"IOPS": 0,
"Networks": null,
"Devices": null
},
"RestartPolicy": {
"Attempts": 2,
"Interval": 1800000000000,
"Delay": 15000000000,
"Mode": "fail"
},
"DispatchPayload": null,
"Lifecycle": null,
"Meta": null,
"KillTimeout": 5000000000,
"LogConfig": {
"MaxFiles": 10,
"MaxFileSizeMB": 10
},
"Artifacts": null,
"Leader": false,
"ShutdownDelay": 0,
"VolumeMounts": null,
"ScalingPolicies": null,
"KillSignal": "",
"Kind": "",
"CSIPluginConfig": null
}
],
"EphemeralDisk": {
"Sticky": false,
"SizeMB": 300,
"Migrate": false
},
"Meta": null,
"ReschedulePolicy": {
"Attempts": 0,
"Interval": 0,
"Delay": 30000000000,
"DelayFunction": "exponential",
"MaxDelay": 3600000000000,
"Unlimited": true
},
"Affinities": null,
"Spreads": null,
"Networks": [
{
"Mode": "",
"Device": "",
"CIDR": "",
"IP": "",
"MBits": 0,
"DNS": null,
"ReservedPorts": [
{
"Label": "http",
"Value": 5678,
"To": 0,
"HostNetwork": "default"
}
],
"DynamicPorts": null
}
],
"Consul": {
"Namespace": ""
},
"Services": null,
"Volumes": null,
"ShutdownDelay": null,
"StopAfterClientDisconnect": null,
"MaxClientDisconnect": null
}
],
"Update": {
"Stagger": 30000000000,
"MaxParallel": 1,
"HealthCheck": "",
"MinHealthyTime": 0,
"HealthyDeadline": 0,
"ProgressDeadline": 0,
"AutoRevert": false,
"AutoPromote": false,
"Canary": 0
},
"Multiregion": null,
"Periodic": null,
"ParameterizedJob": null,
"Dispatched": false,
"DispatchIdempotencyToken": "",
"Payload": null,
"Meta": null,
"ConsulToken": "",
"ConsulNamespace": "",
"VaultToken": "",
"VaultNamespace": "",
"NomadTokenID": "",
"Status": "pending",
"StatusDescription": "",
"Stable": false,
"Version": 0,
"SubmitTime": 1670925631564348000,
"CreateIndex": 14,
"ModifyIndex": 14,
"JobModifyIndex": 14
},
"TaskGroup": "example",
"Resources": {
"CPU": 100,
"Cores": 0,
"MemoryMB": 300,
"MemoryMaxMB": 300,
"DiskMB": 300,
"IOPS": 0,
"Networks": [
{
"Mode": "",
"Device": "",
"CIDR": "",
"IP": "192.168.1.9",
"MBits": 0,
"DNS": null,
"ReservedPorts": [
{
"Label": "http",
"Value": 5678,
"To": 0,
"HostNetwork": "default"
}
],
"DynamicPorts": null
}
],
"Devices": null
},
"SharedResources": {
"CPU": 0,
"Cores": 0,
"MemoryMB": 0,
"MemoryMaxMB": 0,
"DiskMB": 300,
"IOPS": 0,
"Networks": [
{
"Mode": "",
"Device": "",
"CIDR": "",
"IP": "192.168.1.9",
"MBits": 0,
"DNS": null,
"ReservedPorts": [
{
"Label": "http",
"Value": 5678,
"To": 0,
"HostNetwork": "default"
}
],
"DynamicPorts": null
}
],
"Devices": null
},
"TaskResources": {
"server": {
"CPU": 100,
"Cores": 0,
"MemoryMB": 300,
"MemoryMaxMB": 0,
"DiskMB": 0,
"IOPS": 0,
"Networks": null,
"Devices": null
}
},
"AllocatedResources": {
"Tasks": {
"server": {
"Cpu": {
"CpuShares": 100,
"ReservedCores": null
},
"Memory": {
"MemoryMB": 300,
"MemoryMaxMB": 0
},
"Networks": null,
"Devices": null
}
},
"TaskLifecycles": {
"server": null
},
"Shared": {
"Networks": [
{
"Mode": "",
"Device": "",
"CIDR": "",
"IP": "192.168.1.9",
"MBits": 0,
"DNS": null,
"ReservedPorts": [
{
"Label": "http",
"Value": 5678,
"To": 0,
"HostNetwork": "default"
}
],
"DynamicPorts": null
}
],
"DiskMB": 300,
"Ports": [
{
"Label": "http",
"Value": 5678,
"To": 0,
"HostIP": "192.168.1.9"
}
]
}
},
"Metrics": {
"NodesEvaluated": 1,
"NodesFiltered": 0,
"NodesAvailable": {
"dc1": 1
},
"ClassFiltered": null,
"ConstraintFiltered": null,
"NodesExhausted": 0,
"ClassExhausted": null,
"DimensionExhausted": null,
"QuotaExhausted": null,
"ResourcesExhausted": null,
"Scores": null,
"ScoreMetaData": [
{
"NodeID": "00d48d89-d512-3ee6-4b95-271b72415916",
"Scores": {
"binpack": 0.014787748194725047,
"job-anti-affinity": 0,
"node-affinity": 0,
"node-reschedule-penalty": 0
},
"NormScore": 0.014787748194725047
}
],
"AllocationTime": 64877,
"CoalescedFailures": 0
},
"DesiredStatus": "run",
"DesiredDescription": "",
"DesiredTransition": {
"Migrate": null,
"Reschedule": null,
"ForceReschedule": null,
"NoShutdownDelay": null
},
"ClientStatus": "pending",
"ClientDescription": "",
"TaskStates": null,
"AllocStates": null,
"PreviousAllocation": "",
"NextAllocation": "",
"DeploymentID": "dc4c0c22-3bc7-a17a-5d7b-bce06a692293",
"DeploymentStatus": null,
"RescheduleTracker": null,
"NetworkStatus": null,
"FollowupEvalID": "",
"PreemptedAllocations": null,
"PreemptedByAllocation": "",
"SigningKeyID": "e596c865-adad-78cc-0266-94640594e5a2",
"CreateIndex": 15,
"ModifyIndex": 15,
"AllocModifyIndex": 15,
"CreateTime": 1670925631566156300,
"ModifyTime": 1670925631566156300
},
"DeployStatus": {
"Healthy": true,
"Timestamp": "2022-12-13T18:00:45.351354859+08:00",
"Canary": false,
"ModifyIndex": 0
},
"Tasks": {
"server": {
"LocalState": {
"Hooks": {
"artifacts": {
"PrestartDone": true,
"Data": null,
"Env": null
},
"devices": {
"PrestartDone": true,
"Data": null,
"Env": null
},
"dispatch_payload": {
"PrestartDone": true,
"Data": null,
"Env": null
},
"identity": {
"PrestartDone": false,
"Data": null,
"Env": null
},
"logmon": {
"PrestartDone": false,
"Data": {
"reattach_config": "{\"Protocol\":\"grpc\",\"Network\":\"unix\",\"Addr\":\"/tmp/plugin2391577039\",\"Pid\":97576}"
},
"Env": null
},
"script_checks": {
"PrestartDone": false,
"Data": null,
"Env": null
},
"task_dir": {
"PrestartDone": false,
"Data": {
"is_done": "true"
},
"Env": null
},
"validate": {
"PrestartDone": true,
"Data": null,
"Env": null
},
"volumes": {
"PrestartDone": false,
"Data": null,
"Env": null
}
},
"DriverNetwork": {
"PortMap": null,
"IP": "172.17.0.2",
"AutoAdvertise": false
},
"TaskHandle": {
"Version": 1,
"Config": {
"ID": "3b0ed734-f721-45d3-420a-3d96926b3f1d/server/c7ffd87f",
"JobName": "docs",
"JobID": "docs",
"TaskGroupName": "example",
"Name": "server",
"Namespace": "default",
"NodeName": "YOUR_NOMAD_NAME_HERE",
"NodeID": "00d48d89-d512-3ee6-4b95-271b72415916",
"Env": {
"NOMAD_ADDR_http": "192.168.1.9:5678",
"NOMAD_ALLOC_DIR": "/alloc",
"NOMAD_ALLOC_ID": "3b0ed734-f721-45d3-420a-3d96926b3f1d",
"NOMAD_ALLOC_INDEX": "0",
"NOMAD_ALLOC_NAME": "docs.example[0]",
"NOMAD_ALLOC_PORT_http": "5678",
"NOMAD_CPU_LIMIT": "100",
"NOMAD_DC": "dc1",
"NOMAD_GROUP_NAME": "example",
"NOMAD_HOST_ADDR_http": "192.168.1.9:5678",
"NOMAD_HOST_IP_http": "192.168.1.9",
"NOMAD_HOST_PORT_http": "5678",
"NOMAD_IP_http": "192.168.1.9",
"NOMAD_JOB_ID": "docs",
"NOMAD_JOB_NAME": "docs",
"NOMAD_MEMORY_LIMIT": "300",
"NOMAD_NAMESPACE": "default",
"NOMAD_PARENT_CGROUP": "/nomad",
"NOMAD_PORT_http": "5678",
"NOMAD_REGION": "global",
"NOMAD_SECRETS_DIR": "/secrets",
"NOMAD_SHORT_ALLOC_ID": "3b0ed734",
"NOMAD_TASK_DIR": "/local",
"NOMAD_TASK_NAME": "server"
},
"DeviceEnv": {},
"Resources": {
"NomadResources": {
"Cpu": {
"CpuShares": 100,
"ReservedCores": null
},
"Memory": {
"MemoryMB": 300,
"MemoryMaxMB": 0
},
"Networks": null,
"Devices": null
},
"LinuxResources": {
"CPUPeriod": 0,
"CPUQuota": 0,
"CPUShares": 100,
"MemoryLimitBytes": 314572800,
"OOMScoreAdj": 0,
"CpusetCpus": "",
"CpusetCgroupPath": "",
"PercentTicks": 0.0023148148148148147
},
"Ports": [
{
"Label": "http",
"Value": 5678,
"To": 0,
"HostIP": "192.168.1.9"
}
]
},
"Devices": null,
"Mounts": null,
"User": "",
"AllocDir": "/home/test/alloc/3b0ed734-f721-45d3-420a-3d96926b3f1d",
"StdoutPath": "/home/test/alloc/3b0ed734-f721-45d3-420a-3d96926b3f1d/alloc/logs/.server.stdout.fifo",
"StderrPath": "/home/test/alloc/3b0ed734-f721-45d3-420a-3d96926b3f1d/alloc/logs/.server.stderr.fifo",
"AllocID": "3b0ed734-f721-45d3-420a-3d96926b3f1d",
"NetworkIsolation": null,
"DNS": null
},
"State": "",
"DriverState": "g6tDb250YWluZXJJRNoAQDdkNmQxMGVjZTY1YmQ2ZjY0MDk1YzdiMWI2NjViMzUyOTI1NDc5NmQ0YzE3ODY2YzdlNmE1ZTE2YWQ0NzI3YzitRHJpdmVyTmV0d29ya4OtQXV0b0FkdmVydGlzZcKiSVCqMTcyLjE3LjAuMqdQb3J0TWFwwK5SZWF0dGFjaENvbmZpZ4SkQWRkcrUvdG1wL3BsdWdpbjE4Mjc2MjczOTCnTmV0d29ya6R1bml4o1BpZNIAAX2qqFByb3RvY29spGdycGM="
},
"RunComplete": false
},
"RemoteState": {
"State": "running",
"Failed": false,
"Restarts": 0,
"LastRestart": "0001-01-01T00:00:00Z",
"StartedAt": "2022-12-13T10:00:35.350231151Z",
"FinishedAt": "0001-01-01T00:00:00Z",
"Events": [
{
"Type": "Received",
"Time": 1670925631571274800,
"Message": "",
"DisplayMessage": "Task received by client",
"Details": {},
"FailsTask": false,
"RestartReason": "",
"SetupError": "",
"DriverError": "",
"ExitCode": 0,
"Signal": 0,
"KillTimeout": 0,
"KillError": "",
"KillReason": "",
"StartDelay": 0,
"DownloadError": "",
"ValidationError": "",
"DiskLimit": 0,
"FailedSibling": "",
"VaultError": "",
"TaskSignalReason": "",
"TaskSignal": "",
"DriverMessage": "",
"GenericSource": ""
},
{
"Type": "Task Setup",
"Time": 1670925631572548900,
"Message": "Building Task Directory",
"DisplayMessage": "Building Task Directory",
"Details": {
"message": "Building Task Directory"
},
"FailsTask": false,
"RestartReason": "",
"SetupError": "",
"DriverError": "",
"ExitCode": 0,
"Signal": 0,
"KillTimeout": 0,
"KillError": "",
"KillReason": "",
"StartDelay": 0,
"DownloadError": "",
"ValidationError": "",
"DiskLimit": 0,
"FailedSibling": "",
"VaultError": "",
"TaskSignalReason": "",
"TaskSignal": "",
"DriverMessage": "",
"GenericSource": ""
},
{
"Type": "Driver",
"Time": 1670925631589523700,
"Message": "",
"DisplayMessage": "Downloading image",
"Details": {
"image": "hashicorp/http-echo:latest"
},
"FailsTask": false,
"RestartReason": "",
"SetupError": "",
"DriverError": "",
"ExitCode": 0,
"Signal": 0,
"KillTimeout": 0,
"KillError": "",
"KillReason": "",
"StartDelay": 0,
"DownloadError": "",
"ValidationError": "",
"DiskLimit": 0,
"FailedSibling": "",
"VaultError": "",
"TaskSignalReason": "",
"TaskSignal": "",
"DriverMessage": "Downloading image",
"GenericSource": ""
},
{
"Type": "Started",
"Time": 1670925635350228700,
"Message": "",
"DisplayMessage": "Task started by client",
"Details": {},
"FailsTask": false,
"RestartReason": "",
"SetupError": "",
"DriverError": "",
"ExitCode": 0,
"Signal": 0,
"KillTimeout": 0,
"KillError": "",
"KillReason": "",
"StartDelay": 0,
"DownloadError": "",
"ValidationError": "",
"DiskLimit": 0,
"FailedSibling": "",
"VaultError": "",
"TaskSignalReason": "",
"TaskSignal": "",
"DriverMessage": "",
"GenericSource": ""
}
],
"TaskHandle": null
},
"DriverState": {
"ContainerID": "7d6d10ece65bd6f64095c7b1b665b3529254796d4c17866c7e6a5e16ad4727c8",
"DriverNetwork": {
"AutoAdvertise": false,
"IP": "172.17.0.2",
"PortMap": null
},
"ReattachConfig": {
"Addr": "/tmp/plugin1827627390",
"Network": "unix",
"Pid": 97706,
"Protocol": "grpc"
}
}
}
}
}
}
}
```

View File

@ -659,6 +659,10 @@
}
]
},
{
"title": "client-state",
"path": "commands/operator/client-state"
},
{
"title": "debug",
"path": "commands/operator/debug"