From 15089f055f1da7cf5fa49c1be638e5dc4f157069 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Thu, 17 Mar 2022 13:56:14 -0400 Subject: [PATCH] api: add related evals to eval details (#12305) The `related` query param is used to indicate that the request should return a list of related (next, previous, and blocked) evaluations. Co-authored-by: Jasmine Dahilig --- .changelog/12305.txt | 3 + api/evaluations.go | 24 +++++ api/evaluations_test.go | 19 ++++ command/agent/eval_endpoint.go | 3 + command/agent/eval_endpoint_test.go | 41 ++++++++ nomad/eval_endpoint.go | 34 +++++-- nomad/eval_endpoint_test.go | 78 ++++++++++----- nomad/state/state_store.go | 49 ++++++++++ nomad/state/state_store_test.go | 119 +++++++++++++++++++++++ nomad/structs/structs.go | 72 +++++++++++++- website/content/api-docs/evaluations.mdx | 85 +++++++++++----- 11 files changed, 466 insertions(+), 61 deletions(-) create mode 100644 .changelog/12305.txt diff --git a/.changelog/12305.txt b/.changelog/12305.txt new file mode 100644 index 000000000..8d9425eed --- /dev/null +++ b/.changelog/12305.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: Add `related` query parameter to the Evaluation details endpoint +``` diff --git a/api/evaluations.go b/api/evaluations.go index c8404dfc4..62d699ef3 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -71,6 +71,7 @@ type Evaluation struct { NextEval string PreviousEval string BlockedEval string + RelatedEvals []*EvaluationStub FailedTGAllocs map[string]*AllocationMetric ClassEligibility map[string]bool EscapedComputedClass bool @@ -84,6 +85,29 @@ type Evaluation struct { ModifyTime int64 } +// EvaluationStub is used to serialize parts of an evaluation returned in the +// RelatedEvals field of an Evaluation. +type EvaluationStub struct { + ID string + Priority int + Type string + TriggeredBy string + Namespace string + JobID string + NodeID string + DeploymentID string + Status string + StatusDescription string + WaitUntil time.Time + NextEval string + PreviousEval string + BlockedEval string + CreateIndex uint64 + ModifyIndex uint64 + CreateTime int64 + ModifyTime int64 +} + // EvalIndexSort is a wrapper to sort evaluations by CreateIndex. // We reverse the test so that we get the highest index first. type EvalIndexSort []*Evaluation diff --git a/api/evaluations_test.go b/api/evaluations_test.go index 226db8460..b084e4fbb 100644 --- a/api/evaluations_test.go +++ b/api/evaluations_test.go @@ -126,6 +126,25 @@ func TestEvaluations_Info(t *testing.T) { // Check that we got the right result require.NotNil(t, result) require.Equal(t, resp.EvalID, result.ID) + + // Register the job again to get a related eval + resp, wm, err = jobs.Register(job, nil) + evals, _, err := e.List(nil) + require.NoError(t, err) + + // Find an eval that should have related evals + for _, eval := range evals { + if eval.NextEval != "" || eval.PreviousEval != "" || eval.BlockedEval != "" { + result, qm, err := e.Info(eval.ID, &QueryOptions{ + Params: map[string]string{ + "related": "true", + }, + }) + require.NoError(t, err) + assertQueryMeta(t, qm) + require.NotNil(t, result.RelatedEvals) + } + } } func TestEvaluations_Allocations(t *testing.T) { diff --git a/command/agent/eval_endpoint.go b/command/agent/eval_endpoint.go index a51c9e940..1be0e24ba 100644 --- a/command/agent/eval_endpoint.go +++ b/command/agent/eval_endpoint.go @@ -80,6 +80,9 @@ func (s *HTTPServer) evalQuery(resp http.ResponseWriter, req *http.Request, eval return nil, nil } + query := req.URL.Query() + args.IncludeRelated = query.Get("related") == "true" + var out structs.SingleEvalResponse if err := s.agent.RPC("Eval.GetEval", &args, &out); err != nil { return nil, err diff --git a/command/agent/eval_endpoint_test.go b/command/agent/eval_endpoint_test.go index 15217ea1b..df651d462 100644 --- a/command/agent/eval_endpoint_test.go +++ b/command/agent/eval_endpoint_test.go @@ -200,3 +200,44 @@ func TestHTTP_EvalQuery(t *testing.T) { } }) } + +func TestHTTP_EvalQueryWithRelated(t *testing.T) { + t.Parallel() + httpTest(t, nil, func(s *TestAgent) { + // Directly manipulate the state + state := s.Agent.server.State() + eval1 := mock.Eval() + eval2 := mock.Eval() + + // Link related evals + eval1.NextEval = eval2.ID + eval2.PreviousEval = eval1.ID + + err := state.UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1, eval2}) + require.NoError(t, err) + + // Make the HTTP request + req, err := http.NewRequest("GET", fmt.Sprintf("/v1/evaluation/%s?related=true", eval1.ID), nil) + require.NoError(t, err) + respW := httptest.NewRecorder() + + // Make the request + obj, err := s.Server.EvalSpecificRequest(respW, req) + require.NoError(t, err) + + // Check for the index + require.NotEmpty(t, respW.Result().Header.Get("X-Nomad-Index")) + require.NotEmpty(t, respW.Result().Header.Get("X-Nomad-KnownLeader")) + require.NotEmpty(t, respW.Result().Header.Get("X-Nomad-LastContact")) + + // Check the eval + e := obj.(*structs.Evaluation) + require.Equal(t, eval1.ID, e.ID) + + // Check for the related evals + expected := []*structs.EvaluationStub{ + eval2.Stub(), + } + require.Equal(t, expected, e.RelatedEvals) + }) +} diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go index b3e4d371e..2d6af727c 100644 --- a/nomad/eval_endpoint.go +++ b/nomad/eval_endpoint.go @@ -53,23 +53,39 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest, queryOpts: &args.QueryOptions, queryMeta: &reply.QueryMeta, run: func(ws memdb.WatchSet, state *state.StateStore) error { - // Look for the job - out, err := state.EvalByID(ws, args.EvalID) + var related []*structs.EvaluationStub + + // Look for the eval + eval, err := state.EvalByID(ws, args.EvalID) if err != nil { - return err + return fmt.Errorf("failed to lookup eval: %v", err) } - // Setup the output - reply.Eval = out - if out != nil { + if eval != nil { // Re-check namespace in case it differs from request. - if !allowNsOp(aclObj, out.Namespace) { + if !allowNsOp(aclObj, eval.Namespace) { return structs.ErrPermissionDenied } - reply.Index = out.ModifyIndex + // Lookup related evals if requested. + if args.IncludeRelated { + related, err = state.EvalsRelatedToID(ws, eval.ID) + if err != nil { + return fmt.Errorf("failed to lookup related evals: %v", err) + } + + // Use a copy to avoid modifying the original eval. + eval = eval.Copy() + eval.RelatedEvals = related + } + } + + // Setup the output. + reply.Eval = eval + if eval != nil { + reply.Index = eval.ModifyIndex } else { - // Use the last index that affected the nodes table + // Use the last index that affected the evals table index, err := state.Index("evals") if err != nil { return err diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go index 500b3502c..b23a9f2b2 100644 --- a/nomad/eval_endpoint_test.go +++ b/nomad/eval_endpoint_test.go @@ -30,36 +30,60 @@ func TestEvalEndpoint_GetEval(t *testing.T) { // Create the register request eval1 := mock.Eval() - s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1}) + eval2 := mock.Eval() - // Lookup the eval - get := &structs.EvalSpecificRequest{ - EvalID: eval1.ID, - QueryOptions: structs.QueryOptions{Region: "global"}, - } - var resp structs.SingleEvalResponse - if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil { - t.Fatalf("err: %v", err) - } - if resp.Index != 1000 { - t.Fatalf("Bad index: %d %d", resp.Index, 1000) - } + // Link the evals + eval1.NextEval = eval2.ID + eval2.PreviousEval = eval1.ID - if !reflect.DeepEqual(eval1, resp.Eval) { - t.Fatalf("bad: %#v %#v", eval1, resp.Eval) - } + err := s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1, eval2}) + require.NoError(t, err) - // Lookup non-existing node - get.EvalID = uuid.Generate() - if err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp); err != nil { - t.Fatalf("err: %v", err) - } - if resp.Index != 1000 { - t.Fatalf("Bad index: %d %d", resp.Index, 1000) - } - if resp.Eval != nil { - t.Fatalf("unexpected eval") - } + t.Run("lookup eval", func(t *testing.T) { + get := &structs.EvalSpecificRequest{ + EvalID: eval1.ID, + QueryOptions: structs.QueryOptions{Region: "global"}, + } + var resp structs.SingleEvalResponse + err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp) + require.NoError(t, err) + require.EqualValues(t, 1000, resp.Index, "bad index") + require.Equal(t, eval1, resp.Eval) + }) + + t.Run("lookup non-existing eval", func(t *testing.T) { + get := &structs.EvalSpecificRequest{ + EvalID: uuid.Generate(), + QueryOptions: structs.QueryOptions{Region: "global"}, + } + var resp structs.SingleEvalResponse + err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp) + require.NoError(t, err) + require.EqualValues(t, 1000, resp.Index, "bad index") + require.Nil(t, resp.Eval, "unexpected eval") + }) + + t.Run("lookup related evals", func(t *testing.T) { + get := &structs.EvalSpecificRequest{ + EvalID: eval1.ID, + QueryOptions: structs.QueryOptions{Region: "global"}, + IncludeRelated: true, + } + var resp structs.SingleEvalResponse + err := msgpackrpc.CallWithCodec(codec, "Eval.GetEval", get, &resp) + require.NoError(t, err) + require.EqualValues(t, 1000, resp.Index, "bad index") + require.Equal(t, eval1.ID, resp.Eval.ID) + + // Make sure we didn't modify the eval on a read request. + require.Nil(t, eval1.RelatedEvals) + + // Check for the related evals + expected := []*structs.EvaluationStub{ + eval2.Stub(), + } + require.Equal(t, expected, resp.Eval.RelatedEvals) + }) } func TestEvalEndpoint_GetEval_ACL(t *testing.T) { diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 42f2ebf1c..24b9e6f8c 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -3177,6 +3177,55 @@ func (s *StateStore) EvalByID(ws memdb.WatchSet, id string) (*structs.Evaluation return nil, nil } +// EvalsRelatedToID is used to retrieve the evals that are related (next, +// previous, or blocked) to the provided eval ID. +func (s *StateStore) EvalsRelatedToID(ws memdb.WatchSet, id string) ([]*structs.EvaluationStub, error) { + txn := s.db.ReadTxn() + + raw, err := txn.First("evals", "id", id) + if err != nil { + return nil, fmt.Errorf("eval lookup failed: %v", err) + } + if raw == nil { + return nil, nil + } + eval := raw.(*structs.Evaluation) + + relatedEvals := []*structs.EvaluationStub{} + todo := eval.RelatedIDs() + done := map[string]bool{ + eval.ID: true, // don't place the requested eval in the related list. + } + + for len(todo) > 0 { + // Pop the first value from the todo list. + current := todo[0] + todo = todo[1:] + if current == "" { + continue + } + + // Skip value if we already have it in the results. + if done[current] { + continue + } + + eval, err := s.EvalByID(ws, current) + if err != nil { + return nil, err + } + if eval == nil { + continue + } + + todo = append(todo, eval.RelatedIDs()...) + relatedEvals = append(relatedEvals, eval.Stub()) + done[eval.ID] = true + } + + return relatedEvals, nil +} + // EvalsByIDPrefix is used to lookup evaluations by prefix in a particular // namespace func (s *StateStore) EvalsByIDPrefix(ws memdb.WatchSet, namespace, id string, sort SortOption) (memdb.ResultIterator, error) { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index fa2a2759e..0859b6e54 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -4598,6 +4598,125 @@ func TestStateStore_EvalsByIDPrefix_Namespaces(t *testing.T) { require.False(t, watchFired(ws)) } +func TestStateStore_EvalsRelatedToID(t *testing.T) { + t.Parallel() + + state := testStateStore(t) + + // Create sample evals. + e1 := mock.Eval() + e2 := mock.Eval() + e3 := mock.Eval() + e4 := mock.Eval() + e5 := mock.Eval() + e6 := mock.Eval() + + // Link evals. + // This is not accurate for a real scenario, but it's helpful for testing + // the general approach. + // + // e1 -> e2 -> e3 -> e5 + // └─-> e4 (blocked) -> e6 + e1.NextEval = e2.ID + e2.PreviousEval = e1.ID + + e2.NextEval = e3.ID + e3.PreviousEval = e2.ID + + e3.BlockedEval = e4.ID + e4.PreviousEval = e3.ID + + e3.NextEval = e5.ID + e5.PreviousEval = e3.ID + + e4.NextEval = e6.ID + e6.PreviousEval = e4.ID + + // Create eval not in chain. + e7 := mock.Eval() + + // Create eval with GC'ed related eval. + e8 := mock.Eval() + e8.NextEval = uuid.Generate() + + err := state.UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{e1, e2, e3, e4, e5, e6, e7, e8}) + require.NoError(t, err) + + testCases := []struct { + name string + id string + expected []string + }{ + { + name: "linear history", + id: e1.ID, + expected: []string{ + e2.ID, + e3.ID, + e4.ID, + e5.ID, + e6.ID, + }, + }, + { + name: "linear history from middle", + id: e4.ID, + expected: []string{ + e1.ID, + e2.ID, + e3.ID, + e5.ID, + e6.ID, + }, + }, + { + name: "eval not in chain", + id: e7.ID, + expected: []string{}, + }, + { + name: "eval with gc", + id: e8.ID, + expected: []string{}, + }, + { + name: "non-existing eval", + id: uuid.Generate(), + expected: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ws := memdb.NewWatchSet() + related, err := state.EvalsRelatedToID(ws, tc.id) + require.NoError(t, err) + + got := []string{} + for _, e := range related { + got = append(got, e.ID) + } + require.ElementsMatch(t, tc.expected, got) + }) + } + + t.Run("blocking query", func(t *testing.T) { + ws := memdb.NewWatchSet() + _, err := state.EvalsRelatedToID(ws, e2.ID) + require.NoError(t, err) + + // Update an eval off the chain and make sure watchset doesn't fire. + e7.Status = structs.EvalStatusComplete + state.UpsertEvals(structs.MsgTypeTestSetup, 1001, []*structs.Evaluation{e7}) + require.False(t, watchFired(ws)) + + // Update an eval in the chain and make sure watchset does fire. + e3.Status = structs.EvalStatusComplete + state.UpsertEvals(structs.MsgTypeTestSetup, 1001, []*structs.Evaluation{e3}) + require.True(t, watchFired(ws)) + }) +} + func TestStateStore_UpdateAllocsFromClient(t *testing.T) { ci.Parallel(t) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 8f2974a3b..325c07f76 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -828,7 +828,8 @@ type EvalDeleteRequest struct { // EvalSpecificRequest is used when we just need to specify a target evaluation type EvalSpecificRequest struct { - EvalID string + EvalID string + IncludeRelated bool QueryOptions } @@ -10586,6 +10587,10 @@ type Evaluation struct { // to constraints or lacking resources. BlockedEval string + // RelatedEvals is a list of all the evaluations that are related (next, + // previous, or blocked) to this one. It may be nil if not requested. + RelatedEvals []*EvaluationStub + // FailedTGAllocs are task groups which have allocations that could not be // made, but the metrics are persisted so that the user can use the feedback // to determine the cause. @@ -10632,6 +10637,27 @@ type Evaluation struct { ModifyTime int64 } +type EvaluationStub struct { + ID string + Namespace string + Priority int + Type string + TriggeredBy string + JobID string + NodeID string + DeploymentID string + Status string + StatusDescription string + WaitUntil time.Time + NextEval string + PreviousEval string + BlockedEval string + CreateIndex uint64 + ModifyIndex uint64 + CreateTime int64 + ModifyTime int64 +} + // GetID implements the IDGetter interface, required for pagination. func (e *Evaluation) GetID() string { if e == nil { @@ -10664,6 +10690,50 @@ func (e *Evaluation) GoString() string { return fmt.Sprintf("", e.ID, e.JobID, e.Namespace) } +func (e *Evaluation) RelatedIDs() []string { + if e == nil { + return nil + } + + ids := []string{e.NextEval, e.PreviousEval, e.BlockedEval} + related := make([]string, 0, len(ids)) + + for _, id := range ids { + if id != "" { + related = append(related, id) + } + } + + return related +} + +func (e *Evaluation) Stub() *EvaluationStub { + if e == nil { + return nil + } + + return &EvaluationStub{ + ID: e.ID, + Namespace: e.Namespace, + Priority: e.Priority, + Type: e.Type, + TriggeredBy: e.TriggeredBy, + JobID: e.JobID, + NodeID: e.NodeID, + DeploymentID: e.DeploymentID, + Status: e.Status, + StatusDescription: e.StatusDescription, + WaitUntil: e.WaitUntil, + NextEval: e.NextEval, + PreviousEval: e.PreviousEval, + BlockedEval: e.BlockedEval, + CreateIndex: e.CreateIndex, + ModifyIndex: e.ModifyIndex, + CreateTime: e.CreateTime, + ModifyTime: e.ModifyTime, + } +} + func (e *Evaluation) Copy() *Evaluation { if e == nil { return nil diff --git a/website/content/api-docs/evaluations.mdx b/website/content/api-docs/evaluations.mdx index 6cb2f34d4..bd501342e 100644 --- a/website/content/api-docs/evaluations.mdx +++ b/website/content/api-docs/evaluations.mdx @@ -129,41 +129,78 @@ The table below shows this endpoint's support for must be the full UUID, not the short 8-character one. This is specified as part of the path. +- `related` `(bool: false)` - Specifies if related evaluations should be + returned. Related evaluations are the ones that can be reached by following + the trail of IDs for `NextEval`, `PreviousEval`, and `BlockedEval`. This is + specified as a query parameter. + ### Sample Request ```shell-session $ curl \ - https://localhost:4646/v1/evaluation/5456bd7a-9fc0-c0dd-6131-cbee77f57577 + https://localhost:4646/v1/evaluation/2deb5f06-a100-f01a-3316-5e501a4965e7?related=true ``` ### Sample Response ```json { - "ID": "5456bd7a-9fc0-c0dd-6131-cbee77f57577", - "Priority": 50, - "Type": "service", - "TriggeredBy": "job-register", - "JobID": "example", - "JobModifyIndex": 52, - "NodeID": "", - "NodeModifyIndex": 0, - "Status": "complete", - "StatusDescription": "", - "Wait": 0, - "NextEval": "", - "PreviousEval": "", - "BlockedEval": "", - "FailedTGAllocs": null, - "ClassEligibility": null, - "EscapedComputedClass": false, - "AnnotatePlan": false, - "SnapshotIndex": 53, - "QueuedAllocations": { - "cache": 0 + "CreateIndex": 28, + "CreateTime": 1647394818583344000, + "FailedTGAllocs": { + "cache": { + "AllocationTime": 4111, + "ClassExhausted": null, + "ClassFiltered": null, + "CoalescedFailures": 0, + "ConstraintFiltered": null, + "DimensionExhausted": null, + "NodesAvailable": { + "dc1": 0 + }, + "NodesEvaluated": 0, + "NodesExhausted": 0, + "NodesFiltered": 0, + "QuotaExhausted": null, + "ResourcesExhausted": null, + "ScoreMetaData": null, + "Scores": null + } }, - "CreateIndex": 53, - "ModifyIndex": 55 + "ID": "2deb5f06-a100-f01a-3316-5e501a4965e7", + "JobID": "example", + "ModifyIndex": 28, + "ModifyTime": 1647394818583344000, + "Namespace": "default", + "PreviousEval": "0f98f7ea-59ae-4d90-d9bd-b8ce80b9e100", + "Priority": 50, + "RelatedEvals": [ + { + "BlockedEval": "2deb5f06-a100-f01a-3316-5e501a4965e7", + "CreateIndex": 27, + "CreateTime": 1647394818582736000, + "DeploymentID": "79ae0a49-acf6-0fcf-183f-8646f3167b88", + "ID": "0f98f7ea-59ae-4d90-d9bd-b8ce80b9e100", + "JobID": "example", + "ModifyIndex": 30, + "ModifyTime": 1647394818583565000, + "Namespace": "default", + "NextEval": "", + "NodeID": "", + "PreviousEval": "", + "Priority": 50, + "Status": "complete", + "StatusDescription": "", + "TriggeredBy": "node-drain", + "Type": "service", + "WaitUntil": null + } + ], + "SnapshotIndex": 27, + "Status": "blocked", + "StatusDescription": "created to place remaining allocations", + "TriggeredBy": "queued-allocs", + "Type": "service" } ```