diff --git a/.changelog/15117.txt b/.changelog/15117.txt new file mode 100644 index 000000000..a272e5c2c --- /dev/null +++ b/.changelog/15117.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: Improved performance of eval delete with large filter sets +``` diff --git a/api/evaluations.go b/api/evaluations.go index e7b699716..4206ffde6 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -62,6 +62,16 @@ func (e *Evaluations) Delete(evalIDs []string, w *WriteOptions) (*WriteMeta, err return wm, nil } +// DeleteOpts is used to batch delete evaluations using a filter. +func (e *Evaluations) DeleteOpts(req *EvalDeleteRequest, w *WriteOptions) (*EvalDeleteResponse, *WriteMeta, error) { + resp := &EvalDeleteResponse{} + wm, err := e.client.delete("/v1/evaluations", &req, resp, w) + if err != nil { + return nil, nil, err + } + return resp, wm, nil +} + // Allocations is used to retrieve a set of allocations given // an evaluation ID. func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*AllocationListStub, *QueryMeta, error) { @@ -140,9 +150,14 @@ type EvaluationStub struct { type EvalDeleteRequest struct { EvalIDs []string + Filter string WriteRequest } +type EvalDeleteResponse struct { + Count int +} + type EvalCountResponse struct { Count int QueryMeta diff --git a/command/agent/eval_endpoint.go b/command/agent/eval_endpoint.go index da7fc6c2a..e52285a0c 100644 --- a/command/agent/eval_endpoint.go +++ b/command/agent/eval_endpoint.go @@ -56,13 +56,19 @@ func (s *HTTPServer) evalsDeleteRequest(resp http.ResponseWriter, req *http.Requ numIDs := len(args.EvalIDs) - // Ensure the number of evaluation IDs included in the request is within - // bounds. - if numIDs < 1 { - return nil, CodedError(http.StatusBadRequest, "request does not include any evaluation IDs") - } else if numIDs > structs.MaxUUIDsPerWriteRequest { + if args.Filter != "" && numIDs > 0 { + return nil, CodedError(http.StatusBadRequest, + "evals cannot be deleted by both ID and filter") + } + if args.Filter == "" && numIDs == 0 { + return nil, CodedError(http.StatusBadRequest, + "evals must be deleted by either ID or filter") + } + + // If an explicit list of evaluation IDs is sent, ensure its within bounds + if numIDs > structs.MaxUUIDsPerWriteRequest { return nil, CodedError(http.StatusBadRequest, fmt.Sprintf( - "request includes %v evaluations IDs, must be %v or fewer", + "request includes %v evaluation IDs, must be %v or fewer", numIDs, structs.MaxUUIDsPerWriteRequest)) } @@ -73,8 +79,9 @@ func (s *HTTPServer) evalsDeleteRequest(resp http.ResponseWriter, req *http.Requ if err := s.agent.RPC(structs.EvalDeleteRPCMethod, &args, &reply); err != nil { return nil, err } + setIndex(resp, reply.Index) - return nil, nil + return reply, nil } func (s *HTTPServer) EvalSpecificRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) { diff --git a/command/agent/eval_endpoint_test.go b/command/agent/eval_endpoint_test.go index 68614567c..8891506d6 100644 --- a/command/agent/eval_endpoint_test.go +++ b/command/agent/eval_endpoint_test.go @@ -138,7 +138,7 @@ func TestHTTP_EvalsDelete(t *testing.T) { // Make the request and check the response. obj, err := s.Server.EvalsRequest(respW, req) require.Equal(t, - CodedError(http.StatusBadRequest, "request does not include any evaluation IDs"), err) + CodedError(http.StatusBadRequest, "evals must be deleted by either ID or filter"), err) require.Nil(t, obj) }) }, @@ -169,7 +169,7 @@ func TestHTTP_EvalsDelete(t *testing.T) { obj, err := s.Server.EvalsRequest(respW, req) require.Equal(t, CodedError(http.StatusBadRequest, - "request includes 8000 evaluations IDs, must be 7281 or fewer"), err) + "request includes 8000 evaluation IDs, must be 7281 or fewer"), err) require.Nil(t, obj) }) }, @@ -223,8 +223,10 @@ func TestHTTP_EvalsDelete(t *testing.T) { // Make the request and check the response. obj, err := s.Server.EvalsRequest(respW, req) - require.Nil(t, err) - require.Nil(t, obj) + require.NoError(t, err) + require.NotNil(t, obj) + deleteResp := obj.(structs.EvalDeleteResponse) + require.Equal(t, deleteResp.Count, 1) // Ensure the eval is not found. readEval, err := s.Agent.server.State().EvalByID(nil, mockEval.ID) diff --git a/command/eval_delete.go b/command/eval_delete.go index 7609c2b10..45c638b25 100644 --- a/command/eval_delete.go +++ b/command/eval_delete.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "strings" - "time" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" @@ -58,7 +57,9 @@ Eval Delete Options: -filter Specifies an expression used to filter evaluations by for deletion. When using this flag, it is advisable to ensure the syntax is correct using the - eval list command first. + eval list command first. Note that deleting evals by filter is imprecise: + for sets of evals larger than a single raft log batch, evals can be inserted + behind the cursor and therefore be missed. -yes Bypass the confirmation prompt if an evaluation ID was not provided. @@ -148,32 +149,7 @@ func (e *EvalDeleteCommand) Run(args []string) int { e.deleteByArg = true exitCode, err = e.handleEvalArgDelete(args[0]) default: - - // Track the next token, so we can iterate all pages that match the - // passed filter. - var nextToken string - - // It is possible the filter matches a large number of evaluations - // which means we need to run a number of batch deletes. Perform - // iteration here rather than recursion in later function, so we avoid - // any potential issues with stack size limits. - for { - exitCode, nextToken, err = e.handleFlagFilterDelete(nextToken) - - // If there is another page of evaluations matching the filter, - // iterate the loop and delete the next batch of evals. We pause - // for a 500ms rather than just run as fast as the code and machine - // possibly can. This means deleting 13million evals will take - // roughly 13-15 mins, which seems reasonable. It is worth noting, - // we do not expect operators to delete this many evals in a single - // run and expect more careful filtering options to be used. - if nextToken != "" { - time.Sleep(500 * time.Millisecond) - continue - } else { - break - } - } + exitCode, err = e.handleDeleteByFilter(e.filter) } // Do not exit if we got an error as it's possible this was on the @@ -228,93 +204,6 @@ func (e *EvalDeleteCommand) handleEvalArgDelete(evalID string) (int, error) { return code, err } -// handleFlagFilterDelete handles deletion of evaluations discovered using -// the filter. It is unknown how many will match the operator criteria so -// this function batches lookup and delete requests into sensible numbers. -func (e *EvalDeleteCommand) handleFlagFilterDelete(nt string) (int, string, error) { - - evalsToDelete, nextToken, err := e.batchLookupEvals(nt) - if err != nil { - return 1, "", err - } - - numEvalsToDelete := len(evalsToDelete) - - // The filter flags are operator controlled, therefore ensure we - // actually found some evals to delete. Otherwise, inform the operator - // their flags are potentially incorrect. - if numEvalsToDelete == 0 { - if e.numDeleted > 0 { - return 0, "", nil - } else { - return 1, "", errors.New("failed to find any evals that matched filter criteria") - } - } - - if code, actioned, err := e.batchDelete(evalsToDelete); err != nil { - return code, "", err - } else if !actioned { - return code, "", nil - } - - e.Ui.Info(fmt.Sprintf("Successfully deleted batch of %v %s", - numEvalsToDelete, correctGrammar("evaluation", numEvalsToDelete))) - - return 0, nextToken, nil -} - -// batchLookupEvals handles batched lookup of evaluations using the operator -// provided filter. The lookup is performed a maximum number of 3 times to -// ensure their size is limited and the number of evals to delete doesn't exceed -// the total allowable in a single call. -// -// The JSON serialized evaluation API object is 350-380B in size. -// 2426 * 380B (3.8e-4 MB) = 0.92MB. We may want to make this configurable -// in the future, but this is counteracted by the CLI logic which will loop -// until the user tells it to exit, or all evals matching the filter are -// deleted. 2426 * 3 falls below the maximum limit for eval IDs in a single -// delete request (set by MaxEvalIDsPerDeleteRequest). -func (e *EvalDeleteCommand) batchLookupEvals(nextToken string) ([]*api.Evaluation, string, error) { - - var evalsToDelete []*api.Evaluation - currentNextToken := nextToken - - // Call List 3 times to accumulate the maximum number if eval IDs supported - // in a single Delete request. See math above. - for i := 0; i < 3; i++ { - - // Generate the query options using the passed next token and filter. The - // per page value is less than the total number we can include in a single - // delete request. This keeps the maximum size of the return object at a - // reasonable size. - opts := api.QueryOptions{ - Filter: e.filter, - PerPage: 2426, - NextToken: currentNextToken, - } - - evalList, meta, err := e.client.Evaluations().List(&opts) - if err != nil { - return nil, "", err - } - - if len(evalList) > 0 { - evalsToDelete = append(evalsToDelete, evalList...) - } - - // Store the next token no matter if it is empty or populated. - currentNextToken = meta.NextToken - - // If there is no next token, ensure we exit and avoid any new loops - // which will result in duplicate IDs. - if currentNextToken == "" { - break - } - } - - return evalsToDelete, currentNextToken, nil -} - // batchDelete is responsible for deleting the passed evaluations and asking // any confirmation questions along the way. It will ask whether the operator // want to list the evals before deletion, and optionally ask for confirmation @@ -404,3 +293,38 @@ func correctGrammar(word string, num int) string { } return word } + +func (e *EvalDeleteCommand) handleDeleteByFilter(filterExpr string) (int, error) { + + // If the user did not wish to bypass the confirmation step, ask this now + // and handle the response. + if !e.yes && !e.deleteByArg { + + resp, _, err := e.client.Evaluations().Count(&api.QueryOptions{ + Filter: filterExpr, + }) + if err != nil { + return 1, err + } + + code, deleteEvals := e.askQuestion(fmt.Sprintf( + "Are you sure you want to delete %d evals? [y/N]", + resp.Count), "Cancelling eval deletion") + e.Ui.Output("") + + if !deleteEvals { + return code, nil + } + } + + resp, _, err := e.client.Evaluations().DeleteOpts(&api.EvalDeleteRequest{ + Filter: filterExpr, + }, nil) + if err != nil { + return 1, err + } + e.numDeleted = resp.Count + + return 0, nil + +} diff --git a/nomad/eval_endpoint.go b/nomad/eval_endpoint.go index bedaf45a8..ed5dc15ad 100644 --- a/nomad/eval_endpoint.go +++ b/nomad/eval_endpoint.go @@ -11,6 +11,7 @@ import ( log "github.com/hashicorp/go-hclog" memdb "github.com/hashicorp/go-memdb" multierror "github.com/hashicorp/go-multierror" + version "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/nomad/state" @@ -24,6 +25,8 @@ const ( DefaultDequeueTimeout = time.Second ) +var minVersionEvalDeleteByFilter = version.Must(version.NewVersion("1.4.3")) + // Eval endpoint is used for eval interactions type Eval struct { srv *Server @@ -438,12 +441,37 @@ func (e *Eval) Delete( return structs.ErrPermissionDenied } + if args.Filter != "" && !ServersMeetMinimumVersion( + e.srv.Members(), e.srv.Region(), minVersionEvalDeleteByFilter, true) { + return fmt.Errorf( + "all servers must be running version %v or later to delete evals by filter", + minVersionEvalDeleteByFilter) + } + if args.Filter != "" && len(args.EvalIDs) > 0 { + return fmt.Errorf("evals cannot be deleted by both ID and filter") + } + if args.Filter == "" && len(args.EvalIDs) == 0 { + return fmt.Errorf("evals must be deleted by either ID or filter") + } + // The eval broker must be disabled otherwise Nomad's state will likely get // wild in a very un-fun way. if e.srv.evalBroker.Enabled() { return errors.New("eval broker is enabled; eval broker must be paused to delete evals") } + if args.Filter != "" { + count, index, err := e.deleteEvalsByFilter(args) + if err != nil { + return err + } + + // Update the index and return. + reply.Index = index + reply.Count = count + return nil + } + // Grab the state snapshot, so we can look up relevant eval information. serverStateSnapshot, err := e.srv.State().Snapshot() if err != nil { @@ -451,6 +479,8 @@ func (e *Eval) Delete( } ws := memdb.NewWatchSet() + count := 0 + // Iterate the evaluations and ensure they are safe to delete. It is // possible passed evals are not safe to delete and would make Nomads state // a little wonky. The nature of the RPC return error, means a single @@ -471,6 +501,7 @@ func (e *Eval) Delete( if !ok { return fmt.Errorf("eval %s is not safe to delete", evalInfo.ID) } + count++ } // Generate the Raft request object using the reap request object. This @@ -490,9 +521,97 @@ func (e *Eval) Delete( // Update the index and return. reply.Index = index + reply.Count = count return nil } +// deleteEvalsByFilter deletes evaluations in batches based on the filter. It +// returns a count, the index, and any error +func (e *Eval) deleteEvalsByFilter(args *structs.EvalDeleteRequest) (int, uint64, error) { + count := 0 + index := uint64(0) + + filter, err := bexpr.CreateEvaluator(args.Filter) + if err != nil { + return count, index, err + } + + // Note that deleting evals by filter is imprecise: For sets of evals larger + // than a single batch eval inserts may occur behind the cursor and therefore + // be missed. This imprecision is not considered to hurt this endpoint's + // purpose of reducing pressure on servers during periods of heavy scheduling + // activity. + snap, err := e.srv.State().Snapshot() + if err != nil { + return count, index, fmt.Errorf("failed to lookup state snapshot: %v", err) + } + + iter, err := snap.Evals(nil, state.SortDefault) + if err != nil { + return count, index, err + } + + // We *can* send larger raft logs but rough benchmarks for deleting 1M evals + // show that a smaller page size strikes a balance between throughput and + // time we block the FSM apply for other operations + perPage := structs.MaxUUIDsPerWriteRequest / 10 + + raftReq := structs.EvalReapRequest{ + Filter: args.Filter, + PerPage: int32(perPage), + UserInitiated: true, + WriteRequest: args.WriteRequest, + } + + // Note: Paginator is designed around fetching a single page for a single + // RPC call and finalizes its state after that page. So we're doing our own + // pagination here. + pageCount := 0 + lastToken := "" + + for { + raw := iter.Next() + if raw == nil { + break + } + eval := raw.(*structs.Evaluation) + deleteOk, err := snap.EvalIsUserDeleteSafe(nil, eval) + if !deleteOk || err != nil { + continue + } + match, err := filter.Evaluate(eval) + if !match || err != nil { + continue + } + pageCount++ + lastToken = eval.ID + + if pageCount >= perPage { + raftReq.PerPage = int32(pageCount) + _, index, err = e.srv.raftApply(structs.EvalDeleteRequestType, &raftReq) + if err != nil { + return count, index, err + } + count += pageCount + + pageCount = 0 + raftReq.NextToken = lastToken + } + } + + // send last batch if it's partial + if pageCount > 0 { + raftReq.PerPage = int32(pageCount) + _, index, err = e.srv.raftApply(structs.EvalDeleteRequestType, &raftReq) + if err != nil { + return count, index, err + } + count += pageCount + } + + return count, index, nil +} + // List is used to get a list of the evaluations in the system func (e *Eval) List(args *structs.EvalListRequest, reply *structs.EvalListResponse) error { if done, err := e.srv.forward("Eval.List", args, args, reply); done { diff --git a/nomad/eval_endpoint_test.go b/nomad/eval_endpoint_test.go index 18ac753ae..91eff63f7 100644 --- a/nomad/eval_endpoint_test.go +++ b/nomad/eval_endpoint_test.go @@ -8,6 +8,7 @@ import ( "time" memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/go-set" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" @@ -864,6 +865,94 @@ func TestEvalEndpoint_Delete(t *testing.T) { must.EqError(t, err, structs.ErrPermissionDenied.Error()) }) + t.Run("successful delete by filter", func(t *testing.T) { + + testServer, rootToken, cleanup := setup(t) + defer cleanup() + codec := rpcClient(t, testServer) + + // Ensure broker is disabled + setBrokerEnabled(t, testServer, false) + + evalCount := 10000 + index := uint64(100) + + store := testServer.fsm.State() + + // Create a large set of pending evaluations + + evals := []*structs.Evaluation{} + for i := 0; i < evalCount; i++ { + mockEval := mock.Eval() + evals = append(evals, mockEval) + } + must.NoError(t, store.UpsertEvals( + structs.MsgTypeTestSetup, index, evals)) + + // Create some evaluations we don't want to delete + + evalsToKeep := []*structs.Evaluation{} + for i := 0; i < 3; i++ { + mockEval := mock.Eval() + mockEval.JobID = "keepme" + evalsToKeep = append(evalsToKeep, mockEval) + } + index++ + must.NoError(t, store.UpsertEvals( + structs.MsgTypeTestSetup, index, evalsToKeep)) + + // Create a job with running allocs and evaluations those allocs reference + + job := mock.Job() + job.ID = "notsafetodelete" + job.Status = structs.JobStatusRunning + index++ + must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, job)) + + evalsNotSafeToDelete := []*structs.Evaluation{} + for i := 0; i < 3; i++ { + mockEval := mock.Eval() + mockEval.JobID = job.ID + evalsNotSafeToDelete = append(evalsNotSafeToDelete, mockEval) + } + index++ + must.NoError(t, store.UpsertEvals( + structs.MsgTypeTestSetup, index, evalsNotSafeToDelete)) + + allocs := []*structs.Allocation{} + for i := 0; i < 3; i++ { + alloc := mock.Alloc() + alloc.ClientStatus = structs.AllocClientStatusRunning + alloc.EvalID = evalsNotSafeToDelete[i].ID + allocs = append(allocs, alloc) + } + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs)) + + // Delete all the unwanted evals + + get := &structs.EvalDeleteRequest{ + Filter: "JobID != \"keepme\"", + WriteRequest: structs.WriteRequest{AuthToken: rootToken.SecretID, Region: "global"}, + } + var resp structs.EvalDeleteResponse + must.NoError(t, msgpackrpc.CallWithCodec(codec, structs.EvalDeleteRPCMethod, get, &resp)) + must.Eq(t, resp.Count, evalCount) + + // Assert we didn't delete the filtered evals + gotKeptEvals, err := store.EvalsByJob(nil, job.Namespace, "keepme") + must.NoError(t, err) + must.Len(t, 3, gotKeptEvals) + must.Eq(t, set.From(evalsToKeep), set.From(gotKeptEvals)) + + // Assert we didn't delete the evals that were not safe to delete + gotNotSafeEvals, err := store.EvalsByJob(nil, job.Namespace, "notsafetodelete") + must.NoError(t, err) + must.Len(t, 3, gotNotSafeEvals) + must.Eq(t, set.From(evalsNotSafeToDelete), set.From(gotNotSafeEvals)) + + }) + } func TestEvalEndpoint_List(t *testing.T) { diff --git a/nomad/fsm.go b/nomad/fsm.go index 2fa59c043..f7d17bf1a 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -804,6 +804,14 @@ func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } + if req.Filter != "" { + if err := n.state.DeleteEvalsByFilter(index, req.Filter, req.NextToken, req.PerPage); err != nil { + n.logger.Error("DeleteEvalsByFilter failed", "error", err) + return err + } + return nil + } + if err := n.state.DeleteEval(index, req.Evals, req.Allocs, req.UserInitiated); err != nil { n.logger.Error("DeleteEval failed", "error", err) return err diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 9540346f5..8101c7483 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/hashicorp/go-bexpr" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-memdb" "github.com/hashicorp/go-multierror" @@ -3143,6 +3144,66 @@ func (s *StateStore) updateEvalModifyIndex(txn *txn, index uint64, evalID string return nil } +// DeleteEvalsByFilter is used to delete all evals that are both safe to delete +// and match a filter. +func (s *StateStore) DeleteEvalsByFilter(index uint64, filterExpr string, pageToken string, perPage int32) error { + txn := s.db.WriteTxn(index) + defer txn.Abort() + + // These are always user-initiated, so ensure the eval broker is paused. + _, schedConfig, err := s.schedulerConfigTxn(txn) + if err != nil { + return err + } + if schedConfig == nil || !schedConfig.PauseEvalBroker { + return errors.New("eval broker is enabled; eval broker must be paused to delete evals") + } + + filter, err := bexpr.CreateEvaluator(filterExpr) + if err != nil { + return err + } + + iter, err := s.Evals(nil, SortDefault) + if err != nil { + return fmt.Errorf("failed to lookup evals: %v", err) + } + + // Note: Paginator imports this package for testing so we can't just use + // Paginator + pageCount := int32(0) + + for { + if pageCount >= perPage { + break + } + raw := iter.Next() + if raw == nil { + break + } + eval := raw.(*structs.Evaluation) + if eval.ID < pageToken { + continue + } + + deleteOk, err := s.EvalIsUserDeleteSafe(nil, eval) + if !deleteOk || err != nil { + continue + } + match, err := filter.Evaluate(eval) + if !match || err != nil { + continue + } + if err := txn.Delete("evals", eval); err != nil { + return fmt.Errorf("eval delete failed: %v", err) + } + pageCount++ + } + + err = txn.Commit() + return err +} + // EvalIsUserDeleteSafe ensures an evaluation is safe to delete based on its // related allocation and job information. This follows similar, but different // rules to the eval reap checking, to ensure evaluations for running allocs or diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index b5a628d22..1f1030ab1 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -16,6 +16,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -4498,6 +4499,67 @@ func TestStateStore_DeleteEval_UserInitiated(t *testing.T) { require.Nil(t, mockEval2Lookup) } +// TestStateStore_DeleteEvalsByFilter_Pagination tests the pagination logic for +// deleting evals by filter; the business logic is tested more fully in the eval +// endpoint tests. +func TestStateStore_DeleteEvalsByFilter_Pagination(t *testing.T) { + + evalCount := 100 + index := uint64(100) + + store := testStateStore(t) + + // Create a set of pending evaluations + + schedulerConfig := &structs.SchedulerConfiguration{ + PauseEvalBroker: true, + CreateIndex: index, + ModifyIndex: index, + } + must.NoError(t, store.SchedulerSetConfig(index, schedulerConfig)) + + evals := []*structs.Evaluation{} + for i := 0; i < evalCount; i++ { + mockEval := mock.Eval() + evals = append(evals, mockEval) + } + index++ + must.NoError(t, store.UpsertEvals( + structs.MsgTypeTestSetup, index, evals)) + + // Delete one page + index++ + must.NoError(t, store.DeleteEvalsByFilter(index, "JobID != \"\"", "", 10)) + + countRemaining := func() (string, int) { + lastSeen := "" + remaining := 0 + + iter, err := store.Evals(nil, SortDefault) + must.NoError(t, err) + for { + raw := iter.Next() + if raw == nil { + break + } + eval := raw.(*structs.Evaluation) + lastSeen = eval.ID + remaining++ + } + return lastSeen, remaining + } + + lastSeen, remaining := countRemaining() + must.Eq(t, 90, remaining) + + // Delete starting from lastSeen, which should only delete 1 + index++ + must.NoError(t, store.DeleteEvalsByFilter(index, "JobID != \"\"", lastSeen, 10)) + + _, remaining = countRemaining() + must.Eq(t, 89, remaining) +} + func TestStateStore_EvalIsUserDeleteSafe(t *testing.T) { ci.Parallel(t) diff --git a/nomad/structs/eval.go b/nomad/structs/eval.go index cb74a3e3e..a690ae304 100644 --- a/nomad/structs/eval.go +++ b/nomad/structs/eval.go @@ -14,11 +14,17 @@ const ( // not be greater than MaxEvalIDsPerDeleteRequest. type EvalDeleteRequest struct { EvalIDs []string + + // Filter specifies the go-bexpr filter expression to be used for deleting a + // set of evaluations that matches the filter + Filter string + WriteRequest } // EvalDeleteResponse is the response object when one or more evaluation are // deleted manually by an operator. type EvalDeleteResponse struct { + Count int // how many Evaluations were safe to delete and/or matched the filter WriteMeta } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f79ff996c..dc3d83b1e 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -849,8 +849,14 @@ type EvalUpdateRequest struct { // Eval.Delete use the same Raft message when performing deletes so we do not // need more Raft message types. type EvalReapRequest struct { - Evals []string - Allocs []string + Evals []string // slice of Evaluation IDs + Allocs []string // slice of Allocation IDs + + // Filter specifies the go-bexpr filter expression to be used for + // filtering the data prior to returning a response + Filter string + PerPage int32 + NextToken string // UserInitiated tracks whether this reap request is the result of an // operator request. If this is true, the FSM needs to ensure the eval diff --git a/website/content/docs/commands/eval/delete.mdx b/website/content/docs/commands/eval/delete.mdx index 4939c12f8..4ce92e3fa 100644 --- a/website/content/docs/commands/eval/delete.mdx +++ b/website/content/docs/commands/eval/delete.mdx @@ -36,7 +36,10 @@ When ACLs are enabled, this command requires a `management` token. ## Delete Options - `-filter`: Specifies an expression used to filter evaluations by for - deletion. + deletion. When using this flag, it is advisable to ensure the syntax is + correct using the eval list command first. Note that deleting evals by filter + is imprecise: for sets of evals larger than a single raft log batch, evals can + be inserted behind the cursor and therefore be missed. - `-yes`: Bypass the confirmation prompt if an evaluation ID was not provided. @@ -53,13 +56,6 @@ Delete all evaluations with status `pending` for the `example` job: ```shell-session $ nomad eval delete -filter='Stauts == "pending" and JobID == "example"' -Do you want to list evals (3) before deletion? [y/N] y - -ID Priority Triggered By Job ID Namespace Node ID Status Placement Failures -cef92121 50 job-register example default pending false -1c905ca0 50 job-register example default pending false -b9e77692 50 job-register example default pending false - Are you sure you want to delete 3 evals? [y/N] y Successfuly deleted 3 evaluations