2023-04-10 15:36:59 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2015-07-23 23:00:19 +00:00
|
|
|
package nomad
|
|
|
|
|
|
|
|
import (
|
2022-07-06 14:30:11 +00:00
|
|
|
"errors"
|
2015-07-24 04:58:51 +00:00
|
|
|
"fmt"
|
2022-02-16 16:40:30 +00:00
|
|
|
"net/http"
|
2015-07-23 23:00:19 +00:00
|
|
|
"time"
|
|
|
|
|
2022-12-01 15:05:15 +00:00
|
|
|
"github.com/armon/go-metrics"
|
2022-11-07 13:53:19 +00:00
|
|
|
"github.com/hashicorp/go-bexpr"
|
2022-12-01 15:05:15 +00:00
|
|
|
"github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/hashicorp/go-memdb"
|
|
|
|
"github.com/hashicorp/go-multierror"
|
|
|
|
"github.com/hashicorp/go-version"
|
2018-09-15 23:23:13 +00:00
|
|
|
|
2017-10-02 22:49:20 +00:00
|
|
|
"github.com/hashicorp/nomad/acl"
|
2017-02-08 04:31:23 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/state"
|
2022-03-09 01:54:17 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/state/paginator"
|
2015-07-23 23:00:19 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
2016-10-26 21:52:48 +00:00
|
|
|
"github.com/hashicorp/nomad/scheduler"
|
2015-07-23 23:00:19 +00:00
|
|
|
)
|
|
|
|
|
2015-07-24 04:58:51 +00:00
|
|
|
const (
|
|
|
|
// DefaultDequeueTimeout is used if no dequeue timeout is provided
|
|
|
|
DefaultDequeueTimeout = time.Second
|
|
|
|
)
|
|
|
|
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
var minVersionEvalDeleteByFilter = version.Must(version.NewVersion("1.4.3"))
|
|
|
|
|
2015-07-23 23:00:19 +00:00
|
|
|
// Eval endpoint is used for eval interactions
|
|
|
|
type Eval struct {
|
2018-09-15 23:23:13 +00:00
|
|
|
srv *Server
|
2022-12-01 15:05:15 +00:00
|
|
|
ctx *RPCContext
|
|
|
|
logger hclog.Logger
|
|
|
|
}
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2022-12-01 15:05:15 +00:00
|
|
|
func NewEvalEndpoint(srv *Server, ctx *RPCContext) *Eval {
|
|
|
|
return &Eval{srv: srv, ctx: ctx, logger: srv.logger.Named("eval")}
|
2015-07-23 23:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// GetEval is used to request information about a specific evaluation
|
|
|
|
func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
|
|
|
|
reply *structs.SingleEvalResponse) error {
|
2023-01-25 19:33:06 +00:00
|
|
|
|
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
2015-07-23 23:00:19 +00:00
|
|
|
if done, err := e.srv.forward("Eval.GetEval", args, args, reply); done {
|
|
|
|
return err
|
|
|
|
}
|
2023-01-25 21:37:24 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricRead, args)
|
2023-01-25 19:33:06 +00:00
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2015-07-23 23:00:19 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())
|
|
|
|
|
2019-10-01 20:06:24 +00:00
|
|
|
// Check for read-job permissions before performing blocking query.
|
|
|
|
allowNsOp := acl.NamespaceValidator(acl.NamespaceCapabilityReadJob)
|
2023-01-25 19:33:06 +00:00
|
|
|
aclObj, err := e.srv.ResolveACL(args)
|
2019-10-01 20:06:24 +00:00
|
|
|
if err != nil {
|
2017-10-02 22:49:20 +00:00
|
|
|
return err
|
2019-10-01 20:06:24 +00:00
|
|
|
} else if !allowNsOp(aclObj, args.RequestNamespace()) {
|
2017-10-02 22:49:20 +00:00
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
2015-10-29 23:12:25 +00:00
|
|
|
// Setup the blocking query
|
|
|
|
opts := blockingOptions{
|
|
|
|
queryOpts: &args.QueryOptions,
|
|
|
|
queryMeta: &reply.QueryMeta,
|
2017-02-08 04:31:23 +00:00
|
|
|
run: func(ws memdb.WatchSet, state *state.StateStore) error {
|
2022-03-17 17:56:14 +00:00
|
|
|
var related []*structs.EvaluationStub
|
|
|
|
|
|
|
|
// Look for the eval
|
|
|
|
eval, err := state.EvalByID(ws, args.EvalID)
|
2015-12-22 22:44:33 +00:00
|
|
|
if err != nil {
|
2022-03-17 17:56:14 +00:00
|
|
|
return fmt.Errorf("failed to lookup eval: %v", err)
|
2015-10-29 23:12:25 +00:00
|
|
|
}
|
2015-07-23 23:00:19 +00:00
|
|
|
|
2022-03-17 17:56:14 +00:00
|
|
|
if eval != nil {
|
2019-10-01 20:06:24 +00:00
|
|
|
// Re-check namespace in case it differs from request.
|
2022-03-17 17:56:14 +00:00
|
|
|
if !allowNsOp(aclObj, eval.Namespace) {
|
2019-10-01 20:06:24 +00:00
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
2022-03-17 17:56:14 +00:00
|
|
|
// Lookup related evals if requested.
|
|
|
|
if args.IncludeRelated {
|
|
|
|
related, err = state.EvalsRelatedToID(ws, eval.ID)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to lookup related evals: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use a copy to avoid modifying the original eval.
|
|
|
|
eval = eval.Copy()
|
|
|
|
eval.RelatedEvals = related
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Setup the output.
|
|
|
|
reply.Eval = eval
|
|
|
|
if eval != nil {
|
|
|
|
reply.Index = eval.ModifyIndex
|
2015-10-29 23:12:25 +00:00
|
|
|
} else {
|
2022-03-17 17:56:14 +00:00
|
|
|
// Use the last index that affected the evals table
|
2017-02-08 04:31:23 +00:00
|
|
|
index, err := state.Index("evals")
|
2015-10-29 23:12:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
reply.Index = index
|
|
|
|
}
|
2015-07-23 23:00:19 +00:00
|
|
|
|
2015-10-29 23:12:25 +00:00
|
|
|
// Set the query response
|
|
|
|
e.srv.setQueryMeta(&reply.QueryMeta)
|
|
|
|
return nil
|
|
|
|
}}
|
|
|
|
return e.srv.blockingRPC(&opts)
|
2015-07-23 23:00:19 +00:00
|
|
|
}
|
2015-07-24 04:58:51 +00:00
|
|
|
|
|
|
|
// Dequeue is used to dequeue a pending evaluation
|
|
|
|
func (e *Eval) Dequeue(args *structs.EvalDequeueRequest,
|
2015-08-12 22:25:31 +00:00
|
|
|
reply *structs.EvalDequeueResponse) error {
|
2022-02-05 01:35:20 +00:00
|
|
|
|
2023-01-24 15:52:07 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2015-07-24 04:58:51 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Dequeue", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-25 21:37:24 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
2023-01-25 19:33:06 +00:00
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2015-07-24 04:58:51 +00:00
|
|
|
// Ensure there is at least one scheduler
|
|
|
|
if len(args.Schedulers) == 0 {
|
|
|
|
return fmt.Errorf("dequeue requires at least one scheduler type")
|
|
|
|
}
|
|
|
|
|
2016-10-26 21:52:48 +00:00
|
|
|
// Check that there isn't a scheduler version mismatch
|
|
|
|
if args.SchedulerVersion != scheduler.SchedulerVersion {
|
|
|
|
return fmt.Errorf("dequeue disallowed: calling scheduler version is %d; leader version is %d",
|
|
|
|
args.SchedulerVersion, scheduler.SchedulerVersion)
|
|
|
|
}
|
|
|
|
|
2015-07-24 04:58:51 +00:00
|
|
|
// Ensure there is a default timeout
|
|
|
|
if args.Timeout <= 0 {
|
|
|
|
args.Timeout = DefaultDequeueTimeout
|
|
|
|
}
|
|
|
|
|
2022-07-06 14:13:48 +00:00
|
|
|
// If the eval broker is paused, attempt to block and wait for a state
|
|
|
|
// change before returning. This avoids a tight loop and mimics the
|
|
|
|
// behaviour where there are no evals to process.
|
|
|
|
//
|
|
|
|
// The call can return because either the timeout is reached or the broker
|
|
|
|
// SetEnabled function was called to modify its state. It is possible this
|
|
|
|
// is because of leadership transition, therefore the RPC should exit to
|
|
|
|
// allow all safety checks and RPC forwarding to occur again.
|
|
|
|
//
|
|
|
|
// The log line is trace, because the default worker timeout is 500ms which
|
|
|
|
// produces a large amount of logging.
|
|
|
|
if !e.srv.evalBroker.Enabled() {
|
|
|
|
message := e.srv.evalBroker.enabledNotifier.WaitForChange(args.Timeout)
|
|
|
|
e.logger.Trace("eval broker wait for un-pause", "message", message)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-07-24 04:58:51 +00:00
|
|
|
// Attempt the dequeue
|
2015-08-12 22:25:31 +00:00
|
|
|
eval, token, err := e.srv.evalBroker.Dequeue(args.Schedulers, args.Timeout)
|
2015-07-24 04:58:51 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Provide the output if any
|
|
|
|
if eval != nil {
|
2017-09-13 20:47:01 +00:00
|
|
|
// Get the index that the worker should wait until before scheduling.
|
2019-03-01 23:23:39 +00:00
|
|
|
waitIndex, err := e.getWaitIndex(eval.Namespace, eval.JobID, eval.ModifyIndex)
|
2017-09-13 20:47:01 +00:00
|
|
|
if err != nil {
|
2017-09-14 21:27:00 +00:00
|
|
|
var mErr multierror.Error
|
2021-01-14 20:46:35 +00:00
|
|
|
_ = multierror.Append(&mErr, err)
|
2017-09-14 21:27:00 +00:00
|
|
|
|
|
|
|
// We have dequeued the evaluation but won't be returning it to the
|
|
|
|
// worker so Nack the eval.
|
|
|
|
if err := e.srv.evalBroker.Nack(eval.ID, token); err != nil {
|
2021-01-14 20:46:35 +00:00
|
|
|
_ = multierror.Append(&mErr, err)
|
2017-09-14 21:27:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return &mErr
|
2017-09-13 20:47:01 +00:00
|
|
|
}
|
|
|
|
|
2015-07-24 04:58:51 +00:00
|
|
|
reply.Eval = eval
|
2015-08-12 22:25:31 +00:00
|
|
|
reply.Token = token
|
2017-09-13 20:47:01 +00:00
|
|
|
reply.WaitIndex = waitIndex
|
2015-07-24 04:58:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Set the query response
|
|
|
|
e.srv.setQueryMeta(&reply.QueryMeta)
|
|
|
|
return nil
|
|
|
|
}
|
2015-07-24 05:11:25 +00:00
|
|
|
|
2017-09-13 20:47:01 +00:00
|
|
|
// getWaitIndex returns the wait index that should be used by the worker before
|
|
|
|
// invoking the scheduler. The index should be the highest modify index of any
|
|
|
|
// evaluation for the job. This prevents scheduling races for the same job when
|
|
|
|
// there are blocked evaluations.
|
2019-03-04 21:44:14 +00:00
|
|
|
func (e *Eval) getWaitIndex(namespace, job string, evalModifyIndex uint64) (uint64, error) {
|
2017-09-13 20:47:01 +00:00
|
|
|
snap, err := e.srv.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
evals, err := snap.EvalsByJob(nil, namespace, job)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
2019-03-05 23:19:07 +00:00
|
|
|
// Since dequeueing evals is concurrent with applying Raft messages to
|
2019-03-04 21:44:14 +00:00
|
|
|
// the state store, initialize to the currently dequeued eval's index
|
|
|
|
// in case it isn't in the snapshot used by EvalsByJob yet.
|
|
|
|
max := evalModifyIndex
|
2017-09-13 20:47:01 +00:00
|
|
|
for _, eval := range evals {
|
|
|
|
if max < eval.ModifyIndex {
|
|
|
|
max = eval.ModifyIndex
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return max, nil
|
|
|
|
}
|
|
|
|
|
2015-07-24 05:11:25 +00:00
|
|
|
// Ack is used to acknowledge completion of a dequeued evaluation
|
2015-08-12 22:25:31 +00:00
|
|
|
func (e *Eval) Ack(args *structs.EvalAckRequest,
|
2015-07-24 05:11:25 +00:00
|
|
|
reply *structs.GenericResponse) error {
|
2022-02-05 01:35:20 +00:00
|
|
|
|
2023-01-26 20:04:25 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2015-07-24 05:11:25 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Ack", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-26 20:04:25 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2015-07-24 05:11:25 +00:00
|
|
|
// Ack the EvalID
|
2015-08-12 22:25:31 +00:00
|
|
|
if err := e.srv.evalBroker.Ack(args.EvalID, args.Token); err != nil {
|
2015-07-24 05:11:25 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-11-16 21:10:11 +00:00
|
|
|
|
2022-11-18 13:38:17 +00:00
|
|
|
// Wake up the eval cancelation reaper. This never blocks; if the buffer is
|
|
|
|
// full we know it's going to get picked up by the reaper so we don't need
|
|
|
|
// another send on that channel.
|
|
|
|
select {
|
|
|
|
case e.srv.reapCancelableEvalsCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
2022-11-17 21:40:41 +00:00
|
|
|
return nil
|
2015-07-24 05:11:25 +00:00
|
|
|
}
|
|
|
|
|
2021-08-30 09:08:12 +00:00
|
|
|
// Nack is used to negative acknowledge completion of a dequeued evaluation.
|
2015-08-12 22:25:31 +00:00
|
|
|
func (e *Eval) Nack(args *structs.EvalAckRequest,
|
2015-07-24 05:11:25 +00:00
|
|
|
reply *structs.GenericResponse) error {
|
2022-02-05 01:35:20 +00:00
|
|
|
|
2023-01-26 20:04:25 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2015-07-24 05:11:25 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Nack", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-26 20:04:25 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2015-07-24 05:11:25 +00:00
|
|
|
// Nack the EvalID
|
2015-08-12 22:25:31 +00:00
|
|
|
if err := e.srv.evalBroker.Nack(args.EvalID, args.Token); err != nil {
|
2015-07-24 05:11:25 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2015-08-15 21:16:40 +00:00
|
|
|
|
|
|
|
// Update is used to perform an update of an Eval if it is outstanding.
|
|
|
|
func (e *Eval) Update(args *structs.EvalUpdateRequest,
|
|
|
|
reply *structs.GenericResponse) error {
|
2022-02-05 01:35:20 +00:00
|
|
|
|
2023-01-26 20:04:25 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2015-08-15 21:16:40 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Update", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-26 20:04:25 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2015-08-15 21:16:40 +00:00
|
|
|
// Ensure there is only a single update with token
|
|
|
|
if len(args.Evals) != 1 {
|
|
|
|
return fmt.Errorf("only a single eval can be updated")
|
|
|
|
}
|
|
|
|
eval := args.Evals[0]
|
|
|
|
|
|
|
|
// Verify the evaluation is outstanding, and that the tokens match.
|
2015-10-23 17:22:44 +00:00
|
|
|
if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
|
|
|
|
return err
|
2015-08-15 21:16:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Update via Raft
|
|
|
|
_, index, err := e.srv.raftApply(structs.EvalUpdateRequestType, args)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the index
|
|
|
|
reply.Index = index
|
|
|
|
return nil
|
|
|
|
}
|
2015-08-15 22:42:44 +00:00
|
|
|
|
2015-09-07 21:17:11 +00:00
|
|
|
// Create is used to make a new evaluation
|
|
|
|
func (e *Eval) Create(args *structs.EvalUpdateRequest,
|
|
|
|
reply *structs.GenericResponse) error {
|
2022-02-05 01:35:20 +00:00
|
|
|
|
2023-01-26 20:04:25 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2015-09-07 21:17:11 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Create", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-26 20:04:25 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2015-09-07 21:17:11 +00:00
|
|
|
// Ensure there is only a single update with token
|
|
|
|
if len(args.Evals) != 1 {
|
|
|
|
return fmt.Errorf("only a single eval can be created")
|
|
|
|
}
|
|
|
|
eval := args.Evals[0]
|
|
|
|
|
2015-09-07 21:21:38 +00:00
|
|
|
// Verify the parent evaluation is outstanding, and that the tokens match.
|
2015-10-23 17:22:44 +00:00
|
|
|
if err := e.srv.evalBroker.OutstandingReset(eval.PreviousEval, args.EvalToken); err != nil {
|
|
|
|
return err
|
2015-09-07 21:21:38 +00:00
|
|
|
}
|
|
|
|
|
2015-09-07 21:17:11 +00:00
|
|
|
// Look for the eval
|
|
|
|
snap, err := e.srv.fsm.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-02-08 04:31:23 +00:00
|
|
|
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := snap.EvalByID(ws, eval.ID)
|
2015-09-07 21:17:11 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if out != nil {
|
|
|
|
return fmt.Errorf("evaluation already exists")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update via Raft
|
|
|
|
_, index, err := e.srv.raftApply(structs.EvalUpdateRequestType, args)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the index
|
|
|
|
reply.Index = index
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-20 23:03:53 +00:00
|
|
|
// Reblock is used to reinsert an existing blocked evaluation into the blocked
|
|
|
|
// evaluation tracker.
|
|
|
|
func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error {
|
2023-01-26 20:04:25 +00:00
|
|
|
|
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2016-05-20 23:03:53 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Reblock", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-26 20:04:25 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2016-05-20 23:03:53 +00:00
|
|
|
// Ensure there is only a single update with token
|
|
|
|
if len(args.Evals) != 1 {
|
|
|
|
return fmt.Errorf("only a single eval can be reblocked")
|
|
|
|
}
|
|
|
|
eval := args.Evals[0]
|
|
|
|
|
|
|
|
// Verify the evaluation is outstanding, and that the tokens match.
|
|
|
|
if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look for the eval
|
|
|
|
snap, err := e.srv.fsm.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-02-08 04:31:23 +00:00
|
|
|
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
out, err := snap.EvalByID(ws, eval.ID)
|
2016-05-20 23:03:53 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if out == nil {
|
|
|
|
return fmt.Errorf("evaluation does not exist")
|
|
|
|
}
|
|
|
|
if out.Status != structs.EvalStatusBlocked {
|
|
|
|
return fmt.Errorf("evaluation not blocked")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reblock the eval
|
2016-05-31 18:39:03 +00:00
|
|
|
e.srv.blockedEvals.Reblock(eval, args.EvalToken)
|
2016-05-20 23:03:53 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-15 22:42:44 +00:00
|
|
|
// Reap is used to cleanup dead evaluations and allocations
|
2022-07-06 14:30:11 +00:00
|
|
|
func (e *Eval) Reap(args *structs.EvalReapRequest,
|
2015-08-15 22:42:44 +00:00
|
|
|
reply *structs.GenericResponse) error {
|
2022-02-05 01:35:20 +00:00
|
|
|
|
2023-01-26 20:04:25 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
|
|
|
|
2022-02-05 01:35:20 +00:00
|
|
|
// Ensure the connection was initiated by another server if TLS is used.
|
|
|
|
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
|
|
|
|
if err != nil {
|
2015-08-15 22:42:44 +00:00
|
|
|
return err
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Reap", args, args, reply); done {
|
|
|
|
return err
|
2022-02-02 20:03:18 +00:00
|
|
|
}
|
2023-01-26 20:04:25 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-02-05 01:35:20 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())
|
2022-02-02 20:03:18 +00:00
|
|
|
|
2015-08-15 22:42:44 +00:00
|
|
|
// Update via Raft
|
|
|
|
_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, args)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the index
|
|
|
|
reply.Index = index
|
|
|
|
return nil
|
|
|
|
}
|
2015-09-06 23:01:16 +00:00
|
|
|
|
2022-07-06 14:30:11 +00:00
|
|
|
// Delete is used by operators to delete evaluations during severe outages. It
|
|
|
|
// differs from Reap while duplicating some behavior to ensure we have the
|
|
|
|
// correct controls for user initiated deletions.
|
|
|
|
func (e *Eval) Delete(
|
|
|
|
args *structs.EvalDeleteRequest,
|
|
|
|
reply *structs.EvalDeleteResponse) error {
|
|
|
|
|
2023-01-25 19:33:06 +00:00
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
2022-07-06 14:30:11 +00:00
|
|
|
if done, err := e.srv.forward(structs.EvalDeleteRPCMethod, args, args, reply); done {
|
|
|
|
return err
|
|
|
|
}
|
2023-01-25 21:37:24 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricWrite, args)
|
2023-01-25 19:33:06 +00:00
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-07-06 14:30:11 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "delete"}, time.Now())
|
|
|
|
|
|
|
|
// This RPC endpoint is very destructive and alters Nomad's core state,
|
|
|
|
// meaning only those with management tokens can call it.
|
2023-01-25 19:33:06 +00:00
|
|
|
if aclObj, err := e.srv.ResolveACL(args); err != nil {
|
2022-07-06 14:30:11 +00:00
|
|
|
return err
|
|
|
|
} else if aclObj != nil && !aclObj.IsManagement() {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
if args.Filter != "" && !ServersMeetMinimumVersion(
|
|
|
|
e.srv.Members(), e.srv.Region(), minVersionEvalDeleteByFilter, true) {
|
|
|
|
return fmt.Errorf(
|
|
|
|
"all servers must be running version %v or later to delete evals by filter",
|
|
|
|
minVersionEvalDeleteByFilter)
|
|
|
|
}
|
|
|
|
if args.Filter != "" && len(args.EvalIDs) > 0 {
|
|
|
|
return fmt.Errorf("evals cannot be deleted by both ID and filter")
|
|
|
|
}
|
|
|
|
if args.Filter == "" && len(args.EvalIDs) == 0 {
|
|
|
|
return fmt.Errorf("evals must be deleted by either ID or filter")
|
|
|
|
}
|
|
|
|
|
2022-07-06 14:30:11 +00:00
|
|
|
// The eval broker must be disabled otherwise Nomad's state will likely get
|
|
|
|
// wild in a very un-fun way.
|
|
|
|
if e.srv.evalBroker.Enabled() {
|
|
|
|
return errors.New("eval broker is enabled; eval broker must be paused to delete evals")
|
|
|
|
}
|
|
|
|
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
if args.Filter != "" {
|
|
|
|
count, index, err := e.deleteEvalsByFilter(args)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the index and return.
|
|
|
|
reply.Index = index
|
|
|
|
reply.Count = count
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-07-06 14:30:11 +00:00
|
|
|
// Grab the state snapshot, so we can look up relevant eval information.
|
|
|
|
serverStateSnapshot, err := e.srv.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to lookup state snapshot: %v", err)
|
|
|
|
}
|
|
|
|
ws := memdb.NewWatchSet()
|
|
|
|
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
count := 0
|
|
|
|
|
2022-07-06 14:30:11 +00:00
|
|
|
// Iterate the evaluations and ensure they are safe to delete. It is
|
|
|
|
// possible passed evals are not safe to delete and would make Nomads state
|
|
|
|
// a little wonky. The nature of the RPC return error, means a single
|
|
|
|
// unsafe eval ID fails the whole call.
|
|
|
|
for _, evalID := range args.EvalIDs {
|
|
|
|
|
|
|
|
evalInfo, err := serverStateSnapshot.EvalByID(ws, evalID)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to lookup eval: %v", err)
|
|
|
|
}
|
|
|
|
if evalInfo == nil {
|
|
|
|
return errors.New("eval not found")
|
|
|
|
}
|
2022-10-28 13:10:33 +00:00
|
|
|
ok, err := serverStateSnapshot.EvalIsUserDeleteSafe(ws, evalInfo)
|
2022-07-06 14:30:11 +00:00
|
|
|
if err != nil {
|
2022-10-28 13:10:33 +00:00
|
|
|
return err
|
2022-07-06 14:30:11 +00:00
|
|
|
}
|
2022-10-28 13:10:33 +00:00
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("eval %s is not safe to delete", evalInfo.ID)
|
2022-07-06 14:30:11 +00:00
|
|
|
}
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
count++
|
2022-07-06 14:30:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the Raft request object using the reap request object. This
|
|
|
|
// avoids adding new Raft messages types and follows the existing reap
|
|
|
|
// flow.
|
|
|
|
raftReq := structs.EvalReapRequest{
|
|
|
|
Evals: args.EvalIDs,
|
|
|
|
UserInitiated: true,
|
|
|
|
WriteRequest: args.WriteRequest,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update via Raft.
|
|
|
|
_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, &raftReq)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the index and return.
|
|
|
|
reply.Index = index
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
reply.Count = count
|
2022-07-06 14:30:11 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
eval delete: move batching of deletes into RPC handler and state (#15117)
During unusual outage recovery scenarios on large clusters, a backlog of
millions of evaluations can appear. In these cases, the `eval delete` command can
put excessive load on the cluster by listing large sets of evals to extract the
IDs and then sending larges batches of IDs. Although the command's batch size
was carefully tuned, we still need to be JSON deserialize, re-serialize to
MessagePack, send the log entries through raft, and get the FSM applied.
To improve performance of this recovery case, move the batching process into the
RPC handler and the state store. The design here is a little weird, so let's
look a the failed options first:
* A naive solution here would be to just send the filter as the raft request and
let the FSM apply delete the whole set in a single operation. Benchmarking with
1M evals on a 3 node cluster demonstrated this can block the FSM apply for
several minutes, which puts the cluster at risk if there's a leadership
failover (the barrier write can't be made while this apply is in-flight).
* A less naive but still bad solution would be to have the RPC handler filter
and paginate, and then hand a list of IDs to the existing raft log
entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and
took roughly an hour to complete.
Instead, we're filtering and paginating in the RPC handler to find a page token,
and then passing both the filter and page token in the raft log. The FSM apply
recreates the paginator using the filter and page token to get roughly the same
page of evaluations, which it then deletes. The pagination process is fairly
cheap (only abut 5% of the total FSM apply time), so counter-intuitively this
rework ends up being much faster. A benchmark of 1M evaluations showed this
blocked the FSM apply for 20-30ms at a time (typical for normal operations) and
completes in less than 4 minutes.
Note that, as with the existing design, this delete is not consistent: a new
evaluation inserted "behind" the cursor of the pagination will fail to be
deleted.
2022-11-14 19:08:13 +00:00
|
|
|
// deleteEvalsByFilter deletes evaluations in batches based on the filter. It
|
|
|
|
// returns a count, the index, and any error
|
|
|
|
func (e *Eval) deleteEvalsByFilter(args *structs.EvalDeleteRequest) (int, uint64, error) {
|
|
|
|
count := 0
|
|
|
|
index := uint64(0)
|
|
|
|
|
|
|
|
filter, err := bexpr.CreateEvaluator(args.Filter)
|
|
|
|
if err != nil {
|
|
|
|
return count, index, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note that deleting evals by filter is imprecise: For sets of evals larger
|
|
|
|
// than a single batch eval inserts may occur behind the cursor and therefore
|
|
|
|
// be missed. This imprecision is not considered to hurt this endpoint's
|
|
|
|
// purpose of reducing pressure on servers during periods of heavy scheduling
|
|
|
|
// activity.
|
|
|
|
snap, err := e.srv.State().Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
return count, index, fmt.Errorf("failed to lookup state snapshot: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
iter, err := snap.Evals(nil, state.SortDefault)
|
|
|
|
if err != nil {
|
|
|
|
return count, index, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// We *can* send larger raft logs but rough benchmarks for deleting 1M evals
|
|
|
|
// show that a smaller page size strikes a balance between throughput and
|
|
|
|
// time we block the FSM apply for other operations
|
|
|
|
perPage := structs.MaxUUIDsPerWriteRequest / 10
|
|
|
|
|
|
|
|
raftReq := structs.EvalReapRequest{
|
|
|
|
Filter: args.Filter,
|
|
|
|
PerPage: int32(perPage),
|
|
|
|
UserInitiated: true,
|
|
|
|
WriteRequest: args.WriteRequest,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note: Paginator is designed around fetching a single page for a single
|
|
|
|
// RPC call and finalizes its state after that page. So we're doing our own
|
|
|
|
// pagination here.
|
|
|
|
pageCount := 0
|
|
|
|
lastToken := ""
|
|
|
|
|
|
|
|
for {
|
|
|
|
raw := iter.Next()
|
|
|
|
if raw == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
eval := raw.(*structs.Evaluation)
|
|
|
|
deleteOk, err := snap.EvalIsUserDeleteSafe(nil, eval)
|
|
|
|
if !deleteOk || err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
match, err := filter.Evaluate(eval)
|
|
|
|
if !match || err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
pageCount++
|
|
|
|
lastToken = eval.ID
|
|
|
|
|
|
|
|
if pageCount >= perPage {
|
|
|
|
raftReq.PerPage = int32(pageCount)
|
|
|
|
_, index, err = e.srv.raftApply(structs.EvalDeleteRequestType, &raftReq)
|
|
|
|
if err != nil {
|
|
|
|
return count, index, err
|
|
|
|
}
|
|
|
|
count += pageCount
|
|
|
|
|
|
|
|
pageCount = 0
|
|
|
|
raftReq.NextToken = lastToken
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// send last batch if it's partial
|
|
|
|
if pageCount > 0 {
|
|
|
|
raftReq.PerPage = int32(pageCount)
|
|
|
|
_, index, err = e.srv.raftApply(structs.EvalDeleteRequestType, &raftReq)
|
|
|
|
if err != nil {
|
|
|
|
return count, index, err
|
|
|
|
}
|
|
|
|
count += pageCount
|
|
|
|
}
|
|
|
|
|
|
|
|
return count, index, nil
|
|
|
|
}
|
|
|
|
|
2015-09-06 23:01:16 +00:00
|
|
|
// List is used to get a list of the evaluations in the system
|
2022-02-10 17:50:34 +00:00
|
|
|
func (e *Eval) List(args *structs.EvalListRequest, reply *structs.EvalListResponse) error {
|
2023-01-25 19:33:06 +00:00
|
|
|
|
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
2015-09-06 23:01:16 +00:00
|
|
|
if done, err := e.srv.forward("Eval.List", args, args, reply); done {
|
|
|
|
return err
|
|
|
|
}
|
2023-01-25 21:37:24 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricList, args)
|
2023-01-25 19:33:06 +00:00
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2015-09-06 23:01:16 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())
|
|
|
|
|
2022-02-10 17:50:34 +00:00
|
|
|
namespace := args.RequestNamespace()
|
|
|
|
|
2017-10-02 23:53:50 +00:00
|
|
|
// Check for read-job permissions
|
2023-01-25 19:33:06 +00:00
|
|
|
aclObj, err := e.srv.ResolveACL(args)
|
2022-07-11 20:42:17 +00:00
|
|
|
if err != nil {
|
2017-10-02 23:53:50 +00:00
|
|
|
return err
|
2022-07-11 20:42:17 +00:00
|
|
|
}
|
|
|
|
if !aclObj.AllowNsOp(namespace, acl.NamespaceCapabilityReadJob) {
|
2017-10-02 23:53:50 +00:00
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-07-11 20:42:17 +00:00
|
|
|
allow := aclObj.AllowNsOpFunc(acl.NamespaceCapabilityReadJob)
|
2017-10-02 23:53:50 +00:00
|
|
|
|
2022-02-16 16:40:30 +00:00
|
|
|
if args.Filter != "" {
|
|
|
|
// Check for incompatible filtering.
|
|
|
|
hasLegacyFilter := args.FilterJobID != "" || args.FilterEvalStatus != ""
|
|
|
|
if hasLegacyFilter {
|
|
|
|
return structs.ErrIncompatibleFiltering
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-29 01:34:56 +00:00
|
|
|
// Setup the blocking query
|
2022-03-09 01:54:17 +00:00
|
|
|
sort := state.SortOption(args.Reverse)
|
2015-10-29 01:34:56 +00:00
|
|
|
opts := blockingOptions{
|
2015-10-29 21:47:39 +00:00
|
|
|
queryOpts: &args.QueryOptions,
|
|
|
|
queryMeta: &reply.QueryMeta,
|
2021-12-10 18:43:03 +00:00
|
|
|
run: func(ws memdb.WatchSet, store *state.StateStore) error {
|
2015-10-29 01:34:56 +00:00
|
|
|
// Scan all the evaluations
|
2017-02-08 04:31:23 +00:00
|
|
|
var err error
|
2015-12-24 10:46:59 +00:00
|
|
|
var iter memdb.ResultIterator
|
2022-03-09 01:54:17 +00:00
|
|
|
var opts paginator.StructsTokenizerOptions
|
2022-02-10 17:50:34 +00:00
|
|
|
|
2022-07-11 20:42:17 +00:00
|
|
|
// Get the namespaces the user is allowed to access.
|
|
|
|
allowableNamespaces, err := allowedNSes(aclObj, store, allow)
|
|
|
|
if err == structs.ErrPermissionDenied {
|
|
|
|
// return empty evals if token isn't authorized for any
|
|
|
|
// namespace, matching other endpoints
|
|
|
|
reply.Evaluations = make([]*structs.Evaluation, 0)
|
|
|
|
} else if err != nil {
|
|
|
|
return err
|
2015-12-24 10:46:59 +00:00
|
|
|
} else {
|
2022-07-11 20:42:17 +00:00
|
|
|
if prefix := args.QueryOptions.Prefix; prefix != "" {
|
|
|
|
iter, err = store.EvalsByIDPrefix(ws, namespace, prefix, sort)
|
|
|
|
opts = paginator.StructsTokenizerOptions{
|
|
|
|
WithID: true,
|
|
|
|
}
|
|
|
|
} else if namespace != structs.AllNamespacesSentinel {
|
|
|
|
iter, err = store.EvalsByNamespaceOrdered(ws, namespace, sort)
|
|
|
|
opts = paginator.StructsTokenizerOptions{
|
|
|
|
WithCreateIndex: true,
|
|
|
|
WithID: true,
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
iter, err = store.Evals(ws, sort)
|
|
|
|
opts = paginator.StructsTokenizerOptions{
|
|
|
|
WithCreateIndex: true,
|
|
|
|
WithID: true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2022-03-09 01:54:17 +00:00
|
|
|
}
|
2015-10-29 01:34:56 +00:00
|
|
|
|
2022-07-11 20:42:17 +00:00
|
|
|
iter = memdb.NewFilterIterator(iter, func(raw interface{}) bool {
|
|
|
|
if eval := raw.(*structs.Evaluation); eval != nil {
|
|
|
|
return args.ShouldBeFiltered(eval)
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
|
|
|
|
tokenizer := paginator.NewStructsTokenizer(iter, opts)
|
|
|
|
filters := []paginator.Filter{
|
|
|
|
paginator.NamespaceFilter{
|
|
|
|
AllowableNamespaces: allowableNamespaces,
|
|
|
|
},
|
2015-10-29 01:34:56 +00:00
|
|
|
}
|
2022-03-09 01:54:17 +00:00
|
|
|
|
2022-07-11 20:42:17 +00:00
|
|
|
var evals []*structs.Evaluation
|
|
|
|
paginator, err := paginator.NewPaginator(iter, tokenizer, filters, args.QueryOptions,
|
|
|
|
func(raw interface{}) error {
|
|
|
|
eval := raw.(*structs.Evaluation)
|
|
|
|
evals = append(evals, eval)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return structs.NewErrRPCCodedf(
|
|
|
|
http.StatusBadRequest, "failed to create result paginator: %v", err)
|
|
|
|
}
|
2021-12-10 18:43:03 +00:00
|
|
|
|
2022-07-11 20:42:17 +00:00
|
|
|
nextToken, err := paginator.Page()
|
|
|
|
if err != nil {
|
|
|
|
return structs.NewErrRPCCodedf(
|
|
|
|
http.StatusBadRequest, "failed to read result page: %v", err)
|
|
|
|
}
|
2022-02-16 16:40:30 +00:00
|
|
|
|
2022-07-11 20:42:17 +00:00
|
|
|
reply.QueryMeta.NextToken = nextToken
|
|
|
|
reply.Evaluations = evals
|
2022-02-16 16:40:30 +00:00
|
|
|
}
|
2021-12-10 18:43:03 +00:00
|
|
|
|
2015-10-29 01:34:56 +00:00
|
|
|
// Use the last index that affected the jobs table
|
2021-12-10 18:43:03 +00:00
|
|
|
index, err := store.Index("evals")
|
2015-10-29 01:34:56 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
reply.Index = index
|
|
|
|
|
|
|
|
// Set the query response
|
|
|
|
e.srv.setQueryMeta(&reply.QueryMeta)
|
|
|
|
return nil
|
|
|
|
}}
|
|
|
|
return e.srv.blockingRPC(&opts)
|
2015-09-06 23:01:16 +00:00
|
|
|
}
|
2015-09-06 23:14:41 +00:00
|
|
|
|
2022-11-07 13:53:19 +00:00
|
|
|
// Count is used to get a list of the evaluations in the system
|
|
|
|
func (e *Eval) Count(args *structs.EvalCountRequest, reply *structs.EvalCountResponse) error {
|
2023-01-25 19:33:06 +00:00
|
|
|
|
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
2022-11-07 13:53:19 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Count", args, args, reply); done {
|
|
|
|
return err
|
|
|
|
}
|
2023-01-25 21:37:24 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricList, args)
|
2023-01-25 19:33:06 +00:00
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2022-11-07 13:53:19 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "count"}, time.Now())
|
|
|
|
namespace := args.RequestNamespace()
|
|
|
|
|
|
|
|
// Check for read-job permissions
|
2023-01-25 19:33:06 +00:00
|
|
|
aclObj, err := e.srv.ResolveACL(args)
|
2022-11-07 13:53:19 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !aclObj.AllowNsOp(namespace, acl.NamespaceCapabilityReadJob) {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
allow := aclObj.AllowNsOpFunc(acl.NamespaceCapabilityReadJob)
|
|
|
|
|
|
|
|
var filter *bexpr.Evaluator
|
|
|
|
if args.Filter != "" {
|
|
|
|
filter, err = bexpr.CreateEvaluator(args.Filter)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Setup the blocking query. This is only superficially like Eval.List,
|
|
|
|
// because we don't any concerns about pagination, sorting, and legacy
|
|
|
|
// filter fields.
|
|
|
|
opts := blockingOptions{
|
|
|
|
queryOpts: &args.QueryOptions,
|
|
|
|
queryMeta: &reply.QueryMeta,
|
|
|
|
run: func(ws memdb.WatchSet, store *state.StateStore) error {
|
|
|
|
// Scan all the evaluations
|
|
|
|
var err error
|
|
|
|
var iter memdb.ResultIterator
|
|
|
|
|
|
|
|
// Get the namespaces the user is allowed to access.
|
|
|
|
allowableNamespaces, err := allowedNSes(aclObj, store, allow)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if prefix := args.QueryOptions.Prefix; prefix != "" {
|
|
|
|
iter, err = store.EvalsByIDPrefix(ws, namespace, prefix, state.SortDefault)
|
|
|
|
} else if namespace != structs.AllNamespacesSentinel {
|
|
|
|
iter, err = store.EvalsByNamespace(ws, namespace)
|
|
|
|
} else {
|
|
|
|
iter, err = store.Evals(ws, state.SortDefault)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
count := 0
|
|
|
|
|
|
|
|
iter = memdb.NewFilterIterator(iter, func(raw interface{}) bool {
|
|
|
|
if raw == nil {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
eval := raw.(*structs.Evaluation)
|
|
|
|
if allowableNamespaces != nil && !allowableNamespaces[eval.Namespace] {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if filter != nil {
|
|
|
|
ok, err := filter.Evaluate(eval)
|
|
|
|
if err != nil {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return !ok
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
|
|
|
|
for {
|
|
|
|
raw := iter.Next()
|
|
|
|
if raw == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use the last index that affected the jobs table
|
|
|
|
index, err := store.Index("evals")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
reply.Index = index
|
|
|
|
reply.Count = count
|
|
|
|
|
|
|
|
// Set the query response
|
|
|
|
e.srv.setQueryMeta(&reply.QueryMeta)
|
|
|
|
return nil
|
|
|
|
}}
|
|
|
|
|
|
|
|
return e.srv.blockingRPC(&opts)
|
|
|
|
}
|
|
|
|
|
2015-09-06 23:14:41 +00:00
|
|
|
// Allocations is used to list the allocations for an evaluation
|
|
|
|
func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
|
|
|
|
reply *structs.EvalAllocationsResponse) error {
|
2023-01-25 19:33:06 +00:00
|
|
|
|
|
|
|
authErr := e.srv.Authenticate(e.ctx, args)
|
2015-09-06 23:14:41 +00:00
|
|
|
if done, err := e.srv.forward("Eval.Allocations", args, args, reply); done {
|
|
|
|
return err
|
|
|
|
}
|
2023-01-25 21:37:24 +00:00
|
|
|
e.srv.MeasureRPCRate("eval", structs.RateMetricList, args)
|
2023-01-25 19:33:06 +00:00
|
|
|
if authErr != nil {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
2015-09-06 23:14:41 +00:00
|
|
|
defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())
|
|
|
|
|
2017-10-03 00:21:40 +00:00
|
|
|
// Check for read-job permissions
|
2019-10-01 20:06:24 +00:00
|
|
|
allowNsOp := acl.NamespaceValidator(acl.NamespaceCapabilityReadJob)
|
2023-01-25 19:33:06 +00:00
|
|
|
aclObj, err := e.srv.ResolveACL(args)
|
2019-10-01 20:06:24 +00:00
|
|
|
if err != nil {
|
2017-10-03 00:21:40 +00:00
|
|
|
return err
|
2019-10-01 20:06:24 +00:00
|
|
|
} else if !allowNsOp(aclObj, args.RequestNamespace()) {
|
2017-10-03 00:21:40 +00:00
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
2015-10-29 23:20:57 +00:00
|
|
|
// Setup the blocking query
|
|
|
|
opts := blockingOptions{
|
|
|
|
queryOpts: &args.QueryOptions,
|
|
|
|
queryMeta: &reply.QueryMeta,
|
2017-02-08 04:31:23 +00:00
|
|
|
run: func(ws memdb.WatchSet, state *state.StateStore) error {
|
2015-10-29 23:20:57 +00:00
|
|
|
// Capture the allocations
|
2017-02-08 04:31:23 +00:00
|
|
|
allocs, err := state.AllocsByEval(ws, args.EvalID)
|
2015-10-29 23:20:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-09-06 23:14:41 +00:00
|
|
|
|
2015-10-29 23:20:57 +00:00
|
|
|
// Convert to a stub
|
|
|
|
if len(allocs) > 0 {
|
2019-10-01 20:06:24 +00:00
|
|
|
// Evaluations do not span namespaces so just check the
|
|
|
|
// first allocs namespace.
|
|
|
|
ns := allocs[0].Namespace
|
|
|
|
if ns != args.RequestNamespace() && !allowNsOp(aclObj, ns) {
|
|
|
|
return structs.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
2015-10-29 23:20:57 +00:00
|
|
|
reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
|
|
|
|
for _, alloc := range allocs {
|
2020-10-09 05:21:41 +00:00
|
|
|
reply.Allocations = append(reply.Allocations, alloc.Stub(nil))
|
2015-10-29 23:20:57 +00:00
|
|
|
}
|
|
|
|
}
|
2015-09-06 23:14:41 +00:00
|
|
|
|
2015-10-29 23:20:57 +00:00
|
|
|
// Use the last index that affected the allocs table
|
2017-02-08 04:31:23 +00:00
|
|
|
index, err := state.Index("allocs")
|
2015-10-29 23:20:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
reply.Index = index
|
2015-09-06 23:14:41 +00:00
|
|
|
|
2015-10-29 23:20:57 +00:00
|
|
|
// Set the query response
|
|
|
|
e.srv.setQueryMeta(&reply.QueryMeta)
|
|
|
|
return nil
|
|
|
|
}}
|
|
|
|
return e.srv.blockingRPC(&opts)
|
2015-09-06 23:14:41 +00:00
|
|
|
}
|