open-nomad/api/evaluations.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package api

import (
	"sort"
	"time"
)

// Evaluations is used to query the evaluation endpoints.
type Evaluations struct {
	client *Client
}

// Evaluations returns a new handle on the evaluations.
func (c *Client) Evaluations() *Evaluations {
	return &Evaluations{client: c}
}

// List is used to dump all of the evaluations.
func (e *Evaluations) List(q *QueryOptions) ([]*Evaluation, *QueryMeta, error) {
	var resp []*Evaluation
	qm, err := e.client.query("/v1/evaluations", &resp, q)
	if err != nil {
		return nil, nil, err
	}
	sort.Sort(EvalIndexSort(resp))
	return resp, qm, nil
}

func (e *Evaluations) PrefixList(prefix string) ([]*Evaluation, *QueryMeta, error) {
	return e.List(&QueryOptions{Prefix: prefix})
}

// Count is used to get a count of evaluations.
func (e *Evaluations) Count(q *QueryOptions) (*EvalCountResponse, *QueryMeta, error) {
	var resp *EvalCountResponse
	qm, err := e.client.query("/v1/evaluations/count", &resp, q)
	if err != nil {
		return resp, nil, err
	}
	return resp, qm, nil
}

// Info is used to query a single evaluation by its ID.
func (e *Evaluations) Info(evalID string, q *QueryOptions) (*Evaluation, *QueryMeta, error) {
	var resp Evaluation
	qm, err := e.client.query("/v1/evaluation/"+evalID, &resp, q)
	if err != nil {
		return nil, nil, err
	}
	return &resp, qm, nil
}

// Delete is used to batch delete evaluations using their IDs.
func (e *Evaluations) Delete(evalIDs []string, w *WriteOptions) (*WriteMeta, error) {
	req := EvalDeleteRequest{
		EvalIDs: evalIDs,
	}
	wm, err := e.client.delete("/v1/evaluations", &req, nil, w)
	if err != nil {
		return nil, err
	}
	return wm, nil
}

// DeleteOpts is used to batch delete evaluations using a filter.
func (e *Evaluations) DeleteOpts(req *EvalDeleteRequest, w *WriteOptions) (*EvalDeleteResponse, *WriteMeta, error) {
	resp := &EvalDeleteResponse{}
	wm, err := e.client.delete("/v1/evaluations", &req, resp, w)
	if err != nil {
		return nil, nil, err
	}
	return resp, wm, nil
}

// Allocations is used to retrieve a set of allocations given
// an evaluation ID.
func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*AllocationListStub, *QueryMeta, error) {
	var resp []*AllocationListStub
	qm, err := e.client.query("/v1/evaluation/"+evalID+"/allocations", &resp, q)
	if err != nil {
		return nil, nil, err
	}
	sort.Sort(AllocIndexSort(resp))
	return resp, qm, nil
}

const (
	EvalStatusBlocked   = "blocked"
	EvalStatusPending   = "pending"
	EvalStatusComplete  = "complete"
	EvalStatusFailed    = "failed"
	EvalStatusCancelled = "canceled"
)

// Evaluation is used to serialize an evaluation.
type Evaluation struct {
	ID                   string
	Priority             int
	Type                 string
	TriggeredBy          string
	Namespace            string
	JobID                string
	JobModifyIndex       uint64
	NodeID               string
	NodeModifyIndex      uint64
	DeploymentID         string
	Status               string
	StatusDescription    string
	Wait                 time.Duration
	WaitUntil            time.Time
	NextEval             string
	PreviousEval         string
	BlockedEval          string
	RelatedEvals         []*EvaluationStub
	FailedTGAllocs       map[string]*AllocationMetric
	ClassEligibility     map[string]bool
	EscapedComputedClass bool
	QuotaLimitReached    string
	AnnotatePlan         bool
	QueuedAllocations    map[string]int
	SnapshotIndex        uint64
	CreateIndex          uint64
	ModifyIndex          uint64
	CreateTime           int64
	ModifyTime           int64
}

// EvaluationStub is used to serialize parts of an evaluation returned in the
// RelatedEvals field of an Evaluation.
type EvaluationStub struct {
	ID                string
	Priority          int
	Type              string
	TriggeredBy       string
	Namespace         string
	JobID             string
	NodeID            string
	DeploymentID      string
	Status            string
	StatusDescription string
	WaitUntil         time.Time
	NextEval          string
	PreviousEval      string
	BlockedEval       string
	CreateIndex       uint64
	ModifyIndex       uint64
	CreateTime        int64
	ModifyTime        int64
}

type EvalDeleteRequest struct {
	EvalIDs []string
	Filter  string
	WriteRequest
}

type EvalDeleteResponse struct {
	Count int
}

type EvalCountResponse struct {
	Count int
	QueryMeta
}

// EvalIndexSort is a wrapper to sort evaluations by CreateIndex.
// We reverse the test so that we get the highest index first.
type EvalIndexSort []*Evaluation

func (e EvalIndexSort) Len() int {
	return len(e)
}

func (e EvalIndexSort) Less(i, j int) bool {
	return e[i].CreateIndex > e[j].CreateIndex
}

func (e EvalIndexSort) Swap(i, j int) {
	e[i], e[j] = e[j], e[i]
}
[COMPLIANCE] Add Copyright and License Headers 2023-04-10 15:36:59 +00:00			`// Copyright (c) HashiCorp, Inc.`
			`// SPDX-License-Identifier: MPL-2.0`

api: finishing jobs 2015-09-09 01:42:34 +00:00			`package api`

			`import (`
api: sort all list responses 2015-09-17 19:40:51 +00:00			`"sort"`
api: finishing jobs 2015-09-09 01:42:34 +00:00			`"time"`
			`)`

api: working on evaluations 2015-09-09 20:48:56 +00:00			`// Evaluations is used to query the evaluation endpoints.`
			`type Evaluations struct {`
			`client *Client`
			`}`

			`// Evaluations returns a new handle on the evaluations.`
			`func (c Client) Evaluations() Evaluations {`
			`return &Evaluations{client: c}`
			`}`

			`// List is used to dump all of the evaluations.`
			`func (e Evaluations) List(q QueryOptions) ([]Evaluation, QueryMeta, error) {`
			`var resp []*Evaluation`
			`qm, err := e.client.query("/v1/evaluations", &resp, q)`
			`if err != nil {`
			`return nil, nil, err`
			`}`
api: sort all list responses 2015-09-17 19:40:51 +00:00			`sort.Sort(EvalIndexSort(resp))`
api: working on evaluations 2015-09-09 20:48:56 +00:00			`return resp, qm, nil`
			`}`

Refactoring continued * Refactor other cli commands to new design * Add PrefixList method to api package * Add more tests 2015-12-24 10:46:59 +00:00			`func (e Evaluations) PrefixList(prefix string) ([]Evaluation, *QueryMeta, error) {`
			`return e.List(&QueryOptions{Prefix: prefix})`
			`}`

API for `Eval.Count` (#15147) Add a new `Eval.Count` RPC and associated HTTP API endpoints. This API is designed to support interactive use in the `nomad eval delete` command to get a count of evals expected to be deleted before doing so. The state store operations to do this sort of thing are somewhat expensive, but it's cheaper than serializing a big list of evals to JSON. Note that although it seems like this could be done as an extra parameter and response field on `Eval.List`, having it as its own endpoint avoids having to change the response body shape and lets us avoid handling the legacy filter params supported by `Eval.List`. 2022-11-07 13:53:19 +00:00			`// Count is used to get a count of evaluations.`
			`func (e Evaluations) Count(q QueryOptions) (EvalCountResponse, QueryMeta, error) {`
			`var resp *EvalCountResponse`
			`qm, err := e.client.query("/v1/evaluations/count", &resp, q)`
			`if err != nil {`
			`return resp, nil, err`
			`}`
			`return resp, qm, nil`
			`}`

api: working on evaluations 2015-09-09 20:48:56 +00:00			`// Info is used to query a single evaluation by its ID.`
			`func (e Evaluations) Info(evalID string, q QueryOptions) (Evaluation, QueryMeta, error) {`
			`var resp Evaluation`
			`qm, err := e.client.query("/v1/evaluation/"+evalID, &resp, q)`
			`if err != nil {`
			`return nil, nil, err`
			`}`
			`return &resp, qm, nil`
			`}`

core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`// Delete is used to batch delete evaluations using their IDs.`
			`func (e Evaluations) Delete(evalIDs []string, w WriteOptions) (*WriteMeta, error) {`
			`req := EvalDeleteRequest{`
			`EvalIDs: evalIDs,`
			`}`
			`wm, err := e.client.delete("/v1/evaluations", &req, nil, w)`
			`if err != nil {`
			`return nil, err`
			`}`
			`return wm, nil`
			`}`

eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00			`// DeleteOpts is used to batch delete evaluations using a filter.`
			`func (e Evaluations) DeleteOpts(req EvalDeleteRequest, w WriteOptions) (EvalDeleteResponse, *WriteMeta, error) {`
			`resp := &EvalDeleteResponse{}`
			`wm, err := e.client.delete("/v1/evaluations", &req, resp, w)`
			`if err != nil {`
			`return nil, nil, err`
			`}`
			`return resp, wm, nil`
			`}`

api: working on evaluations 2015-09-09 20:48:56 +00:00			`// Allocations is used to retrieve a set of allocations given`
			`// an evaluation ID.`
api: use stub structs 2015-09-14 02:55:47 +00:00			`func (e Evaluations) Allocations(evalID string, q QueryOptions) ([]AllocationListStub, QueryMeta, error) {`
			`var resp []*AllocationListStub`
api: working on evaluations 2015-09-09 20:48:56 +00:00			`qm, err := e.client.query("/v1/evaluation/"+evalID+"/allocations", &resp, q)`
			`if err != nil {`
			`return nil, nil, err`
			`}`
api: sort all list responses 2015-09-17 19:40:51 +00:00			`sort.Sort(AllocIndexSort(resp))`
api: working on evaluations 2015-09-09 20:48:56 +00:00			`return resp, qm, nil`
			`}`

cli: do not import structs, use API package only. (#13938) 2022-08-02 14:33:08 +00:00			`const (`
			`EvalStatusBlocked = "blocked"`
			`EvalStatusPending = "pending"`
			`EvalStatusComplete = "complete"`
			`EvalStatusFailed = "failed"`
			`EvalStatusCancelled = "canceled"`
			`)`

api: finishing jobs 2015-09-09 01:42:34 +00:00			`// Evaluation is used to serialize an evaluation.`
			`type Evaluation struct {`
Add metrics to show allocations on the client This PR adds the following metrics to the client: client.allocations.migrating client.allocations.blocked client.allocations.pending client.allocations.running client.allocations.terminal Also adds some missing fields to the API version of the evaluation. 2017-03-09 20:37:41 +00:00			`ID string`
			`Priority int`
			`Type string`
			`TriggeredBy string`
Sync namespace changes 2017-09-07 23:56:15 +00:00			`Namespace string`
Add metrics to show allocations on the client This PR adds the following metrics to the client: client.allocations.migrating client.allocations.blocked client.allocations.pending client.allocations.running client.allocations.terminal Also adds some missing fields to the API version of the evaluation. 2017-03-09 20:37:41 +00:00			`JobID string`
			`JobModifyIndex uint64`
			`NodeID string`
			`NodeModifyIndex uint64`
initial impl 2017-06-26 21:23:52 +00:00			`DeploymentID string`
Add metrics to show allocations on the client This PR adds the following metrics to the client: client.allocations.migrating client.allocations.blocked client.allocations.pending client.allocations.running client.allocations.terminal Also adds some missing fields to the API version of the evaluation. 2017-03-09 20:37:41 +00:00			`Status string`
			`StatusDescription string`
			`Wait time.Duration`
Rename DelayCeiling to MaxDelay 2018-03-13 15:06:26 +00:00			`WaitUntil time.Time`
Add metrics to show allocations on the client This PR adds the following metrics to the client: client.allocations.migrating client.allocations.blocked client.allocations.pending client.allocations.running client.allocations.terminal Also adds some missing fields to the API version of the evaluation. 2017-03-09 20:37:41 +00:00			`NextEval string`
			`PreviousEval string`
			`BlockedEval string`
api: add related evals to eval details (#12305) The `related` query param is used to indicate that the request should return a list of related (next, previous, and blocked) evaluations. Co-authored-by: Jasmine Dahilig <jasmine@hashicorp.com> 2022-03-17 17:56:14 +00:00			`RelatedEvals []*EvaluationStub`
Add metrics to show allocations on the client This PR adds the following metrics to the client: client.allocations.migrating client.allocations.blocked client.allocations.pending client.allocations.running client.allocations.terminal Also adds some missing fields to the API version of the evaluation. 2017-03-09 20:37:41 +00:00			`FailedTGAllocs map[string]*AllocationMetric`
			`ClassEligibility map[string]bool`
			`EscapedComputedClass bool`
sync 2017-10-13 21:36:02 +00:00			`QuotaLimitReached string`
Add metrics to show allocations on the client This PR adds the following metrics to the client: client.allocations.migrating client.allocations.blocked client.allocations.pending client.allocations.running client.allocations.terminal Also adds some missing fields to the API version of the evaluation. 2017-03-09 20:37:41 +00:00			`AnnotatePlan bool`
			`QueuedAllocations map[string]int`
			`SnapshotIndex uint64`
			`CreateIndex uint64`
			`ModifyIndex uint64`
add create and modify timestamps to evaluations (#5881) 2019-08-07 16:50:35 +00:00			`CreateTime int64`
			`ModifyTime int64`
api: sort all list responses 2015-09-17 19:40:51 +00:00			`}`

api: add related evals to eval details (#12305) The `related` query param is used to indicate that the request should return a list of related (next, previous, and blocked) evaluations. Co-authored-by: Jasmine Dahilig <jasmine@hashicorp.com> 2022-03-17 17:56:14 +00:00			`// EvaluationStub is used to serialize parts of an evaluation returned in the`
			`// RelatedEvals field of an Evaluation.`
			`type EvaluationStub struct {`
			`ID string`
			`Priority int`
			`Type string`
			`TriggeredBy string`
			`Namespace string`
			`JobID string`
			`NodeID string`
			`DeploymentID string`
			`Status string`
			`StatusDescription string`
			`WaitUntil time.Time`
			`NextEval string`
			`PreviousEval string`
			`BlockedEval string`
			`CreateIndex uint64`
			`ModifyIndex uint64`
			`CreateTime int64`
			`ModifyTime int64`
			`}`

core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`type EvalDeleteRequest struct {`
			`EvalIDs []string`
eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00			`Filter string`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`WriteRequest`
			`}`

eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00			`type EvalDeleteResponse struct {`
			`Count int`
			`}`

API for `Eval.Count` (#15147) Add a new `Eval.Count` RPC and associated HTTP API endpoints. This API is designed to support interactive use in the `nomad eval delete` command to get a count of evals expected to be deleted before doing so. The state store operations to do this sort of thing are somewhat expensive, but it's cheaper than serializing a big list of evals to JSON. Note that although it seems like this could be done as an extra parameter and response field on `Eval.List`, having it as its own endpoint avoids having to change the response body shape and lets us avoid handling the legacy filter params supported by `Eval.List`. 2022-11-07 13:53:19 +00:00			`type EvalCountResponse struct {`
			`Count int`
			`QueryMeta`
			`}`

api: sort all list responses 2015-09-17 19:40:51 +00:00			`// EvalIndexSort is a wrapper to sort evaluations by CreateIndex.`
			`// We reverse the test so that we get the highest index first.`
			`type EvalIndexSort []*Evaluation`

			`func (e EvalIndexSort) Len() int {`
			`return len(e)`
			`}`

			`func (e EvalIndexSort) Less(i, j int) bool {`
			`return e[i].CreateIndex > e[j].CreateIndex`
			`}`

			`func (e EvalIndexSort) Swap(i, j int) {`
			`e[i], e[j] = e[j], e[i]`
api: finishing jobs 2015-09-09 01:42:34 +00:00			`}`