open-nomad/command/agent/eval_endpoint.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package agent

import (
	"fmt"
	"net/http"
	"strings"

	"github.com/hashicorp/nomad/nomad/structs"
)

// EvalsRequest is the entry point for /v1/evaluations and is responsible for
// handling both the listing of evaluations, and the bulk deletion of
// evaluations. The latter is a dangerous operation and should use the
// eval delete command to perform this.
func (s *HTTPServer) EvalsRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
	switch req.Method {
	case http.MethodGet:
		return s.evalsListRequest(resp, req)
	case http.MethodDelete:
		return s.evalsDeleteRequest(resp, req)
	default:
		return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod)
	}
}

func (s *HTTPServer) evalsListRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {

	args := structs.EvalListRequest{}
	if s.parse(resp, req, &args.Region, &args.QueryOptions) {
		return nil, nil
	}

	query := req.URL.Query()
	args.FilterEvalStatus = query.Get("status")
	args.FilterJobID = query.Get("job")

	var out structs.EvalListResponse
	if err := s.agent.RPC("Eval.List", &args, &out); err != nil {
		return nil, err
	}

	setMeta(resp, &out.QueryMeta)
	if out.Evaluations == nil {
		out.Evaluations = make([]*structs.Evaluation, 0)
	}
	return out.Evaluations, nil
}

func (s *HTTPServer) evalsDeleteRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {

	var args structs.EvalDeleteRequest

	if err := decodeBody(req, &args); err != nil {
		return nil, CodedError(http.StatusBadRequest, err.Error())
	}

	numIDs := len(args.EvalIDs)

	if args.Filter != "" && numIDs > 0 {
		return nil, CodedError(http.StatusBadRequest,
			"evals cannot be deleted by both ID and filter")
	}
	if args.Filter == "" && numIDs == 0 {
		return nil, CodedError(http.StatusBadRequest,
			"evals must be deleted by either ID or filter")
	}

	// If an explicit list of evaluation IDs is sent, ensure its within bounds
	if numIDs > structs.MaxUUIDsPerWriteRequest {
		return nil, CodedError(http.StatusBadRequest, fmt.Sprintf(
			"request includes %v evaluation IDs, must be %v or fewer",
			numIDs, structs.MaxUUIDsPerWriteRequest))
	}

	// Pass the write request to populate all meta fields.
	s.parseWriteRequest(req, &args.WriteRequest)

	var reply structs.EvalDeleteResponse
	if err := s.agent.RPC(structs.EvalDeleteRPCMethod, &args, &reply); err != nil {
		return nil, err
	}

	setIndex(resp, reply.Index)
	return reply, nil
}

func (s *HTTPServer) EvalSpecificRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
	path := strings.TrimPrefix(req.URL.Path, "/v1/evaluation/")
	switch {
	case strings.HasSuffix(path, "/allocations"):
		evalID := strings.TrimSuffix(path, "/allocations")
		return s.evalAllocations(resp, req, evalID)
	default:
		return s.evalQuery(resp, req, path)
	}
}

func (s *HTTPServer) evalAllocations(resp http.ResponseWriter, req *http.Request, evalID string) (interface{}, error) {
	if req.Method != http.MethodGet {
		return nil, CodedError(405, ErrInvalidMethod)
	}

	args := structs.EvalSpecificRequest{
		EvalID: evalID,
	}
	if s.parse(resp, req, &args.Region, &args.QueryOptions) {
		return nil, nil
	}

	var out structs.EvalAllocationsResponse
	if err := s.agent.RPC("Eval.Allocations", &args, &out); err != nil {
		return nil, err
	}

	setMeta(resp, &out.QueryMeta)
	if out.Allocations == nil {
		out.Allocations = make([]*structs.AllocListStub, 0)
	}
	return out.Allocations, nil
}

func (s *HTTPServer) evalQuery(resp http.ResponseWriter, req *http.Request, evalID string) (interface{}, error) {
	if req.Method != http.MethodGet {
		return nil, CodedError(405, ErrInvalidMethod)
	}

	args := structs.EvalSpecificRequest{
		EvalID: evalID,
	}
	if s.parse(resp, req, &args.Region, &args.QueryOptions) {
		return nil, nil
	}

	query := req.URL.Query()
	args.IncludeRelated = query.Get("related") == "true"

	var out structs.SingleEvalResponse
	if err := s.agent.RPC("Eval.GetEval", &args, &out); err != nil {
		return nil, err
	}

	setMeta(resp, &out.QueryMeta)
	if out.Eval == nil {
		return nil, CodedError(404, "eval not found")
	}
	return out.Eval, nil
}

func (s *HTTPServer) EvalsCountRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
	if req.Method != http.MethodGet {
		return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod)
	}

	args := structs.EvalCountRequest{}
	if s.parse(resp, req, &args.Region, &args.QueryOptions) {
		return nil, nil
	}

	var out structs.EvalCountResponse
	if err := s.agent.RPC("Eval.Count", &args, &out); err != nil {
		return nil, err
	}

	setMeta(resp, &out.QueryMeta)
	return &out, nil
}
[COMPLIANCE] Add Copyright and License Headers 2023-04-10 15:36:59 +00:00			`// Copyright (c) HashiCorp, Inc.`
			`// SPDX-License-Identifier: MPL-2.0`

http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`package agent`

			`import (`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`"fmt"`
http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`"net/http"`
			`"strings"`

			`"github.com/hashicorp/nomad/nomad/structs"`
			`)`

core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`// EvalsRequest is the entry point for /v1/evaluations and is responsible for`
			`// handling both the listing of evaluations, and the bulk deletion of`
			`// evaluations. The latter is a dangerous operation and should use the`
			`// eval delete command to perform this.`
http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`func (s HTTPServer) EvalsRequest(resp http.ResponseWriter, req http.Request) (interface{}, error) {`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`switch req.Method {`
			`case http.MethodGet:`
			`return s.evalsListRequest(resp, req)`
			`case http.MethodDelete:`
			`return s.evalsDeleteRequest(resp, req)`
			`default:`
			`return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod)`
http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`}`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`}`

			`func (s HTTPServer) evalsListRequest(resp http.ResponseWriter, req http.Request) (interface{}, error) {`
http: adding the eval endpoints 2015-09-06 23:02:53 +00:00
			`args := structs.EvalListRequest{}`
			`if s.parse(resp, req, &args.Region, &args.QueryOptions) {`
			`return nil, nil`
			`}`

evaluations list pagination and filtering (#11648) API queries can request pagination using the `NextToken` and `PerPage` fields of `QueryOptions`, when supported by the underlying API. Add a `NextToken` field to the `structs.QueryMeta` so that we have a common field across RPCs to tell the caller where to resume paging from on their next API call. Include this field on the `api.QueryMeta` as well so that it's available for future versions of List HTTP APIs that wrap the response with `QueryMeta` rather than returning a simple list of structs. In the meantime callers can get the `X-Nomad-NextToken`. Add pagination to the `Eval.List` RPC by checking for pagination token and page size in `QueryOptions`. This will allow resuming from the last ID seen so long as the query parameters and the state store itself are unchanged between requests. Add filtering by job ID or evaluation status over the results we get out of the state store. Parse the query parameters of the `Eval.List` API into the arguments expected for filtering in the RPC call. 2021-12-10 18:43:03 +00:00			`query := req.URL.Query()`
			`args.FilterEvalStatus = query.Get("status")`
			`args.FilterJobID = query.Get("job")`

http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`var out structs.EvalListResponse`
			`if err := s.agent.RPC("Eval.List", &args, &out); err != nil {`
			`return nil, err`
			`}`

			`setMeta(resp, &out.QueryMeta)`
http: list results are never null 2015-09-07 17:03:10 +00:00			`if out.Evaluations == nil {`
			`out.Evaluations = make([]*structs.Evaluation, 0)`
			`}`
http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`return out.Evaluations, nil`
			`}`

core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`func (s HTTPServer) evalsDeleteRequest(resp http.ResponseWriter, req http.Request) (interface{}, error) {`

			`var args structs.EvalDeleteRequest`

			`if err := decodeBody(req, &args); err != nil {`
			`return nil, CodedError(http.StatusBadRequest, err.Error())`
			`}`

			`numIDs := len(args.EvalIDs)`

eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00			`if args.Filter != "" && numIDs > 0 {`
			`return nil, CodedError(http.StatusBadRequest,`
			`"evals cannot be deleted by both ID and filter")`
			`}`
			`if args.Filter == "" && numIDs == 0 {`
			`return nil, CodedError(http.StatusBadRequest,`
			`"evals must be deleted by either ID or filter")`
			`}`

			`// If an explicit list of evaluation IDs is sent, ensure its within bounds`
			`if numIDs > structs.MaxUUIDsPerWriteRequest {`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`return nil, CodedError(http.StatusBadRequest, fmt.Sprintf(`
eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00			`"request includes %v evaluation IDs, must be %v or fewer",`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`numIDs, structs.MaxUUIDsPerWriteRequest))`
			`}`

			`// Pass the write request to populate all meta fields.`
			`s.parseWriteRequest(req, &args.WriteRequest)`

			`var reply structs.EvalDeleteResponse`
			`if err := s.agent.RPC(structs.EvalDeleteRPCMethod, &args, &reply); err != nil {`
			`return nil, err`
			`}`
eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`setIndex(resp, reply.Index)`
eval delete: move batching of deletes into RPC handler and state (#15117) During unusual outage recovery scenarios on large clusters, a backlog of millions of evaluations can appear. In these cases, the `eval delete` command can put excessive load on the cluster by listing large sets of evals to extract the IDs and then sending larges batches of IDs. Although the command's batch size was carefully tuned, we still need to be JSON deserialize, re-serialize to MessagePack, send the log entries through raft, and get the FSM applied. To improve performance of this recovery case, move the batching process into the RPC handler and the state store. The design here is a little weird, so let's look a the failed options first: * A naive solution here would be to just send the filter as the raft request and let the FSM apply delete the whole set in a single operation. Benchmarking with 1M evals on a 3 node cluster demonstrated this can block the FSM apply for several minutes, which puts the cluster at risk if there's a leadership failover (the barrier write can't be made while this apply is in-flight). * A less naive but still bad solution would be to have the RPC handler filter and paginate, and then hand a list of IDs to the existing raft log entry. Benchmarks showed this blocked the FSM apply for 20-30s at a time and took roughly an hour to complete. Instead, we're filtering and paginating in the RPC handler to find a page token, and then passing both the filter and page token in the raft log. The FSM apply recreates the paginator using the filter and page token to get roughly the same page of evaluations, which it then deletes. The pagination process is fairly cheap (only abut 5% of the total FSM apply time), so counter-intuitively this rework ends up being much faster. A benchmark of 1M evaluations showed this blocked the FSM apply for 20-30ms at a time (typical for normal operations) and completes in less than 4 minutes. Note that, as with the existing design, this delete is not consistent: a new evaluation inserted "behind" the cursor of the pagination will fail to be deleted. 2022-11-14 19:08:13 +00:00			`return reply, nil`
core: allow deleting of evaluations (#13492) * core: add eval delete RPC and core functionality. * agent: add eval delete HTTP endpoint. * api: add eval delete API functionality. * cli: add eval delete command. * docs: add eval delete website documentation. 2022-07-06 14:30:11 +00:00			`}`

http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`func (s HTTPServer) EvalSpecificRequest(resp http.ResponseWriter, req http.Request) (interface{}, error) {`
http: complete the eval endpoints 2015-09-06 23:18:25 +00:00			`path := strings.TrimPrefix(req.URL.Path, "/v1/evaluation/")`
			`switch {`
			`case strings.HasSuffix(path, "/allocations"):`
			`evalID := strings.TrimSuffix(path, "/allocations")`
			`return s.evalAllocations(resp, req, evalID)`
			`default:`
			`return s.evalQuery(resp, req, path)`
			`}`
			`}`

			`func (s HTTPServer) evalAllocations(resp http.ResponseWriter, req http.Request, evalID string) (interface{}, error) {`
chore(lint): use Go stdlib variables for HTTP methods and status codes (#17968) (#18074) Co-authored-by: Ville Vesilehto <ville@vesilehto.fi> 2023-07-26 15:38:39 +00:00			`if req.Method != http.MethodGet {`
http: complete the eval endpoints 2015-09-06 23:18:25 +00:00			`return nil, CodedError(405, ErrInvalidMethod)`
			`}`

			`args := structs.EvalSpecificRequest{`
			`EvalID: evalID,`
			`}`
			`if s.parse(resp, req, &args.Region, &args.QueryOptions) {`
			`return nil, nil`
			`}`

			`var out structs.EvalAllocationsResponse`
			`if err := s.agent.RPC("Eval.Allocations", &args, &out); err != nil {`
			`return nil, err`
			`}`

			`setMeta(resp, &out.QueryMeta)`
http: list results are never null 2015-09-07 17:03:10 +00:00			`if out.Allocations == nil {`
			`out.Allocations = make([]*structs.AllocListStub, 0)`
			`}`
http: complete the eval endpoints 2015-09-06 23:18:25 +00:00			`return out.Allocations, nil`
			`}`

			`func (s HTTPServer) evalQuery(resp http.ResponseWriter, req http.Request, evalID string) (interface{}, error) {`
chore(lint): use Go stdlib variables for HTTP methods and status codes (#17968) (#18074) Co-authored-by: Ville Vesilehto <ville@vesilehto.fi> 2023-07-26 15:38:39 +00:00			`if req.Method != http.MethodGet {`
http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`return nil, CodedError(405, ErrInvalidMethod)`
			`}`

			`args := structs.EvalSpecificRequest{`
			`EvalID: evalID,`
			`}`
			`if s.parse(resp, req, &args.Region, &args.QueryOptions) {`
			`return nil, nil`
			`}`

api: add related evals to eval details (#12305) The `related` query param is used to indicate that the request should return a list of related (next, previous, and blocked) evaluations. Co-authored-by: Jasmine Dahilig <jasmine@hashicorp.com> 2022-03-17 17:56:14 +00:00			`query := req.URL.Query()`
			`args.IncludeRelated = query.Get("related") == "true"`

http: adding the eval endpoints 2015-09-06 23:02:53 +00:00			`var out structs.SingleEvalResponse`
			`if err := s.agent.RPC("Eval.GetEval", &args, &out); err != nil {`
			`return nil, err`
			`}`

			`setMeta(resp, &out.QueryMeta)`
			`if out.Eval == nil {`
			`return nil, CodedError(404, "eval not found")`
			`}`
			`return out.Eval, nil`
			`}`
API for `Eval.Count` (#15147) Add a new `Eval.Count` RPC and associated HTTP API endpoints. This API is designed to support interactive use in the `nomad eval delete` command to get a count of evals expected to be deleted before doing so. The state store operations to do this sort of thing are somewhat expensive, but it's cheaper than serializing a big list of evals to JSON. Note that although it seems like this could be done as an extra parameter and response field on `Eval.List`, having it as its own endpoint avoids having to change the response body shape and lets us avoid handling the legacy filter params supported by `Eval.List`. 2022-11-07 13:53:19 +00:00
			`func (s HTTPServer) EvalsCountRequest(resp http.ResponseWriter, req http.Request) (interface{}, error) {`
			`if req.Method != http.MethodGet {`
			`return nil, CodedError(http.StatusMethodNotAllowed, ErrInvalidMethod)`
			`}`

			`args := structs.EvalCountRequest{}`
			`if s.parse(resp, req, &args.Region, &args.QueryOptions) {`
			`return nil, nil`
			`}`

			`var out structs.EvalCountResponse`
			`if err := s.agent.RPC("Eval.Count", &args, &out); err != nil {`
			`return nil, err`
			`}`

			`setMeta(resp, &out.QueryMeta)`
			`return &out, nil`
			`}`