open-nomad/nomad/job_endpoint_hooks.go
Seth Hoenig ba728f8f97
api: enable support for setting original job source (#16763)
* api: enable support for setting original source alongside job

This PR adds support for setting job source material along with
the registration of a job.

This includes a new HTTP endpoint and a new RPC endpoint for
making queries for the original source of a job. The
HTTP endpoint is /v1/job/<id>/submission?version=<version> and
the RPC method is Job.GetJobSubmission.

The job source (if submitted, and doing so is always optional), is
stored in the job_submission memdb table, separately from the
actual job. This way we do not incur overhead of reading the large
string field throughout normal job operations.

The server config now includes job_max_source_size for configuring
the maximum size the job source may be, before the server simply
drops the source material. This should help prevent Bad Things from
happening when huge jobs are submitted. If the value is set to 0,
all job source material will be dropped.

* api: avoid writing var content to disk for parsing

* api: move submission validation into RPC layer

* api: return an error if updating a job submission without namespace or job id

* api: be exact about the job index we associate a submission with (modify)

* api: reword api docs scheduling

* api: prune all but the last 6 job submissions

* api: protect against nil job submission in job validation

* api: set max job source size in test server

* api: fixups from pr
2023-04-11 08:45:08 -05:00

367 lines
11 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package nomad
import (
"fmt"
"github.com/dustin/go-humanize"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
attrVaultVersion = `${attr.vault.version}`
attrConsulVersion = `${attr.consul.version}`
attrNomadVersion = `${attr.nomad.version}`
attrNomadServiceDisco = `${attr.nomad.service_discovery}`
)
var (
// vaultConstraint is the implicit constraint added to jobs requesting a
// Vault token
vaultConstraint = &structs.Constraint{
LTarget: attrVaultVersion,
RTarget: ">= 0.6.1",
Operand: structs.ConstraintSemver,
}
// consulServiceDiscoveryConstraint is the implicit constraint added to
// task groups which include services utilising the Consul provider. The
// Consul version is pinned to a minimum of that which introduced the
// namespace feature.
consulServiceDiscoveryConstraint = &structs.Constraint{
LTarget: attrConsulVersion,
RTarget: ">= 1.7.0",
Operand: structs.ConstraintSemver,
}
// nativeServiceDiscoveryConstraint is the constraint injected into task
// groups that utilise Nomad's native service discovery feature. This is
// needed, as operators can disable the client functionality, and therefore
// we need to ensure task groups are placed where they can run
// successfully.
nativeServiceDiscoveryConstraint = &structs.Constraint{
LTarget: attrNomadServiceDisco,
RTarget: "true",
Operand: "=",
}
// nativeServiceDiscoveryChecksConstraint is the constraint injected into task
// groups that utilize Nomad's native service discovery checks feature. This
// is needed, as operators can have versions of Nomad pre-v1.4 mixed into a
// cluster with v1.4 servers, causing jobs to be placed on incompatible
// clients.
nativeServiceDiscoveryChecksConstraint = &structs.Constraint{
LTarget: attrNomadVersion,
RTarget: ">= 1.4.0",
Operand: structs.ConstraintSemver,
}
)
type admissionController interface {
Name() string
}
type jobMutator interface {
admissionController
Mutate(*structs.Job) (out *structs.Job, warnings []error, err error)
}
type jobValidator interface {
admissionController
Validate(*structs.Job) (warnings []error, err error)
}
func (j *Job) admissionControllers(job *structs.Job) (out *structs.Job, warnings []error, err error) {
// Mutators run first before validators, so validators view the final rendered job.
// So, mutators must handle invalid jobs.
out, warnings, err = j.admissionMutators(job)
if err != nil {
return nil, nil, err
}
validateWarnings, err := j.admissionValidators(job)
if err != nil {
return nil, nil, err
}
warnings = append(warnings, validateWarnings...)
return out, warnings, nil
}
// admissionMutator returns an updated job as well as warnings or an error.
func (j *Job) admissionMutators(job *structs.Job) (_ *structs.Job, warnings []error, err error) {
var w []error
for _, mutator := range j.mutators {
job, w, err = mutator.Mutate(job)
j.logger.Trace("job mutate results", "mutator", mutator.Name(), "warnings", w, "error", err)
if err != nil {
return nil, nil, fmt.Errorf("error in job mutator %s: %v", mutator.Name(), err)
}
warnings = append(warnings, w...)
}
return job, warnings, err
}
// admissionValidators returns a slice of validation warnings and a multierror
// of validation failures.
func (j *Job) admissionValidators(origJob *structs.Job) ([]error, error) {
// ensure job is not mutated
job := origJob.Copy()
var warnings []error
var errs error
for _, validator := range j.validators {
w, err := validator.Validate(job)
j.logger.Trace("job validate results", "validator", validator.Name(), "warnings", w, "error", err)
if err != nil {
errs = multierror.Append(errs, err)
}
warnings = append(warnings, w...)
}
return warnings, errs
}
// jobCanonicalizer calls job.Canonicalize (sets defaults and initializes
// fields) and returns any errors as warnings.
type jobCanonicalizer struct {
srv *Server
}
func (c *jobCanonicalizer) Name() string {
return "canonicalize"
}
func (c *jobCanonicalizer) Mutate(job *structs.Job) (*structs.Job, []error, error) {
job.Canonicalize()
// If the job priority is not set, we fallback on the defaults specified in the server config
if job.Priority == 0 {
job.Priority = c.srv.GetConfig().JobDefaultPriority
}
return job, nil, nil
}
// jobImpliedConstraints adds constraints to a job implied by other job fields
// and blocks.
type jobImpliedConstraints struct{}
func (jobImpliedConstraints) Name() string {
return "constraints"
}
func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, error) {
// Get the Vault blocks in the job
vaultBlocks := j.Vault()
// Get the required signals
signals := j.RequiredSignals()
// Identify which task groups are utilising Nomad native service discovery.
nativeServiceDisco := j.RequiredNativeServiceDiscovery()
// Identify which task groups are utilising Consul service discovery.
consulServiceDisco := j.RequiredConsulServiceDiscovery()
// Hot path
if len(signals) == 0 && len(vaultBlocks) == 0 &&
nativeServiceDisco.Empty() && len(consulServiceDisco) == 0 {
return j, nil, nil
}
// Iterate through all the task groups within the job and add any required
// constraints. When adding new implicit constraints, they should go inside
// this single loop, with a new constraintMatcher if needed.
for _, tg := range j.TaskGroups {
// If the task group utilises Vault, run the mutator.
if _, ok := vaultBlocks[tg.Name]; ok {
mutateConstraint(constraintMatcherLeft, tg, vaultConstraint)
}
// Check whether the task group is using signals. In the case that it
// is, we flatten the signals and build a constraint, then run the
// mutator.
if tgSignals, ok := signals[tg.Name]; ok {
required := helper.UniqueMapSliceValues(tgSignals)
sigConstraint := getSignalConstraint(required)
mutateConstraint(constraintMatcherFull, tg, sigConstraint)
}
// If the task group utilises Nomad service discovery, run the mutator.
if nativeServiceDisco.Basic.Contains(tg.Name) {
mutateConstraint(constraintMatcherFull, tg, nativeServiceDiscoveryConstraint)
}
// If the task group utilizes NSD checks, run the mutator.
if nativeServiceDisco.Checks.Contains(tg.Name) {
mutateConstraint(constraintMatcherFull, tg, nativeServiceDiscoveryChecksConstraint)
}
// If the task group utilises Consul service discovery, run the mutator.
if ok := consulServiceDisco[tg.Name]; ok {
mutateConstraint(constraintMatcherLeft, tg, consulServiceDiscoveryConstraint)
}
}
return j, nil, nil
}
// constraintMatcher is a custom type which helps control how constraints are
// identified as being present within a task group.
type constraintMatcher uint
const (
// constraintMatcherFull ensures that a constraint is only considered found
// when they match totally. This check is performed using the
// structs.Constraint Equal function.
constraintMatcherFull constraintMatcher = iota
// constraintMatcherLeft ensure that a constraint is considered found if
// the constraints LTarget is matched only. This allows an existing
// constraint to override the proposed implicit one.
constraintMatcherLeft
)
// mutateConstraint is a generic mutator used to set implicit constraints
// within the task group if they are needed.
func mutateConstraint(matcher constraintMatcher, taskGroup *structs.TaskGroup, constraint *structs.Constraint) {
var found bool
// It's possible to switch on the matcher within the constraint loop to
// reduce repetition. This, however, means switching per constraint,
// therefore we do it here.
switch matcher {
case constraintMatcherFull:
for _, c := range taskGroup.Constraints {
if c.Equal(constraint) {
found = true
break
}
}
case constraintMatcherLeft:
for _, c := range taskGroup.Constraints {
if c.LTarget == constraint.LTarget {
found = true
break
}
}
}
// If we didn't find a suitable constraint match, add one.
if !found {
taskGroup.Constraints = append(taskGroup.Constraints, constraint)
}
}
// jobValidate validates a Job and task drivers and returns an error if there is
// a validation problem or if the Job is of a type a user is not allowed to
// submit.
type jobValidate struct {
srv *Server
}
func (*jobValidate) Name() string {
return "validate"
}
func (v *jobValidate) Validate(job *structs.Job) (warnings []error, err error) {
validationErrors := new(multierror.Error)
if err := job.Validate(); err != nil {
multierror.Append(validationErrors, err)
}
// Get any warnings
jobWarnings := job.Warnings()
if jobWarnings != nil {
if multi, ok := jobWarnings.(*multierror.Error); ok {
// Unpack multiple warnings
warnings = append(warnings, multi.Errors...)
} else {
warnings = append(warnings, jobWarnings)
}
}
// TODO: Validate the driver configurations. These had to be removed in 0.9
// to support driver plugins, but see issue: #XXXX for more info.
if job.Type == structs.JobTypeCore {
multierror.Append(validationErrors, fmt.Errorf("job type cannot be core"))
}
if len(job.Payload) != 0 {
multierror.Append(validationErrors, fmt.Errorf("job can't be submitted with a payload, only dispatched"))
}
if job.Priority < structs.JobMinPriority || job.Priority > v.srv.config.JobMaxPriority {
multierror.Append(validationErrors, fmt.Errorf("job priority must be between [%d, %d]", structs.JobMinPriority, v.srv.config.JobMaxPriority))
}
return warnings, validationErrors.ErrorOrNil()
}
type memoryOversubscriptionValidate struct {
srv *Server
}
func (*memoryOversubscriptionValidate) Name() string {
return "memory_oversubscription"
}
func (v *memoryOversubscriptionValidate) Validate(job *structs.Job) (warnings []error, err error) {
_, c, err := v.srv.State().SchedulerConfig()
if err != nil {
return nil, err
}
if c != nil && c.MemoryOversubscriptionEnabled {
return nil, nil
}
for _, tg := range job.TaskGroups {
for _, t := range tg.Tasks {
if t.Resources != nil && t.Resources.MemoryMaxMB != 0 {
warnings = append(warnings, fmt.Errorf("Memory oversubscription is not enabled; Task \"%v.%v\" memory_max value will be ignored. Update the Scheduler Configuration to allow oversubscription.", tg.Name, t.Name))
}
}
}
return warnings, err
}
// submissionController is used to protect against job source sizes that exceed
// the maximum as set in server config as job_max_source_size
//
// Such jobs will have their source discarded and emit a warning, but the job
// itself will still continue with being registered.
func (j *Job) submissionController(args *structs.JobRegisterRequest) error {
if args.Submission == nil {
return nil
}
maxSize := j.srv.GetConfig().JobMaxSourceSize
submission := args.Submission
// discard the submission if the source + variables is larger than the maximum
// allowable size as set by client config
totalSize := len(submission.Source)
totalSize += len(submission.Variables)
for key, value := range submission.VariableFlags {
totalSize += len(key)
totalSize += len(value)
}
if totalSize > maxSize {
args.Submission = nil
totalSizeHuman := humanize.Bytes(uint64(totalSize))
maxSizeHuman := humanize.Bytes(uint64(maxSize))
return fmt.Errorf("job source size of %s exceeds maximum of %s and will be discarded", totalSizeHuman, maxSizeHuman)
}
return nil
}