2015-06-01 15:49:10 +00:00
package structs
2015-06-05 22:21:17 +00:00
import (
"bytes"
2016-03-14 22:46:06 +00:00
"crypto/md5"
2015-11-21 20:34:01 +00:00
"crypto/sha1"
2016-03-14 22:46:06 +00:00
"crypto/sha256"
"crypto/sha512"
2017-12-08 19:50:14 +00:00
"encoding/base32"
2016-03-14 22:46:06 +00:00
"encoding/hex"
2015-09-15 17:46:10 +00:00
"errors"
2015-06-05 22:41:03 +00:00
"fmt"
2015-11-21 20:34:01 +00:00
"io"
2016-11-03 21:14:52 +00:00
"net"
2017-12-21 09:32:12 +00:00
"net/url"
2016-10-05 20:41:29 +00:00
"os"
2016-03-18 19:01:46 +00:00
"path/filepath"
2015-11-16 22:25:19 +00:00
"reflect"
2015-10-11 19:50:16 +00:00
"regexp"
2016-10-20 20:55:35 +00:00
"sort"
2015-12-18 20:26:28 +00:00
"strconv"
2015-09-16 00:38:23 +00:00
"strings"
2015-06-05 22:41:03 +00:00
"time"
2015-06-05 22:21:17 +00:00
2017-08-30 16:58:42 +00:00
"golang.org/x/crypto/blake2b"
2015-12-01 00:51:56 +00:00
"github.com/gorhill/cronexpr"
2016-08-16 19:05:15 +00:00
"github.com/hashicorp/consul/api"
2017-11-13 17:32:52 +00:00
multierror "github.com/hashicorp/go-multierror"
2017-08-12 21:11:49 +00:00
"github.com/hashicorp/go-version"
"github.com/hashicorp/nomad/acl"
2017-01-18 23:55:14 +00:00
"github.com/hashicorp/nomad/helper"
2015-11-27 03:26:00 +00:00
"github.com/hashicorp/nomad/helper/args"
2017-09-29 16:58:48 +00:00
"github.com/hashicorp/nomad/helper/uuid"
2015-12-18 20:26:28 +00:00
"github.com/mitchellh/copystructure"
2016-02-21 02:05:17 +00:00
"github.com/ugorji/go/codec"
2018-02-22 23:43:07 +00:00
"math"
2016-02-21 02:05:17 +00:00
hcodec "github.com/hashicorp/go-msgpack/codec"
2015-06-05 22:21:17 +00:00
)
2015-06-05 22:41:03 +00:00
var (
2017-08-12 21:11:49 +00:00
// validPolicyName is used to validate a policy name
validPolicyName = regexp . MustCompile ( "^[a-zA-Z0-9-]{1,128}$" )
2017-12-08 23:54:04 +00:00
// b32 is a lowercase base32 encoding for use in URL friendly service hashes
b32 = base32 . NewEncoding ( strings . ToLower ( "abcdefghijklmnopqrstuvwxyz234567" ) )
2015-06-05 22:41:03 +00:00
)
2015-06-01 15:49:10 +00:00
type MessageType uint8
const (
2015-07-07 16:51:42 +00:00
NodeRegisterRequestType MessageType = iota
NodeDeregisterRequestType
2015-07-04 01:41:36 +00:00
NodeUpdateStatusRequestType
2015-09-07 02:55:38 +00:00
NodeUpdateDrainRequestType
2015-07-07 16:51:42 +00:00
JobRegisterRequestType
JobDeregisterRequestType
2015-07-23 22:52:38 +00:00
EvalUpdateRequestType
EvalDeleteRequestType
2015-08-04 21:04:33 +00:00
AllocUpdateRequestType
2015-08-26 01:00:14 +00:00
AllocClientUpdateRequestType
2016-08-03 23:08:30 +00:00
ReconcileJobSummariesRequestType
2016-08-19 01:14:58 +00:00
VaultAccessorRegisterRequestType
2018-03-11 17:53:22 +00:00
VaultAccessorDeregisterRequestType
2017-05-05 20:52:01 +00:00
ApplyPlanResultsRequestType
2017-06-26 21:23:52 +00:00
DeploymentStatusUpdateRequestType
DeploymentPromoteRequestType
2017-06-27 17:31:32 +00:00
DeploymentAllocHealthRequestType
2017-06-29 19:32:37 +00:00
DeploymentDeleteRequestType
2017-07-06 19:49:13 +00:00
JobStabilityRequestType
2017-08-08 03:53:07 +00:00
ACLPolicyUpsertRequestType
ACLPolicyDeleteRequestType
2017-08-12 22:44:05 +00:00
ACLTokenUpsertRequestType
ACLTokenDeleteRequestType
2017-08-21 01:19:26 +00:00
ACLTokenBootstrapRequestType
2017-12-18 21:16:23 +00:00
AutopilotRequestType
2018-03-14 00:52:12 +00:00
UpsertNodeEventsType
2018-03-14 22:32:18 +00:00
JobBatchDeregisterRequestType
2018-02-23 01:38:44 +00:00
AllocUpdateDesiredTransitionRequestType
2018-02-27 00:34:42 +00:00
NodeUpdateEligibilityRequestType
2018-03-09 22:15:21 +00:00
BatchNodeUpdateDrainRequestType
2015-06-01 15:49:10 +00:00
)
const (
// IgnoreUnknownTypeFlag is set along with a MessageType
// to indicate that the message type can be safely ignored
// if it is not recognized. This is for future proofing, so
// that new commands can be added in a way that won't cause
// old servers to crash when the FSM attempts to process them.
IgnoreUnknownTypeFlag MessageType = 128
2016-05-28 01:14:34 +00:00
// ApiMajorVersion is returned as part of the Status.Version request.
// It should be incremented anytime the APIs are changed in a way
// that would break clients for sane client versioning.
ApiMajorVersion = 1
// ApiMinorVersion is returned as part of the Status.Version request.
// It should be incremented anytime the APIs are changed to allow
// for sane client versioning. Minor changes should be compatible
// within the major version.
ApiMinorVersion = 1
ProtocolVersion = "protocol"
APIMajorVersion = "api.major"
APIMinorVersion = "api.minor"
2017-07-06 03:44:49 +00:00
GetterModeAny = "any"
GetterModeFile = "file"
GetterModeDir = "dir"
2017-08-12 21:11:49 +00:00
// maxPolicyDescriptionLength limits a policy description length
maxPolicyDescriptionLength = 256
2017-08-12 21:36:10 +00:00
// maxTokenNameLength limits a ACL token name length
2018-02-20 17:51:34 +00:00
maxTokenNameLength = 256
2017-08-12 21:36:10 +00:00
// ACLClientToken and ACLManagementToken are the only types of tokens
ACLClientToken = "client"
ACLManagementToken = "management"
2017-08-21 04:31:45 +00:00
// DefaultNamespace is the default namespace.
2017-09-07 23:56:15 +00:00
DefaultNamespace = "default"
DefaultNamespaceDescription = "Default shared namespace"
2018-01-26 02:15:21 +00:00
// JitterFraction is a the limit to the amount of jitter we apply
// to a user specified MaxQueryTime. We divide the specified time by
// the fraction. So 16 == 6.25% limit of jitter. This jitter is also
// applied to RPCHoldTimeout.
JitterFraction = 16
2018-03-12 01:00:13 +00:00
// MaxRetainedNodeEvents is the maximum number of node events that will be
// retained for a single node
MaxRetainedNodeEvents = 10
2015-06-01 15:49:10 +00:00
)
2015-06-05 22:21:17 +00:00
2017-08-18 20:06:25 +00:00
// Context defines the scope in which a search for Nomad object operates, and
// is also used to query the matching index value for this context
2017-08-11 21:21:35 +00:00
type Context string
const (
2017-08-18 20:06:25 +00:00
Allocs Context = "allocs"
Deployments Context = "deployment"
Evals Context = "evals"
Jobs Context = "jobs"
Nodes Context = "nodes"
2017-09-07 23:56:15 +00:00
Namespaces Context = "namespaces"
2017-10-13 21:36:02 +00:00
Quotas Context = "quotas"
2017-08-26 00:04:23 +00:00
All Context = "all"
2017-08-11 21:21:35 +00:00
)
2017-09-07 23:56:15 +00:00
// NamespacedID is a tuple of an ID and a namespace
type NamespacedID struct {
ID string
Namespace string
}
2018-03-19 17:12:12 +00:00
func ( n NamespacedID ) String ( ) string {
return fmt . Sprintf ( "<ns: %q, id: %q>" , n . Namespace , n . ID )
}
2015-06-05 22:41:03 +00:00
// RPCInfo is used to describe common information about query
type RPCInfo interface {
RequestRegion ( ) string
IsRead ( ) bool
AllowStaleRead ( ) bool
2018-03-27 01:10:43 +00:00
IsForwarded ( ) bool
SetForwarded ( )
}
// InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct
// should NOT be replicated in the API package as it is internal only.
type InternalRpcInfo struct {
// Forwarded marks whether the RPC has been forwarded.
Forwarded bool
}
// IsForwarded returns whether the RPC is forwarded from another server.
func ( i * InternalRpcInfo ) IsForwarded ( ) bool {
return i . Forwarded
}
// SetForwarded marks that the RPC is being forwarded from another server.
func ( i * InternalRpcInfo ) SetForwarded ( ) {
i . Forwarded = true
2015-06-05 22:41:03 +00:00
}
// QueryOptions is used to specify various flags for read queries
type QueryOptions struct {
2015-07-03 23:04:24 +00:00
// The target region for this query
Region string
2017-09-07 23:56:15 +00:00
// Namespace is the target namespace for the query.
Namespace string
2015-06-07 18:18:59 +00:00
// If set, wait until query exceeds given index. Must be provided
// with MaxQueryTime.
MinQueryIndex uint64
// Provided with MinQueryIndex to wait for change.
MaxQueryTime time . Duration
2015-06-05 22:41:03 +00:00
// If set, any follower can service the request. Results
// may be arbitrarily stale.
AllowStale bool
2015-12-22 22:44:33 +00:00
// If set, used as prefix for resource list searches
Prefix string
2017-08-21 03:12:11 +00:00
2017-10-12 22:16:33 +00:00
// AuthToken is secret portion of the ACL token used for the request
AuthToken string
2018-03-27 01:10:43 +00:00
InternalRpcInfo
2015-06-05 22:41:03 +00:00
}
func ( q QueryOptions ) RequestRegion ( ) string {
return q . Region
}
2017-09-07 23:56:15 +00:00
func ( q QueryOptions ) RequestNamespace ( ) string {
if q . Namespace == "" {
return DefaultNamespace
}
return q . Namespace
}
2015-06-05 22:41:03 +00:00
// QueryOption only applies to reads, so always true
func ( q QueryOptions ) IsRead ( ) bool {
return true
}
func ( q QueryOptions ) AllowStaleRead ( ) bool {
return q . AllowStale
}
type WriteRequest struct {
2015-07-03 23:04:24 +00:00
// The target region for this write
2015-06-05 22:41:03 +00:00
Region string
2017-08-21 03:12:11 +00:00
2017-09-07 23:56:15 +00:00
// Namespace is the target namespace for the write.
Namespace string
2017-10-12 22:16:33 +00:00
// AuthToken is secret portion of the ACL token used for the request
AuthToken string
2018-03-27 01:10:43 +00:00
InternalRpcInfo
2015-06-05 22:41:03 +00:00
}
func ( w WriteRequest ) RequestRegion ( ) string {
// The target region for this request
return w . Region
}
2017-09-07 23:56:15 +00:00
func ( w WriteRequest ) RequestNamespace ( ) string {
if w . Namespace == "" {
return DefaultNamespace
}
return w . Namespace
}
2015-06-05 22:41:03 +00:00
// WriteRequest only applies to writes, always false
func ( w WriteRequest ) IsRead ( ) bool {
return false
}
func ( w WriteRequest ) AllowStaleRead ( ) bool {
return false
}
// QueryMeta allows a query response to include potentially
// useful metadata about a query
type QueryMeta struct {
// This is the index associated with the read
Index uint64
// If AllowStale is used, this is time elapsed since
// last contact between the follower and leader. This
// can be used to gauge staleness.
LastContact time . Duration
// Used to indicate if there is a known leader node
KnownLeader bool
}
2016-01-13 18:19:53 +00:00
// WriteMeta allows a write response to include potentially
2015-07-03 23:04:24 +00:00
// useful metadata about the write
type WriteMeta struct {
// This is the index associated with the write
Index uint64
}
2015-06-07 18:18:59 +00:00
2015-09-07 03:31:32 +00:00
// NodeRegisterRequest is used for Node.Register endpoint
2015-06-07 18:18:59 +00:00
// to register a node as being a schedulable entity.
2015-07-07 16:51:42 +00:00
type NodeRegisterRequest struct {
2018-05-12 00:26:25 +00:00
Node * Node
NodeEvent * NodeEvent
2015-07-03 23:04:24 +00:00
WriteRequest
}
2015-09-07 03:31:32 +00:00
// NodeDeregisterRequest is used for Node.Deregister endpoint
2015-07-04 01:41:36 +00:00
// to deregister a node as being a schedulable entity.
2015-07-07 16:51:42 +00:00
type NodeDeregisterRequest struct {
2015-07-04 01:41:36 +00:00
NodeID string
WriteRequest
}
2016-05-23 18:09:31 +00:00
// NodeServerInfo is used to in NodeUpdateResponse to return Nomad server
// information used in RPC server lists.
type NodeServerInfo struct {
// RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to
// be contacted at for RPCs.
2016-06-11 03:26:15 +00:00
RPCAdvertiseAddr string
2016-05-23 18:09:31 +00:00
2016-05-28 01:14:34 +00:00
// RpcMajorVersion is the major version number the Nomad Server
// supports
2016-06-11 03:26:15 +00:00
RPCMajorVersion int32
2016-05-28 01:14:34 +00:00
// RpcMinorVersion is the minor version number the Nomad Server
// supports
2016-06-11 03:26:15 +00:00
RPCMinorVersion int32
2016-05-27 10:45:09 +00:00
// Datacenter is the datacenter that a Nomad server belongs to
Datacenter string
2016-05-23 18:09:31 +00:00
}
2015-09-07 03:31:32 +00:00
// NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
2015-07-04 01:41:36 +00:00
// to update the status of a node.
2015-07-07 16:51:42 +00:00
type NodeUpdateStatusRequest struct {
2018-05-11 21:53:41 +00:00
NodeID string
Status string
NodeEvent * NodeEvent
2015-07-04 01:41:36 +00:00
WriteRequest
}
2018-02-23 18:42:43 +00:00
// NodeUpdateDrainRequest is used for updating the drain strategy
2015-09-07 02:55:38 +00:00
type NodeUpdateDrainRequest struct {
2018-02-23 18:42:43 +00:00
NodeID string
DrainStrategy * DrainStrategy
2018-03-08 19:06:30 +00:00
2018-04-03 22:24:20 +00:00
// COMPAT Remove in version 0.10
// As part of Nomad 0.8 we have deprecated the drain boolean in favor of a
// drain strategy but we need to handle the upgrade path where the Raft log
// contains drain updates with just the drain boolean being manipulated.
Drain bool
2018-03-08 19:06:30 +00:00
// MarkEligible marks the node as eligible if removing the drain strategy.
MarkEligible bool
2018-05-10 23:30:54 +00:00
// NodeEvent is the event added to the node
NodeEvent * NodeEvent
2015-09-07 02:55:38 +00:00
WriteRequest
}
2018-03-09 22:15:21 +00:00
// BatchNodeUpdateDrainRequest is used for updating the drain strategy for a
// batch of nodes
type BatchNodeUpdateDrainRequest struct {
// Updates is a mapping of nodes to their updated drain strategy
Updates map [ string ] * DrainUpdate
2018-05-10 23:30:54 +00:00
// NodeEvents is a mapping of the node to the event to add to the node
NodeEvents map [ string ] * NodeEvent
2018-03-09 22:15:21 +00:00
WriteRequest
}
// DrainUpdate is used to update the drain of a node
type DrainUpdate struct {
// DrainStrategy is the new strategy for the node
DrainStrategy * DrainStrategy
// MarkEligible marks the node as eligible if removing the drain strategy.
MarkEligible bool
}
2018-02-27 00:34:42 +00:00
// NodeUpdateEligibilityRequest is used for updating the scheduling eligibility
type NodeUpdateEligibilityRequest struct {
NodeID string
Eligibility string
2018-05-11 21:32:34 +00:00
// NodeEvent is the event added to the node
NodeEvent * NodeEvent
2015-09-07 02:55:38 +00:00
WriteRequest
}
2017-12-13 17:36:03 +00:00
// NodeEvaluateRequest is used to re-evaluate the node
2015-08-16 01:20:35 +00:00
type NodeEvaluateRequest struct {
NodeID string
WriteRequest
}
2015-07-06 21:23:15 +00:00
// NodeSpecificRequest is used when we just need to specify a target node
type NodeSpecificRequest struct {
2016-08-16 06:11:57 +00:00
NodeID string
SecretID string
2015-08-16 01:20:35 +00:00
QueryOptions
2015-07-06 21:23:15 +00:00
}
2017-08-10 19:24:11 +00:00
// SearchResponse is used to return matches and information about whether
2017-08-04 15:08:12 +00:00
// the match list is truncated specific to each type of context.
2017-08-10 19:24:11 +00:00
type SearchResponse struct {
2017-08-10 16:40:14 +00:00
// Map of context types to ids which match a specified prefix
2017-08-11 21:21:35 +00:00
Matches map [ Context ] [ ] string
2017-08-07 14:16:24 +00:00
// Truncations indicates whether the matches for a particular context have
// been truncated
2017-08-11 21:21:35 +00:00
Truncations map [ Context ] bool
2017-08-07 14:16:24 +00:00
2017-07-28 21:48:15 +00:00
QueryMeta
}
2017-08-10 19:24:11 +00:00
// SearchRequest is used to parameterize a request, and returns a
2017-08-10 16:40:14 +00:00
// list of matches made up of jobs, allocations, evaluations, and/or nodes,
// along with whether or not the information returned is truncated.
2017-08-10 19:24:11 +00:00
type SearchRequest struct {
2017-08-10 16:40:14 +00:00
// Prefix is what ids are matched to. I.e, if the given prefix were
2017-08-04 22:18:49 +00:00
// "a", potential matches might be "abcd" or "aabb"
Prefix string
2017-08-07 14:16:24 +00:00
2017-08-10 16:40:14 +00:00
// Context is the type that can be matched against. A context can be a job,
// node, evaluation, allocation, or empty (indicated every context should be
2017-08-04 22:18:49 +00:00
// matched)
2017-08-11 21:21:35 +00:00
Context Context
2017-08-26 22:56:13 +00:00
QueryOptions
2017-07-28 21:48:15 +00:00
}
2015-07-07 16:51:42 +00:00
// JobRegisterRequest is used for Job.Register endpoint
// to register a job as being a schedulable entity.
type JobRegisterRequest struct {
Job * Job
2016-06-08 23:48:02 +00:00
// If EnforceIndex is set then the job will only be registered if the passed
// JobModifyIndex matches the current Jobs index. If the index is zero, the
// register only occurs if the job is new.
EnforceIndex bool
JobModifyIndex uint64
2017-09-19 14:47:10 +00:00
// PolicyOverride is set when the user is attempting to override any policies
PolicyOverride bool
2015-07-07 16:51:42 +00:00
WriteRequest
}
// JobDeregisterRequest is used for Job.Deregister endpoint
// to deregister a job as being a schedulable entity.
type JobDeregisterRequest struct {
2015-07-23 22:15:48 +00:00
JobID string
2017-04-15 03:54:30 +00:00
// Purge controls whether the deregister purges the job from the system or
// whether the job is just marked as stopped and will be removed by the
// garbage collector
Purge bool
2015-07-07 16:51:42 +00:00
WriteRequest
}
2018-03-14 22:32:18 +00:00
// JobBatchDeregisterRequest is used to batch deregister jobs and upsert
// evaluations.
type JobBatchDeregisterRequest struct {
// Jobs is the set of jobs to deregister
Jobs map [ NamespacedID ] * JobDeregisterOptions
// Evals is the set of evaluations to create.
Evals [ ] * Evaluation
WriteRequest
}
// JobDeregisterOptions configures how a job is deregistered.
type JobDeregisterOptions struct {
// Purge controls whether the deregister purges the job from the system or
// whether the job is just marked as stopped and will be removed by the
// garbage collector
Purge bool
}
2015-08-16 01:11:26 +00:00
// JobEvaluateRequest is used when we just need to re-evaluate a target job
type JobEvaluateRequest struct {
2018-05-08 22:26:36 +00:00
JobID string
EvalOptions EvalOptions
2015-08-16 01:11:26 +00:00
WriteRequest
}
2018-05-09 20:04:27 +00:00
// EvalOptions is used to encapsulate options when forcing a job evaluation
2018-05-08 22:26:36 +00:00
type EvalOptions struct {
ForceReschedule bool
}
2015-07-23 21:41:18 +00:00
// JobSpecificRequest is used when we just need to specify a target job
type JobSpecificRequest struct {
2016-11-24 12:20:52 +00:00
JobID string
AllAllocs bool
2015-08-15 19:59:10 +00:00
QueryOptions
2015-07-23 21:41:18 +00:00
}
2015-09-06 19:18:45 +00:00
// JobListRequest is used to parameterize a list request
type JobListRequest struct {
QueryOptions
}
2016-05-05 18:21:58 +00:00
// JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run
// evaluation of the Job.
type JobPlanRequest struct {
Job * Job
Diff bool // Toggles an annotated diff
2017-09-19 14:47:10 +00:00
// PolicyOverride is set when the user is attempting to override any policies
PolicyOverride bool
2016-05-05 18:21:58 +00:00
WriteRequest
}
2016-07-18 23:51:47 +00:00
// JobSummaryRequest is used when we just need to get a specific job summary
type JobSummaryRequest struct {
JobID string
QueryOptions
}
2017-01-20 18:33:52 +00:00
// JobDispatchRequest is used to dispatch a job based on a parameterized job
2016-11-26 02:04:55 +00:00
type JobDispatchRequest struct {
2016-12-14 20:50:08 +00:00
JobID string
Payload [ ] byte
Meta map [ string ] string
2016-11-26 02:04:55 +00:00
WriteRequest
}
2017-02-06 19:48:28 +00:00
// JobValidateRequest is used to validate a job
type JobValidateRequest struct {
Job * Job
WriteRequest
}
2017-04-18 20:09:24 +00:00
// JobRevertRequest is used to revert a job to a prior version.
type JobRevertRequest struct {
// JobID is the ID of the job being reverted
JobID string
2017-02-06 19:48:28 +00:00
2017-04-18 20:09:24 +00:00
// JobVersion the version to revert to.
JobVersion uint64
2017-03-03 23:00:39 +00:00
2017-04-18 20:09:24 +00:00
// EnforcePriorVersion if set will enforce that the job is at the given
// version before reverting.
EnforcePriorVersion * uint64
WriteRequest
2017-02-06 19:48:28 +00:00
}
2017-07-06 19:49:13 +00:00
// JobStabilityRequest is used to marked a job as stable.
type JobStabilityRequest struct {
// Job to set the stability on
JobID string
JobVersion uint64
// Set the stability
Stable bool
WriteRequest
}
// JobStabilityResponse is the response when marking a job as stable.
type JobStabilityResponse struct {
WriteMeta
}
2015-09-06 21:28:29 +00:00
// NodeListRequest is used to parameterize a list request
type NodeListRequest struct {
QueryOptions
}
2015-08-06 21:51:15 +00:00
// EvalUpdateRequest is used for upserting evaluations.
2015-07-23 22:52:38 +00:00
type EvalUpdateRequest struct {
2015-08-15 21:22:21 +00:00
Evals [ ] * Evaluation
EvalToken string
2015-07-23 22:52:38 +00:00
WriteRequest
}
// EvalDeleteRequest is used for deleting an evaluation.
type EvalDeleteRequest struct {
2015-08-15 23:07:22 +00:00
Evals [ ] string
Allocs [ ] string
2015-07-23 22:52:38 +00:00
WriteRequest
}
2015-07-23 23:00:19 +00:00
// EvalSpecificRequest is used when we just need to specify a target evaluation
type EvalSpecificRequest struct {
EvalID string
2015-08-16 01:20:35 +00:00
QueryOptions
2015-07-23 23:00:19 +00:00
}
2015-08-12 22:25:31 +00:00
// EvalAckRequest is used to Ack/Nack a specific evaluation
type EvalAckRequest struct {
EvalID string
Token string
WriteRequest
}
2015-07-24 04:58:51 +00:00
// EvalDequeueRequest is used when we want to dequeue an evaluation
type EvalDequeueRequest struct {
2016-10-26 21:52:48 +00:00
Schedulers [ ] string
Timeout time . Duration
SchedulerVersion uint16
2015-07-24 04:58:51 +00:00
WriteRequest
}
2015-09-06 23:01:16 +00:00
// EvalListRequest is used to list the evaluations
type EvalListRequest struct {
QueryOptions
}
2015-07-27 22:31:49 +00:00
// PlanRequest is used to submit an allocation plan to the leader
type PlanRequest struct {
Plan * Plan
WriteRequest
}
2017-05-05 20:52:01 +00:00
// ApplyPlanResultsRequest is used by the planner to apply a Raft transaction
// committing the result of a plan.
type ApplyPlanResultsRequest struct {
// AllocUpdateRequest holds the allocation updates to be made by the
// scheduler.
AllocUpdateRequest
2017-07-04 20:31:01 +00:00
// Deployment is the deployment created or updated as a result of a
// scheduling event.
Deployment * Deployment
2017-05-11 19:49:04 +00:00
// DeploymentUpdates is a set of status updates to apply to the given
// deployments. This allows the scheduler to cancel any unneeded deployment
// because the job is stopped or the update block is removed.
DeploymentUpdates [ ] * DeploymentStatusUpdate
2017-12-18 16:03:55 +00:00
2017-12-18 21:13:16 +00:00
// EvalID is the eval ID of the plan being applied. The modify index of the
// evaluation is updated as part of applying the plan to ensure that subsequent
// scheduling events for the same job will wait for the index that last produced
// state changes. This is necessary for blocked evaluations since they can be
// processed many times, potentially making state updates, without the state of
// the evaluation itself being updated.
2017-12-18 16:03:55 +00:00
EvalID string
2017-05-05 20:52:01 +00:00
}
2015-08-04 21:04:33 +00:00
// AllocUpdateRequest is used to submit changes to allocations, either
2018-03-11 17:37:22 +00:00
// to cause evictions or to assign new allocations. Both can be done
2015-08-04 21:04:33 +00:00
// within a single transaction
type AllocUpdateRequest struct {
// Alloc is the list of new allocations to assign
Alloc [ ] * Allocation
2016-02-21 19:31:27 +00:00
2018-01-16 14:55:35 +00:00
// Evals is the list of new evaluations to create
// Evals are valid only when used in the Raft RPC
Evals [ ] * Evaluation
2016-02-21 19:31:27 +00:00
// Job is the shared parent job of the allocations.
// It is pulled out since it is common to reduce payload size.
Job * Job
2015-08-26 01:12:51 +00:00
WriteRequest
2015-08-04 21:04:33 +00:00
}
2018-02-23 01:38:44 +00:00
// AllocUpdateDesiredTransitionRequest is used to submit changes to allocations
// desired transition state.
type AllocUpdateDesiredTransitionRequest struct {
2018-02-21 18:58:04 +00:00
// Allocs is the mapping of allocation ids to their desired state
2018-02-23 01:38:44 +00:00
// transition
Allocs map [ string ] * DesiredTransition
// Evals is the set of evaluations to create
Evals [ ] * Evaluation
2018-02-21 18:58:04 +00:00
WriteRequest
}
2015-09-06 22:34:28 +00:00
// AllocListRequest is used to request a list of allocations
type AllocListRequest struct {
QueryOptions
}
2015-09-06 22:46:45 +00:00
// AllocSpecificRequest is used to query a specific allocation
type AllocSpecificRequest struct {
AllocID string
QueryOptions
}
2016-05-27 06:48:34 +00:00
// AllocsGetRequest is used to query a set of allocations
2016-02-01 21:57:35 +00:00
type AllocsGetRequest struct {
AllocIDs [ ] string
QueryOptions
}
2018-03-11 18:42:43 +00:00
// PeriodicForceRequest is used to force a specific periodic job.
2016-01-13 18:19:53 +00:00
type PeriodicForceRequest struct {
JobID string
WriteRequest
}
2016-11-03 21:14:52 +00:00
// ServerMembersResponse has the list of servers in a cluster
type ServerMembersResponse struct {
ServerName string
ServerRegion string
ServerDC string
Members [ ] * ServerMember
}
// ServerMember holds information about a Nomad server agent in a cluster
type ServerMember struct {
Name string
Addr net . IP
Port uint16
Tags map [ string ] string
Status string
ProtocolMin uint8
ProtocolMax uint8
ProtocolCur uint8
DelegateMin uint8
DelegateMax uint8
DelegateCur uint8
}
2016-08-18 21:31:44 +00:00
// DeriveVaultTokenRequest is used to request wrapped Vault tokens for the
// following tasks in the given allocation
2016-08-18 17:50:47 +00:00
type DeriveVaultTokenRequest struct {
NodeID string
SecretID string
AllocID string
Tasks [ ] string
QueryOptions
}
2016-08-22 20:57:27 +00:00
// VaultAccessorsRequest is used to operate on a set of Vault accessors
type VaultAccessorsRequest struct {
2016-08-19 01:14:58 +00:00
Accessors [ ] * VaultAccessor
}
// VaultAccessor is a reference to a created Vault token on behalf of
// an allocation's task.
type VaultAccessor struct {
AllocID string
Task string
NodeID string
Accessor string
2016-08-19 01:57:33 +00:00
CreationTTL int
2016-08-19 01:14:58 +00:00
// Raft Indexes
CreateIndex uint64
}
2016-08-18 21:31:44 +00:00
// DeriveVaultTokenResponse returns the wrapped tokens for each requested task
2016-08-18 17:50:47 +00:00
type DeriveVaultTokenResponse struct {
2016-08-19 01:57:33 +00:00
// Tasks is a mapping between the task name and the wrapped token
2016-08-18 21:31:44 +00:00
Tasks map [ string ] string
2016-10-23 01:08:30 +00:00
2017-08-07 21:13:05 +00:00
// Error stores any error that occurred. Errors are stored here so we can
2016-10-23 01:08:30 +00:00
// communicate whether it is retriable
Error * RecoverableError
2016-08-18 17:50:47 +00:00
QueryMeta
}
2015-08-15 19:59:10 +00:00
// GenericRequest is used to request where no
// specific information is needed.
type GenericRequest struct {
QueryOptions
}
2017-06-27 18:15:07 +00:00
// DeploymentListRequest is used to list the deployments
type DeploymentListRequest struct {
QueryOptions
}
2017-06-29 19:32:37 +00:00
// DeploymentDeleteRequest is used for deleting deployments.
type DeploymentDeleteRequest struct {
Deployments [ ] string
WriteRequest
}
2017-06-26 21:23:52 +00:00
// DeploymentStatusUpdateRequest is used to update the status of a deployment as
// well as optionally creating an evaluation atomically.
type DeploymentStatusUpdateRequest struct {
// Eval, if set, is used to create an evaluation at the same time as
// updating the status of a deployment.
Eval * Evaluation
// DeploymentUpdate is a status update to apply to the given
// deployment.
DeploymentUpdate * DeploymentStatusUpdate
// Job is used to optionally upsert a job. This is used when setting the
// allocation health results in a deployment failure and the deployment
// auto-reverts to the latest stable job.
Job * Job
}
// DeploymentAllocHealthRequest is used to set the health of a set of
// allocations as part of a deployment.
type DeploymentAllocHealthRequest struct {
DeploymentID string
// Marks these allocations as healthy, allow further allocations
// to be rolled.
HealthyAllocationIDs [ ] string
// Any unhealthy allocations fail the deployment
UnhealthyAllocationIDs [ ] string
2017-06-29 05:00:18 +00:00
WriteRequest
2017-06-26 21:23:52 +00:00
}
// ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft
type ApplyDeploymentAllocHealthRequest struct {
DeploymentAllocHealthRequest
2018-04-06 20:11:58 +00:00
// Timestamp is the timestamp to use when setting the allocations health.
Timestamp time . Time
2017-06-26 21:23:52 +00:00
// An optional field to update the status of a deployment
DeploymentUpdate * DeploymentStatusUpdate
// Job is used to optionally upsert a job. This is used when setting the
// allocation health results in a deployment failure and the deployment
// auto-reverts to the latest stable job.
Job * Job
// An optional evaluation to create after promoting the canaries
Eval * Evaluation
}
// DeploymentPromoteRequest is used to promote task groups in a deployment
type DeploymentPromoteRequest struct {
DeploymentID string
// All is to promote all task groups
All bool
// Groups is used to set the promotion status per task group
2017-06-29 22:15:21 +00:00
Groups [ ] string
2017-06-29 05:00:18 +00:00
WriteRequest
2017-06-26 21:23:52 +00:00
}
// ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft
type ApplyDeploymentPromoteRequest struct {
DeploymentPromoteRequest
// An optional evaluation to create after promoting the canaries
Eval * Evaluation
}
// DeploymentPauseRequest is used to pause a deployment
type DeploymentPauseRequest struct {
DeploymentID string
// Pause sets the pause status
Pause bool
2017-06-29 05:00:18 +00:00
WriteRequest
2017-06-26 21:23:52 +00:00
}
// DeploymentSpecificRequest is used to make a request specific to a particular
// deployment
type DeploymentSpecificRequest struct {
DeploymentID string
QueryOptions
}
2017-06-29 05:00:18 +00:00
// DeploymentFailRequest is used to fail a particular deployment
type DeploymentFailRequest struct {
DeploymentID string
WriteRequest
}
// SingleDeploymentResponse is used to respond with a single deployment
type SingleDeploymentResponse struct {
Deployment * Deployment
QueryMeta
}
2015-07-04 01:41:36 +00:00
// GenericResponse is used to respond to a request where no
// specific response information is needed.
type GenericResponse struct {
2015-07-03 23:04:24 +00:00
WriteMeta
}
2018-03-11 18:48:24 +00:00
// VersionResponse is used for the Status.Version response
2015-08-15 19:59:10 +00:00
type VersionResponse struct {
2015-08-15 20:08:06 +00:00
Build string
Versions map [ string ] int
2015-08-15 19:59:10 +00:00
QueryMeta
}
2015-08-06 18:48:44 +00:00
// JobRegisterResponse is used to respond to a job registration
type JobRegisterResponse struct {
EvalID string
EvalCreateIndex uint64
JobModifyIndex uint64
2017-05-10 03:52:47 +00:00
// Warnings contains any warnings about the given job. These may include
// deprecation warnings.
Warnings string
2015-08-06 18:48:44 +00:00
QueryMeta
}
2015-08-06 21:17:18 +00:00
// JobDeregisterResponse is used to respond to a job deregistration
type JobDeregisterResponse struct {
EvalID string
EvalCreateIndex uint64
JobModifyIndex uint64
QueryMeta
}
2018-03-14 22:32:18 +00:00
// JobBatchDeregisterResponse is used to respond to a batch job deregistration
type JobBatchDeregisterResponse struct {
// JobEvals maps the job to its created evaluation
JobEvals map [ NamespacedID ] string
QueryMeta
}
2017-04-18 20:09:24 +00:00
// JobValidateResponse is the response from validate request
type JobValidateResponse struct {
// DriverConfigValidated indicates whether the agent validated the driver
// config
DriverConfigValidated bool
// ValidationErrors is a list of validation errors
ValidationErrors [ ] string
2017-08-07 21:13:05 +00:00
// Error is a string version of any error that may have occurred
2017-04-18 20:09:24 +00:00
Error string
2017-05-10 03:52:47 +00:00
// Warnings contains any warnings about the given job. These may include
// deprecation warnings.
Warnings string
2017-04-18 20:09:24 +00:00
}
2015-08-06 23:39:20 +00:00
// NodeUpdateResponse is used to respond to a node update
type NodeUpdateResponse struct {
2015-08-23 00:37:50 +00:00
HeartbeatTTL time . Duration
2015-08-06 23:39:20 +00:00
EvalIDs [ ] string
EvalCreateIndex uint64
NodeModifyIndex uint64
2016-05-23 18:09:31 +00:00
// LeaderRPCAddr is the RPC address of the current Raft Leader. If
// empty, the current Nomad Server is in the minority of a partition.
LeaderRPCAddr string
// NumNodes is the number of Nomad nodes attached to this quorum of
// Nomad Servers at the time of the response. This value can
// fluctuate based on the health of the cluster between heartbeats.
NumNodes int32
// Servers is the full list of known Nomad servers in the local
// region.
Servers [ ] * NodeServerInfo
2015-08-06 23:39:20 +00:00
QueryMeta
}
2015-09-07 03:00:12 +00:00
// NodeDrainUpdateResponse is used to respond to a node drain update
type NodeDrainUpdateResponse struct {
NodeModifyIndex uint64
2018-03-27 22:53:24 +00:00
EvalIDs [ ] string
EvalCreateIndex uint64
WriteMeta
}
2018-02-23 18:42:43 +00:00
2018-03-27 22:53:24 +00:00
// NodeEligibilityUpdateResponse is used to respond to a node eligibility update
type NodeEligibilityUpdateResponse struct {
NodeModifyIndex uint64
2018-02-23 18:42:43 +00:00
EvalIDs [ ] string
EvalCreateIndex uint64
2018-03-27 22:53:24 +00:00
WriteMeta
2015-09-07 03:00:12 +00:00
}
2015-08-23 02:17:49 +00:00
// NodeAllocsResponse is used to return allocs for a single node
type NodeAllocsResponse struct {
Allocs [ ] * Allocation
QueryMeta
}
2016-01-29 14:29:52 +00:00
// NodeClientAllocsResponse is used to return allocs meta data for a single node
type NodeClientAllocsResponse struct {
Allocs map [ string ] uint64
2017-10-02 19:18:33 +00:00
2017-10-10 00:23:26 +00:00
// MigrateTokens are used when ACLs are enabled to allow cross node,
// authenticated access to sticky volumes
2017-10-02 19:18:33 +00:00
MigrateTokens map [ string ] string
2016-01-29 14:29:52 +00:00
QueryMeta
}
2015-07-06 21:23:15 +00:00
// SingleNodeResponse is used to return a single node
type SingleNodeResponse struct {
Node * Node
QueryMeta
}
2017-04-13 22:47:59 +00:00
// NodeListResponse is used for a list request
2015-09-06 21:28:29 +00:00
type NodeListResponse struct {
Nodes [ ] * NodeListStub
QueryMeta
}
2015-07-23 21:41:18 +00:00
// SingleJobResponse is used to return a single job
type SingleJobResponse struct {
Job * Job
QueryMeta
}
2016-07-22 06:13:07 +00:00
// JobSummaryResponse is used to return a single job summary
2016-07-21 21:43:21 +00:00
type JobSummaryResponse struct {
2016-07-18 23:51:47 +00:00
JobSummary * JobSummary
QueryMeta
}
2016-11-26 02:04:55 +00:00
type JobDispatchResponse struct {
DispatchedJobID string
EvalID string
EvalCreateIndex uint64
JobCreateIndex uint64
2017-02-17 19:43:14 +00:00
WriteMeta
2016-11-26 02:04:55 +00:00
}
2015-09-06 19:18:45 +00:00
// JobListResponse is used for a list request
type JobListResponse struct {
Jobs [ ] * JobListStub
QueryMeta
}
2017-06-30 00:16:20 +00:00
// JobVersionsRequest is used to get a jobs versions
type JobVersionsRequest struct {
JobID string
Diffs bool
QueryOptions
}
2017-04-13 22:47:59 +00:00
// JobVersionsResponse is used for a job get versions request
type JobVersionsResponse struct {
Versions [ ] * Job
2017-06-30 00:16:20 +00:00
Diffs [ ] * JobDiff
2017-04-13 22:47:59 +00:00
QueryMeta
}
2016-05-05 18:21:58 +00:00
// JobPlanResponse is used to respond to a job plan request
type JobPlanResponse struct {
2016-05-12 18:29:38 +00:00
// Annotations stores annotations explaining decisions the scheduler made.
Annotations * PlanAnnotations
2016-05-05 18:21:58 +00:00
2016-05-31 21:51:23 +00:00
// FailedTGAllocs is the placement failures per task group.
FailedTGAllocs map [ string ] * AllocMetric
2016-05-16 18:48:44 +00:00
// JobModifyIndex is the modification index of the job. The value can be
// used when running `nomad run` to ensure that the Job wasn’ t modified
// since the last plan. If the job is being created, the value is zero.
JobModifyIndex uint64
2016-05-05 18:21:58 +00:00
// CreatedEvals is the set of evaluations created by the scheduler. The
// reasons for this can be rolling-updates or blocked evals.
CreatedEvals [ ] * Evaluation
// Diff contains the diff of the job and annotations on whether the change
// causes an in-place update or create/destroy
Diff * JobDiff
2016-06-15 20:34:45 +00:00
// NextPeriodicLaunch is the time duration till the job would be launched if
// submitted.
NextPeriodicLaunch time . Time
2017-05-10 03:52:47 +00:00
// Warnings contains any warnings about the given job. These may include
// deprecation warnings.
Warnings string
2016-05-12 01:51:48 +00:00
WriteMeta
2016-05-05 18:21:58 +00:00
}
2015-09-06 22:46:45 +00:00
// SingleAllocResponse is used to return a single allocation
type SingleAllocResponse struct {
Alloc * Allocation
QueryMeta
}
2016-02-01 21:57:35 +00:00
// AllocsGetResponse is used to return a set of allocations
type AllocsGetResponse struct {
Allocs [ ] * Allocation
QueryMeta
}
2015-09-06 19:18:45 +00:00
// JobAllocationsResponse is used to return the allocations for a job
type JobAllocationsResponse struct {
2015-09-06 23:14:41 +00:00
Allocations [ ] * AllocListStub
2015-09-06 19:18:45 +00:00
QueryMeta
}
// JobEvaluationsResponse is used to return the evaluations for a job
type JobEvaluationsResponse struct {
Evaluations [ ] * Evaluation
QueryMeta
}
2015-07-23 23:00:19 +00:00
// SingleEvalResponse is used to return a single evaluation
type SingleEvalResponse struct {
Eval * Evaluation
QueryMeta
}
2015-08-12 22:25:31 +00:00
// EvalDequeueResponse is used to return from a dequeue
type EvalDequeueResponse struct {
Eval * Evaluation
Token string
2017-09-13 20:47:01 +00:00
// WaitIndex is the Raft index the worker should wait until invoking the
// scheduler.
WaitIndex uint64
2015-08-12 22:25:31 +00:00
QueryMeta
}
2017-09-13 20:47:01 +00:00
// GetWaitIndex is used to retrieve the Raft index in which state should be at
// or beyond before invoking the scheduler.
func ( e * EvalDequeueResponse ) GetWaitIndex ( ) uint64 {
// Prefer the wait index sent. This will be populated on all responses from
// 0.7.0 and above
if e . WaitIndex != 0 {
return e . WaitIndex
} else if e . Eval != nil {
return e . Eval . ModifyIndex
}
// This should never happen
return 1
}
2015-07-27 22:31:49 +00:00
// PlanResponse is used to return from a PlanRequest
type PlanResponse struct {
Result * PlanResult
WriteMeta
}
2015-09-06 22:34:28 +00:00
// AllocListResponse is used for a list request
type AllocListResponse struct {
Allocations [ ] * AllocListStub
QueryMeta
}
2017-06-27 18:15:07 +00:00
// DeploymentListResponse is used for a list request
type DeploymentListResponse struct {
Deployments [ ] * Deployment
QueryMeta
}
2015-09-06 23:01:16 +00:00
// EvalListResponse is used for a list request
type EvalListResponse struct {
Evaluations [ ] * Evaluation
QueryMeta
}
2015-09-06 23:14:41 +00:00
// EvalAllocationsResponse is used to return the allocations for an evaluation
type EvalAllocationsResponse struct {
Allocations [ ] * AllocListStub
QueryMeta
}
2016-01-13 18:19:53 +00:00
// PeriodicForceResponse is used to respond to a periodic job force launch
type PeriodicForceResponse struct {
EvalID string
EvalCreateIndex uint64
WriteMeta
}
2017-06-26 21:23:52 +00:00
// DeploymentUpdateResponse is used to respond to a deployment change. The
// response will include the modify index of the deployment as well as details
// of any triggered evaluation.
type DeploymentUpdateResponse struct {
EvalID string
EvalCreateIndex uint64
DeploymentModifyIndex uint64
2017-07-06 20:55:44 +00:00
// RevertedJobVersion is the version the job was reverted to. If unset, the
// job wasn't reverted
RevertedJobVersion * uint64
2017-06-29 05:00:18 +00:00
WriteMeta
2017-06-26 21:23:52 +00:00
}
2018-01-13 00:52:24 +00:00
// NodeConnQueryResponse is used to respond to a query of whether a server has
// a connection to a specific Node
type NodeConnQueryResponse struct {
// Connected indicates whether a connection to the Client exists
Connected bool
// Established marks the time at which the connection was established
Established time . Time
QueryMeta
}
2018-03-14 00:52:12 +00:00
// EmitNodeEventsRequest is a request to update the node events source
// with a new client-side event
type EmitNodeEventsRequest struct {
// NodeEvents are a map where the key is a node id, and value is a list of
// events for that node
NodeEvents map [ string ] [ ] * NodeEvent
WriteRequest
}
// EmitNodeEventsResponse is a response to the client about the status of
// the node event source update.
type EmitNodeEventsResponse struct {
Index uint64
WriteMeta
}
const (
2018-03-14 00:59:37 +00:00
NodeEventSubsystemDrain = "Drain"
NodeEventSubsystemDriver = "Driver"
NodeEventSubsystemHeartbeat = "Heartbeat"
NodeEventSubsystemCluster = "Cluster"
2018-03-14 00:52:12 +00:00
)
// NodeEvent is a single unit representing a node’ s state change
type NodeEvent struct {
Message string
2018-03-14 00:59:37 +00:00
Subsystem string
2018-03-14 00:52:12 +00:00
Details map [ string ] string
2018-03-27 17:22:29 +00:00
Timestamp time . Time
2018-03-14 00:52:12 +00:00
CreateIndex uint64
}
func ( ne * NodeEvent ) String ( ) string {
var details [ ] string
for k , v := range ne . Details {
details = append ( details , fmt . Sprintf ( "%s: %s" , k , v ) )
}
2018-03-27 19:21:02 +00:00
return fmt . Sprintf ( "Message: %s, Subsystem: %s, Details: %s, Timestamp: %s" , ne . Message , ne . Subsystem , strings . Join ( details , "," ) , ne . Timestamp . String ( ) )
2018-03-14 00:52:12 +00:00
}
func ( ne * NodeEvent ) Copy ( ) * NodeEvent {
c := new ( NodeEvent )
* c = * ne
c . Details = helper . CopyMapStringString ( ne . Details )
return c
}
2018-05-10 23:54:43 +00:00
// NewNodeEvent generates a new node event storing the current time as the
// timestamp
func NewNodeEvent ( ) * NodeEvent {
return & NodeEvent { Timestamp : time . Now ( ) }
}
// SetMessage is used to set the message on the node event
func ( ne * NodeEvent ) SetMessage ( msg string ) * NodeEvent {
ne . Message = msg
return ne
}
// SetSubsystem is used to set the subsystem on the node event
func ( ne * NodeEvent ) SetSubsystem ( sys string ) * NodeEvent {
ne . Subsystem = sys
return ne
}
2018-05-12 00:26:25 +00:00
// SetTimestamp is used to set the timestamp on the node event
func ( ne * NodeEvent ) SetTimestamp ( ts time . Time ) * NodeEvent {
ne . Timestamp = ts
return ne
}
2018-05-10 23:54:43 +00:00
// AddDetail is used to add a detail to the node event
func ( ne * NodeEvent ) AddDetail ( k , v string ) * NodeEvent {
if ne . Details == nil {
ne . Details = make ( map [ string ] string , 1 )
}
ne . Details [ k ] = v
return ne
}
2015-07-03 23:57:48 +00:00
const (
NodeStatusInit = "initializing"
NodeStatusReady = "ready"
NodeStatusDown = "down"
)
2015-08-13 23:40:51 +00:00
// ShouldDrainNode checks if a given node status should trigger an
2015-08-06 23:39:20 +00:00
// evaluation. Some states don't require any further action.
2015-08-13 23:40:51 +00:00
func ShouldDrainNode ( status string ) bool {
2015-08-06 23:39:20 +00:00
switch status {
2015-09-07 02:47:02 +00:00
case NodeStatusInit , NodeStatusReady :
2015-08-06 23:39:20 +00:00
return false
2015-09-07 02:47:02 +00:00
case NodeStatusDown :
2015-08-06 23:39:20 +00:00
return true
default :
panic ( fmt . Sprintf ( "unhandled node status %s" , status ) )
}
}
// ValidNodeStatus is used to check if a node status is valid
func ValidNodeStatus ( status string ) bool {
switch status {
2015-09-07 02:47:02 +00:00
case NodeStatusInit , NodeStatusReady , NodeStatusDown :
2015-08-06 23:39:20 +00:00
return true
default :
return false
}
}
2018-01-24 00:47:00 +00:00
const (
2018-02-23 18:42:43 +00:00
// NodeSchedulingEligible and Ineligible marks the node as eligible or not,
// respectively, for receiving allocations. This is orthoginal to the node
// status being ready.
2018-03-21 17:44:17 +00:00
NodeSchedulingEligible = "eligible"
2018-01-24 00:47:00 +00:00
NodeSchedulingIneligible = "ineligible"
)
2018-02-23 23:56:36 +00:00
// DrainSpec describes a Node's desired drain behavior.
type DrainSpec struct {
2018-01-24 00:47:00 +00:00
// Deadline is the duration after StartTime when the remaining
// allocations on a draining Node should be told to stop.
Deadline time . Duration
2018-02-23 18:42:43 +00:00
// IgnoreSystemJobs allows systems jobs to remain on the node even though it
// has been marked for draining.
IgnoreSystemJobs bool
2018-01-24 00:47:00 +00:00
}
2018-02-23 23:56:36 +00:00
// DrainStrategy describes a Node's drain behavior.
type DrainStrategy struct {
// DrainSpec is the user declared drain specification
DrainSpec
2018-03-01 00:25:56 +00:00
// ForceDeadline is the deadline time for the drain after which drains will
// be forced
ForceDeadline time . Time
2018-02-23 23:56:36 +00:00
}
2018-01-24 00:47:00 +00:00
func ( d * DrainStrategy ) Copy ( ) * DrainStrategy {
if d == nil {
return nil
}
nd := new ( DrainStrategy )
* nd = * d
return nd
}
2018-03-01 00:25:56 +00:00
// DeadlineTime returns a boolean whether the drain strategy allows an infinite
// duration or otherwise the deadline time. The force drain is captured by the
// deadline time being in the past.
func ( d * DrainStrategy ) DeadlineTime ( ) ( infinite bool , deadline time . Time ) {
// Treat the nil case as a force drain so during an upgrade where a node may
// not have a drain strategy but has Drain set to true, it is treated as a
// force to mimick old behavior.
2018-01-24 00:47:00 +00:00
if d == nil {
2018-03-01 00:25:56 +00:00
return false , time . Time { }
2018-01-24 00:47:00 +00:00
}
2018-03-01 00:25:56 +00:00
ns := d . Deadline . Nanoseconds ( )
switch {
case ns < 0 : // Force
return false , time . Time { }
case ns == 0 : // Infinite
return true , time . Time { }
default :
return false , d . ForceDeadline
}
}
func ( d * DrainStrategy ) Equal ( o * DrainStrategy ) bool {
if d == nil && o == nil {
return true
} else if o != nil && d == nil {
return false
} else if d != nil && o == nil {
return false
2018-01-24 00:47:00 +00:00
}
2018-03-01 00:25:56 +00:00
// Compare values
if d . ForceDeadline != o . ForceDeadline {
return false
} else if d . Deadline != o . Deadline {
return false
} else if d . IgnoreSystemJobs != o . IgnoreSystemJobs {
return false
}
return true
2018-01-24 00:47:00 +00:00
}
2015-07-03 23:04:24 +00:00
// Node is a representation of a schedulable client node
type Node struct {
// ID is a unique identifier for the node. It can be constructed
// by doing a concatenation of the Name and Datacenter as a simple
// approach. Alternatively a UUID may be used.
ID string
2016-08-16 06:11:57 +00:00
// SecretID is an ID that is only known by the Node and the set of Servers.
// It is not accessible via the API and is used to authenticate nodes
2018-01-08 20:56:07 +00:00
// conducting privileged activities.
2016-08-16 06:11:57 +00:00
SecretID string
2015-06-07 18:18:59 +00:00
// Datacenter for this node
Datacenter string
2015-06-07 19:14:41 +00:00
// Node name
2015-07-03 23:04:24 +00:00
Name string
2015-06-07 18:18:59 +00:00
2016-01-27 19:06:48 +00:00
// HTTPAddr is the address on which the Nomad client is listening for http
// requests
2016-01-26 22:31:52 +00:00
HTTPAddr string
2016-10-26 18:13:53 +00:00
// TLSEnabled indicates if the Agent has TLS enabled for the HTTP API
TLSEnabled bool
2015-06-07 18:18:59 +00:00
// Attributes is an arbitrary set of key/value
// data that can be used for constraints. Examples
2015-08-28 08:30:47 +00:00
// include "kernel.name=linux", "arch=386", "driver.docker=1",
2015-07-03 23:04:24 +00:00
// "docker.runtime=1.8.3"
2015-07-06 20:01:10 +00:00
Attributes map [ string ] string
2015-06-07 18:18:59 +00:00
// Resources is the available resources on the client.
// For example 'cpu=2' 'memory=2048'
2015-07-06 20:01:10 +00:00
Resources * Resources
2015-06-07 18:18:59 +00:00
2015-07-03 23:57:48 +00:00
// Reserved is the set of resources that are reserved,
// and should be subtracted from the total resources for
// the purposes of scheduling. This may be provide certain
// high-watermark tolerances or because of external schedulers
// consuming resources.
Reserved * Resources
2015-06-07 18:18:59 +00:00
// Links are used to 'link' this client to external
// systems. For example 'consul=foo.dc1' 'aws=i-83212'
// 'ami=ami-123'
2015-07-06 20:01:10 +00:00
Links map [ string ] string
2015-06-07 18:18:59 +00:00
// Meta is used to associate arbitrary metadata with this
// client. This is opaque to Nomad.
Meta map [ string ] string
2015-07-04 00:37:01 +00:00
// NodeClass is an opaque identifier used to group nodes
// together for the purpose of determining scheduling pressure.
NodeClass string
2016-01-21 01:30:02 +00:00
// ComputedClass is a unique id that identifies nodes with a common set of
// attributes and capabilities.
2016-01-30 01:46:44 +00:00
ComputedClass string
2016-01-21 01:30:02 +00:00
2018-02-23 18:42:43 +00:00
// COMPAT: Remove in Nomad 0.9
2015-09-07 02:47:02 +00:00
// Drain is controlled by the servers, and not the client.
// If true, no jobs will be scheduled to this node, and existing
2018-01-24 00:47:00 +00:00
// allocations will be drained. Superceded by DrainStrategy in Nomad
// 0.8 but kept for backward compat.
2015-09-07 02:47:02 +00:00
Drain bool
2018-01-24 00:47:00 +00:00
// DrainStrategy determines the node's draining behavior. Will be nil
// when Drain=false.
DrainStrategy * DrainStrategy
// SchedulingEligibility determines whether this node will receive new
// placements.
SchedulingEligibility string
2015-07-03 23:04:24 +00:00
// Status of this node
Status string
2015-07-04 00:50:54 +00:00
2015-08-15 20:08:06 +00:00
// StatusDescription is meant to provide more human useful information
StatusDescription string
2016-07-12 17:29:23 +00:00
// StatusUpdatedAt is the time stamp at which the state of the node was
// updated
StatusUpdatedAt int64
2018-03-14 00:59:37 +00:00
// Events is the most recent set of events generated for the node,
2018-03-12 01:00:13 +00:00
// retaining only MaxRetainedNodeEvents number at a time
2018-03-14 00:59:37 +00:00
Events [ ] * NodeEvent
2018-03-01 22:17:33 +00:00
2018-01-25 16:30:15 +00:00
// Drivers is a map of driver names to current driver information
Drivers map [ string ] * DriverInfo
2015-07-04 00:50:54 +00:00
// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
2015-06-07 18:18:59 +00:00
}
2018-01-24 00:47:00 +00:00
// Ready returns true if the node is ready for running allocations
2016-09-24 04:15:50 +00:00
func ( n * Node ) Ready ( ) bool {
2018-01-24 00:47:00 +00:00
// Drain is checked directly to support pre-0.8 Node data
return n . Status == NodeStatusReady && ! n . Drain && n . SchedulingEligibility == NodeSchedulingEligible
2016-09-24 04:15:50 +00:00
}
2018-03-29 23:33:11 +00:00
func ( n * Node ) Canonicalize ( ) {
if n == nil {
return
}
// COMPAT Remove in 0.10
// In v0.8.0 we introduced scheduling eligibility, so we need to set it for
// upgrading nodes
if n . SchedulingEligibility == "" {
if n . Drain {
n . SchedulingEligibility = NodeSchedulingIneligible
} else {
n . SchedulingEligibility = NodeSchedulingEligible
}
}
}
2016-02-10 21:44:53 +00:00
func ( n * Node ) Copy ( ) * Node {
2016-02-11 01:54:43 +00:00
if n == nil {
2016-02-10 21:44:53 +00:00
return nil
}
2016-02-11 01:54:43 +00:00
nn := new ( Node )
* nn = * n
2017-01-18 23:55:14 +00:00
nn . Attributes = helper . CopyMapStringString ( nn . Attributes )
2016-02-11 01:54:43 +00:00
nn . Resources = nn . Resources . Copy ( )
nn . Reserved = nn . Reserved . Copy ( )
2017-01-18 23:55:14 +00:00
nn . Links = helper . CopyMapStringString ( nn . Links )
nn . Meta = helper . CopyMapStringString ( nn . Meta )
2018-03-14 00:59:37 +00:00
nn . Events = copyNodeEvents ( n . Events )
2018-02-23 18:42:43 +00:00
nn . DrainStrategy = nn . DrainStrategy . Copy ( )
2018-04-16 22:02:00 +00:00
nn . Drivers = copyNodeDrivers ( n . Drivers )
2016-02-11 01:54:43 +00:00
return nn
2016-02-10 21:44:53 +00:00
}
2018-03-14 00:52:12 +00:00
// copyNodeEvents is a helper to copy a list of NodeEvent's
func copyNodeEvents ( events [ ] * NodeEvent ) [ ] * NodeEvent {
l := len ( events )
if l == 0 {
return nil
}
c := make ( [ ] * NodeEvent , l )
for i , event := range events {
c [ i ] = event . Copy ( )
}
return c
2018-03-08 14:34:08 +00:00
}
2018-04-16 22:02:00 +00:00
// copyNodeDrivers is a helper to copy a map of DriverInfo
func copyNodeDrivers ( drivers map [ string ] * DriverInfo ) map [ string ] * DriverInfo {
l := len ( drivers )
if l == 0 {
return nil
}
c := make ( map [ string ] * DriverInfo , l )
for driver , info := range drivers {
c [ driver ] = info . Copy ( )
}
return c
}
2015-08-23 00:17:13 +00:00
// TerminalStatus returns if the current status is terminal and
// will no longer transition.
func ( n * Node ) TerminalStatus ( ) bool {
switch n . Status {
case NodeStatusDown :
return true
default :
return false
}
}
2015-09-06 22:34:28 +00:00
// Stub returns a summarized version of the node
func ( n * Node ) Stub ( ) * NodeListStub {
2018-01-11 07:17:58 +00:00
addr , _ , _ := net . SplitHostPort ( n . HTTPAddr )
2015-09-06 22:34:28 +00:00
return & NodeListStub {
2018-01-24 00:47:00 +00:00
Address : addr ,
ID : n . ID ,
Datacenter : n . Datacenter ,
Name : n . Name ,
NodeClass : n . NodeClass ,
Version : n . Attributes [ "nomad.version" ] ,
Drain : n . Drain ,
SchedulingEligibility : n . SchedulingEligibility ,
Status : n . Status ,
StatusDescription : n . StatusDescription ,
2018-05-09 15:21:54 +00:00
Drivers : n . Drivers ,
2018-01-24 00:47:00 +00:00
CreateIndex : n . CreateIndex ,
ModifyIndex : n . ModifyIndex ,
2015-09-06 22:34:28 +00:00
}
}
// NodeListStub is used to return a subset of job information
// for the job list
type NodeListStub struct {
2018-01-24 00:47:00 +00:00
Address string
ID string
Datacenter string
Name string
NodeClass string
Version string
Drain bool
SchedulingEligibility string
Status string
StatusDescription string
2018-05-09 15:21:54 +00:00
Drivers map [ string ] * DriverInfo
2018-01-24 00:47:00 +00:00
CreateIndex uint64
ModifyIndex uint64
2015-09-06 22:34:28 +00:00
}
2017-06-09 17:29:41 +00:00
// Networks defined for a task on the Resources struct.
type Networks [ ] * NetworkResource
// Port assignment and IP for the given label or empty values.
func ( ns Networks ) Port ( label string ) ( string , int ) {
for _ , n := range ns {
for _ , p := range n . ReservedPorts {
if p . Label == label {
return n . IP , p . Value
}
}
for _ , p := range n . DynamicPorts {
if p . Label == label {
return n . IP , p . Value
}
}
}
return "" , 0
}
2015-06-07 18:18:59 +00:00
// Resources is used to define the resources available
// on a client
type Resources struct {
2015-09-23 18:14:32 +00:00
CPU int
2017-02-22 20:30:05 +00:00
MemoryMB int
DiskMB int
2015-07-03 23:57:48 +00:00
IOPS int
2017-06-09 17:29:41 +00:00
Networks Networks
2015-06-07 18:18:59 +00:00
}
2016-08-11 07:20:53 +00:00
const (
BytesInMegabyte = 1024 * 1024
)
2017-11-13 17:05:30 +00:00
// DefaultResources is a small resources object that contains the
// default resources requests that we will provide to an object.
2017-11-13 17:32:52 +00:00
// --- THIS FUNCTION IS REPLICATED IN api/resources.go and should
2017-11-13 17:05:30 +00:00
// be kept in sync.
2016-02-02 20:00:26 +00:00
func DefaultResources ( ) * Resources {
2017-11-13 17:05:30 +00:00
return & Resources {
CPU : 100 ,
MemoryMB : 300 ,
IOPS : 0 ,
}
}
// MinResources is a small resources object that contains the
// absolute minimum resources that we will provide to an object.
2017-11-13 17:32:52 +00:00
// This should not be confused with the defaults which are
// provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN
2017-11-13 17:05:30 +00:00
// api/resources.go and should be kept in sync.
func MinResources ( ) * Resources {
2016-02-02 20:00:26 +00:00
return & Resources {
2018-01-02 14:56:02 +00:00
CPU : 20 ,
2016-02-02 20:00:26 +00:00
MemoryMB : 10 ,
2016-02-03 00:13:25 +00:00
IOPS : 0 ,
2016-02-02 20:00:26 +00:00
}
}
2016-08-11 07:20:53 +00:00
// DiskInBytes returns the amount of disk resources in bytes.
func ( r * Resources ) DiskInBytes ( ) int64 {
return int64 ( r . DiskMB * BytesInMegabyte )
}
2016-02-02 20:07:16 +00:00
// Merge merges this resource with another resource.
2016-02-02 20:00:26 +00:00
func ( r * Resources ) Merge ( other * Resources ) {
if other . CPU != 0 {
r . CPU = other . CPU
}
if other . MemoryMB != 0 {
r . MemoryMB = other . MemoryMB
}
if other . DiskMB != 0 {
r . DiskMB = other . DiskMB
}
if other . IOPS != 0 {
r . IOPS = other . IOPS
}
if len ( other . Networks ) != 0 {
r . Networks = other . Networks
}
}
2016-07-20 23:07:15 +00:00
func ( r * Resources ) Canonicalize ( ) {
2016-07-18 23:17:38 +00:00
// Ensure that an empty and nil slices are treated the same to avoid scheduling
// problems since we use reflect DeepEquals.
if len ( r . Networks ) == 0 {
r . Networks = nil
}
for _ , n := range r . Networks {
2016-07-20 23:07:15 +00:00
n . Canonicalize ( )
2016-07-18 23:17:38 +00:00
}
}
2016-02-02 20:07:16 +00:00
// MeetsMinResources returns an error if the resources specified are less than
// the minimum allowed.
2017-11-13 17:05:30 +00:00
// This is based on the minimums defined in the Resources type
2016-02-02 20:00:26 +00:00
func ( r * Resources ) MeetsMinResources ( ) error {
var mErr multierror . Error
2017-11-13 17:05:30 +00:00
minResources := MinResources ( )
if r . CPU < minResources . CPU {
2017-11-13 17:51:19 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "minimum CPU value is %d; got %d" , minResources . CPU , r . CPU ) )
2016-02-02 20:00:26 +00:00
}
2017-11-13 17:05:30 +00:00
if r . MemoryMB < minResources . MemoryMB {
2017-11-29 00:44:33 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "minimum MemoryMB value is %d; got %d" , minResources . MemoryMB , r . MemoryMB ) )
2016-02-02 20:00:26 +00:00
}
2017-11-13 17:05:30 +00:00
if r . IOPS < minResources . IOPS {
2017-11-29 00:44:33 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "minimum IOPS value is %d; got %d" , minResources . IOPS , r . IOPS ) )
2016-02-02 20:00:26 +00:00
}
2016-02-02 20:07:16 +00:00
for i , n := range r . Networks {
if err := n . MeetsMinResources ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "network resource at index %d failed: %v" , i , err ) )
}
}
2016-02-02 20:00:26 +00:00
return mErr . ErrorOrNil ( )
}
2015-09-13 21:30:45 +00:00
// Copy returns a deep copy of the resources
func ( r * Resources ) Copy ( ) * Resources {
2016-02-11 01:54:43 +00:00
if r == nil {
return nil
}
2015-09-13 21:30:45 +00:00
newR := new ( Resources )
* newR = * r
2016-03-21 23:29:21 +00:00
if r . Networks != nil {
n := len ( r . Networks )
newR . Networks = make ( [ ] * NetworkResource , n )
for i := 0 ; i < n ; i ++ {
newR . Networks [ i ] = r . Networks [ i ] . Copy ( )
}
2015-09-13 21:30:45 +00:00
}
return newR
}
2015-09-13 22:04:36 +00:00
// NetIndex finds the matching net index using device name
2015-09-13 00:04:09 +00:00
func ( r * Resources ) NetIndex ( n * NetworkResource ) int {
2015-08-05 00:23:42 +00:00
for idx , net := range r . Networks {
2015-09-13 22:04:36 +00:00
if net . Device == n . Device {
2015-09-12 23:21:57 +00:00
return idx
}
}
return - 1
}
2015-08-05 00:32:57 +00:00
// Superset checks if one set of resources is a superset
2015-09-13 21:59:34 +00:00
// of another. This ignores network resources, and the NetworkIndex
// should be used for that.
2015-09-14 01:38:11 +00:00
func ( r * Resources ) Superset ( other * Resources ) ( bool , string ) {
2015-08-05 00:32:57 +00:00
if r . CPU < other . CPU {
2017-11-29 04:15:32 +00:00
return false , "cpu"
2015-08-05 00:32:57 +00:00
}
if r . MemoryMB < other . MemoryMB {
2017-11-29 04:15:32 +00:00
return false , "memory"
2015-08-05 00:32:57 +00:00
}
if r . DiskMB < other . DiskMB {
2017-11-29 04:15:32 +00:00
return false , "disk"
2015-08-05 00:32:57 +00:00
}
if r . IOPS < other . IOPS {
2017-11-29 04:15:32 +00:00
return false , "iops"
2015-08-05 00:32:57 +00:00
}
2015-09-14 01:38:11 +00:00
return true , ""
2015-08-05 00:32:57 +00:00
}
2015-08-05 00:41:02 +00:00
// Add adds the resources of the delta to this, potentially
// returning an error if not possible.
func ( r * Resources ) Add ( delta * Resources ) error {
if delta == nil {
return nil
}
r . CPU += delta . CPU
r . MemoryMB += delta . MemoryMB
r . DiskMB += delta . DiskMB
r . IOPS += delta . IOPS
2015-09-12 23:33:41 +00:00
for _ , n := range delta . Networks {
// Find the matching interface by IP or CIDR
2015-09-13 00:04:09 +00:00
idx := r . NetIndex ( n )
2015-08-05 00:41:02 +00:00
if idx == - 1 {
2015-09-13 23:40:53 +00:00
r . Networks = append ( r . Networks , n . Copy ( ) )
2015-09-12 23:33:41 +00:00
} else {
r . Networks [ idx ] . Add ( n )
2015-08-05 00:41:02 +00:00
}
}
return nil
}
2015-09-15 00:43:42 +00:00
func ( r * Resources ) GoString ( ) string {
return fmt . Sprintf ( "*%#v" , * r )
}
2015-11-14 02:09:42 +00:00
type Port struct {
Label string
2017-02-22 20:30:05 +00:00
Value int
2015-11-14 02:09:42 +00:00
}
2015-09-24 06:56:25 +00:00
// NetworkResource is used to represent available network
2015-07-03 23:57:48 +00:00
// resources
2015-06-07 18:18:59 +00:00
type NetworkResource struct {
2015-11-14 02:09:42 +00:00
Device string // Name of the device
CIDR string // CIDR block of addresses
2017-06-09 17:29:41 +00:00
IP string // Host IP address
2015-11-14 02:09:42 +00:00
MBits int // Throughput
2017-06-09 17:29:41 +00:00
ReservedPorts [ ] Port // Host Reserved ports
DynamicPorts [ ] Port // Host Dynamically assigned ports
2015-07-03 23:57:48 +00:00
}
2018-02-27 17:21:06 +00:00
func ( nr * NetworkResource ) Equals ( other * NetworkResource ) bool {
if nr . Device != other . Device {
return false
}
if nr . CIDR != other . CIDR {
return false
}
if nr . IP != other . IP {
return false
}
if nr . MBits != other . MBits {
return false
}
if len ( nr . ReservedPorts ) != len ( other . ReservedPorts ) {
return false
}
for i , port := range nr . ReservedPorts {
if len ( other . ReservedPorts ) <= i {
return false
}
if port != other . ReservedPorts [ i ] {
return false
}
}
if len ( nr . DynamicPorts ) != len ( other . DynamicPorts ) {
return false
}
for i , port := range nr . DynamicPorts {
if len ( other . DynamicPorts ) <= i {
return false
}
if port != other . DynamicPorts [ i ] {
return false
}
}
return true
}
2016-07-20 23:07:15 +00:00
func ( n * NetworkResource ) Canonicalize ( ) {
2016-07-18 23:17:38 +00:00
// Ensure that an empty and nil slices are treated the same to avoid scheduling
// problems since we use reflect DeepEquals.
if len ( n . ReservedPorts ) == 0 {
n . ReservedPorts = nil
}
if len ( n . DynamicPorts ) == 0 {
n . DynamicPorts = nil
}
}
2016-02-02 20:07:16 +00:00
// MeetsMinResources returns an error if the resources specified are less than
// the minimum allowed.
func ( n * NetworkResource ) MeetsMinResources ( ) error {
var mErr multierror . Error
if n . MBits < 1 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "minimum MBits value is 1; got %d" , n . MBits ) )
}
return mErr . ErrorOrNil ( )
}
2015-09-13 21:30:45 +00:00
// Copy returns a deep copy of the network resource
func ( n * NetworkResource ) Copy ( ) * NetworkResource {
2016-02-11 01:54:43 +00:00
if n == nil {
return nil
}
2015-09-13 21:30:45 +00:00
newR := new ( NetworkResource )
* newR = * n
2015-09-14 01:38:11 +00:00
if n . ReservedPorts != nil {
2015-11-14 02:09:42 +00:00
newR . ReservedPorts = make ( [ ] Port , len ( n . ReservedPorts ) )
2015-09-14 01:38:11 +00:00
copy ( newR . ReservedPorts , n . ReservedPorts )
}
2015-11-24 00:32:30 +00:00
if n . DynamicPorts != nil {
newR . DynamicPorts = make ( [ ] Port , len ( n . DynamicPorts ) )
copy ( newR . DynamicPorts , n . DynamicPorts )
}
2015-09-13 21:30:45 +00:00
return newR
}
2015-08-05 00:41:02 +00:00
// Add adds the resources of the delta to this, potentially
// returning an error if not possible.
func ( n * NetworkResource ) Add ( delta * NetworkResource ) {
if len ( delta . ReservedPorts ) > 0 {
n . ReservedPorts = append ( n . ReservedPorts , delta . ReservedPorts ... )
}
n . MBits += delta . MBits
2015-09-22 20:33:16 +00:00
n . DynamicPorts = append ( n . DynamicPorts , delta . DynamicPorts ... )
2015-08-05 00:41:02 +00:00
}
2015-09-15 01:27:37 +00:00
func ( n * NetworkResource ) GoString ( ) string {
return fmt . Sprintf ( "*%#v" , * n )
}
2017-06-09 17:29:41 +00:00
// PortLabels returns a map of port labels to their assigned host ports.
func ( n * NetworkResource ) PortLabels ( ) map [ string ] int {
num := len ( n . ReservedPorts ) + len ( n . DynamicPorts )
labelValues := make ( map [ string ] int , num )
for _ , port := range n . ReservedPorts {
labelValues [ port . Label ] = port . Value
}
for _ , port := range n . DynamicPorts {
labelValues [ port . Label ] = port . Value
2015-11-15 09:13:42 +00:00
}
return labelValues
}
2015-07-03 23:57:48 +00:00
const (
2015-08-15 19:38:58 +00:00
// JobTypeNomad is reserved for internal system tasks and is
// always handled by the CoreScheduler.
JobTypeCore = "_core"
2015-07-03 23:57:48 +00:00
JobTypeService = "service"
JobTypeBatch = "batch"
2015-10-14 23:43:06 +00:00
JobTypeSystem = "system"
2015-07-03 23:57:48 +00:00
)
const (
2016-01-08 04:08:14 +00:00
JobStatusPending = "pending" // Pending means the job is waiting on scheduling
JobStatusRunning = "running" // Running means the job has non-terminal allocations
JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal
2015-07-03 23:57:48 +00:00
)
2015-08-06 18:48:44 +00:00
const (
// JobMinPriority is the minimum allowed priority
JobMinPriority = 1
// JobDefaultPriority is the default priority if not
// not specified.
JobDefaultPriority = 50
// JobMaxPriority is the maximum allowed priority
JobMaxPriority = 100
2015-08-15 22:15:00 +00:00
// Ensure CoreJobPriority is higher than any user
// specified job so that it gets priority. This is important
// for the system to remain healthy.
CoreJobPriority = JobMaxPriority * 2
2017-04-13 20:54:57 +00:00
2017-04-13 21:54:22 +00:00
// JobTrackedVersions is the number of historic job versions that are
2017-04-13 20:54:57 +00:00
// kept.
2017-04-13 21:54:22 +00:00
JobTrackedVersions = 6
2015-08-06 18:48:44 +00:00
)
2015-07-03 23:57:48 +00:00
// Job is the scope of a scheduling request to Nomad. It is the largest
// scoped object, and is a named collection of task groups. Each task group
// is further composed of tasks. A task group (TG) is the unit of scheduling
// however.
type Job struct {
2017-04-15 03:54:30 +00:00
// Stop marks whether the user has stopped the job. A stopped job will
// have all created allocations stopped and acts as a way to stop a job
// without purging it from the system. This allows existing allocs to be
// queried and the job to be inspected as it is being killed.
Stop bool
2015-09-15 17:46:10 +00:00
// Region is the Nomad region that handles scheduling this job
Region string
2017-09-07 23:56:15 +00:00
// Namespace is the namespace the job is submitted into.
Namespace string
2015-09-15 17:46:10 +00:00
// ID is a unique identifier for the job per region. It can be
// specified hierarchically like LineOfBiz/OrgName/Team/Project
2015-07-23 22:15:48 +00:00
ID string
2015-12-18 20:26:28 +00:00
// ParentID is the unique identifier of the job that spawned this job.
ParentID string
2015-07-03 23:57:48 +00:00
// Name is the logical name of the job used to refer to it. This is unique
// per region, but not unique globally.
Name string
// Type is used to control various behaviors about the job. Most jobs
// are service jobs, meaning they are expected to be long lived.
// Some jobs are batch oriented meaning they run and then terminate.
// This can be extended in the future to support custom schedulers.
Type string
// Priority is used to control scheduling importance and if this job
// can preempt other jobs.
Priority int
// AllAtOnce is used to control if incremental scheduling of task groups
// is allowed or if we must do a gang scheduling of the entire job. This
// can slow down larger jobs if resources are not available.
2017-02-22 20:30:05 +00:00
AllAtOnce bool
2015-07-03 23:57:48 +00:00
2015-08-13 21:02:39 +00:00
// Datacenters contains all the datacenters this job is allowed to span
Datacenters [ ] string
2015-07-03 23:57:48 +00:00
// Constraints can be specified at a job level and apply to
// all the task groups and tasks.
Constraints [ ] * Constraint
// TaskGroups are the collections of task groups that this job needs
// to run. Each task group is an atomic unit of scheduling and placement.
TaskGroups [ ] * TaskGroup
2017-05-09 00:44:26 +00:00
// COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0.
2015-09-07 22:08:50 +00:00
Update UpdateStrategy
2015-12-01 00:51:56 +00:00
// Periodic is used to define the interval the job is run at.
2015-12-01 16:40:32 +00:00
Periodic * PeriodicConfig
2015-12-01 00:51:56 +00:00
2017-01-20 18:33:52 +00:00
// ParameterizedJob is used to specify the job as a parameterized job
// for dispatching.
ParameterizedJob * ParameterizedJobConfig
2016-11-23 22:56:50 +00:00
2016-12-14 20:50:08 +00:00
// Payload is the payload supplied when the job was dispatched.
Payload [ ] byte
2016-11-26 02:04:55 +00:00
2015-07-03 23:57:48 +00:00
// Meta is used to associate arbitrary metadata with this
// job. This is opaque to Nomad.
Meta map [ string ] string
2016-08-10 18:51:19 +00:00
// VaultToken is the Vault token that proves the submitter of the job has
// access to the specified Vault policies. This field is only used to
// transfer the token and is not stored after Job submission.
2017-02-22 20:30:05 +00:00
VaultToken string
2016-08-10 18:51:19 +00:00
2015-07-03 23:57:48 +00:00
// Job status
Status string
2015-07-04 00:50:54 +00:00
2015-08-15 20:08:06 +00:00
// StatusDescription is meant to provide more human useful information
StatusDescription string
2017-04-13 20:54:57 +00:00
// Stable marks a job as stable. Stability is only defined on "service" and
// "system" jobs. The stability of a job will be set automatically as part
// of a deployment and can be manually set via APIs.
Stable bool
2018-03-11 18:28:31 +00:00
// Version is a monotonically increasing version number that is incremented
2017-04-13 20:54:57 +00:00
// on each job register.
Version uint64
2017-06-30 02:08:25 +00:00
// SubmitTime is the time at which the job was submitted as a UnixNano in
// UTC
SubmitTime int64
2015-07-04 00:50:54 +00:00
// Raft Indexes
2016-01-12 17:50:33 +00:00
CreateIndex uint64
ModifyIndex uint64
JobModifyIndex uint64
2015-07-03 23:57:48 +00:00
}
2018-04-26 22:15:43 +00:00
// NamespacedID returns the namespaced id useful for logging
func ( j * Job ) NamespacedID ( ) * NamespacedID {
return & NamespacedID {
ID : j . ID ,
Namespace : j . Namespace ,
}
}
2016-07-20 23:07:15 +00:00
// Canonicalize is used to canonicalize fields in the Job. This should be called
2017-07-07 02:08:51 +00:00
// when registering a Job. A set of warnings are returned if the job was changed
// in anyway that the user should be made aware of.
func ( j * Job ) Canonicalize ( ) ( warnings error ) {
2017-09-07 23:56:15 +00:00
if j == nil {
return nil
}
2017-07-07 02:08:51 +00:00
var mErr multierror . Error
2016-07-18 23:17:38 +00:00
// Ensure that an empty and nil map are treated the same to avoid scheduling
// problems since we use reflect DeepEquals.
if len ( j . Meta ) == 0 {
j . Meta = nil
}
2017-09-07 23:56:15 +00:00
// Ensure the job is in a namespace.
if j . Namespace == "" {
j . Namespace = DefaultNamespace
}
2015-12-18 20:17:13 +00:00
for _ , tg := range j . TaskGroups {
2016-07-20 23:07:15 +00:00
tg . Canonicalize ( j )
2015-12-18 20:17:13 +00:00
}
2016-11-23 22:56:50 +00:00
2017-01-20 18:33:52 +00:00
if j . ParameterizedJob != nil {
j . ParameterizedJob . Canonicalize ( )
2016-11-23 22:56:50 +00:00
}
2017-02-15 22:37:06 +00:00
if j . Periodic != nil {
j . Periodic . Canonicalize ( )
}
2017-05-10 20:48:53 +00:00
2017-07-07 02:08:51 +00:00
return mErr . ErrorOrNil ( )
2015-12-15 03:20:57 +00:00
}
2015-12-18 20:26:28 +00:00
// Copy returns a deep copy of the Job. It is expected that callers use recover.
// This job can panic if the deep copy failed as it uses reflection.
func ( j * Job ) Copy ( ) * Job {
2016-02-11 01:54:43 +00:00
if j == nil {
return nil
}
nj := new ( Job )
* nj = * j
2017-01-18 23:55:14 +00:00
nj . Datacenters = helper . CopySliceString ( nj . Datacenters )
2016-02-11 17:08:20 +00:00
nj . Constraints = CopySliceConstraints ( nj . Constraints )
2015-12-18 20:26:28 +00:00
2016-03-21 23:29:21 +00:00
if j . TaskGroups != nil {
tgs := make ( [ ] * TaskGroup , len ( nj . TaskGroups ) )
for i , tg := range nj . TaskGroups {
tgs [ i ] = tg . Copy ( )
}
nj . TaskGroups = tgs
2016-02-11 01:54:43 +00:00
}
nj . Periodic = nj . Periodic . Copy ( )
2017-01-18 23:55:14 +00:00
nj . Meta = helper . CopyMapStringString ( nj . Meta )
2017-01-20 18:33:52 +00:00
nj . ParameterizedJob = nj . ParameterizedJob . Copy ( )
2016-02-11 01:54:43 +00:00
return nj
2015-12-18 20:26:28 +00:00
}
2015-09-15 17:46:10 +00:00
// Validate is used to sanity check a job input
func ( j * Job ) Validate ( ) error {
var mErr multierror . Error
2017-02-06 19:48:28 +00:00
2015-09-15 17:46:10 +00:00
if j . Region == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing job region" ) )
}
if j . ID == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing job ID" ) )
2015-09-16 00:38:23 +00:00
} else if strings . Contains ( j . ID , " " ) {
mErr . Errors = append ( mErr . Errors , errors . New ( "Job ID contains a space" ) )
2015-09-15 17:46:10 +00:00
}
if j . Name == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing job name" ) )
}
2017-09-07 23:56:15 +00:00
if j . Namespace == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Job must be in a namespace" ) )
}
2017-07-07 22:34:26 +00:00
switch j . Type {
case JobTypeCore , JobTypeService , JobTypeBatch , JobTypeSystem :
case "" :
2015-09-15 17:46:10 +00:00
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing job type" ) )
2017-07-07 22:34:26 +00:00
default :
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Invalid job type: %q" , j . Type ) )
2015-09-15 17:46:10 +00:00
}
if j . Priority < JobMinPriority || j . Priority > JobMaxPriority {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Job priority must be between [%d, %d]" , JobMinPriority , JobMaxPriority ) )
}
if len ( j . Datacenters ) == 0 {
2015-09-25 19:27:31 +00:00
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing job datacenters" ) )
2015-09-15 17:46:10 +00:00
}
if len ( j . TaskGroups ) == 0 {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing job task groups" ) )
}
2015-10-11 19:50:16 +00:00
for idx , constr := range j . Constraints {
if err := constr . Validate ( ) ; err != nil {
outer := fmt . Errorf ( "Constraint %d validation failed: %s" , idx + 1 , err )
mErr . Errors = append ( mErr . Errors , outer )
}
}
2015-09-15 17:46:10 +00:00
// Check for duplicate task groups
taskGroups := make ( map [ string ] int )
for idx , tg := range j . TaskGroups {
if tg . Name == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Job task group %d missing name" , idx + 1 ) )
} else if existing , ok := taskGroups [ tg . Name ] ; ok {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Job task group %d redefines '%s' from group %d" , idx + 1 , tg . Name , existing + 1 ) )
} else {
taskGroups [ tg . Name ] = idx
}
2015-10-16 21:15:01 +00:00
2016-07-13 19:50:08 +00:00
if j . Type == "system" && tg . Count > 1 {
2015-10-16 21:15:01 +00:00
mErr . Errors = append ( mErr . Errors ,
2016-07-20 13:23:35 +00:00
fmt . Errorf ( "Job task group %s has count %d. Count cannot exceed 1 with system scheduler" ,
tg . Name , tg . Count ) )
2015-10-16 21:15:01 +00:00
}
2015-09-15 18:23:03 +00:00
}
2015-09-15 17:46:10 +00:00
2015-09-15 18:23:03 +00:00
// Validate the task group
2016-07-20 13:23:35 +00:00
for _ , tg := range j . TaskGroups {
2017-07-07 02:08:51 +00:00
if err := tg . Validate ( j ) ; err != nil {
2017-03-03 23:00:39 +00:00
outer := fmt . Errorf ( "Task group %s validation failed: %v" , tg . Name , err )
2015-09-21 00:08:57 +00:00
mErr . Errors = append ( mErr . Errors , outer )
2015-09-15 17:46:10 +00:00
}
}
2015-12-01 00:51:56 +00:00
// Validate periodic is only used with batch jobs.
2016-07-13 19:50:08 +00:00
if j . IsPeriodic ( ) && j . Periodic . Enabled {
2015-12-04 23:10:08 +00:00
if j . Type != JobTypeBatch {
mErr . Errors = append ( mErr . Errors ,
fmt . Errorf ( "Periodic can only be used with %q scheduler" , JobTypeBatch ) )
}
if err := j . Periodic . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
2015-12-01 00:51:56 +00:00
}
2017-01-20 18:33:52 +00:00
if j . IsParameterized ( ) {
2016-12-16 23:20:12 +00:00
if j . Type != JobTypeBatch {
mErr . Errors = append ( mErr . Errors ,
2017-01-20 18:33:52 +00:00
fmt . Errorf ( "Parameterized job can only be used with %q scheduler" , JobTypeBatch ) )
2016-12-16 23:20:12 +00:00
}
2017-01-20 18:33:52 +00:00
if err := j . ParameterizedJob . Validate ( ) ; err != nil {
2016-11-23 22:56:50 +00:00
mErr . Errors = append ( mErr . Errors , err )
}
}
2015-09-15 17:46:10 +00:00
return mErr . ErrorOrNil ( )
}
2017-05-10 03:52:47 +00:00
// Warnings returns a list of warnings that may be from dubious settings or
// deprecation warnings.
func ( j * Job ) Warnings ( ) error {
var mErr multierror . Error
2017-07-13 20:10:45 +00:00
// Check the groups
for _ , tg := range j . TaskGroups {
if err := tg . Warnings ( j ) ; err != nil {
outer := fmt . Errorf ( "Group %q has warnings: %v" , tg . Name , err )
mErr . Errors = append ( mErr . Errors , outer )
}
}
2017-05-10 03:52:47 +00:00
return mErr . ErrorOrNil ( )
}
2015-08-23 23:49:48 +00:00
// LookupTaskGroup finds a task group by name
2015-08-30 02:14:47 +00:00
func ( j * Job ) LookupTaskGroup ( name string ) * TaskGroup {
2015-08-23 23:49:48 +00:00
for _ , tg := range j . TaskGroups {
if tg . Name == name {
return tg
}
}
return nil
}
2016-12-16 01:08:38 +00:00
// CombinedTaskMeta takes a TaskGroup and Task name and returns the combined
// meta data for the task. When joining Job, Group and Task Meta, the precedence
// is by deepest scope (Task > Group > Job).
func ( j * Job ) CombinedTaskMeta ( groupName , taskName string ) map [ string ] string {
group := j . LookupTaskGroup ( groupName )
if group == nil {
return nil
}
task := group . LookupTask ( taskName )
if task == nil {
return nil
}
2017-01-18 23:55:14 +00:00
meta := helper . CopyMapStringString ( task . Meta )
2016-12-16 01:08:38 +00:00
if meta == nil {
meta = make ( map [ string ] string , len ( group . Meta ) + len ( j . Meta ) )
}
// Add the group specific meta
for k , v := range group . Meta {
if _ , ok := meta [ k ] ; ! ok {
meta [ k ] = v
}
}
// Add the job specific meta
for k , v := range j . Meta {
if _ , ok := meta [ k ] ; ! ok {
meta [ k ] = v
}
}
return meta
}
2017-04-19 17:54:03 +00:00
// Stopped returns if a job is stopped.
func ( j * Job ) Stopped ( ) bool {
return j == nil || j . Stop
}
2017-05-23 00:06:46 +00:00
// HasUpdateStrategy returns if any task group in the job has an update strategy
2017-05-22 17:58:34 +00:00
func ( j * Job ) HasUpdateStrategy ( ) bool {
for _ , tg := range j . TaskGroups {
if tg . Update != nil {
return true
}
}
return false
}
2015-09-06 22:34:28 +00:00
// Stub is used to return a summary of the job
2016-07-21 20:21:47 +00:00
func ( j * Job ) Stub ( summary * JobSummary ) * JobListStub {
2015-09-06 22:34:28 +00:00
return & JobListStub {
ID : j . ID ,
2016-01-07 22:43:55 +00:00
ParentID : j . ParentID ,
2015-09-06 22:34:28 +00:00
Name : j . Name ,
Type : j . Type ,
Priority : j . Priority ,
2017-04-16 00:05:52 +00:00
Periodic : j . IsPeriodic ( ) ,
ParameterizedJob : j . IsParameterized ( ) ,
2017-04-15 03:54:30 +00:00
Stop : j . Stop ,
2015-09-06 22:34:28 +00:00
Status : j . Status ,
StatusDescription : j . StatusDescription ,
CreateIndex : j . CreateIndex ,
ModifyIndex : j . ModifyIndex ,
2016-06-08 23:48:02 +00:00
JobModifyIndex : j . JobModifyIndex ,
2017-06-30 02:08:25 +00:00
SubmitTime : j . SubmitTime ,
2016-07-21 20:21:47 +00:00
JobSummary : summary ,
2015-09-06 22:34:28 +00:00
}
}
2015-12-01 16:40:32 +00:00
// IsPeriodic returns whether a job is periodic.
func ( j * Job ) IsPeriodic ( ) bool {
return j . Periodic != nil
}
2017-12-11 21:55:17 +00:00
// IsPeriodicActive returns whether the job is an active periodic job that will
// create child jobs
func ( j * Job ) IsPeriodicActive ( ) bool {
return j . IsPeriodic ( ) && j . Periodic . Enabled && ! j . Stopped ( ) && ! j . IsParameterized ( )
}
2017-01-20 18:33:52 +00:00
// IsParameterized returns whether a job is parameterized job.
func ( j * Job ) IsParameterized ( ) bool {
return j . ParameterizedJob != nil
2016-11-23 22:56:50 +00:00
}
2016-08-17 00:50:14 +00:00
// VaultPolicies returns the set of Vault policies per task group, per task
2016-08-18 17:50:47 +00:00
func ( j * Job ) VaultPolicies ( ) map [ string ] map [ string ] * Vault {
policies := make ( map [ string ] map [ string ] * Vault , len ( j . TaskGroups ) )
2016-08-17 00:50:14 +00:00
for _ , tg := range j . TaskGroups {
2016-08-18 17:50:47 +00:00
tgPolicies := make ( map [ string ] * Vault , len ( tg . Tasks ) )
2016-08-17 00:50:14 +00:00
for _ , task := range tg . Tasks {
if task . Vault == nil {
continue
}
2016-08-18 17:50:47 +00:00
tgPolicies [ task . Name ] = task . Vault
2016-08-17 00:50:14 +00:00
}
2016-09-21 18:14:09 +00:00
if len ( tgPolicies ) != 0 {
policies [ tg . Name ] = tgPolicies
}
2016-08-17 00:50:14 +00:00
}
return policies
}
2016-10-20 20:55:35 +00:00
// RequiredSignals returns a mapping of task groups to tasks to their required
// set of signals
func ( j * Job ) RequiredSignals ( ) map [ string ] map [ string ] [ ] string {
signals := make ( map [ string ] map [ string ] [ ] string )
for _ , tg := range j . TaskGroups {
for _ , task := range tg . Tasks {
// Use this local one as a set
taskSignals := make ( map [ string ] struct { } )
// Check if the Vault change mode uses signals
if task . Vault != nil && task . Vault . ChangeMode == VaultChangeModeSignal {
taskSignals [ task . Vault . ChangeSignal ] = struct { } { }
}
2017-11-30 21:53:35 +00:00
// If a user has specified a KillSignal, add it to required signals
if task . KillSignal != "" {
taskSignals [ task . KillSignal ] = struct { } { }
}
2016-10-20 20:55:35 +00:00
// Check if any template change mode uses signals
for _ , t := range task . Templates {
if t . ChangeMode != TemplateChangeModeSignal {
continue
}
taskSignals [ t . ChangeSignal ] = struct { } { }
}
// Flatten and sort the signals
l := len ( taskSignals )
if l == 0 {
continue
}
flat := make ( [ ] string , 0 , l )
for sig := range taskSignals {
flat = append ( flat , sig )
}
sort . Strings ( flat )
tgSignals , ok := signals [ tg . Name ]
if ! ok {
tgSignals = make ( map [ string ] [ ] string )
signals [ tg . Name ] = tgSignals
}
tgSignals [ task . Name ] = flat
}
}
return signals
}
2017-05-23 00:02:20 +00:00
// SpecChanged determines if the functional specification has changed between
2017-06-21 20:14:03 +00:00
// two job versions.
2017-05-23 00:02:20 +00:00
func ( j * Job ) SpecChanged ( new * Job ) bool {
if j == nil {
return new != nil
}
2017-06-21 20:14:03 +00:00
// Create a copy of the new job
c := new . Copy ( )
2017-05-23 00:02:20 +00:00
// Update the new job so we can do a reflect
2017-06-21 20:14:03 +00:00
c . Status = j . Status
c . StatusDescription = j . StatusDescription
c . Stable = j . Stable
c . Version = j . Version
c . CreateIndex = j . CreateIndex
c . ModifyIndex = j . ModifyIndex
c . JobModifyIndex = j . JobModifyIndex
2017-06-30 02:08:25 +00:00
c . SubmitTime = j . SubmitTime
2017-05-23 00:02:20 +00:00
// Deep equals the jobs
2017-06-21 20:14:03 +00:00
return ! reflect . DeepEqual ( j , c )
2017-05-23 00:02:20 +00:00
}
2017-06-30 02:08:25 +00:00
func ( j * Job ) SetSubmitTime ( ) {
j . SubmitTime = time . Now ( ) . UTC ( ) . UnixNano ( )
}
2015-09-06 22:34:28 +00:00
// JobListStub is used to return a subset of job information
// for the job list
type JobListStub struct {
ID string
2016-01-07 22:43:55 +00:00
ParentID string
2015-09-06 22:34:28 +00:00
Name string
Type string
Priority int
2017-04-16 00:05:52 +00:00
Periodic bool
ParameterizedJob bool
2017-04-15 03:54:30 +00:00
Stop bool
2015-09-06 22:34:28 +00:00
Status string
StatusDescription string
2016-07-21 20:34:19 +00:00
JobSummary * JobSummary
2015-09-06 22:34:28 +00:00
CreateIndex uint64
ModifyIndex uint64
2016-06-08 23:48:02 +00:00
JobModifyIndex uint64
2017-06-30 02:08:25 +00:00
SubmitTime int64
2015-09-06 22:34:28 +00:00
}
2016-12-06 01:24:37 +00:00
// JobSummary summarizes the state of the allocations of a job
type JobSummary struct {
2017-09-07 23:56:15 +00:00
// JobID is the ID of the job the summary is for
2016-12-06 01:24:37 +00:00
JobID string
2017-09-07 23:56:15 +00:00
// Namespace is the namespace of the job and its summary
Namespace string
2018-03-11 19:00:07 +00:00
// Summary contains the summary per task group for the Job
2016-12-06 01:24:37 +00:00
Summary map [ string ] TaskGroupSummary
2016-12-15 00:58:54 +00:00
// Children contains a summary for the children of this job.
2016-12-06 01:24:37 +00:00
Children * JobChildrenSummary
// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
}
// Copy returns a new copy of JobSummary
func ( js * JobSummary ) Copy ( ) * JobSummary {
newJobSummary := new ( JobSummary )
* newJobSummary = * js
newTGSummary := make ( map [ string ] TaskGroupSummary , len ( js . Summary ) )
for k , v := range js . Summary {
newTGSummary [ k ] = v
}
newJobSummary . Summary = newTGSummary
newJobSummary . Children = newJobSummary . Children . Copy ( )
return newJobSummary
}
// JobChildrenSummary contains the summary of children job statuses
type JobChildrenSummary struct {
2016-12-07 00:58:44 +00:00
Pending int64
Running int64
Dead int64
2016-12-06 01:24:37 +00:00
}
// Copy returns a new copy of a JobChildrenSummary
func ( jc * JobChildrenSummary ) Copy ( ) * JobChildrenSummary {
if jc == nil {
return nil
}
njc := new ( JobChildrenSummary )
* njc = * jc
return njc
}
// TaskGroup summarizes the state of all the allocations of a particular
// TaskGroup
type TaskGroupSummary struct {
Queued int
Complete int
Failed int
Running int
Starting int
Lost int
}
2017-05-09 00:44:26 +00:00
const (
// Checks uses any registered health check state in combination with task
// states to determine if a allocation is healthy.
UpdateStrategyHealthCheck_Checks = "checks"
// TaskStates uses the task states of an allocation to determine if the
// allocation is healthy.
UpdateStrategyHealthCheck_TaskStates = "task_states"
// Manual allows the operator to manually signal to Nomad when an
// allocations is healthy. This allows more advanced health checking that is
// outside of the scope of Nomad.
UpdateStrategyHealthCheck_Manual = "manual"
)
2017-05-10 20:48:53 +00:00
var (
// DefaultUpdateStrategy provides a baseline that can be used to upgrade
2017-05-10 20:51:52 +00:00
// jobs with the old policy or for populating field defaults.
2017-05-10 20:48:53 +00:00
DefaultUpdateStrategy = & UpdateStrategy {
2018-03-23 17:56:00 +00:00
Stagger : 30 * time . Second ,
MaxParallel : 1 ,
HealthCheck : UpdateStrategyHealthCheck_Checks ,
MinHealthyTime : 10 * time . Second ,
HealthyDeadline : 5 * time . Minute ,
2018-04-10 23:00:52 +00:00
ProgressDeadline : 10 * time . Minute ,
2018-03-23 17:56:00 +00:00
AutoRevert : false ,
Canary : 0 ,
2017-05-10 20:48:53 +00:00
}
)
2015-09-07 22:08:50 +00:00
// UpdateStrategy is used to modify how updates are done
type UpdateStrategy struct {
2017-07-07 02:08:51 +00:00
// Stagger is used to determine the rate at which allocations are migrated
// due to down or draining nodes.
2015-09-07 22:08:50 +00:00
Stagger time . Duration
// MaxParallel is how many updates can be done in parallel
2017-02-22 20:30:05 +00:00
MaxParallel int
2017-05-09 00:44:26 +00:00
// HealthCheck specifies the mechanism in which allocations are marked
// healthy or unhealthy as part of a deployment.
HealthCheck string
// MinHealthyTime is the minimum time an allocation must be in the healthy
2018-03-11 17:37:22 +00:00
// state before it is marked as healthy, unblocking more allocations to be
2017-05-09 00:44:26 +00:00
// rolled.
MinHealthyTime time . Duration
// HealthyDeadline is the time in which an allocation must be marked as
2018-03-11 19:06:05 +00:00
// healthy before it is automatically transitioned to unhealthy. This time
2017-05-09 00:44:26 +00:00
// period doesn't count against the MinHealthyTime.
HealthyDeadline time . Duration
2018-03-23 17:56:00 +00:00
// ProgressDeadline is the time in which an allocation as part of the
2018-04-10 18:42:13 +00:00
// deployment must transition to healthy. If no allocation becomes healthy
2018-03-23 17:56:00 +00:00
// after the deadline, the deployment is marked as failed. If the deadline
// is zero, the first failure causes the deployment to fail.
ProgressDeadline time . Duration
2017-05-09 00:44:26 +00:00
// AutoRevert declares that if a deployment fails because of unhealthy
// allocations, there should be an attempt to auto-revert the job to a
// stable version.
AutoRevert bool
// Canary is the number of canaries to deploy when a change to the task
// group is detected.
Canary int
}
func ( u * UpdateStrategy ) Copy ( ) * UpdateStrategy {
if u == nil {
return nil
}
copy := new ( UpdateStrategy )
* copy = * u
return copy
2015-09-07 22:08:50 +00:00
}
2017-05-09 00:44:26 +00:00
func ( u * UpdateStrategy ) Validate ( ) error {
if u == nil {
return nil
}
var mErr multierror . Error
switch u . HealthCheck {
case UpdateStrategyHealthCheck_Checks , UpdateStrategyHealthCheck_TaskStates , UpdateStrategyHealthCheck_Manual :
default :
multierror . Append ( & mErr , fmt . Errorf ( "Invalid health check given: %q" , u . HealthCheck ) )
}
2017-08-23 13:30:28 +00:00
if u . MaxParallel < 1 {
multierror . Append ( & mErr , fmt . Errorf ( "Max parallel can not be less than one: %d < 1" , u . MaxParallel ) )
2017-05-09 00:44:26 +00:00
}
if u . Canary < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Canary count can not be less than zero: %d < 0" , u . Canary ) )
}
if u . MinHealthyTime < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Minimum healthy time may not be less than zero: %v" , u . MinHealthyTime ) )
}
if u . HealthyDeadline <= 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Healthy deadline must be greater than zero: %v" , u . HealthyDeadline ) )
}
2018-03-23 17:56:00 +00:00
if u . ProgressDeadline < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Progress deadline must be zero or greater: %v" , u . ProgressDeadline ) )
}
2017-07-25 23:30:53 +00:00
if u . MinHealthyTime >= u . HealthyDeadline {
multierror . Append ( & mErr , fmt . Errorf ( "Minimum healthy time must be less than healthy deadline: %v > %v" , u . MinHealthyTime , u . HealthyDeadline ) )
}
2018-04-06 21:44:04 +00:00
if u . ProgressDeadline != 0 && u . HealthyDeadline >= u . ProgressDeadline {
2018-03-23 17:56:00 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Healthy deadline must be less than progress deadline: %v > %v" , u . HealthyDeadline , u . ProgressDeadline ) )
}
2017-07-07 02:08:51 +00:00
if u . Stagger <= 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Stagger must be greater than zero: %v" , u . Stagger ) )
}
2017-05-09 00:44:26 +00:00
return mErr . ErrorOrNil ( )
2015-09-07 22:08:50 +00:00
}
2017-05-09 00:44:26 +00:00
// TODO(alexdadgar): Remove once no longer used by the scheduler.
2015-09-07 22:08:50 +00:00
// Rolling returns if a rolling strategy should be used
func ( u * UpdateStrategy ) Rolling ( ) bool {
return u . Stagger > 0 && u . MaxParallel > 0
}
2015-12-01 00:51:56 +00:00
const (
// PeriodicSpecCron is used for a cron spec.
PeriodicSpecCron = "cron"
2015-12-18 20:26:28 +00:00
// PeriodicSpecTest is only used by unit tests. It is a sorted, comma
2016-05-15 16:41:34 +00:00
// separated list of unix timestamps at which to launch.
2015-12-16 21:46:09 +00:00
PeriodicSpecTest = "_internal_test"
2015-12-01 00:51:56 +00:00
)
// Periodic defines the interval a job should be run at.
type PeriodicConfig struct {
// Enabled determines if the job should be run periodically.
Enabled bool
// Spec specifies the interval the job should be run as. It is parsed based
// on the SpecType.
Spec string
// SpecType defines the format of the spec.
SpecType string
2016-01-07 19:19:46 +00:00
// ProhibitOverlap enforces that spawned jobs do not run in parallel.
2017-02-22 20:30:05 +00:00
ProhibitOverlap bool
2017-02-15 22:37:06 +00:00
// TimeZone is the user specified string that determines the time zone to
// launch against. The time zones must be specified from IANA Time Zone
// database, such as "America/New_York".
// Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
// Reference: https://www.iana.org/time-zones
2017-02-22 20:30:05 +00:00
TimeZone string
2017-02-15 22:37:06 +00:00
// location is the time zone to evaluate the launch time against
location * time . Location
2015-12-01 00:51:56 +00:00
}
2016-02-11 01:54:43 +00:00
func ( p * PeriodicConfig ) Copy ( ) * PeriodicConfig {
if p == nil {
return nil
}
np := new ( PeriodicConfig )
* np = * p
return np
}
2015-12-01 00:51:56 +00:00
func ( p * PeriodicConfig ) Validate ( ) error {
if ! p . Enabled {
return nil
}
2017-02-15 22:37:06 +00:00
var mErr multierror . Error
2015-12-01 00:56:44 +00:00
if p . Spec == "" {
2017-02-15 22:37:06 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Must specify a spec" ) )
}
// Check if we got a valid time zone
if p . TimeZone != "" {
if _ , err := time . LoadLocation ( p . TimeZone ) ; err != nil {
multierror . Append ( & mErr , fmt . Errorf ( "Invalid time zone %q: %v" , p . TimeZone , err ) )
}
2015-12-01 00:51:56 +00:00
}
switch p . SpecType {
case PeriodicSpecCron :
// Validate the cron spec
if _ , err := cronexpr . Parse ( p . Spec ) ; err != nil {
2017-02-15 22:37:06 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Invalid cron spec %q: %v" , p . Spec , err ) )
2015-12-01 00:51:56 +00:00
}
2015-12-18 20:26:28 +00:00
case PeriodicSpecTest :
// No-op
2015-12-01 00:51:56 +00:00
default :
2017-02-15 22:37:06 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Unknown periodic specification type %q" , p . SpecType ) )
2015-12-01 00:51:56 +00:00
}
2017-02-15 22:37:06 +00:00
return mErr . ErrorOrNil ( )
}
func ( p * PeriodicConfig ) Canonicalize ( ) {
// Load the location
l , err := time . LoadLocation ( p . TimeZone )
if err != nil {
2017-02-17 19:21:49 +00:00
p . location = time . UTC
2017-02-15 22:37:06 +00:00
}
p . location = l
2015-12-01 00:51:56 +00:00
}
2018-04-26 22:46:22 +00:00
// CronParseNext is a helper that parses the next time for the given expression
2018-04-26 22:15:43 +00:00
// but captures any panic that may occur in the underlying library.
2018-04-26 22:46:22 +00:00
func CronParseNext ( e * cronexpr . Expression , fromTime time . Time , spec string ) ( t time . Time , err error ) {
2018-04-26 20:57:45 +00:00
defer func ( ) {
if recover ( ) != nil {
t = time . Time { }
2018-04-26 22:15:43 +00:00
err = fmt . Errorf ( "failed parsing cron expression: %q" , spec )
2018-04-26 20:57:45 +00:00
}
} ( )
return e . Next ( fromTime ) , nil
}
2015-12-01 00:51:56 +00:00
// Next returns the closest time instant matching the spec that is after the
// passed time. If no matching instance exists, the zero value of time.Time is
// returned. The `time.Location` of the returned value matches that of the
// passed time.
2018-04-26 20:57:45 +00:00
func ( p * PeriodicConfig ) Next ( fromTime time . Time ) ( time . Time , error ) {
2015-12-01 00:51:56 +00:00
switch p . SpecType {
case PeriodicSpecCron :
if e , err := cronexpr . Parse ( p . Spec ) ; err == nil {
2018-04-26 22:46:22 +00:00
return CronParseNext ( e , fromTime , p . Spec )
2015-12-01 00:51:56 +00:00
}
2015-12-18 20:26:28 +00:00
case PeriodicSpecTest :
split := strings . Split ( p . Spec , "," )
if len ( split ) == 1 && split [ 0 ] == "" {
2018-04-26 20:57:45 +00:00
return time . Time { } , nil
2015-12-18 20:26:28 +00:00
}
// Parse the times
times := make ( [ ] time . Time , len ( split ) )
for i , s := range split {
unix , err := strconv . Atoi ( s )
if err != nil {
2018-04-26 20:57:45 +00:00
return time . Time { } , nil
2015-12-18 20:26:28 +00:00
}
2015-12-21 21:55:26 +00:00
times [ i ] = time . Unix ( int64 ( unix ) , 0 )
2015-12-18 20:26:28 +00:00
}
// Find the next match
for _ , next := range times {
if fromTime . Before ( next ) {
2018-04-26 20:57:45 +00:00
return next , nil
2015-12-18 20:26:28 +00:00
}
}
2015-12-01 00:51:56 +00:00
}
2018-04-26 20:57:45 +00:00
return time . Time { } , nil
2015-12-01 00:51:56 +00:00
}
2017-02-15 22:37:06 +00:00
// GetLocation returns the location to use for determining the time zone to run
// the periodic job against.
func ( p * PeriodicConfig ) GetLocation ( ) * time . Location {
// Jobs pre 0.5.5 will not have this
if p . location != nil {
return p . location
}
return time . UTC
}
2016-01-07 22:24:25 +00:00
const (
// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
// when launching derived instances of it.
PeriodicLaunchSuffix = "/periodic-"
)
2015-12-19 01:51:30 +00:00
// PeriodicLaunch tracks the last launch time of a periodic job.
type PeriodicLaunch struct {
2017-09-07 23:56:15 +00:00
ID string // ID of the periodic job.
Namespace string // Namespace of the periodic job
Launch time . Time // The last launch time.
2015-12-16 21:46:09 +00:00
// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
2015-12-19 01:51:30 +00:00
}
2016-11-23 22:56:50 +00:00
const (
2016-12-14 20:50:08 +00:00
DispatchPayloadForbidden = "forbidden"
DispatchPayloadOptional = "optional"
DispatchPayloadRequired = "required"
2016-11-26 02:04:55 +00:00
2017-01-20 18:33:52 +00:00
// DispatchLaunchSuffix is the string appended to the parameterized job's ID
2016-12-14 20:50:08 +00:00
// when dispatching instances of it.
2017-01-20 18:33:52 +00:00
DispatchLaunchSuffix = "/dispatch-"
2016-11-23 22:56:50 +00:00
)
2017-01-20 18:33:52 +00:00
// ParameterizedJobConfig is used to configure the parameterized job
type ParameterizedJobConfig struct {
2016-12-14 20:50:08 +00:00
// Payload configure the payload requirements
Payload string
2016-11-23 22:56:50 +00:00
// MetaRequired is metadata keys that must be specified by the dispatcher
2017-02-22 20:30:05 +00:00
MetaRequired [ ] string
2016-11-23 22:56:50 +00:00
// MetaOptional is metadata keys that may be specified by the dispatcher
2017-02-22 20:30:05 +00:00
MetaOptional [ ] string
2016-11-23 22:56:50 +00:00
}
2017-01-20 18:33:52 +00:00
func ( d * ParameterizedJobConfig ) Validate ( ) error {
2016-11-23 22:56:50 +00:00
var mErr multierror . Error
2016-12-14 20:50:08 +00:00
switch d . Payload {
case DispatchPayloadOptional , DispatchPayloadRequired , DispatchPayloadForbidden :
2016-11-23 22:56:50 +00:00
default :
2016-12-14 20:50:08 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Unknown payload requirement: %q" , d . Payload ) )
2016-11-23 22:56:50 +00:00
}
// Check that the meta configurations are disjoint sets
2017-01-18 23:55:14 +00:00
disjoint , offending := helper . SliceSetDisjoint ( d . MetaRequired , d . MetaOptional )
2016-11-23 22:56:50 +00:00
if ! disjoint {
multierror . Append ( & mErr , fmt . Errorf ( "Required and optional meta keys should be disjoint. Following keys exist in both: %v" , offending ) )
}
return mErr . ErrorOrNil ( )
}
2017-01-20 18:33:52 +00:00
func ( d * ParameterizedJobConfig ) Canonicalize ( ) {
2016-12-14 20:50:08 +00:00
if d . Payload == "" {
d . Payload = DispatchPayloadOptional
2016-11-23 22:56:50 +00:00
}
}
2017-01-20 18:33:52 +00:00
func ( d * ParameterizedJobConfig ) Copy ( ) * ParameterizedJobConfig {
2016-11-23 22:56:50 +00:00
if d == nil {
return nil
}
2017-01-20 18:33:52 +00:00
nd := new ( ParameterizedJobConfig )
2016-11-23 22:56:50 +00:00
* nd = * d
2017-01-18 23:55:14 +00:00
nd . MetaOptional = helper . CopySliceString ( nd . MetaOptional )
nd . MetaRequired = helper . CopySliceString ( nd . MetaRequired )
2016-11-23 22:56:50 +00:00
return nd
}
2016-11-26 02:04:55 +00:00
// DispatchedID returns an ID appropriate for a job dispatched against a
2017-01-20 18:33:52 +00:00
// particular parameterized job
2016-11-26 04:02:18 +00:00
func DispatchedID ( templateID string , t time . Time ) string {
2017-09-29 16:58:48 +00:00
u := uuid . Generate ( ) [ : 8 ]
2017-01-20 18:33:52 +00:00
return fmt . Sprintf ( "%s%s%d-%s" , templateID , DispatchLaunchSuffix , t . Unix ( ) , u )
2016-11-26 02:04:55 +00:00
}
2017-01-26 05:06:16 +00:00
// DispatchPayloadConfig configures how a task gets its input from a job dispatch
type DispatchPayloadConfig struct {
2016-11-23 22:56:50 +00:00
// File specifies a relative path to where the input data should be written
File string
}
2017-01-26 05:06:16 +00:00
func ( d * DispatchPayloadConfig ) Copy ( ) * DispatchPayloadConfig {
2016-11-23 22:56:50 +00:00
if d == nil {
return nil
}
2017-01-26 05:06:16 +00:00
nd := new ( DispatchPayloadConfig )
2016-11-23 22:56:50 +00:00
* nd = * d
return nd
}
2017-01-26 05:06:16 +00:00
func ( d * DispatchPayloadConfig ) Validate ( ) error {
2016-12-18 23:48:30 +00:00
// Verify the destination doesn't escape
escaped , err := PathEscapesAllocDir ( "task/local/" , d . File )
if err != nil {
return fmt . Errorf ( "invalid destination path: %v" , err )
} else if escaped {
return fmt . Errorf ( "destination escapes allocation directory" )
}
return nil
}
2015-12-18 20:17:13 +00:00
var (
2018-01-31 22:33:00 +00:00
DefaultServiceJobRestartPolicy = RestartPolicy {
2016-02-02 23:08:07 +00:00
Delay : 15 * time . Second ,
Attempts : 2 ,
2018-01-31 22:33:00 +00:00
Interval : 30 * time . Minute ,
2018-01-31 16:39:11 +00:00
Mode : RestartPolicyModeFail ,
2015-12-18 20:17:13 +00:00
}
2018-01-31 22:33:00 +00:00
DefaultBatchJobRestartPolicy = RestartPolicy {
2016-02-02 23:08:07 +00:00
Delay : 15 * time . Second ,
2018-01-31 22:33:00 +00:00
Attempts : 3 ,
Interval : 24 * time . Hour ,
2018-01-31 16:39:11 +00:00
Mode : RestartPolicyModeFail ,
2015-12-18 20:17:13 +00:00
}
)
2018-01-14 15:03:08 +00:00
var (
2018-01-17 17:05:22 +00:00
DefaultServiceJobReschedulePolicy = ReschedulePolicy {
2018-02-22 23:43:07 +00:00
Delay : 30 * time . Second ,
DelayFunction : "exponential" ,
2018-03-13 15:06:26 +00:00
MaxDelay : 1 * time . Hour ,
2018-02-22 23:43:07 +00:00
Unlimited : true ,
2018-01-14 15:03:08 +00:00
}
2018-01-17 17:05:22 +00:00
DefaultBatchJobReschedulePolicy = ReschedulePolicy {
2018-02-22 23:43:07 +00:00
Attempts : 1 ,
Interval : 24 * time . Hour ,
Delay : 5 * time . Second ,
2018-03-26 19:45:09 +00:00
DelayFunction : "constant" ,
2018-01-14 15:03:08 +00:00
}
)
2015-12-18 20:17:13 +00:00
const (
// RestartPolicyModeDelay causes an artificial delay till the next interval is
// reached when the specified attempts have been reached in the interval.
RestartPolicyModeDelay = "delay"
// RestartPolicyModeFail causes a job to fail if the specified number of
// attempts are reached within an interval.
RestartPolicyModeFail = "fail"
2017-02-13 23:27:36 +00:00
// RestartPolicyMinInterval is the minimum interval that is accepted for a
// restart policy.
RestartPolicyMinInterval = 5 * time . Second
2017-11-03 14:34:30 +00:00
// ReasonWithinPolicy describes restart events that are within policy
ReasonWithinPolicy = "Restart within policy"
2015-12-18 20:17:13 +00:00
)
// RestartPolicy configures how Tasks are restarted when they crash or fail.
2015-10-30 22:51:39 +00:00
type RestartPolicy struct {
2015-12-18 20:17:13 +00:00
// Attempts is the number of restart that will occur in an interval.
2015-10-30 22:51:39 +00:00
Attempts int
2015-12-18 20:17:13 +00:00
// Interval is a duration in which we can limit the number of restarts
// within.
2015-10-30 22:51:39 +00:00
Interval time . Duration
2015-12-18 20:17:13 +00:00
// Delay is the time between a failure and a restart.
Delay time . Duration
// Mode controls what happens when the task restarts more than attempt times
// in an interval.
Mode string
2015-10-30 22:51:39 +00:00
}
2016-02-11 01:54:43 +00:00
func ( r * RestartPolicy ) Copy ( ) * RestartPolicy {
if r == nil {
return nil
}
nrp := new ( RestartPolicy )
* nrp = * r
return nrp
}
2015-10-30 22:51:39 +00:00
func ( r * RestartPolicy ) Validate ( ) error {
2017-02-13 23:27:36 +00:00
var mErr multierror . Error
2015-12-18 20:17:13 +00:00
switch r . Mode {
case RestartPolicyModeDelay , RestartPolicyModeFail :
default :
2017-02-13 23:27:36 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Unsupported restart mode: %q" , r . Mode ) )
2015-12-18 20:17:13 +00:00
}
2016-02-02 22:32:30 +00:00
// Check for ambiguous/confusing settings
if r . Attempts == 0 && r . Mode != RestartPolicyModeFail {
2017-02-13 23:27:36 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Restart policy %q with %d attempts is ambiguous" , r . Mode , r . Attempts ) )
2016-02-02 22:32:30 +00:00
}
2017-02-13 23:27:36 +00:00
if r . Interval . Nanoseconds ( ) < RestartPolicyMinInterval . Nanoseconds ( ) {
multierror . Append ( & mErr , fmt . Errorf ( "Interval can not be less than %v (got %v)" , RestartPolicyMinInterval , r . Interval ) )
2015-11-17 07:56:11 +00:00
}
2015-10-30 22:51:39 +00:00
if time . Duration ( r . Attempts ) * r . Delay > r . Interval {
2017-02-13 23:27:36 +00:00
multierror . Append ( & mErr ,
fmt . Errorf ( "Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v" , r . Attempts , r . Interval , r . Delay ) )
2015-10-30 22:51:39 +00:00
}
2017-02-13 23:27:36 +00:00
return mErr . ErrorOrNil ( )
2015-10-30 22:51:39 +00:00
}
func NewRestartPolicy ( jobType string ) * RestartPolicy {
2015-11-02 21:35:51 +00:00
switch jobType {
2015-11-06 20:38:25 +00:00
case JobTypeService , JobTypeSystem :
2018-01-31 22:33:00 +00:00
rp := DefaultServiceJobRestartPolicy
2015-11-03 01:00:17 +00:00
return & rp
2015-11-02 21:35:51 +00:00
case JobTypeBatch :
2018-01-31 22:33:00 +00:00
rp := DefaultBatchJobRestartPolicy
2015-11-03 01:00:17 +00:00
return & rp
2015-10-30 22:51:39 +00:00
}
2015-11-03 01:00:17 +00:00
return nil
2015-10-30 22:51:39 +00:00
}
2018-01-14 15:03:08 +00:00
const ReschedulePolicyMinInterval = 15 * time . Second
2018-02-22 23:43:07 +00:00
const ReschedulePolicyMinDelay = 5 * time . Second
2018-03-26 19:45:09 +00:00
var RescheduleDelayFunctions = [ ... ] string { "constant" , "exponential" , "fibonacci" }
2018-01-14 15:03:08 +00:00
// ReschedulePolicy configures how Tasks are rescheduled when they crash or fail.
type ReschedulePolicy struct {
// Attempts limits the number of rescheduling attempts that can occur in an interval.
Attempts int
// Interval is a duration in which we can limit the number of reschedule attempts.
Interval time . Duration
2018-02-22 23:43:07 +00:00
// Delay is a minimum duration to wait between reschedule attempts.
// The delay function determines how much subsequent reschedule attempts are delayed by.
Delay time . Duration
// DelayFunction determines how the delay progressively changes on subsequent reschedule
2018-03-26 19:45:09 +00:00
// attempts. Valid values are "exponential", "constant", and "fibonacci".
2018-02-22 23:43:07 +00:00
DelayFunction string
2018-03-13 15:06:26 +00:00
// MaxDelay is an upper bound on the delay.
MaxDelay time . Duration
2018-02-22 23:43:07 +00:00
2018-03-02 00:20:09 +00:00
// Unlimited allows infinite rescheduling attempts. Only allowed when delay is set
// between reschedule attempts.
2018-02-22 23:43:07 +00:00
Unlimited bool
2018-01-14 15:03:08 +00:00
}
func ( r * ReschedulePolicy ) Copy ( ) * ReschedulePolicy {
if r == nil {
return nil
}
nrp := new ( ReschedulePolicy )
* nrp = * r
return nrp
}
2018-05-08 22:26:36 +00:00
func ( r * ReschedulePolicy ) Enabled ( ) bool {
enabled := r != nil && ( r . Attempts > 0 || r . Unlimited )
return enabled
}
2018-02-22 23:43:07 +00:00
// Validate uses different criteria to validate the reschedule policy
// Delay must be a minimum of 5 seconds
2018-03-26 19:45:09 +00:00
// Delay Ceiling is ignored if Delay Function is "constant"
2018-02-22 23:43:07 +00:00
// Number of possible attempts is validated, given the interval, delay and delay function
2018-01-14 15:03:08 +00:00
func ( r * ReschedulePolicy ) Validate ( ) error {
2018-05-08 22:26:36 +00:00
if ! r . Enabled ( ) {
2018-02-22 23:43:07 +00:00
return nil
}
var mErr multierror . Error
// Check for ambiguous/confusing settings
2018-03-24 15:29:20 +00:00
if r . Attempts > 0 {
if r . Interval <= 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Interval must be a non zero value if Attempts > 0" ) )
}
if r . Unlimited {
multierror . Append ( & mErr , fmt . Errorf ( "Reschedule Policy with Attempts = %v, Interval = %v, " +
"and Unlimited = %v is ambiguous" , r . Attempts , r . Interval , r . Unlimited ) )
2018-03-26 18:30:09 +00:00
multierror . Append ( & mErr , errors . New ( "If Attempts >0, Unlimited cannot also be set to true" ) )
2018-03-24 15:29:20 +00:00
}
}
2018-02-22 23:43:07 +00:00
delayPreCheck := true
// Delay should be bigger than the default
if r . Delay . Nanoseconds ( ) < ReschedulePolicyMinDelay . Nanoseconds ( ) {
multierror . Append ( & mErr , fmt . Errorf ( "Delay cannot be less than %v (got %v)" , ReschedulePolicyMinDelay , r . Delay ) )
delayPreCheck = false
}
// Must use a valid delay function
if ! isValidDelayFunction ( r . DelayFunction ) {
multierror . Append ( & mErr , fmt . Errorf ( "Invalid delay function %q, must be one of %q" , r . DelayFunction , RescheduleDelayFunctions ) )
delayPreCheck = false
}
2018-03-13 15:06:26 +00:00
// Validate MaxDelay if not using linear delay progression
2018-03-26 19:45:09 +00:00
if r . DelayFunction != "constant" {
2018-03-13 15:06:26 +00:00
if r . MaxDelay . Nanoseconds ( ) < ReschedulePolicyMinDelay . Nanoseconds ( ) {
2018-03-24 15:29:20 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Max Delay cannot be less than %v (got %v)" , ReschedulePolicyMinDelay , r . Delay ) )
2018-02-22 23:43:07 +00:00
delayPreCheck = false
}
2018-03-13 15:06:26 +00:00
if r . MaxDelay < r . Delay {
2018-03-24 15:29:20 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Max Delay cannot be less than Delay %v (got %v)" , r . Delay , r . MaxDelay ) )
2018-02-22 23:43:07 +00:00
delayPreCheck = false
}
}
2018-03-06 03:46:54 +00:00
// Validate Interval and other delay parameters if attempts are limited
2018-02-22 23:43:07 +00:00
if ! r . Unlimited {
2018-01-17 17:05:22 +00:00
if r . Interval . Nanoseconds ( ) < ReschedulePolicyMinInterval . Nanoseconds ( ) {
2018-02-22 23:43:07 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Interval cannot be less than %v (got %v)" , ReschedulePolicyMinInterval , r . Interval ) )
}
if ! delayPreCheck {
// We can't cross validate the rest of the delay params if delayPreCheck fails, so return early
return mErr . ErrorOrNil ( )
2018-01-17 17:05:22 +00:00
}
2018-02-22 23:43:07 +00:00
crossValidationErr := r . validateDelayParams ( )
if crossValidationErr != nil {
multierror . Append ( & mErr , crossValidationErr )
}
}
return mErr . ErrorOrNil ( )
}
2018-01-14 15:03:08 +00:00
2018-02-22 23:43:07 +00:00
func isValidDelayFunction ( delayFunc string ) bool {
for _ , value := range RescheduleDelayFunctions {
if value == delayFunc {
return true
}
2018-01-14 15:03:08 +00:00
}
2018-02-22 23:43:07 +00:00
return false
}
func ( r * ReschedulePolicy ) validateDelayParams ( ) error {
ok , possibleAttempts , recommendedInterval := r . viableAttempts ( )
if ok {
return nil
}
var mErr multierror . Error
2018-03-26 19:45:09 +00:00
if r . DelayFunction == "constant" {
2018-02-22 23:43:07 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Nomad can only make %v attempts in %v with initial delay %v and " +
"delay function %q" , possibleAttempts , r . Interval , r . Delay , r . DelayFunction ) )
} else {
multierror . Append ( & mErr , fmt . Errorf ( "Nomad can only make %v attempts in %v with initial delay %v, " +
2018-03-13 15:06:26 +00:00
"delay function %q, and delay ceiling %v" , possibleAttempts , r . Interval , r . Delay , r . DelayFunction , r . MaxDelay ) )
2018-02-22 23:43:07 +00:00
}
2018-03-19 15:40:36 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Set the interval to at least %v to accommodate %v attempts" , recommendedInterval . Round ( time . Second ) , r . Attempts ) )
2018-02-22 23:43:07 +00:00
return mErr . ErrorOrNil ( )
}
func ( r * ReschedulePolicy ) viableAttempts ( ) ( bool , int , time . Duration ) {
var possibleAttempts int
var recommendedInterval time . Duration
valid := true
switch r . DelayFunction {
2018-03-26 19:45:09 +00:00
case "constant" :
2018-02-22 23:43:07 +00:00
recommendedInterval = time . Duration ( r . Attempts ) * r . Delay
if r . Interval < recommendedInterval {
possibleAttempts = int ( r . Interval / r . Delay )
valid = false
}
case "exponential" :
for i := 0 ; i < r . Attempts ; i ++ {
nextDelay := time . Duration ( math . Pow ( 2 , float64 ( i ) ) ) * r . Delay
2018-03-13 15:06:26 +00:00
if nextDelay > r . MaxDelay {
nextDelay = r . MaxDelay
2018-02-22 23:43:07 +00:00
recommendedInterval += nextDelay
} else {
recommendedInterval = nextDelay
}
if recommendedInterval < r . Interval {
possibleAttempts ++
}
}
if possibleAttempts < r . Attempts {
valid = false
}
case "fibonacci" :
var slots [ ] time . Duration
slots = append ( slots , r . Delay )
slots = append ( slots , r . Delay )
reachedCeiling := false
for i := 2 ; i < r . Attempts ; i ++ {
var nextDelay time . Duration
if reachedCeiling {
//switch to linear
2018-03-13 15:06:26 +00:00
nextDelay = slots [ i - 1 ] + r . MaxDelay
2018-02-22 23:43:07 +00:00
} else {
nextDelay = slots [ i - 1 ] + slots [ i - 2 ]
2018-03-13 15:06:26 +00:00
if nextDelay > r . MaxDelay {
nextDelay = r . MaxDelay
2018-02-22 23:43:07 +00:00
reachedCeiling = true
}
}
slots = append ( slots , nextDelay )
}
recommendedInterval = slots [ len ( slots ) - 1 ]
if r . Interval < recommendedInterval {
valid = false
// calculate possible attempts
for i := 0 ; i < len ( slots ) ; i ++ {
if slots [ i ] > r . Interval {
possibleAttempts = i
break
}
}
}
default :
return false , 0 , 0
}
if possibleAttempts < 0 { // can happen if delay is bigger than interval
possibleAttempts = 0
}
return valid , possibleAttempts , recommendedInterval
2018-01-14 15:03:08 +00:00
}
2018-03-11 18:50:50 +00:00
func NewReschedulePolicy ( jobType string ) * ReschedulePolicy {
2018-01-14 15:03:08 +00:00
switch jobType {
2018-01-17 17:05:22 +00:00
case JobTypeService :
rp := DefaultServiceJobReschedulePolicy
2018-01-14 15:03:08 +00:00
return & rp
case JobTypeBatch :
2018-01-17 17:05:22 +00:00
rp := DefaultBatchJobReschedulePolicy
2018-01-14 15:03:08 +00:00
return & rp
}
return nil
}
2018-01-24 00:47:00 +00:00
const (
MigrateStrategyHealthChecks = "checks"
MigrateStrategyHealthStates = "task_states"
)
type MigrateStrategy struct {
MaxParallel int
HealthCheck string
MinHealthyTime time . Duration
HealthyDeadline time . Duration
}
// DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations
// that lack an update strategy.
//
// This function should match its counterpart in api/tasks.go
func DefaultMigrateStrategy ( ) * MigrateStrategy {
return & MigrateStrategy {
MaxParallel : 1 ,
HealthCheck : MigrateStrategyHealthChecks ,
MinHealthyTime : 10 * time . Second ,
HealthyDeadline : 5 * time . Minute ,
}
}
func ( m * MigrateStrategy ) Validate ( ) error {
var mErr multierror . Error
if m . MaxParallel < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "MaxParallel must be >= 0 but found %d" , m . MaxParallel ) )
}
switch m . HealthCheck {
case MigrateStrategyHealthChecks , MigrateStrategyHealthStates :
// ok
case "" :
if m . MaxParallel > 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Missing HealthCheck" ) )
}
default :
multierror . Append ( & mErr , fmt . Errorf ( "Invalid HealthCheck: %q" , m . HealthCheck ) )
}
if m . MinHealthyTime < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "MinHealthyTime is %s and must be >= 0" , m . MinHealthyTime ) )
}
if m . HealthyDeadline < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "HealthyDeadline is %s and must be >= 0" , m . HealthyDeadline ) )
}
if m . MinHealthyTime > m . HealthyDeadline {
multierror . Append ( & mErr , fmt . Errorf ( "MinHealthyTime must be less than HealthyDeadline" ) )
}
return mErr . ErrorOrNil ( )
}
2015-07-03 23:57:48 +00:00
// TaskGroup is an atomic unit of placement. Each task group belongs to
// a job and may contain any number of tasks. A task group support running
// in many replicas using the same configuration..
type TaskGroup struct {
// Name of the task group
Name string
// Count is the number of replicas of this task group that should
// be scheduled.
Count int
2017-05-09 00:44:26 +00:00
// Update is used to control the update strategy for this task group
Update * UpdateStrategy
2018-01-24 00:47:00 +00:00
// Migrate is used to control the migration strategy for this task group
Migrate * MigrateStrategy
2015-07-03 23:57:48 +00:00
// Constraints can be specified at a task group level and apply to
// all the tasks contained.
Constraints [ ] * Constraint
2015-10-30 22:51:39 +00:00
//RestartPolicy of a TaskGroup
RestartPolicy * RestartPolicy
2015-07-03 23:57:48 +00:00
// Tasks are the collection of tasks that this task group needs to run
Tasks [ ] * Task
2016-09-14 22:43:42 +00:00
// EphemeralDisk is the disk resources that the task group requests
EphemeralDisk * EphemeralDisk
2016-08-24 18:51:15 +00:00
2015-07-03 23:57:48 +00:00
// Meta is used to associate arbitrary metadata with this
// task group. This is opaque to Nomad.
Meta map [ string ] string
2018-01-14 15:03:08 +00:00
2018-01-15 23:27:55 +00:00
// ReschedulePolicy is used to configure how the scheduler should
// retry failed allocations.
2018-01-14 15:03:08 +00:00
ReschedulePolicy * ReschedulePolicy
2015-07-03 23:57:48 +00:00
}
2016-02-04 05:22:18 +00:00
func ( tg * TaskGroup ) Copy ( ) * TaskGroup {
2016-02-11 01:54:43 +00:00
if tg == nil {
return nil
}
ntg := new ( TaskGroup )
* ntg = * tg
2017-05-09 00:44:26 +00:00
ntg . Update = ntg . Update . Copy ( )
2016-02-11 17:08:20 +00:00
ntg . Constraints = CopySliceConstraints ( ntg . Constraints )
2016-02-11 01:54:43 +00:00
ntg . RestartPolicy = ntg . RestartPolicy . Copy ( )
2018-01-14 15:03:08 +00:00
ntg . ReschedulePolicy = ntg . ReschedulePolicy . Copy ( )
2016-02-11 01:54:43 +00:00
2016-03-21 23:29:21 +00:00
if tg . Tasks != nil {
tasks := make ( [ ] * Task , len ( ntg . Tasks ) )
for i , t := range ntg . Tasks {
tasks [ i ] = t . Copy ( )
}
ntg . Tasks = tasks
2016-02-04 05:22:18 +00:00
}
2017-01-18 23:55:14 +00:00
ntg . Meta = helper . CopyMapStringString ( ntg . Meta )
2016-08-26 19:24:47 +00:00
2016-09-14 22:43:42 +00:00
if tg . EphemeralDisk != nil {
ntg . EphemeralDisk = tg . EphemeralDisk . Copy ( )
2016-08-26 19:24:47 +00:00
}
2016-02-11 01:54:43 +00:00
return ntg
2016-02-04 05:22:18 +00:00
}
2016-07-20 23:07:15 +00:00
// Canonicalize is used to canonicalize fields in the TaskGroup.
func ( tg * TaskGroup ) Canonicalize ( job * Job ) {
2016-07-18 23:17:38 +00:00
// Ensure that an empty and nil map are treated the same to avoid scheduling
// problems since we use reflect DeepEquals.
if len ( tg . Meta ) == 0 {
tg . Meta = nil
}
2015-12-18 20:17:13 +00:00
// Set the default restart policy.
if tg . RestartPolicy == nil {
tg . RestartPolicy = NewRestartPolicy ( job . Type )
}
2018-01-14 15:03:08 +00:00
if tg . ReschedulePolicy == nil {
2018-03-11 18:50:50 +00:00
tg . ReschedulePolicy = NewReschedulePolicy ( job . Type )
2018-01-14 15:03:08 +00:00
}
2018-03-30 00:42:58 +00:00
// Canonicalize Migrate for service jobs
if job . Type == JobTypeService && tg . Migrate == nil {
tg . Migrate = DefaultMigrateStrategy ( )
}
2016-11-02 22:07:22 +00:00
// Set a default ephemeral disk object if the user has not requested for one
if tg . EphemeralDisk == nil {
tg . EphemeralDisk = DefaultEphemeralDisk ( )
}
2015-11-27 03:26:00 +00:00
for _ , task := range tg . Tasks {
2016-07-20 23:07:15 +00:00
task . Canonicalize ( job , tg )
2015-11-27 03:26:00 +00:00
}
2016-11-08 23:24:51 +00:00
// Add up the disk resources to EphemeralDisk. This is done so that users
// are not required to move their disk attribute from resources to
// EphemeralDisk section of the job spec in Nomad 0.5
// COMPAT 0.4.1 -> 0.5
// Remove in 0.6
var diskMB int
for _ , task := range tg . Tasks {
diskMB += task . Resources . DiskMB
}
if diskMB > 0 {
tg . EphemeralDisk . SizeMB = diskMB
}
2015-11-27 03:26:00 +00:00
}
2015-09-15 17:46:10 +00:00
// Validate is used to sanity check a task group
2017-07-07 02:08:51 +00:00
func ( tg * TaskGroup ) Validate ( j * Job ) error {
2015-09-15 17:46:10 +00:00
var mErr multierror . Error
if tg . Name == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing task group name" ) )
}
2016-03-17 18:02:59 +00:00
if tg . Count < 0 {
mErr . Errors = append ( mErr . Errors , errors . New ( "Task group count can't be negative" ) )
2015-09-15 17:46:10 +00:00
}
if len ( tg . Tasks ) == 0 {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing tasks for task group" ) )
}
2015-10-11 19:50:16 +00:00
for idx , constr := range tg . Constraints {
if err := constr . Validate ( ) ; err != nil {
outer := fmt . Errorf ( "Constraint %d validation failed: %s" , idx + 1 , err )
mErr . Errors = append ( mErr . Errors , outer )
}
}
2015-09-15 17:46:10 +00:00
2015-11-11 23:10:39 +00:00
if tg . RestartPolicy != nil {
if err := tg . RestartPolicy . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
} else {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Task Group %v should have a restart policy" , tg . Name ) )
2015-10-30 22:51:39 +00:00
}
2018-04-11 19:56:20 +00:00
if j . Type == JobTypeSystem {
if tg . ReschedulePolicy != nil {
2018-04-11 22:07:14 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "System jobs should not have a reschedule policy" ) )
2018-04-11 19:56:20 +00:00
}
} else {
2018-02-22 23:43:07 +00:00
if tg . ReschedulePolicy != nil {
if err := tg . ReschedulePolicy . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
} else {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Task Group %v should have a reschedule policy" , tg . Name ) )
2018-01-14 15:03:08 +00:00
}
}
2016-09-14 22:43:42 +00:00
if tg . EphemeralDisk != nil {
if err := tg . EphemeralDisk . Validate ( ) ; err != nil {
2016-08-24 18:51:15 +00:00
mErr . Errors = append ( mErr . Errors , err )
}
} else {
2016-09-28 00:57:05 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Task Group %v should have an ephemeral disk object" , tg . Name ) )
2016-08-24 18:51:15 +00:00
}
2017-05-09 00:44:26 +00:00
// Validate the update strategy
if u := tg . Update ; u != nil {
2017-07-07 02:08:51 +00:00
switch j . Type {
case JobTypeService , JobTypeSystem :
default :
2018-03-29 18:13:50 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Job type %q does not allow update block" , j . Type ) )
2017-07-07 02:08:51 +00:00
}
2017-05-09 00:44:26 +00:00
if err := u . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
}
2018-01-24 00:47:00 +00:00
// Validate the migration strategy
switch j . Type {
case JobTypeService :
2018-02-23 18:42:43 +00:00
if tg . Migrate != nil {
if err := tg . Migrate . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
2018-01-24 00:47:00 +00:00
}
default :
if tg . Migrate != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Job type %q does not allow migrate block" , j . Type ) )
}
}
2017-07-07 23:17:05 +00:00
// Check for duplicate tasks, that there is only leader task if any,
// and no duplicated static ports
2015-09-15 17:46:10 +00:00
tasks := make ( map [ string ] int )
2017-07-07 23:17:05 +00:00
staticPorts := make ( map [ int ] string )
2017-02-11 00:57:47 +00:00
leaderTasks := 0
2015-09-15 17:46:10 +00:00
for idx , task := range tg . Tasks {
if task . Name == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Task %d missing name" , idx + 1 ) )
} else if existing , ok := tasks [ task . Name ] ; ok {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Task %d redefines '%s' from task %d" , idx + 1 , task . Name , existing + 1 ) )
} else {
tasks [ task . Name ] = idx
}
2017-02-11 00:57:47 +00:00
if task . Leader {
leaderTasks ++
}
2017-07-07 23:17:05 +00:00
if task . Resources == nil {
continue
}
for _ , net := range task . Resources . Networks {
for _ , port := range net . ReservedPorts {
if other , ok := staticPorts [ port . Value ] ; ok {
2017-07-07 23:58:20 +00:00
err := fmt . Errorf ( "Static port %d already reserved by %s" , port . Value , other )
2017-07-07 23:17:05 +00:00
mErr . Errors = append ( mErr . Errors , err )
} else {
2017-07-07 23:58:20 +00:00
staticPorts [ port . Value ] = fmt . Sprintf ( "%s:%s" , task . Name , port . Label )
2017-07-07 23:17:05 +00:00
}
}
}
2017-02-11 00:57:47 +00:00
}
if leaderTasks > 1 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Only one task may be marked as leader" ) )
2015-09-15 18:23:03 +00:00
}
2015-09-15 17:46:10 +00:00
2015-09-15 18:23:03 +00:00
// Validate the tasks
2016-07-20 13:23:35 +00:00
for _ , task := range tg . Tasks {
2016-09-14 22:43:42 +00:00
if err := task . Validate ( tg . EphemeralDisk ) ; err != nil {
2017-03-03 23:00:39 +00:00
outer := fmt . Errorf ( "Task %s validation failed: %v" , task . Name , err )
2015-09-21 00:08:57 +00:00
mErr . Errors = append ( mErr . Errors , outer )
2015-09-15 17:46:10 +00:00
}
}
return mErr . ErrorOrNil ( )
}
2017-07-13 20:10:45 +00:00
// Warnings returns a list of warnings that may be from dubious settings or
// deprecation warnings.
func ( tg * TaskGroup ) Warnings ( j * Job ) error {
var mErr multierror . Error
// Validate the update strategy
if u := tg . Update ; u != nil {
// Check the counts are appropriate
if u . MaxParallel > tg . Count {
mErr . Errors = append ( mErr . Errors ,
fmt . Errorf ( "Update max parallel count is greater than task group count (%d > %d). " +
2017-07-17 22:35:14 +00:00
"A destructive change would result in the simultaneous replacement of all allocations." , u . MaxParallel , tg . Count ) )
2017-07-13 20:10:45 +00:00
}
}
return mErr . ErrorOrNil ( )
}
2015-09-07 19:23:34 +00:00
// LookupTask finds a task by name
func ( tg * TaskGroup ) LookupTask ( name string ) * Task {
for _ , t := range tg . Tasks {
if t . Name == name {
return t
}
}
return nil
}
2015-09-15 00:43:42 +00:00
func ( tg * TaskGroup ) GoString ( ) string {
return fmt . Sprintf ( "*%#v" , * tg )
}
2017-10-13 21:36:02 +00:00
// CombinedResources returns the combined resources for the task group
func ( tg * TaskGroup ) CombinedResources ( ) * Resources {
r := & Resources {
DiskMB : tg . EphemeralDisk . SizeMB ,
}
for _ , task := range tg . Tasks {
r . Add ( task . Resources )
}
return r
}
2017-08-25 00:18:06 +00:00
// CheckRestart describes if and when a task should be restarted based on
// failing health checks.
type CheckRestart struct {
2017-09-11 00:00:25 +00:00
Limit int // Restart task after this many unhealthy intervals
Grace time . Duration // Grace time to give tasks after starting to get healthy
IgnoreWarnings bool // If true treat checks in `warning` as passing
2017-08-25 00:18:06 +00:00
}
func ( c * CheckRestart ) Copy ( ) * CheckRestart {
if c == nil {
return nil
}
nc := new ( CheckRestart )
* nc = * c
return nc
}
func ( c * CheckRestart ) Validate ( ) error {
if c == nil {
return nil
}
2017-09-15 22:12:47 +00:00
var mErr multierror . Error
2017-08-25 00:18:06 +00:00
if c . Limit < 0 {
2017-09-15 22:12:47 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "limit must be greater than or equal to 0 but found %d" , c . Limit ) )
2017-08-25 00:18:06 +00:00
}
if c . Grace < 0 {
2017-09-15 22:12:47 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "grace period must be greater than or equal to 0 but found %d" , c . Grace ) )
2017-08-25 00:18:06 +00:00
}
2017-09-15 22:12:47 +00:00
return mErr . ErrorOrNil ( )
2017-08-25 00:18:06 +00:00
}
2015-11-17 21:36:59 +00:00
const (
ServiceCheckHTTP = "http"
ServiceCheckTCP = "tcp"
ServiceCheckScript = "script"
2018-05-02 23:49:47 +00:00
ServiceCheckGRPC = "grpc"
2016-06-14 01:19:40 +00:00
// minCheckInterval is the minimum check interval permitted. Consul
// currently has its MinInterval set to 1s. Mirror that here for
// consistency.
minCheckInterval = 1 * time . Second
// minCheckTimeout is the minimum check timeout permitted for Consul
// script TTL checks.
minCheckTimeout = 1 * time . Second
2015-11-17 21:36:59 +00:00
)
2015-11-17 07:20:35 +00:00
// The ServiceCheck data model represents the consul health check that
// Nomad registers for a Task
type ServiceCheck struct {
2017-08-25 00:18:06 +00:00
Name string // Name of the check, defaults to id
Type string // Type of the check - tcp, http, docker and script
Command string // Command is the command to run for script checks
2018-03-11 17:40:28 +00:00
Args [ ] string // Args is a list of arguments for script checks
2017-08-25 00:18:06 +00:00
Path string // path of the health check url for http type check
Protocol string // Protocol to use if check is http, defaults to http
PortLabel string // The port to use for tcp/http checks
2017-12-05 19:39:42 +00:00
AddressMode string // 'host' to use host ip:port or 'driver' to use driver's
2017-08-25 00:18:06 +00:00
Interval time . Duration // Interval of the check
Timeout time . Duration // Timeout of the response from the check before consul fails the check
InitialStatus string // Initial status of the check
TLSSkipVerify bool // Skip TLS verification when Protocol=https
Method string // HTTP Method to use (GET by default)
Header map [ string ] [ ] string // HTTP Headers for Consul to set when making HTTP checks
CheckRestart * CheckRestart // If and when a task should be restarted based on checks
2018-05-03 22:18:12 +00:00
GRPCService string // Service for GRPC checks
2018-05-02 23:49:47 +00:00
GRPCUseTLS bool // Whether or not to use TLS for GRPC checks
2015-11-17 07:20:35 +00:00
}
2016-02-11 01:54:43 +00:00
func ( sc * ServiceCheck ) Copy ( ) * ServiceCheck {
if sc == nil {
return nil
}
nsc := new ( ServiceCheck )
* nsc = * sc
2017-08-15 23:13:05 +00:00
nsc . Args = helper . CopySliceString ( sc . Args )
nsc . Header = helper . CopyMapStringSliceString ( sc . Header )
2017-08-25 00:18:06 +00:00
nsc . CheckRestart = sc . CheckRestart . Copy ( )
2016-02-11 01:54:43 +00:00
return nsc
}
2016-07-20 23:07:15 +00:00
func ( sc * ServiceCheck ) Canonicalize ( serviceName string ) {
2017-08-15 23:13:05 +00:00
// Ensure empty maps/slices are treated as null to avoid scheduling
// issues when using DeepEquals.
2016-07-18 23:17:38 +00:00
if len ( sc . Args ) == 0 {
sc . Args = nil
}
2017-08-15 23:13:05 +00:00
if len ( sc . Header ) == 0 {
sc . Header = nil
} else {
for k , v := range sc . Header {
if len ( v ) == 0 {
sc . Header [ k ] = nil
}
}
}
2016-07-18 23:17:38 +00:00
if sc . Name == "" {
sc . Name = fmt . Sprintf ( "service: %q check" , serviceName )
}
}
2016-06-14 01:17:43 +00:00
// validate a Service's ServiceCheck
func ( sc * ServiceCheck ) validate ( ) error {
2017-12-05 19:39:42 +00:00
// Validate Type
2016-06-14 01:19:40 +00:00
switch strings . ToLower ( sc . Type ) {
2018-05-03 22:18:12 +00:00
case ServiceCheckGRPC :
2016-06-14 01:19:40 +00:00
case ServiceCheckTCP :
case ServiceCheckHTTP :
if sc . Path == "" {
return fmt . Errorf ( "http type must have a valid http path" )
}
2017-12-21 09:32:12 +00:00
url , err := url . Parse ( sc . Path )
if err != nil {
return fmt . Errorf ( "http type must have a valid http path" )
}
if url . IsAbs ( ) {
return fmt . Errorf ( "http type must have a relative http path" )
}
2016-06-14 01:55:15 +00:00
2016-06-14 01:19:40 +00:00
case ServiceCheckScript :
if sc . Command == "" {
return fmt . Errorf ( "script type must have a valid script path" )
}
2018-05-02 23:49:47 +00:00
2016-06-14 01:19:40 +00:00
default :
return fmt . Errorf ( ` invalid type (%+q), must be one of "http", "tcp", or "script" type ` , sc . Type )
2016-01-08 02:03:44 +00:00
}
2017-12-05 19:39:42 +00:00
// Validate interval and timeout
2016-11-01 23:02:16 +00:00
if sc . Interval == 0 {
2016-11-01 23:05:34 +00:00
return fmt . Errorf ( "missing required value interval. Interval cannot be less than %v" , minCheckInterval )
2016-11-01 23:02:16 +00:00
} else if sc . Interval < minCheckInterval {
2016-11-01 23:05:34 +00:00
return fmt . Errorf ( "interval (%v) cannot be lower than %v" , sc . Interval , minCheckInterval )
2015-11-17 22:25:23 +00:00
}
2016-06-14 01:28:27 +00:00
2017-08-14 17:23:52 +00:00
if sc . Timeout == 0 {
return fmt . Errorf ( "missing required value timeout. Timeout cannot be less than %v" , minCheckInterval )
} else if sc . Timeout < minCheckTimeout {
return fmt . Errorf ( "timeout (%v) is lower than required minimum timeout %v" , sc . Timeout , minCheckInterval )
}
2017-12-05 19:39:42 +00:00
// Validate InitialStatus
2016-08-16 19:05:15 +00:00
switch sc . InitialStatus {
case "" :
case api . HealthPassing :
case api . HealthWarning :
case api . HealthCritical :
default :
2017-02-28 00:00:19 +00:00
return fmt . Errorf ( ` invalid initial check state (%s), must be one of %q, %q, %q or empty ` , sc . InitialStatus , api . HealthPassing , api . HealthWarning , api . HealthCritical )
2016-08-16 19:05:15 +00:00
}
2017-12-05 19:39:42 +00:00
// Validate AddressMode
switch sc . AddressMode {
case "" , AddressModeHost , AddressModeDriver :
// Ok
case AddressModeAuto :
return fmt . Errorf ( "invalid address_mode %q - %s only valid for services" , sc . AddressMode , AddressModeAuto )
default :
return fmt . Errorf ( "invalid address_mode %q" , sc . AddressMode )
}
2017-08-25 00:18:06 +00:00
return sc . CheckRestart . Validate ( )
2015-11-17 21:36:59 +00:00
}
2016-04-19 02:38:47 +00:00
// RequiresPort returns whether the service check requires the task has a port.
func ( sc * ServiceCheck ) RequiresPort ( ) bool {
switch sc . Type {
2018-05-03 22:18:12 +00:00
case ServiceCheckGRPC , ServiceCheckHTTP , ServiceCheckTCP :
2016-04-19 02:38:47 +00:00
return true
default :
return false
}
}
2017-09-14 16:58:35 +00:00
// TriggersRestarts returns true if this check should be watched and trigger a restart
2017-08-26 05:40:18 +00:00
// on failure.
2017-09-14 16:58:35 +00:00
func ( sc * ServiceCheck ) TriggersRestarts ( ) bool {
2017-08-26 05:40:18 +00:00
return sc . CheckRestart != nil && sc . CheckRestart . Limit > 0
}
2017-04-19 04:28:25 +00:00
// Hash all ServiceCheck fields and the check's corresponding service ID to
// create an identifier. The identifier is not guaranteed to be unique as if
// the PortLabel is blank, the Service's PortLabel will be used after Hash is
// called.
2015-12-14 23:57:56 +00:00
func ( sc * ServiceCheck ) Hash ( serviceID string ) string {
2015-11-21 20:34:01 +00:00
h := sha1 . New ( )
2015-12-14 23:57:56 +00:00
io . WriteString ( h , serviceID )
2015-11-21 20:34:01 +00:00
io . WriteString ( h , sc . Name )
io . WriteString ( h , sc . Type )
2016-03-28 21:05:12 +00:00
io . WriteString ( h , sc . Command )
2016-03-24 17:06:40 +00:00
io . WriteString ( h , strings . Join ( sc . Args , "" ) )
2015-11-21 20:34:01 +00:00
io . WriteString ( h , sc . Path )
io . WriteString ( h , sc . Protocol )
2016-07-08 21:09:27 +00:00
io . WriteString ( h , sc . PortLabel )
2015-11-21 20:34:01 +00:00
io . WriteString ( h , sc . Interval . String ( ) )
io . WriteString ( h , sc . Timeout . String ( ) )
2017-08-15 23:13:05 +00:00
io . WriteString ( h , sc . Method )
2017-04-19 04:28:25 +00:00
// Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6
if sc . TLSSkipVerify {
io . WriteString ( h , "true" )
}
2017-08-15 23:13:05 +00:00
// Since map iteration order isn't stable we need to write k/v pairs to
// a slice and sort it before hashing.
if len ( sc . Header ) > 0 {
headers := make ( [ ] string , 0 , len ( sc . Header ) )
for k , v := range sc . Header {
headers = append ( headers , k + strings . Join ( v , "" ) )
}
sort . Strings ( headers )
io . WriteString ( h , strings . Join ( headers , "" ) )
}
2017-12-05 19:39:42 +00:00
// Only include AddressMode if set to maintain ID stability with Nomad <0.7.1
if len ( sc . AddressMode ) > 0 {
io . WriteString ( h , sc . AddressMode )
}
2018-05-02 23:49:47 +00:00
// Only include GRPC if set to maintain ID stability with Nomad <0.8.4
2018-05-03 22:18:12 +00:00
if sc . GRPCService != "" {
io . WriteString ( h , sc . GRPCService )
2018-05-02 23:49:47 +00:00
}
if sc . GRPCUseTLS {
io . WriteString ( h , "true" )
}
2015-11-21 20:34:01 +00:00
return fmt . Sprintf ( "%x" , h . Sum ( nil ) )
}
2017-06-09 17:29:41 +00:00
const (
AddressModeAuto = "auto"
AddressModeHost = "host"
2017-06-21 20:43:59 +00:00
AddressModeDriver = "driver"
2017-06-09 17:29:41 +00:00
)
2016-06-12 23:36:49 +00:00
// Service represents a Consul service definition in Nomad
type Service struct {
2016-06-10 03:00:32 +00:00
// Name of the service registered with Consul. Consul defaults the
// Name to ServiceID if not specified. The Name if specified is used
// as one of the seed values when generating a Consul ServiceID.
Name string
// PortLabel is either the numeric port number or the `host:port`.
// To specify the port number using the host's Consul Advertise
// address, specify an empty host in the PortLabel (e.g. `:port`).
2017-02-22 20:30:05 +00:00
PortLabel string
2017-06-09 17:29:41 +00:00
// AddressMode specifies whether or not to use the host ip:port for
// this service.
AddressMode string
2018-04-19 22:12:23 +00:00
Tags [ ] string // List of tags for the service
CanaryTags [ ] string // List of tags for the service when it is a canary
Checks [ ] * ServiceCheck // List of checks associated with the service
2015-11-17 07:20:35 +00:00
}
2016-06-12 23:36:49 +00:00
func ( s * Service ) Copy ( ) * Service {
2016-02-11 01:54:43 +00:00
if s == nil {
return nil
}
2016-06-12 23:36:49 +00:00
ns := new ( Service )
2016-02-11 01:54:43 +00:00
* ns = * s
2017-01-18 23:55:14 +00:00
ns . Tags = helper . CopySliceString ( ns . Tags )
2018-04-19 22:12:23 +00:00
ns . CanaryTags = helper . CopySliceString ( ns . CanaryTags )
2016-02-11 01:54:43 +00:00
2016-03-21 23:29:21 +00:00
if s . Checks != nil {
checks := make ( [ ] * ServiceCheck , len ( ns . Checks ) )
2016-02-11 17:08:20 +00:00
for i , c := range ns . Checks {
checks [ i ] = c . Copy ( )
}
2016-03-21 23:29:21 +00:00
ns . Checks = checks
2016-02-11 01:54:43 +00:00
}
2016-03-21 23:29:21 +00:00
2016-02-11 01:54:43 +00:00
return ns
}
2016-07-20 23:07:15 +00:00
// Canonicalize interpolates values of Job, Task Group and Task in the Service
2015-12-11 00:04:04 +00:00
// Name. This also generates check names, service id and check ids.
2016-07-20 23:07:15 +00:00
func ( s * Service ) Canonicalize ( job string , taskGroup string , task string ) {
2016-07-18 23:17:38 +00:00
// Ensure empty lists are treated as null to avoid scheduler issues when
// using DeepEquals
if len ( s . Tags ) == 0 {
s . Tags = nil
}
2018-04-19 22:12:23 +00:00
if len ( s . CanaryTags ) == 0 {
s . CanaryTags = nil
}
2016-07-18 23:17:38 +00:00
if len ( s . Checks ) == 0 {
s . Checks = nil
}
2015-12-11 00:04:04 +00:00
s . Name = args . ReplaceEnv ( s . Name , map [ string ] string {
"JOB" : job ,
"TASKGROUP" : taskGroup ,
"TASK" : task ,
"BASE" : fmt . Sprintf ( "%s-%s-%s" , job , taskGroup , task ) ,
} ,
)
for _ , check := range s . Checks {
2016-07-20 23:07:15 +00:00
check . Canonicalize ( s . Name )
2015-12-11 00:04:04 +00:00
}
}
// Validate checks if the Check definition is valid
2016-06-12 23:36:49 +00:00
func ( s * Service ) Validate ( ) error {
2015-11-17 21:36:59 +00:00
var mErr multierror . Error
2016-02-05 22:42:35 +00:00
2016-10-24 19:13:47 +00:00
// Ensure the service name is valid per the below RFCs but make an exception
2017-11-15 21:35:43 +00:00
// for our interpolation syntax by first stripping any environment variables from the name
serviceNameStripped := args . ReplaceEnvWithPlaceHolder ( s . Name , "ENV-VAR" )
if err := s . ValidateName ( serviceNameStripped ) ; err != nil {
2017-11-17 14:44:18 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q" , s . Name ) )
2016-02-05 22:42:35 +00:00
}
2017-06-09 17:29:41 +00:00
switch s . AddressMode {
2017-06-20 19:26:52 +00:00
case "" , AddressModeAuto , AddressModeHost , AddressModeDriver :
2017-06-09 17:29:41 +00:00
// OK
default :
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "service address_mode must be %q, %q, or %q; not %q" , AddressModeAuto , AddressModeHost , AddressModeDriver , s . AddressMode ) )
}
2015-11-17 21:36:59 +00:00
for _ , c := range s . Checks {
2017-12-19 00:18:42 +00:00
if s . PortLabel == "" && c . PortLabel == "" && c . RequiresPort ( ) {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "check %s invalid: check requires a port but neither check nor service %+q have a port" , c . Name , s . Name ) )
2016-04-15 08:50:55 +00:00
continue
}
2016-06-14 01:19:40 +00:00
2016-06-14 01:17:43 +00:00
if err := c . validate ( ) ; err != nil {
2016-06-14 01:19:40 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "check %s invalid: %v" , c . Name , err ) )
2015-11-17 21:36:59 +00:00
}
}
2017-08-25 00:18:06 +00:00
2015-11-17 21:36:59 +00:00
return mErr . ErrorOrNil ( )
}
2016-10-24 19:13:47 +00:00
// ValidateName checks if the services Name is valid and should be called after
// the name has been interpolated
func ( s * Service ) ValidateName ( name string ) error {
// Ensure the service name is valid per RFC-952 §1
// (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
// (https://tools.ietf.org/html/rfc1123), and RFC-2782
// (https://tools.ietf.org/html/rfc2782).
re := regexp . MustCompile ( ` ^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-] { 0,61}[a-z0-9])$ ` )
if ! re . MatchString ( name ) {
2017-03-08 14:10:30 +00:00
return fmt . Errorf ( "service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q" , name )
2016-10-24 19:13:47 +00:00
}
return nil
}
2017-12-08 19:50:14 +00:00
// Hash returns a base32 encoded hash of a Service's contents excluding checks
// as they're hashed independently.
2018-04-23 23:34:53 +00:00
func ( s * Service ) Hash ( allocID , taskName string , canary bool ) string {
2017-12-08 19:50:14 +00:00
h := sha1 . New ( )
io . WriteString ( h , allocID )
io . WriteString ( h , taskName )
io . WriteString ( h , s . Name )
io . WriteString ( h , s . PortLabel )
io . WriteString ( h , s . AddressMode )
for _ , tag := range s . Tags {
io . WriteString ( h , tag )
}
2018-04-19 22:12:23 +00:00
for _ , tag := range s . CanaryTags {
io . WriteString ( h , tag )
}
2017-12-08 19:50:14 +00:00
2018-04-23 23:34:53 +00:00
// Vary ID on whether or not CanaryTags will be used
if canary {
2018-04-26 17:42:34 +00:00
h . Write ( [ ] byte ( "Canary" ) )
2018-04-23 23:34:53 +00:00
}
2017-12-08 19:50:14 +00:00
// Base32 is used for encoding the hash as sha1 hashes can always be
// encoded without padding, only 4 bytes larger than base64, and saves
// 8 bytes vs hex. Since these hashes are used in Consul URLs it's nice
// to have a reasonably compact URL-safe representation.
2017-12-08 23:54:04 +00:00
return b32 . EncodeToString ( h . Sum ( nil ) )
2017-12-08 19:50:14 +00:00
}
2015-12-23 00:10:30 +00:00
const (
// DefaultKillTimeout is the default timeout between signaling a task it
// will be killed and killing it.
DefaultKillTimeout = 5 * time . Second
)
2016-02-05 07:28:01 +00:00
// LogConfig provides configuration for log rotation
type LogConfig struct {
2017-02-22 20:30:05 +00:00
MaxFiles int
MaxFileSizeMB int
2016-02-05 07:28:01 +00:00
}
2016-07-19 06:37:54 +00:00
// DefaultLogConfig returns the default LogConfig values.
2016-02-11 00:44:31 +00:00
func DefaultLogConfig ( ) * LogConfig {
return & LogConfig {
MaxFiles : 10 ,
MaxFileSizeMB : 10 ,
}
}
2016-02-11 22:44:35 +00:00
// Validate returns an error if the log config specified are less than
2016-02-05 07:28:01 +00:00
// the minimum allowed.
2016-02-11 22:44:35 +00:00
func ( l * LogConfig ) Validate ( ) error {
2016-02-05 07:28:01 +00:00
var mErr multierror . Error
2016-02-11 22:44:35 +00:00
if l . MaxFiles < 1 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "minimum number of files is 1; got %d" , l . MaxFiles ) )
2016-02-05 07:28:01 +00:00
}
2016-02-11 22:44:35 +00:00
if l . MaxFileSizeMB < 1 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "minimum file size is 1MB; got %d" , l . MaxFileSizeMB ) )
2016-02-05 07:28:01 +00:00
}
return mErr . ErrorOrNil ( )
}
2015-07-03 23:57:48 +00:00
// Task is a single process typically that is executed as part of a task group.
type Task struct {
// Name of the task
Name string
// Driver is used to control which driver is used
Driver string
2016-03-23 11:57:31 +00:00
// User is used to determine which user will run the task. It defaults to
// the same user the Nomad client is being run as.
User string
2015-07-03 23:57:48 +00:00
// Config is provided to the driver to initialize
2015-11-14 02:09:42 +00:00
Config map [ string ] interface { }
2015-07-03 23:57:48 +00:00
2015-09-30 16:18:43 +00:00
// Map of environment variables to be used by the driver
Env map [ string ] string
2015-11-17 07:20:35 +00:00
// List of service definitions exposed by the Task
2016-06-12 23:36:49 +00:00
Services [ ] * Service
2015-11-17 06:37:09 +00:00
2016-08-09 22:23:44 +00:00
// Vault is used to define the set of Vault policies that this task should
// have access to.
Vault * Vault
2016-09-23 22:39:52 +00:00
// Templates are the set of templates to be rendered for the task.
Templates [ ] * Template
2015-07-03 23:57:48 +00:00
// Constraints can be specified at a task level and apply only to
// the particular task.
Constraints [ ] * Constraint
// Resources is the resources needed by this task
Resources * Resources
2017-01-26 05:06:16 +00:00
// DispatchPayload configures how the task retrieves its input from a dispatch
DispatchPayload * DispatchPayloadConfig
2016-11-23 22:56:50 +00:00
2015-07-03 23:57:48 +00:00
// Meta is used to associate arbitrary metadata with this
// task. This is opaque to Nomad.
Meta map [ string ] string
2015-12-23 00:10:30 +00:00
// KillTimeout is the time between signaling a task that it will be
// killed and killing it.
2017-02-22 20:30:05 +00:00
KillTimeout time . Duration
2016-02-05 07:28:01 +00:00
// LogConfig provides configuration for log rotation
2017-02-22 20:30:05 +00:00
LogConfig * LogConfig
2016-03-14 05:29:07 +00:00
// Artifacts is a list of artifacts to download and extract before running
// the task.
2016-03-14 18:13:43 +00:00
Artifacts [ ] * TaskArtifact
2017-02-11 00:57:47 +00:00
// Leader marks the task as the leader within the group. When the leader
// task exits, other tasks will be gracefully terminated.
Leader bool
2017-08-17 00:54:11 +00:00
// ShutdownDelay is the duration of the delay between deregistering a
// task from Consul and sending it a signal to shutdown. See #2441
ShutdownDelay time . Duration
2017-11-30 21:53:35 +00:00
// The kill signal to use for the task. This is an optional specification,
2017-12-07 15:45:21 +00:00
// KillSignal is the kill signal to use for the task. This is an optional
// specification and defaults to SIGINT
2017-11-30 21:53:35 +00:00
KillSignal string
2016-03-14 18:13:43 +00:00
}
2016-02-10 21:44:53 +00:00
func ( t * Task ) Copy ( ) * Task {
2016-02-11 01:54:43 +00:00
if t == nil {
2016-02-10 21:44:53 +00:00
return nil
}
2016-02-11 01:54:43 +00:00
nt := new ( Task )
* nt = * t
2017-01-18 23:55:14 +00:00
nt . Env = helper . CopyMapStringString ( nt . Env )
2016-02-11 01:54:43 +00:00
2016-06-12 23:36:49 +00:00
if t . Services != nil {
services := make ( [ ] * Service , len ( nt . Services ) )
for i , s := range nt . Services {
2016-03-21 23:29:21 +00:00
services [ i ] = s . Copy ( )
}
2016-06-12 23:36:49 +00:00
nt . Services = services
2016-02-11 01:54:43 +00:00
}
2016-03-21 23:29:21 +00:00
2016-02-11 17:08:20 +00:00
nt . Constraints = CopySliceConstraints ( nt . Constraints )
2016-02-11 01:54:43 +00:00
2016-08-09 22:23:44 +00:00
nt . Vault = nt . Vault . Copy ( )
2016-02-11 01:54:43 +00:00
nt . Resources = nt . Resources . Copy ( )
2017-01-18 23:55:14 +00:00
nt . Meta = helper . CopyMapStringString ( nt . Meta )
2017-01-26 05:06:16 +00:00
nt . DispatchPayload = nt . DispatchPayload . Copy ( )
2016-02-11 01:54:43 +00:00
2016-03-21 23:29:21 +00:00
if t . Artifacts != nil {
2016-03-22 00:23:04 +00:00
artifacts := make ( [ ] * TaskArtifact , 0 , len ( t . Artifacts ) )
2016-03-21 23:29:21 +00:00
for _ , a := range nt . Artifacts {
artifacts = append ( artifacts , a . Copy ( ) )
}
nt . Artifacts = artifacts
2016-03-14 22:46:06 +00:00
}
2016-02-11 01:54:43 +00:00
if i , err := copystructure . Copy ( nt . Config ) ; err != nil {
2017-07-18 00:18:12 +00:00
panic ( err . Error ( ) )
2017-07-18 00:53:21 +00:00
} else {
2016-02-11 01:54:43 +00:00
nt . Config = i . ( map [ string ] interface { } )
}
2016-09-23 22:39:52 +00:00
if t . Templates != nil {
templates := make ( [ ] * Template , len ( t . Templates ) )
for i , tmpl := range nt . Templates {
templates [ i ] = tmpl . Copy ( )
}
nt . Templates = templates
}
2016-02-11 01:54:43 +00:00
return nt
2016-02-10 21:44:53 +00:00
}
2016-07-20 23:07:15 +00:00
// Canonicalize canonicalizes fields in the task.
func ( t * Task ) Canonicalize ( job * Job , tg * TaskGroup ) {
2016-07-18 23:17:38 +00:00
// Ensure that an empty and nil map are treated the same to avoid scheduling
// problems since we use reflect DeepEquals.
if len ( t . Meta ) == 0 {
t . Meta = nil
}
if len ( t . Config ) == 0 {
t . Config = nil
}
if len ( t . Env ) == 0 {
t . Env = nil
}
2016-07-20 23:07:15 +00:00
for _ , service := range t . Services {
service . Canonicalize ( job . Name , tg . Name , t . Name )
}
2016-10-26 20:21:09 +00:00
// If Resources are nil initialize them to defaults, otherwise canonicalize
2016-10-26 17:36:41 +00:00
if t . Resources == nil {
t . Resources = DefaultResources ( )
} else {
2016-07-20 23:43:20 +00:00
t . Resources . Canonicalize ( )
}
2015-12-23 00:10:30 +00:00
// Set the default timeout if it is not specified.
if t . KillTimeout == 0 {
t . KillTimeout = DefaultKillTimeout
}
2016-10-25 18:09:22 +00:00
if t . Vault != nil {
t . Vault . Canonicalize ( )
}
for _ , template := range t . Templates {
template . Canonicalize ( )
}
2015-12-18 20:17:13 +00:00
}
2015-09-15 00:43:42 +00:00
func ( t * Task ) GoString ( ) string {
return fmt . Sprintf ( "*%#v" , * t )
}
2016-03-16 03:21:52 +00:00
// Validate is used to sanity check a task
2016-09-14 22:43:42 +00:00
func ( t * Task ) Validate ( ephemeralDisk * EphemeralDisk ) error {
2016-03-14 22:46:06 +00:00
var mErr multierror . Error
if t . Name == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing task name" ) )
}
2016-05-28 00:17:10 +00:00
if strings . ContainsAny ( t . Name , ` /\ ` ) {
2016-05-28 00:43:20 +00:00
// We enforce this so that when creating the directory on disk it will
// not have any slashes.
2016-11-01 23:05:34 +00:00
mErr . Errors = append ( mErr . Errors , errors . New ( "Task name cannot include slashes" ) )
2016-05-28 00:17:10 +00:00
}
2016-03-14 22:46:06 +00:00
if t . Driver == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing task driver" ) )
}
2017-08-17 00:54:11 +00:00
if t . KillTimeout < 0 {
2016-03-14 22:46:06 +00:00
mErr . Errors = append ( mErr . Errors , errors . New ( "KillTimeout must be a positive value" ) )
}
2017-08-17 00:54:11 +00:00
if t . ShutdownDelay < 0 {
mErr . Errors = append ( mErr . Errors , errors . New ( "ShutdownDelay must be a positive value" ) )
}
2016-03-14 22:46:06 +00:00
// Validate the resources.
if t . Resources == nil {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing task resources" ) )
2016-10-26 17:36:41 +00:00
} else {
if err := t . Resources . MeetsMinResources ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
2016-03-14 22:46:06 +00:00
2016-10-26 17:36:41 +00:00
// Ensure the task isn't asking for disk resources
2016-08-25 18:53:09 +00:00
if t . Resources . DiskMB > 0 {
mErr . Errors = append ( mErr . Errors , errors . New ( "Task can't ask for disk resources, they have to be specified at the task group level." ) )
}
}
2016-03-14 22:46:06 +00:00
// Validate the log config
if t . LogConfig == nil {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing Log Config" ) )
} else if err := t . LogConfig . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
}
for idx , constr := range t . Constraints {
if err := constr . Validate ( ) ; err != nil {
outer := fmt . Errorf ( "Constraint %d validation failed: %s" , idx + 1 , err )
mErr . Errors = append ( mErr . Errors , outer )
}
2017-03-12 00:23:24 +00:00
switch constr . Operand {
case ConstraintDistinctHosts , ConstraintDistinctProperty :
outer := fmt . Errorf ( "Constraint %d has disallowed Operand at task level: %s" , idx + 1 , constr . Operand )
mErr . Errors = append ( mErr . Errors , outer )
}
2016-03-14 22:46:06 +00:00
}
2016-04-19 02:38:47 +00:00
// Validate Services
if err := validateServices ( t ) ; err != nil {
mErr . Errors = append ( mErr . Errors , err )
2016-03-14 22:46:06 +00:00
}
2016-09-14 22:43:42 +00:00
if t . LogConfig != nil && ephemeralDisk != nil {
2016-03-14 22:46:06 +00:00
logUsage := ( t . LogConfig . MaxFiles * t . LogConfig . MaxFileSizeMB )
2016-09-14 22:43:42 +00:00
if ephemeralDisk . SizeMB <= logUsage {
2016-03-14 22:46:06 +00:00
mErr . Errors = append ( mErr . Errors ,
2016-03-22 17:27:14 +00:00
fmt . Errorf ( "log storage (%d MB) must be less than requested disk capacity (%d MB)" ,
2016-09-14 22:43:42 +00:00
logUsage , ephemeralDisk . SizeMB ) )
2016-03-14 22:46:06 +00:00
}
}
for idx , artifact := range t . Artifacts {
if err := artifact . Validate ( ) ; err != nil {
outer := fmt . Errorf ( "Artifact %d validation failed: %v" , idx + 1 , err )
mErr . Errors = append ( mErr . Errors , outer )
}
}
2016-08-09 22:23:44 +00:00
if t . Vault != nil {
if err := t . Vault . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Vault validation failed: %v" , err ) )
}
}
2016-10-10 22:19:00 +00:00
destinations := make ( map [ string ] int , len ( t . Templates ) )
2016-09-23 22:39:52 +00:00
for idx , tmpl := range t . Templates {
if err := tmpl . Validate ( ) ; err != nil {
outer := fmt . Errorf ( "Template %d validation failed: %s" , idx + 1 , err )
mErr . Errors = append ( mErr . Errors , outer )
}
2016-10-10 22:19:00 +00:00
if other , ok := destinations [ tmpl . DestPath ] ; ok {
outer := fmt . Errorf ( "Template %d has same destination as %d" , idx + 1 , other )
mErr . Errors = append ( mErr . Errors , outer )
} else {
destinations [ tmpl . DestPath ] = idx + 1
}
2016-09-23 22:39:52 +00:00
}
2017-01-26 05:06:16 +00:00
// Validate the dispatch payload block if there
if t . DispatchPayload != nil {
if err := t . DispatchPayload . Validate ( ) ; err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Dispatch Payload validation failed: %v" , err ) )
2016-12-18 23:48:30 +00:00
}
}
2016-03-14 22:46:06 +00:00
return mErr . ErrorOrNil ( )
}
2016-04-19 02:38:47 +00:00
// validateServices takes a task and validates the services within it are valid
// and reference ports that exist.
func validateServices ( t * Task ) error {
var mErr multierror . Error
2018-03-12 18:26:37 +00:00
// Ensure that services don't ask for nonexistent ports and their names are
2016-05-02 20:40:49 +00:00
// unique.
2017-12-08 21:49:57 +00:00
servicePorts := make ( map [ string ] map [ string ] struct { } )
addServicePort := func ( label , service string ) {
if _ , ok := servicePorts [ label ] ; ! ok {
servicePorts [ label ] = map [ string ] struct { } { }
}
servicePorts [ label ] [ service ] = struct { } { }
}
2016-05-02 20:40:49 +00:00
knownServices := make ( map [ string ] struct { } )
2016-06-12 23:36:49 +00:00
for i , service := range t . Services {
2016-04-19 02:38:47 +00:00
if err := service . Validate ( ) ; err != nil {
2016-06-14 01:19:40 +00:00
outer := fmt . Errorf ( "service[%d] %+q validation failed: %s" , i , service . Name , err )
2016-04-19 02:38:47 +00:00
mErr . Errors = append ( mErr . Errors , outer )
}
2017-01-17 20:26:07 +00:00
// Ensure that services with the same name are not being registered for
// the same port
if _ , ok := knownServices [ service . Name + service . PortLabel ] ; ok {
2016-05-02 20:40:49 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "service %q is duplicate" , service . Name ) )
}
2017-01-17 20:26:07 +00:00
knownServices [ service . Name + service . PortLabel ] = struct { } { }
2016-04-19 02:38:47 +00:00
if service . PortLabel != "" {
2017-12-08 06:09:37 +00:00
if service . AddressMode == "driver" {
// Numeric port labels are valid for address_mode=driver
_ , err := strconv . Atoi ( service . PortLabel )
if err != nil {
// Not a numeric port label, add it to list to check
2017-12-08 21:49:57 +00:00
addServicePort ( service . PortLabel , service . Name )
2017-12-08 06:09:37 +00:00
}
2017-12-05 19:39:42 +00:00
} else {
2017-12-08 21:49:57 +00:00
addServicePort ( service . PortLabel , service . Name )
2017-12-05 19:39:42 +00:00
}
2016-04-19 02:38:47 +00:00
}
2016-05-03 20:16:02 +00:00
2017-12-08 21:49:57 +00:00
// Ensure that check names are unique and have valid ports
2016-05-03 20:16:02 +00:00
knownChecks := make ( map [ string ] struct { } )
for _ , check := range service . Checks {
if _ , ok := knownChecks [ check . Name ] ; ok {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "check %q is duplicate" , check . Name ) )
}
knownChecks [ check . Name ] = struct { } { }
2017-12-08 21:49:57 +00:00
if ! check . RequiresPort ( ) {
// No need to continue validating check if it doesn't need a port
continue
}
effectivePort := check . PortLabel
if effectivePort == "" {
// Inherits from service
effectivePort = service . PortLabel
}
if effectivePort == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "check %q is missing a port" , check . Name ) )
continue
}
isNumeric := false
portNumber , err := strconv . Atoi ( effectivePort )
if err == nil {
isNumeric = true
}
// Numeric ports are fine for address_mode = "driver"
if check . AddressMode == "driver" && isNumeric {
if portNumber <= 0 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "check %q has invalid numeric port %d" , check . Name , portNumber ) )
}
continue
}
if isNumeric {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( ` check %q cannot use a numeric port %d without setting address_mode="driver" ` , check . Name , portNumber ) )
continue
}
// PortLabel must exist, report errors by its parent service
addServicePort ( effectivePort , service . Name )
2016-05-03 20:16:02 +00:00
}
2016-04-19 02:38:47 +00:00
}
// Get the set of port labels.
portLabels := make ( map [ string ] struct { } )
if t . Resources != nil {
for _ , network := range t . Resources . Networks {
2017-06-09 17:29:41 +00:00
ports := network . PortLabels ( )
2017-09-26 22:26:33 +00:00
for portLabel := range ports {
2016-04-19 02:38:47 +00:00
portLabels [ portLabel ] = struct { } { }
}
}
}
2017-12-19 00:18:42 +00:00
// Iterate over a sorted list of keys to make error listings stable
keys := make ( [ ] string , 0 , len ( servicePorts ) )
for p := range servicePorts {
keys = append ( keys , p )
}
sort . Strings ( keys )
2016-04-19 02:38:47 +00:00
// Ensure all ports referenced in services exist.
2017-12-19 00:18:42 +00:00
for _ , servicePort := range keys {
services := servicePorts [ servicePort ]
2016-04-19 02:38:47 +00:00
_ , ok := portLabels [ servicePort ]
if ! ok {
2017-12-08 21:49:57 +00:00
names := make ( [ ] string , 0 , len ( services ) )
for name := range services {
names = append ( names , name )
}
// Keep order deterministic
sort . Strings ( names )
joined := strings . Join ( names , ", " )
2016-04-19 02:38:47 +00:00
err := fmt . Errorf ( "port label %q referenced by services %v does not exist" , servicePort , joined )
mErr . Errors = append ( mErr . Errors , err )
}
}
2017-06-09 17:29:41 +00:00
// Ensure address mode is valid
2016-04-19 02:38:47 +00:00
return mErr . ErrorOrNil ( )
}
2016-09-23 22:39:52 +00:00
const (
// TemplateChangeModeNoop marks that no action should be taken if the
// template is re-rendered
TemplateChangeModeNoop = "noop"
// TemplateChangeModeSignal marks that the task should be signaled if the
// template is re-rendered
TemplateChangeModeSignal = "signal"
// TemplateChangeModeRestart marks that the task should be restarted if the
// template is re-rendered
TemplateChangeModeRestart = "restart"
)
var (
// TemplateChangeModeInvalidError is the error for when an invalid change
// mode is given
TemplateChangeModeInvalidError = errors . New ( "Invalid change mode. Must be one of the following: noop, signal, restart" )
)
// Template represents a template configuration to be rendered for a given task
type Template struct {
2016-10-11 19:31:40 +00:00
// SourcePath is the path to the template to be rendered
2017-02-22 20:30:05 +00:00
SourcePath string
2016-09-23 22:39:52 +00:00
// DestPath is the path to where the template should be rendered
2017-02-22 20:30:05 +00:00
DestPath string
2016-09-23 22:39:52 +00:00
2016-10-03 19:42:18 +00:00
// EmbeddedTmpl store the raw template. This is useful for smaller templates
2018-01-08 11:30:00 +00:00
// where they are embedded in the job file rather than sent as an artifact
2017-02-22 20:30:05 +00:00
EmbeddedTmpl string
2016-09-23 22:39:52 +00:00
// ChangeMode indicates what should be done if the template is re-rendered
2017-02-22 20:30:05 +00:00
ChangeMode string
2016-09-23 22:39:52 +00:00
2016-10-03 19:42:18 +00:00
// ChangeSignal is the signal that should be sent if the change mode
2016-09-23 22:39:52 +00:00
// requires it.
2017-02-22 20:30:05 +00:00
ChangeSignal string
2016-09-23 22:39:52 +00:00
// Splay is used to avoid coordinated restarts of processes by applying a
// random wait between 0 and the given splay value before signalling the
// application of a change
2017-02-22 20:30:05 +00:00
Splay time . Duration
2017-02-01 04:00:33 +00:00
// Perms is the permission the file should be written out with.
2017-02-22 20:30:05 +00:00
Perms string
2017-02-21 00:43:28 +00:00
// LeftDelim and RightDelim are optional configurations to control what
// delimiter is utilized when parsing the template.
2017-02-22 20:30:05 +00:00
LeftDelim string
RightDelim string
2017-05-13 00:07:54 +00:00
// Envvars enables exposing the template as environment variables
// instead of as a file. The template must be of the form:
//
// VAR_NAME_1={{ key service/my-key }}
// VAR_NAME_2=raw string and {{ env "attr.kernel.name" }}
//
// Lines will be split on the initial "=" with the first part being the
// key name and the second part the value.
// Empty lines and lines starting with # will be ignored, but to avoid
// escaping issues #s within lines will not be treated as comments.
Envvars bool
2017-08-01 21:14:08 +00:00
// VaultGrace is the grace duration between lease renewal and reacquiring a
// secret. If the lease of a secret is less than the grace, a new secret is
// acquired.
VaultGrace time . Duration
2016-09-23 22:39:52 +00:00
}
2016-09-26 22:23:26 +00:00
// DefaultTemplate returns a default template.
func DefaultTemplate ( ) * Template {
return & Template {
ChangeMode : TemplateChangeModeRestart ,
Splay : 5 * time . Second ,
2017-02-01 04:00:33 +00:00
Perms : "0644" ,
2016-09-26 22:23:26 +00:00
}
}
2016-09-23 22:39:52 +00:00
func ( t * Template ) Copy ( ) * Template {
if t == nil {
return nil
}
copy := new ( Template )
* copy = * t
return copy
}
2016-10-25 18:09:22 +00:00
func ( t * Template ) Canonicalize ( ) {
if t . ChangeSignal != "" {
t . ChangeSignal = strings . ToUpper ( t . ChangeSignal )
}
}
2016-09-23 22:39:52 +00:00
func ( t * Template ) Validate ( ) error {
var mErr multierror . Error
// Verify we have something to render
2016-10-03 19:42:18 +00:00
if t . SourcePath == "" && t . EmbeddedTmpl == "" {
2016-10-05 20:41:29 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Must specify a source path or have an embedded template" ) )
2016-09-23 22:39:52 +00:00
}
// Verify we can render somewhere
if t . DestPath == "" {
multierror . Append ( & mErr , fmt . Errorf ( "Must specify a destination for the template" ) )
}
// Verify the destination doesn't escape
2016-12-18 23:48:30 +00:00
escaped , err := PathEscapesAllocDir ( "task" , t . DestPath )
2016-09-23 22:39:52 +00:00
if err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "invalid destination path: %v" , err ) )
} else if escaped {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "destination escapes allocation directory" ) )
}
// Verify a proper change mode
switch t . ChangeMode {
case TemplateChangeModeNoop , TemplateChangeModeRestart :
case TemplateChangeModeSignal :
2016-10-03 19:42:18 +00:00
if t . ChangeSignal == "" {
2016-09-23 22:39:52 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Must specify signal value when change mode is signal" ) )
}
2017-07-25 23:34:41 +00:00
if t . Envvars {
multierror . Append ( & mErr , fmt . Errorf ( "cannot use signals with env var templates" ) )
}
2016-09-23 22:39:52 +00:00
default :
multierror . Append ( & mErr , TemplateChangeModeInvalidError )
}
// Verify the splay is positive
if t . Splay < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Must specify positive splay value" ) )
}
2017-02-01 04:00:33 +00:00
// Verify the permissions
if t . Perms != "" {
if _ , err := strconv . ParseUint ( t . Perms , 8 , 12 ) ; err != nil {
multierror . Append ( & mErr , fmt . Errorf ( "Failed to parse %q as octal: %v" , t . Perms , err ) )
}
}
2017-08-01 21:14:08 +00:00
if t . VaultGrace . Nanoseconds ( ) < 0 {
multierror . Append ( & mErr , fmt . Errorf ( "Vault grace must be greater than zero: %v < 0" , t . VaultGrace ) )
}
2016-09-23 22:39:52 +00:00
return mErr . ErrorOrNil ( )
}
2015-11-12 23:28:22 +00:00
// Set of possible states for a task.
const (
TaskStatePending = "pending" // The task is waiting to be run.
TaskStateRunning = "running" // The task is currently running.
TaskStateDead = "dead" // Terminal state of task.
)
// TaskState tracks the current state of a task and events that caused state
2016-05-15 16:41:34 +00:00
// transitions.
2015-11-12 23:28:22 +00:00
type TaskState struct {
// The current state of the task.
State string
2016-10-21 00:27:16 +00:00
// Failed marks a task as having failed
Failed bool
2017-07-03 04:49:56 +00:00
// Restarts is the number of times the task has restarted
Restarts uint64
2017-07-07 06:04:32 +00:00
// LastRestart is the time the task last restarted. It is updated each time the
// task restarts
LastRestart time . Time
2017-03-31 22:57:10 +00:00
// StartedAt is the time the task is started. It is updated each time the
// task starts
StartedAt time . Time
2018-03-11 19:06:05 +00:00
// FinishedAt is the time at which the task transitioned to dead and will
2017-03-31 22:57:10 +00:00
// not be started again.
FinishedAt time . Time
2016-05-15 16:41:34 +00:00
// Series of task events that transition the state of the task.
2015-11-12 23:28:22 +00:00
Events [ ] * TaskEvent
}
2016-02-02 01:47:53 +00:00
func ( ts * TaskState ) Copy ( ) * TaskState {
2016-02-11 01:54:43 +00:00
if ts == nil {
return nil
}
2016-02-02 01:47:53 +00:00
copy := new ( TaskState )
2017-07-03 04:49:56 +00:00
* copy = * ts
2016-03-21 23:29:21 +00:00
if ts . Events != nil {
copy . Events = make ( [ ] * TaskEvent , len ( ts . Events ) )
for i , e := range ts . Events {
copy . Events [ i ] = e . Copy ( )
}
2016-02-02 01:47:53 +00:00
}
return copy
}
2018-01-04 22:20:32 +00:00
// Successful returns whether a task finished successfully. This doesn't really
// have meaning on a non-batch allocation because a service and system
// allocation should not finish.
2016-05-25 00:23:18 +00:00
func ( ts * TaskState ) Successful ( ) bool {
l := len ( ts . Events )
if ts . State != TaskStateDead || l == 0 {
return false
}
e := ts . Events [ l - 1 ]
if e . Type != TaskTerminated {
return false
}
return e . ExitCode == 0
}
2015-11-12 23:28:22 +00:00
const (
2016-10-10 21:49:37 +00:00
// TaskSetupFailure indicates that the task could not be started due to a
// a setup failure.
TaskSetupFailure = "Setup Failure"
2016-03-24 17:55:14 +00:00
// TaskDriveFailure indicates that the task could not be started due to a
2015-11-12 23:28:22 +00:00
// failure in the driver.
2015-11-14 22:13:32 +00:00
TaskDriverFailure = "Driver Failure"
2015-11-12 23:28:22 +00:00
2016-03-24 17:55:14 +00:00
// TaskReceived signals that the task has been pulled by the client at the
2016-02-19 22:49:43 +00:00
// given timestamp.
TaskReceived = "Received"
2016-03-24 17:55:14 +00:00
// TaskFailedValidation indicates the task was invalid and as such was not
// run.
TaskFailedValidation = "Failed Validation"
// TaskStarted signals that the task was started and its timestamp can be
2015-11-12 23:28:22 +00:00
// used to determine the running length of the task.
2015-11-14 22:13:32 +00:00
TaskStarted = "Started"
2015-11-12 23:28:22 +00:00
2016-03-24 17:55:14 +00:00
// TaskTerminated indicates that the task was started and exited.
2015-11-14 22:13:32 +00:00
TaskTerminated = "Terminated"
2015-11-12 23:28:22 +00:00
2016-07-21 22:49:54 +00:00
// TaskKilling indicates a kill signal has been sent to the task.
TaskKilling = "Killing"
2016-03-24 17:55:14 +00:00
// TaskKilled indicates a user has killed the task.
2015-11-14 22:13:32 +00:00
TaskKilled = "Killed"
2016-02-29 00:56:05 +00:00
2016-10-05 22:11:09 +00:00
// TaskRestarting indicates that task terminated and is being restarted.
2016-02-29 00:56:05 +00:00
TaskRestarting = "Restarting"
// TaskNotRestarting indicates that the task has failed and is not being
// restarted because it has exceeded its restart policy.
2016-03-24 22:43:55 +00:00
TaskNotRestarting = "Not Restarting"
2016-03-15 17:53:20 +00:00
2016-10-05 22:11:09 +00:00
// TaskRestartSignal indicates that the task has been signalled to be
// restarted
TaskRestartSignal = "Restart Signaled"
// TaskSignaling indicates that the task is being signalled.
TaskSignaling = "Signaling"
2016-03-24 17:55:14 +00:00
// TaskDownloadingArtifacts means the task is downloading the artifacts
2016-03-15 17:53:20 +00:00
// specified in the task.
TaskDownloadingArtifacts = "Downloading Artifacts"
// TaskArtifactDownloadFailed indicates that downloading the artifacts
// failed.
TaskArtifactDownloadFailed = "Failed Artifact Download"
2016-08-11 07:20:53 +00:00
2017-02-27 19:40:24 +00:00
// TaskBuildingTaskDir indicates that the task directory/chroot is being
2017-02-10 01:40:13 +00:00
// built.
TaskBuildingTaskDir = "Building Task Directory"
// TaskSetup indicates the task runner is setting up the task environment
TaskSetup = "Task Setup"
2016-08-11 07:20:53 +00:00
// TaskDiskExceeded indicates that one of the tasks in a taskgroup has
// exceeded the requested disk resources.
TaskDiskExceeded = "Disk Resources Exceeded"
// TaskSiblingFailed indicates that a sibling task in the task group has
// failed.
2017-02-11 01:55:19 +00:00
TaskSiblingFailed = "Sibling Task Failed"
2016-11-30 00:39:36 +00:00
2016-12-20 19:51:09 +00:00
// TaskDriverMessage is an informational event message emitted by
// drivers such as when they're performing a long running action like
// downloading an image.
TaskDriverMessage = "Driver"
2017-02-11 01:55:19 +00:00
// TaskLeaderDead indicates that the leader task within the has finished.
TaskLeaderDead = "Leader Task Dead"
2015-11-12 23:28:22 +00:00
)
// TaskEvent is an event that effects the state of a task and contains meta-data
// appropriate to the events type.
type TaskEvent struct {
2015-11-14 22:13:32 +00:00
Type string
2015-11-12 23:28:22 +00:00
Time int64 // Unix Nanosecond timestamp
2017-11-03 14:34:30 +00:00
Message string // A possible message explaining the termination of the task.
// DisplayMessage is a human friendly message about the event
DisplayMessage string
// Details is a map with annotated info about the event
Details map [ string ] string
// DEPRECATION NOTICE: The following fields are deprecated and will be removed
// in a future release. Field values are available in the Details map.
// FailsTask marks whether this event fails the task.
// Deprecated, use Details["fails_task"] to access this.
2016-10-21 00:27:16 +00:00
FailsTask bool
2016-03-24 22:43:55 +00:00
// Restart fields.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["restart_reason"] to access this.
2016-03-24 22:43:55 +00:00
RestartReason string
2016-10-10 21:49:37 +00:00
// Setup Failure fields.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["setup_error"] to access this.
2016-10-10 21:49:37 +00:00
SetupError string
2015-11-12 23:28:22 +00:00
// Driver Failure fields.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["driver_error"] to access this.
2016-05-15 16:41:34 +00:00
DriverError string // A driver error occurred while starting the task.
2015-11-12 23:28:22 +00:00
// Task Terminated Fields.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["exit_code"] to access this.
ExitCode int // The exit code of the task.
// Deprecated, use Details["signal"] to access this.
Signal int // The signal that terminated the task.
2015-11-14 06:07:13 +00:00
2016-07-21 22:49:54 +00:00
// Killing fields
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["kill_timeout"] to access this.
2016-07-21 22:49:54 +00:00
KillTimeout time . Duration
2015-11-14 06:07:13 +00:00
// Task Killed Fields.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["kill_error"] to access this.
2015-11-14 06:07:13 +00:00
KillError string // Error killing the task.
2016-02-29 00:56:05 +00:00
2016-10-05 20:41:29 +00:00
// KillReason is the reason the task was killed
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["kill_reason"] to access this.
2016-10-05 20:41:29 +00:00
KillReason string
2016-02-29 00:56:05 +00:00
// TaskRestarting fields.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["start_delay"] to access this.
2016-02-29 00:56:05 +00:00
StartDelay int64 // The sleep period before restarting the task in unix nanoseconds.
2016-03-15 17:53:20 +00:00
// Artifact Download fields
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["download_error"] to access this.
2016-03-15 17:53:20 +00:00
DownloadError string // Error downloading artifacts
2016-03-24 17:55:14 +00:00
// Validation fields
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["validation_error"] to access this.
2016-03-24 17:55:14 +00:00
ValidationError string // Validation error
2016-08-11 07:20:53 +00:00
// The maximum allowed task disk size.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["disk_limit"] to access this.
2016-08-11 07:20:53 +00:00
DiskLimit int64
// Name of the sibling task that caused termination of the task that
// the TaskEvent refers to.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["failed_sibling"] to access this.
2016-08-11 07:20:53 +00:00
FailedSibling string
2016-09-14 20:30:01 +00:00
2016-09-15 01:27:13 +00:00
// VaultError is the error from token renewal
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["vault_renewal_error"] to access this.
2016-09-15 01:27:13 +00:00
VaultError string
2016-10-05 20:41:29 +00:00
// TaskSignalReason indicates the reason the task is being signalled.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["task_signal_reason"] to access this.
2016-10-05 20:41:29 +00:00
TaskSignalReason string
// TaskSignal is the signal that was sent to the task
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["task_signal"] to access this.
2016-10-05 20:41:29 +00:00
TaskSignal string
2016-11-30 00:39:36 +00:00
2016-12-20 19:51:09 +00:00
// DriverMessage indicates a driver action being taken.
2017-11-03 14:34:30 +00:00
// Deprecated, use Details["driver_message"] to access this.
2016-12-20 19:51:09 +00:00
DriverMessage string
2017-08-08 04:26:04 +00:00
// GenericSource is the source of a message.
2017-11-03 14:34:30 +00:00
// Deprecated, is redundant with event type.
2017-08-08 04:26:04 +00:00
GenericSource string
2017-10-31 15:35:14 +00:00
}
func ( event * TaskEvent ) PopulateEventDisplayMessage ( ) {
// Build up the description based on the event type.
2017-11-03 14:34:30 +00:00
if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why?
return
}
if event . DisplayMessage != "" {
2017-10-31 15:35:14 +00:00
return
}
2017-11-03 14:34:30 +00:00
2017-10-31 15:35:14 +00:00
var desc string
switch event . Type {
case TaskSetup :
desc = event . Message
case TaskStarted :
desc = "Task started by client"
case TaskReceived :
desc = "Task received by client"
case TaskFailedValidation :
if event . ValidationError != "" {
desc = event . ValidationError
} else {
desc = "Validation of task failed"
}
case TaskSetupFailure :
if event . SetupError != "" {
desc = event . SetupError
} else {
desc = "Task setup failed"
}
case TaskDriverFailure :
if event . DriverError != "" {
desc = event . DriverError
} else {
desc = "Failed to start task"
}
case TaskDownloadingArtifacts :
desc = "Client is downloading artifacts"
case TaskArtifactDownloadFailed :
if event . DownloadError != "" {
desc = event . DownloadError
} else {
desc = "Failed to download artifacts"
}
case TaskKilling :
if event . KillReason != "" {
2018-03-13 22:09:03 +00:00
desc = event . KillReason
2017-10-31 15:35:14 +00:00
} else if event . KillTimeout != 0 {
desc = fmt . Sprintf ( "Sent interrupt. Waiting %v before force killing" , event . KillTimeout )
} else {
desc = "Sent interrupt"
}
case TaskKilled :
if event . KillError != "" {
desc = event . KillError
} else {
desc = "Task successfully killed"
}
case TaskTerminated :
var parts [ ] string
parts = append ( parts , fmt . Sprintf ( "Exit Code: %d" , event . ExitCode ) )
if event . Signal != 0 {
parts = append ( parts , fmt . Sprintf ( "Signal: %d" , event . Signal ) )
}
if event . Message != "" {
parts = append ( parts , fmt . Sprintf ( "Exit Message: %q" , event . Message ) )
}
desc = strings . Join ( parts , ", " )
case TaskRestarting :
in := fmt . Sprintf ( "Task restarting in %v" , time . Duration ( event . StartDelay ) )
if event . RestartReason != "" && event . RestartReason != ReasonWithinPolicy {
desc = fmt . Sprintf ( "%s - %s" , event . RestartReason , in )
} else {
desc = in
}
case TaskNotRestarting :
if event . RestartReason != "" {
desc = event . RestartReason
} else {
desc = "Task exceeded restart policy"
}
case TaskSiblingFailed :
if event . FailedSibling != "" {
desc = fmt . Sprintf ( "Task's sibling %q failed" , event . FailedSibling )
} else {
desc = "Task's sibling failed"
}
case TaskSignaling :
sig := event . TaskSignal
reason := event . TaskSignalReason
if sig == "" && reason == "" {
desc = "Task being sent a signal"
} else if sig == "" {
desc = reason
} else if reason == "" {
desc = fmt . Sprintf ( "Task being sent signal %v" , sig )
} else {
desc = fmt . Sprintf ( "Task being sent signal %v: %v" , sig , reason )
}
case TaskRestartSignal :
if event . RestartReason != "" {
desc = event . RestartReason
} else {
desc = "Task signaled to restart"
}
case TaskDriverMessage :
desc = event . DriverMessage
case TaskLeaderDead :
desc = "Leader Task in Group dead"
default :
2017-11-13 17:14:57 +00:00
desc = event . Message
2017-10-31 15:35:14 +00:00
}
event . DisplayMessage = desc
2016-02-29 00:56:05 +00:00
}
func ( te * TaskEvent ) GoString ( ) string {
2017-09-13 06:15:46 +00:00
return fmt . Sprintf ( "%v - %v" , te . Time , te . Type )
2015-11-14 06:07:13 +00:00
}
2017-02-10 01:40:13 +00:00
// SetMessage sets the message of TaskEvent
func ( te * TaskEvent ) SetMessage ( msg string ) * TaskEvent {
te . Message = msg
2017-10-31 15:35:14 +00:00
te . Details [ "message" ] = msg
2017-02-10 01:40:13 +00:00
return te
}
2016-02-02 01:47:53 +00:00
func ( te * TaskEvent ) Copy ( ) * TaskEvent {
2016-02-11 01:54:43 +00:00
if te == nil {
return nil
}
2016-02-02 01:47:53 +00:00
copy := new ( TaskEvent )
* copy = * te
return copy
}
2015-11-14 22:13:32 +00:00
func NewTaskEvent ( event string ) * TaskEvent {
2015-11-14 06:07:13 +00:00
return & TaskEvent {
2017-10-31 15:35:14 +00:00
Type : event ,
Time : time . Now ( ) . UnixNano ( ) ,
Details : make ( map [ string ] string ) ,
2015-11-14 06:07:13 +00:00
}
}
2017-08-07 21:13:05 +00:00
// SetSetupError is used to store an error that occurred while setting up the
2016-10-18 18:23:27 +00:00
// task
2016-10-10 21:49:37 +00:00
func ( e * TaskEvent ) SetSetupError ( err error ) * TaskEvent {
if err != nil {
e . SetupError = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "setup_error" ] = err . Error ( )
2016-10-10 21:49:37 +00:00
}
return e
}
2016-10-21 00:27:16 +00:00
func ( e * TaskEvent ) SetFailsTask ( ) * TaskEvent {
e . FailsTask = true
2017-10-31 15:35:14 +00:00
e . Details [ "fails_task" ] = "true"
2016-10-21 00:27:16 +00:00
return e
}
2015-11-14 06:07:13 +00:00
func ( e * TaskEvent ) SetDriverError ( err error ) * TaskEvent {
if err != nil {
e . DriverError = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "driver_error" ] = err . Error ( )
2015-11-14 06:07:13 +00:00
}
return e
}
func ( e * TaskEvent ) SetExitCode ( c int ) * TaskEvent {
e . ExitCode = c
2017-10-31 15:35:14 +00:00
e . Details [ "exit_code" ] = fmt . Sprintf ( "%d" , c )
2015-11-14 06:07:13 +00:00
return e
}
func ( e * TaskEvent ) SetSignal ( s int ) * TaskEvent {
e . Signal = s
2017-10-31 15:35:14 +00:00
e . Details [ "signal" ] = fmt . Sprintf ( "%d" , s )
2015-11-14 06:07:13 +00:00
return e
}
2015-11-16 22:46:18 +00:00
func ( e * TaskEvent ) SetExitMessage ( err error ) * TaskEvent {
if err != nil {
e . Message = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "exit_message" ] = err . Error ( )
2015-11-16 22:46:18 +00:00
}
2015-11-14 06:07:13 +00:00
return e
}
func ( e * TaskEvent ) SetKillError ( err error ) * TaskEvent {
if err != nil {
e . KillError = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "kill_error" ] = err . Error ( )
2015-11-14 06:07:13 +00:00
}
return e
2015-11-12 23:28:22 +00:00
}
2016-10-05 20:41:29 +00:00
func ( e * TaskEvent ) SetKillReason ( r string ) * TaskEvent {
e . KillReason = r
2017-10-31 15:35:14 +00:00
e . Details [ "kill_reason" ] = r
2016-10-05 20:41:29 +00:00
return e
}
2016-02-29 00:56:05 +00:00
func ( e * TaskEvent ) SetRestartDelay ( delay time . Duration ) * TaskEvent {
e . StartDelay = int64 ( delay )
2017-10-31 15:35:14 +00:00
e . Details [ "start_delay" ] = fmt . Sprintf ( "%d" , delay )
2016-02-29 00:56:05 +00:00
return e
}
2016-03-24 22:43:55 +00:00
func ( e * TaskEvent ) SetRestartReason ( reason string ) * TaskEvent {
e . RestartReason = reason
2017-10-31 15:35:14 +00:00
e . Details [ "restart_reason" ] = reason
2016-03-24 22:43:55 +00:00
return e
}
2016-10-05 20:41:29 +00:00
func ( e * TaskEvent ) SetTaskSignalReason ( r string ) * TaskEvent {
e . TaskSignalReason = r
2017-10-31 15:35:14 +00:00
e . Details [ "task_signal_reason" ] = r
2016-10-05 20:41:29 +00:00
return e
}
func ( e * TaskEvent ) SetTaskSignal ( s os . Signal ) * TaskEvent {
e . TaskSignal = s . String ( )
2017-10-31 15:35:14 +00:00
e . Details [ "task_signal" ] = s . String ( )
2016-10-05 20:41:29 +00:00
return e
}
2016-03-15 17:53:20 +00:00
func ( e * TaskEvent ) SetDownloadError ( err error ) * TaskEvent {
if err != nil {
e . DownloadError = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "download_error" ] = err . Error ( )
2016-03-15 17:53:20 +00:00
}
return e
}
2016-03-24 17:55:14 +00:00
func ( e * TaskEvent ) SetValidationError ( err error ) * TaskEvent {
if err != nil {
e . ValidationError = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "validation_error" ] = err . Error ( )
2016-03-24 17:55:14 +00:00
}
return e
}
2016-07-21 22:49:54 +00:00
func ( e * TaskEvent ) SetKillTimeout ( timeout time . Duration ) * TaskEvent {
e . KillTimeout = timeout
2017-10-31 15:35:14 +00:00
e . Details [ "kill_timeout" ] = timeout . String ( )
2016-07-21 22:49:54 +00:00
return e
}
2016-08-11 07:20:53 +00:00
func ( e * TaskEvent ) SetDiskLimit ( limit int64 ) * TaskEvent {
e . DiskLimit = limit
2017-10-31 15:35:14 +00:00
e . Details [ "disk_limit" ] = fmt . Sprintf ( "%d" , limit )
2016-08-11 07:20:53 +00:00
return e
}
func ( e * TaskEvent ) SetFailedSibling ( sibling string ) * TaskEvent {
e . FailedSibling = sibling
2017-10-31 15:35:14 +00:00
e . Details [ "failed_sibling" ] = sibling
2016-08-11 07:20:53 +00:00
return e
}
2016-09-14 20:30:01 +00:00
func ( e * TaskEvent ) SetVaultRenewalError ( err error ) * TaskEvent {
if err != nil {
2016-09-15 01:27:13 +00:00
e . VaultError = err . Error ( )
2017-10-31 15:35:14 +00:00
e . Details [ "vault_renewal_error" ] = err . Error ( )
2016-09-14 20:30:01 +00:00
}
return e
}
2016-12-20 19:51:09 +00:00
func ( e * TaskEvent ) SetDriverMessage ( m string ) * TaskEvent {
e . DriverMessage = m
2017-10-31 15:35:14 +00:00
e . Details [ "driver_message" ] = m
2016-11-30 00:39:36 +00:00
return e
}
2016-03-14 22:46:06 +00:00
// TaskArtifact is an artifact to download before running the task.
type TaskArtifact struct {
// GetterSource is the source to download an artifact using go-getter
2017-02-22 20:30:05 +00:00
GetterSource string
2016-02-02 20:00:26 +00:00
2016-03-14 22:46:06 +00:00
// GetterOptions are options to use when downloading the artifact using
// go-getter.
2017-02-22 20:30:05 +00:00
GetterOptions map [ string ] string
2016-03-18 19:01:46 +00:00
2017-07-06 03:44:49 +00:00
// GetterMode is the go-getter.ClientMode for fetching resources.
// Defaults to "any" but can be set to "file" or "dir".
GetterMode string
2016-03-18 19:01:46 +00:00
// RelativeDest is the download destination given relative to the task's
// directory.
2017-02-22 20:30:05 +00:00
RelativeDest string
2016-03-14 22:46:06 +00:00
}
2016-02-02 20:00:26 +00:00
2016-03-14 22:46:06 +00:00
func ( ta * TaskArtifact ) Copy ( ) * TaskArtifact {
if ta == nil {
return nil
2016-02-05 07:28:01 +00:00
}
2016-03-14 22:46:06 +00:00
nta := new ( TaskArtifact )
* nta = * ta
2017-01-18 23:55:14 +00:00
nta . GetterOptions = helper . CopyMapStringString ( ta . GetterOptions )
2016-03-14 22:46:06 +00:00
return nta
}
2016-02-05 07:28:01 +00:00
2016-03-18 19:01:46 +00:00
func ( ta * TaskArtifact ) GoString ( ) string {
return fmt . Sprintf ( "%+v" , ta )
}
2016-10-03 21:58:44 +00:00
// PathEscapesAllocDir returns if the given path escapes the allocation
2016-12-18 23:48:30 +00:00
// directory. The prefix allows adding a prefix if the path will be joined, for
// example a "task/local" prefix may be provided if the path will be joined
// against that prefix.
func PathEscapesAllocDir ( prefix , path string ) ( bool , error ) {
2016-03-18 19:01:46 +00:00
// Verify the destination doesn't escape the tasks directory
2016-12-18 23:48:30 +00:00
alloc , err := filepath . Abs ( filepath . Join ( "/" , "alloc-dir/" , "alloc-id/" ) )
2016-03-23 03:54:36 +00:00
if err != nil {
2016-09-23 22:39:52 +00:00
return false , err
2016-03-23 03:54:36 +00:00
}
2016-12-18 23:48:30 +00:00
abs , err := filepath . Abs ( filepath . Join ( alloc , prefix , path ) )
2016-03-15 02:55:30 +00:00
if err != nil {
2016-09-23 22:39:52 +00:00
return false , err
2016-03-18 19:01:46 +00:00
}
rel , err := filepath . Rel ( alloc , abs )
if err != nil {
2016-09-23 22:39:52 +00:00
return false , err
2016-03-18 19:01:46 +00:00
}
2016-09-23 22:39:52 +00:00
return strings . HasPrefix ( rel , ".." ) , nil
}
func ( ta * TaskArtifact ) Validate ( ) error {
// Verify the source
var mErr multierror . Error
if ta . GetterSource == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "source must be specified" ) )
}
2017-07-06 03:44:49 +00:00
switch ta . GetterMode {
case "" :
// Default to any
ta . GetterMode = GetterModeAny
case GetterModeAny , GetterModeFile , GetterModeDir :
// Ok
default :
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "invalid artifact mode %q; must be one of: %s, %s, %s" ,
ta . GetterMode , GetterModeAny , GetterModeFile , GetterModeDir ) )
}
2016-12-18 23:48:30 +00:00
escaped , err := PathEscapesAllocDir ( "task" , ta . RelativeDest )
2016-09-23 22:39:52 +00:00
if err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "invalid destination path: %v" , err ) )
} else if escaped {
2016-12-19 00:32:14 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "destination escapes allocation directory" ) )
2016-03-15 02:55:30 +00:00
}
2016-03-14 22:46:06 +00:00
// Verify the checksum
if check , ok := ta . GetterOptions [ "checksum" ] ; ok {
check = strings . TrimSpace ( check )
if check == "" {
2016-11-01 23:05:34 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "checksum value cannot be empty" ) )
2016-03-15 22:05:36 +00:00
return mErr . ErrorOrNil ( )
2015-10-11 19:50:16 +00:00
}
2015-11-17 21:36:59 +00:00
2016-03-14 22:46:06 +00:00
parts := strings . Split ( check , ":" )
if l := len ( parts ) ; l != 2 {
2016-03-15 22:05:36 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( ` checksum must be given as "type:value"; got %q ` , check ) )
return mErr . ErrorOrNil ( )
2015-11-17 21:36:59 +00:00
}
2016-02-11 20:30:47 +00:00
2016-03-14 22:46:06 +00:00
checksumVal := parts [ 1 ]
checksumBytes , err := hex . DecodeString ( checksumVal )
if err != nil {
2016-03-15 22:05:36 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "invalid checksum: %v" , err ) )
return mErr . ErrorOrNil ( )
2016-03-14 22:46:06 +00:00
}
checksumType := parts [ 0 ]
expectedLength := 0
switch checksumType {
case "md5" :
expectedLength = md5 . Size
case "sha1" :
expectedLength = sha1 . Size
case "sha256" :
expectedLength = sha256 . Size
case "sha512" :
expectedLength = sha512 . Size
default :
2016-03-15 22:05:36 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "unsupported checksum type: %s" , checksumType ) )
return mErr . ErrorOrNil ( )
2016-03-14 22:46:06 +00:00
}
if len ( checksumBytes ) != expectedLength {
2016-03-15 22:05:36 +00:00
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "invalid %s checksum: %v" , checksumType , checksumVal ) )
return mErr . ErrorOrNil ( )
2016-02-12 06:33:41 +00:00
}
2016-02-11 20:30:47 +00:00
}
2016-03-14 22:46:06 +00:00
2016-03-15 22:05:36 +00:00
return mErr . ErrorOrNil ( )
2015-09-15 17:46:10 +00:00
}
2015-10-26 20:47:56 +00:00
const (
2017-03-07 22:20:02 +00:00
ConstraintDistinctProperty = "distinct_property"
ConstraintDistinctHosts = "distinct_hosts"
ConstraintRegex = "regexp"
ConstraintVersion = "version"
ConstraintSetContains = "set_contains"
2015-10-26 20:47:56 +00:00
)
2015-10-27 21:31:14 +00:00
// Constraints are used to restrict placement options.
2015-07-03 23:57:48 +00:00
type Constraint struct {
LTarget string // Left-hand target
RTarget string // Right-hand target
Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
2016-01-26 21:45:03 +00:00
str string // Memoized string
2015-06-07 18:18:59 +00:00
}
2016-09-01 21:23:40 +00:00
// Equal checks if two constraints are equal
func ( c * Constraint ) Equal ( o * Constraint ) bool {
return c . LTarget == o . LTarget &&
c . RTarget == o . RTarget &&
c . Operand == o . Operand
}
2016-02-11 01:54:43 +00:00
func ( c * Constraint ) Copy ( ) * Constraint {
if c == nil {
return nil
}
nc := new ( Constraint )
* nc = * c
return nc
}
2015-08-14 04:46:33 +00:00
func ( c * Constraint ) String ( ) string {
2016-01-26 21:45:03 +00:00
if c . str != "" {
return c . str
}
c . str = fmt . Sprintf ( "%s %s %s" , c . LTarget , c . Operand , c . RTarget )
return c . str
2015-08-14 04:46:33 +00:00
}
2015-10-11 19:50:16 +00:00
func ( c * Constraint ) Validate ( ) error {
var mErr multierror . Error
if c . Operand == "" {
mErr . Errors = append ( mErr . Errors , errors . New ( "Missing constraint operand" ) )
}
2017-07-31 23:44:17 +00:00
// requireLtarget specifies whether the constraint requires an LTarget to be
// provided.
requireLtarget := true
2015-10-11 19:50:16 +00:00
// Perform additional validation based on operand
switch c . Operand {
2017-07-31 23:44:17 +00:00
case ConstraintDistinctHosts :
requireLtarget = false
case ConstraintSetContains :
if c . RTarget == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Set contains constraint requires an RTarget" ) )
}
2015-10-26 20:47:56 +00:00
case ConstraintRegex :
2015-10-11 19:50:16 +00:00
if _ , err := regexp . Compile ( c . RTarget ) ; err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Regular expression failed to compile: %v" , err ) )
}
2015-10-26 20:47:56 +00:00
case ConstraintVersion :
2015-10-11 19:50:16 +00:00
if _ , err := version . NewConstraint ( c . RTarget ) ; err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Version constraint is invalid: %v" , err ) )
}
2017-07-31 23:44:17 +00:00
case ConstraintDistinctProperty :
// If a count is set, make sure it is convertible to a uint64
if c . RTarget != "" {
count , err := strconv . ParseUint ( c . RTarget , 10 , 64 )
if err != nil {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Failed to convert RTarget %q to uint64: %v" , c . RTarget , err ) )
} else if count < 1 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Distinct Property must have an allowed count of 1 or greater: %d < 1" , count ) )
}
}
case "=" , "==" , "is" , "!=" , "not" , "<" , "<=" , ">" , ">=" :
if c . RTarget == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Operator %q requires an RTarget" , c . Operand ) )
}
default :
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "Unknown constraint type %q" , c . Operand ) )
2015-10-11 19:50:16 +00:00
}
2017-07-31 23:44:17 +00:00
// Ensure we have an LTarget for the constraints that need one
if requireLtarget && c . LTarget == "" {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "No LTarget provided but is required by constraint" ) )
}
2015-10-11 19:50:16 +00:00
return mErr . ErrorOrNil ( )
}
2016-09-14 22:43:42 +00:00
// EphemeralDisk is an ephemeral disk object
type EphemeralDisk struct {
2016-08-25 20:26:28 +00:00
// Sticky indicates whether the allocation is sticky to a node
2016-08-24 18:51:15 +00:00
Sticky bool
2016-08-25 20:26:28 +00:00
2016-09-14 22:43:42 +00:00
// SizeMB is the size of the local disk
2017-02-22 20:30:05 +00:00
SizeMB int
2016-10-03 16:59:57 +00:00
// Migrate determines if Nomad client should migrate the allocation dir for
// sticky allocations
Migrate bool
2016-08-24 18:51:15 +00:00
}
2016-09-14 22:43:42 +00:00
// DefaultEphemeralDisk returns a EphemeralDisk with default configurations
func DefaultEphemeralDisk ( ) * EphemeralDisk {
return & EphemeralDisk {
SizeMB : 300 ,
2016-08-24 18:51:15 +00:00
}
}
2016-09-14 22:43:42 +00:00
// Validate validates EphemeralDisk
func ( d * EphemeralDisk ) Validate ( ) error {
if d . SizeMB < 10 {
return fmt . Errorf ( "minimum DiskMB value is 10; got %d" , d . SizeMB )
2016-08-24 18:51:15 +00:00
}
return nil
}
2016-09-14 22:43:42 +00:00
// Copy copies the EphemeralDisk struct and returns a new one
func ( d * EphemeralDisk ) Copy ( ) * EphemeralDisk {
ld := new ( EphemeralDisk )
2016-08-26 19:24:47 +00:00
* ld = * d
return ld
}
2018-04-03 21:29:22 +00:00
var (
// VaultUnrecoverableError matches unrecoverable errors returned by a Vault
// server
VaultUnrecoverableError = regexp . MustCompile ( ` Code:\s+40(0|3|4) ` )
)
2016-10-11 22:25:49 +00:00
const (
// VaultChangeModeNoop takes no action when a new token is retrieved.
VaultChangeModeNoop = "noop"
// VaultChangeModeSignal signals the task when a new token is retrieved.
VaultChangeModeSignal = "signal"
// VaultChangeModeRestart restarts the task when a new token is retrieved.
VaultChangeModeRestart = "restart"
)
2017-08-07 21:13:05 +00:00
// Vault stores the set of permissions a task needs access to from Vault.
2016-08-09 22:23:44 +00:00
type Vault struct {
// Policies is the set of policies that the task needs access to
Policies [ ] string
2016-09-20 20:22:29 +00:00
// Env marks whether the Vault Token should be exposed as an environment
// variable
Env bool
2016-10-11 22:25:49 +00:00
// ChangeMode is used to configure the task's behavior when the Vault
// token changes because the original token could not be renewed in time.
2017-02-22 20:30:05 +00:00
ChangeMode string
2016-10-11 22:25:49 +00:00
// ChangeSignal is the signal sent to the task when a new token is
// retrieved. This is only valid when using the signal change mode.
2017-02-22 20:30:05 +00:00
ChangeSignal string
2016-08-09 22:23:44 +00:00
}
2016-10-17 18:41:22 +00:00
func DefaultVaultBlock ( ) * Vault {
return & Vault {
Env : true ,
ChangeMode : VaultChangeModeRestart ,
}
}
2016-08-09 22:23:44 +00:00
// Copy returns a copy of this Vault block.
func ( v * Vault ) Copy ( ) * Vault {
if v == nil {
return nil
}
nv := new ( Vault )
* nv = * v
return nv
}
2016-10-25 18:09:22 +00:00
func ( v * Vault ) Canonicalize ( ) {
if v . ChangeSignal != "" {
v . ChangeSignal = strings . ToUpper ( v . ChangeSignal )
}
}
2016-08-09 22:23:44 +00:00
// Validate returns if the Vault block is valid.
func ( v * Vault ) Validate ( ) error {
if v == nil {
return nil
}
2017-02-13 18:51:29 +00:00
var mErr multierror . Error
2016-08-09 22:23:44 +00:00
if len ( v . Policies ) == 0 {
2017-02-13 18:51:29 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Policy list cannot be empty" ) )
}
for _ , p := range v . Policies {
if p == "root" {
2017-08-07 21:13:05 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Can not specify \"root\" policy" ) )
2017-02-13 18:51:29 +00:00
}
2016-08-09 22:23:44 +00:00
}
2016-10-11 22:25:49 +00:00
switch v . ChangeMode {
case VaultChangeModeSignal :
if v . ChangeSignal == "" {
2017-02-13 18:51:29 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Signal must be specified when using change mode %q" , VaultChangeModeSignal ) )
2016-10-11 22:25:49 +00:00
}
case VaultChangeModeNoop , VaultChangeModeRestart :
default :
2017-02-13 18:51:29 +00:00
multierror . Append ( & mErr , fmt . Errorf ( "Unknown change mode %q" , v . ChangeMode ) )
2016-10-11 22:25:49 +00:00
}
2017-02-13 18:51:29 +00:00
return mErr . ErrorOrNil ( )
2016-08-09 22:23:44 +00:00
}
2017-04-21 23:37:10 +00:00
const (
// DeploymentStatuses are the various states a deployment can be be in
DeploymentStatusRunning = "running"
2017-06-27 18:52:14 +00:00
DeploymentStatusPaused = "paused"
2017-04-21 23:37:10 +00:00
DeploymentStatusFailed = "failed"
DeploymentStatusSuccessful = "successful"
DeploymentStatusCancelled = "cancelled"
2017-05-18 19:36:04 +00:00
// DeploymentStatusDescriptions are the various descriptions of the states a
// deployment can be in.
2017-07-07 06:30:46 +00:00
DeploymentStatusDescriptionRunning = "Deployment is running"
DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion"
DeploymentStatusDescriptionPaused = "Deployment is paused"
DeploymentStatusDescriptionSuccessful = "Deployment completed successfully"
DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped"
DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job"
DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations"
2018-03-23 17:56:00 +00:00
DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline"
2017-07-07 06:30:46 +00:00
DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed"
2017-04-21 23:37:10 +00:00
)
2017-06-28 19:58:05 +00:00
// DeploymentStatusDescriptionRollback is used to get the status description of
// a deployment when rolling back to an older job.
func DeploymentStatusDescriptionRollback ( baseDescription string , jobVersion uint64 ) string {
return fmt . Sprintf ( "%s - rolling back to job version %d" , baseDescription , jobVersion )
}
2017-11-03 20:33:34 +00:00
// DeploymentStatusDescriptionRollbackNoop is used to get the status description of
// a deployment when rolling back is not possible because it has the same specification
func DeploymentStatusDescriptionRollbackNoop ( baseDescription string , jobVersion uint64 ) string {
2017-11-03 21:07:06 +00:00
return fmt . Sprintf ( "%s - not rolling back to stable job version %d as current job has same specification" , baseDescription , jobVersion )
2017-11-02 20:00:54 +00:00
}
2017-08-12 22:50:51 +00:00
// DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of
2018-03-11 17:42:45 +00:00
// a deployment when there is no target to rollback to but autorevert is desired.
2017-08-12 22:50:51 +00:00
func DeploymentStatusDescriptionNoRollbackTarget ( baseDescription string ) string {
return fmt . Sprintf ( "%s - no stable job version to auto revert to" , baseDescription )
}
2017-04-21 23:37:10 +00:00
// Deployment is the object that represents a job deployment which is used to
2017-08-07 21:13:05 +00:00
// transition a job between versions.
2017-04-21 23:37:10 +00:00
type Deployment struct {
// ID is a generated UUID for the deployment
ID string
2017-09-07 23:56:15 +00:00
// Namespace is the namespace the deployment is created in
Namespace string
2017-04-21 23:37:10 +00:00
// JobID is the job the deployment is created for
JobID string
// JobVersion is the version of the job at which the deployment is tracking
JobVersion uint64
// JobModifyIndex is the modify index of the job at which the deployment is tracking
JobModifyIndex uint64
// JobCreateIndex is the create index of the job which the deployment is
// tracking. It is needed so that if the job gets stopped and reran we can
// present the correct list of deployments for the job and not old ones.
JobCreateIndex uint64
// TaskGroups is the set of task groups effected by the deployment and their
// current deployment status.
TaskGroups map [ string ] * DeploymentState
// The status of the deployment
Status string
// StatusDescription allows a human readable description of the deployment
// status.
StatusDescription string
CreateIndex uint64
ModifyIndex uint64
}
2017-05-23 00:06:46 +00:00
// NewDeployment creates a new deployment given the job.
2017-05-22 17:58:34 +00:00
func NewDeployment ( job * Job ) * Deployment {
return & Deployment {
2017-09-29 16:58:48 +00:00
ID : uuid . Generate ( ) ,
2017-09-07 23:56:15 +00:00
Namespace : job . Namespace ,
2017-06-30 17:59:19 +00:00
JobID : job . ID ,
JobVersion : job . Version ,
JobModifyIndex : job . ModifyIndex ,
JobCreateIndex : job . CreateIndex ,
Status : DeploymentStatusRunning ,
StatusDescription : DeploymentStatusDescriptionRunning ,
TaskGroups : make ( map [ string ] * DeploymentState , len ( job . TaskGroups ) ) ,
2017-05-22 17:58:34 +00:00
}
}
2017-04-24 21:49:23 +00:00
func ( d * Deployment ) Copy ( ) * Deployment {
2017-07-05 19:55:51 +00:00
if d == nil {
return nil
}
2017-04-24 21:49:23 +00:00
c := & Deployment { }
* c = * d
c . TaskGroups = nil
2017-05-10 22:26:00 +00:00
if l := len ( d . TaskGroups ) ; d . TaskGroups != nil {
2017-04-24 21:49:23 +00:00
c . TaskGroups = make ( map [ string ] * DeploymentState , l )
for tg , s := range d . TaskGroups {
c . TaskGroups [ tg ] = s . Copy ( )
}
}
return c
}
// Active returns whether the deployment is active or terminal.
func ( d * Deployment ) Active ( ) bool {
switch d . Status {
case DeploymentStatusRunning , DeploymentStatusPaused :
return true
default :
return false
}
}
2017-07-06 00:13:45 +00:00
// GetID is a helper for getting the ID when the object may be nil
func ( d * Deployment ) GetID ( ) string {
if d == nil {
return ""
}
return d . ID
}
2017-07-06 04:26:04 +00:00
// HasPlacedCanaries returns whether the deployment has placed canaries
func ( d * Deployment ) HasPlacedCanaries ( ) bool {
if d == nil || len ( d . TaskGroups ) == 0 {
return false
}
for _ , group := range d . TaskGroups {
if len ( group . PlacedCanaries ) != 0 {
return true
}
}
return false
}
2017-07-07 06:30:46 +00:00
// RequiresPromotion returns whether the deployment requires promotion to
// continue
func ( d * Deployment ) RequiresPromotion ( ) bool {
if d == nil || len ( d . TaskGroups ) == 0 || d . Status != DeploymentStatusRunning {
return false
}
for _ , group := range d . TaskGroups {
if group . DesiredCanaries > 0 && ! group . Promoted {
return true
}
}
return false
}
2017-06-06 21:08:46 +00:00
func ( d * Deployment ) GoString ( ) string {
2017-06-29 05:00:18 +00:00
base := fmt . Sprintf ( "Deployment ID %q for job %q has status %q (%v):" , d . ID , d . JobID , d . Status , d . StatusDescription )
2017-06-06 21:08:46 +00:00
for group , state := range d . TaskGroups {
base += fmt . Sprintf ( "\nTask Group %q has state:\n%#v" , group , state )
}
return base
}
2017-04-21 23:37:10 +00:00
// DeploymentState tracks the state of a deployment for a given task group.
type DeploymentState struct {
2017-06-30 19:35:59 +00:00
// AutoRevert marks whether the task group has indicated the job should be
// reverted on failure
AutoRevert bool
2018-04-10 18:42:13 +00:00
// ProgressDeadline is the deadline by which an allocation must transition
2018-04-04 22:39:45 +00:00
// to healthy before the deployment is considered failed.
ProgressDeadline time . Duration
2018-04-10 18:42:13 +00:00
// RequireProgressBy is the time by which an allocation must transition
2018-04-04 22:39:45 +00:00
// to healthy before the deployment is considered failed.
RequireProgressBy time . Time
2017-05-22 17:58:34 +00:00
// Promoted marks whether the canaries have been promoted
2017-04-21 23:37:10 +00:00
Promoted bool
2017-07-04 20:31:01 +00:00
// PlacedCanaries is the set of placed canary allocations
PlacedCanaries [ ] string
2017-04-21 23:37:10 +00:00
// DesiredCanaries is the number of canaries that should be created.
DesiredCanaries int
// DesiredTotal is the total number of allocations that should be created as
// part of the deployment.
DesiredTotal int
// PlacedAllocs is the number of allocations that have been placed
PlacedAllocs int
// HealthyAllocs is the number of allocations that have been marked healthy.
HealthyAllocs int
// UnhealthyAllocs are allocations that have been marked as unhealthy.
UnhealthyAllocs int
}
2017-06-06 21:08:46 +00:00
func ( d * DeploymentState ) GoString ( ) string {
2017-07-05 19:55:51 +00:00
base := fmt . Sprintf ( "\tDesired Total: %d" , d . DesiredTotal )
base += fmt . Sprintf ( "\n\tDesired Canaries: %d" , d . DesiredCanaries )
base += fmt . Sprintf ( "\n\tPlaced Canaries: %#v" , d . PlacedCanaries )
base += fmt . Sprintf ( "\n\tPromoted: %v" , d . Promoted )
base += fmt . Sprintf ( "\n\tPlaced: %d" , d . PlacedAllocs )
base += fmt . Sprintf ( "\n\tHealthy: %d" , d . HealthyAllocs )
base += fmt . Sprintf ( "\n\tUnhealthy: %d" , d . UnhealthyAllocs )
base += fmt . Sprintf ( "\n\tAutoRevert: %v" , d . AutoRevert )
2017-06-06 21:08:46 +00:00
return base
}
2017-04-24 21:49:23 +00:00
func ( d * DeploymentState ) Copy ( ) * DeploymentState {
c := & DeploymentState { }
* c = * d
2017-07-04 20:31:01 +00:00
c . PlacedCanaries = helper . CopySliceString ( d . PlacedCanaries )
2017-04-24 21:49:23 +00:00
return c
}
2017-05-11 19:49:04 +00:00
// DeploymentStatusUpdate is used to update the status of a given deployment
type DeploymentStatusUpdate struct {
// DeploymentID is the ID of the deployment to update
DeploymentID string
// Status is the new status of the deployment.
Status string
// StatusDescription is the new status description of the deployment.
StatusDescription string
}
2018-01-17 17:05:22 +00:00
// RescheduleTracker encapsulates previous reschedule events
2018-01-14 15:03:08 +00:00
type RescheduleTracker struct {
2018-01-17 17:05:22 +00:00
Events [ ] * RescheduleEvent
}
2018-01-17 17:44:06 +00:00
func ( rt * RescheduleTracker ) Copy ( ) * RescheduleTracker {
if rt == nil {
return nil
}
nt := & RescheduleTracker { }
* nt = * rt
rescheduleEvents := make ( [ ] * RescheduleEvent , 0 , len ( rt . Events ) )
for _ , tracker := range rt . Events {
rescheduleEvents = append ( rescheduleEvents , tracker . Copy ( ) )
}
nt . Events = rescheduleEvents
return nt
}
2018-01-17 17:05:22 +00:00
// RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation
type RescheduleEvent struct {
2018-01-14 15:03:08 +00:00
// RescheduleTime is the timestamp of a reschedule attempt
RescheduleTime int64
// PrevAllocID is the ID of the previous allocation being restarted
PrevAllocID string
// PrevNodeID is the node ID of the previous allocation
PrevNodeID string
2018-03-02 00:20:09 +00:00
// Delay is the reschedule delay associated with the attempt
Delay time . Duration
2018-01-14 15:03:08 +00:00
}
2018-03-02 00:20:09 +00:00
func NewRescheduleEvent ( rescheduleTime int64 , prevAllocID string , prevNodeID string , delay time . Duration ) * RescheduleEvent {
2018-01-19 21:20:00 +00:00
return & RescheduleEvent { RescheduleTime : rescheduleTime ,
PrevAllocID : prevAllocID ,
2018-03-02 00:20:09 +00:00
PrevNodeID : prevNodeID ,
Delay : delay }
2018-01-19 21:20:00 +00:00
}
2018-01-17 17:44:06 +00:00
func ( re * RescheduleEvent ) Copy ( ) * RescheduleEvent {
if re == nil {
2018-01-14 15:03:08 +00:00
return nil
}
2018-01-17 17:05:22 +00:00
copy := new ( RescheduleEvent )
2018-01-17 17:44:06 +00:00
* copy = * re
2018-01-14 15:03:08 +00:00
return copy
}
2018-02-23 01:38:44 +00:00
// DesiredTransition is used to mark an allocation as having a desired state
// transition. This information can be used by the scheduler to make the
2018-02-21 18:58:04 +00:00
// correct decision.
2018-02-23 01:38:44 +00:00
type DesiredTransition struct {
2018-02-21 18:58:04 +00:00
// Migrate is used to indicate that this allocation should be stopped and
// migrated to another node.
Migrate * bool
2018-04-07 00:23:35 +00:00
// Reschedule is used to indicate that this allocation is eligible to be
// rescheduled. Most allocations are automatically eligible for
// rescheduling, so this field is only required when an allocation is not
// automatically eligible. An example is an allocation that is part of a
// deployment.
Reschedule * bool
2018-05-08 22:26:36 +00:00
// ForceReschedule is used to indicate that this allocation must be rescheduled.
// This field is only used when operators want to force a placement even if
// a failed allocation is not eligible to be rescheduled
ForceReschedule * bool
2018-02-21 18:58:04 +00:00
}
// Merge merges the two desired transitions, preferring the values from the
// passed in object.
2018-02-23 01:38:44 +00:00
func ( d * DesiredTransition ) Merge ( o * DesiredTransition ) {
2018-02-21 18:58:04 +00:00
if o . Migrate != nil {
d . Migrate = o . Migrate
}
2018-04-07 00:23:35 +00:00
if o . Reschedule != nil {
d . Reschedule = o . Reschedule
}
2018-05-08 22:26:36 +00:00
if o . ForceReschedule != nil {
d . ForceReschedule = o . ForceReschedule
}
2018-02-21 18:58:04 +00:00
}
2018-02-23 01:38:44 +00:00
// ShouldMigrate returns whether the transition object dictates a migration.
func ( d * DesiredTransition ) ShouldMigrate ( ) bool {
2018-02-21 18:58:04 +00:00
return d . Migrate != nil && * d . Migrate
}
2018-04-07 00:23:35 +00:00
// ShouldReschedule returns whether the transition object dictates a
// rescheduling.
func ( d * DesiredTransition ) ShouldReschedule ( ) bool {
return d . Reschedule != nil && * d . Reschedule
}
2018-05-08 22:26:36 +00:00
// ShouldForceReschedule returns whether the transition object dictates a
// forced rescheduling.
func ( d * DesiredTransition ) ShouldForceReschedule ( ) bool {
if d == nil {
return false
}
return d . ForceReschedule != nil && * d . ForceReschedule
}
2015-07-04 00:11:53 +00:00
const (
2016-07-13 19:20:46 +00:00
AllocDesiredStatusRun = "run" // Allocation should run
AllocDesiredStatusStop = "stop" // Allocation should stop
AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
2015-08-25 23:18:37 +00:00
)
const (
2016-03-24 01:08:19 +00:00
AllocClientStatusPending = "pending"
AllocClientStatusRunning = "running"
AllocClientStatusComplete = "complete"
AllocClientStatusFailed = "failed"
2016-07-11 23:56:57 +00:00
AllocClientStatusLost = "lost"
2015-07-04 00:11:53 +00:00
)
// Allocation is used to allocate the placement of a task group to a node.
type Allocation struct {
// ID of the allocation (UUID)
ID string
2017-09-07 23:56:15 +00:00
// Namespace is the namespace the allocation is created in
Namespace string
2015-08-15 20:08:06 +00:00
// ID of the evaluation that generated this allocation
EvalID string
2015-08-11 23:34:06 +00:00
// Name is a logical name of the allocation.
Name string
2015-07-04 00:11:53 +00:00
// NodeID is the node this is being placed on
NodeID string
// Job is the parent job of the task group being allocated.
// This is copied at allocation time to avoid issues if the job
// definition is updated.
2015-07-23 22:15:48 +00:00
JobID string
Job * Job
2015-07-04 00:11:53 +00:00
2015-08-23 23:30:57 +00:00
// TaskGroup is the name of the task group that should be run
TaskGroup string
2015-09-12 23:22:18 +00:00
// Resources is the total set of resources allocated as part
2015-07-04 00:11:53 +00:00
// of this allocation of the task group.
Resources * Resources
2016-08-29 19:49:52 +00:00
// SharedResources are the resources that are shared by all the tasks in an
// allocation
SharedResources * Resources
2015-09-12 23:22:18 +00:00
// TaskResources is the set of resources allocated to each
// task. These should sum to the total Resources.
TaskResources map [ string ] * Resources
2015-07-04 00:37:01 +00:00
// Metrics associated with this allocation
Metrics * AllocMetric
2015-08-25 23:18:37 +00:00
// Desired Status of the allocation on the client
DesiredStatus string
2015-07-04 00:50:54 +00:00
2015-08-25 23:18:37 +00:00
// DesiredStatusDescription is meant to provide more human useful information
DesiredDescription string
2018-02-23 01:38:44 +00:00
// DesiredTransition is used to indicate that a state transition
2018-02-21 18:58:04 +00:00
// is desired for a given reason.
2018-02-23 01:38:44 +00:00
DesiredTransition DesiredTransition
2018-02-21 18:58:04 +00:00
2015-08-25 23:18:37 +00:00
// Status of the allocation on the client
ClientStatus string
// ClientStatusDescription is meant to provide more human useful information
2015-08-25 23:26:34 +00:00
ClientDescription string
2015-08-15 20:08:06 +00:00
2015-11-12 23:28:22 +00:00
// TaskStates stores the state of each task,
TaskStates map [ string ] * TaskState
2016-08-16 17:49:45 +00:00
// PreviousAllocation is the allocation that this allocation is replacing
PreviousAllocation string
2018-01-24 20:56:57 +00:00
// NextAllocation is the allocation that this allocation is being replaced by
NextAllocation string
2017-04-21 23:37:10 +00:00
// DeploymentID identifies an allocation as being created from a
// particular deployment
DeploymentID string
2017-05-11 18:03:15 +00:00
// DeploymentStatus captures the status of the allocation as part of the
// given deployment
DeploymentStatus * AllocDeploymentStatus
2017-04-21 23:37:10 +00:00
2018-03-02 00:20:09 +00:00
// RescheduleTrackers captures details of previous reschedule attempts of the allocation
RescheduleTracker * RescheduleTracker
// FollowupEvalID captures a follow up evaluation created to handle a failed allocation
// that can be rescheduled in the future
FollowupEvalID string
2015-07-04 00:50:54 +00:00
// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
2016-02-02 02:15:12 +00:00
// AllocModifyIndex is not updated when the client updates allocations. This
// lets the client pull only the allocs updated by the server.
2016-02-01 21:57:35 +00:00
AllocModifyIndex uint64
2016-02-09 05:58:05 +00:00
// CreateTime is the time the allocation has finished scheduling and been
// verified by the plan applier.
CreateTime int64
2017-10-25 18:06:25 +00:00
// ModifyTime is the time the allocation was last updated.
ModifyTime int64
2015-07-04 00:11:53 +00:00
}
2017-05-31 18:34:46 +00:00
// Index returns the index of the allocation. If the allocation is from a task
// group with count greater than 1, there will be multiple allocations for it.
func ( a * Allocation ) Index ( ) uint {
l := len ( a . Name )
prefix := len ( a . JobID ) + len ( a . TaskGroup ) + 2
if l <= 3 || l <= prefix {
return uint ( 0 )
}
strNum := a . Name [ prefix : len ( a . Name ) - 1 ]
num , _ := strconv . Atoi ( strNum )
return uint ( num )
}
2016-02-02 01:47:53 +00:00
func ( a * Allocation ) Copy ( ) * Allocation {
2017-05-17 18:07:06 +00:00
return a . copyImpl ( true )
}
// Copy provides a copy of the allocation but doesn't deep copy the job
func ( a * Allocation ) CopySkipJob ( ) * Allocation {
return a . copyImpl ( false )
}
func ( a * Allocation ) copyImpl ( job bool ) * Allocation {
2016-02-11 01:54:43 +00:00
if a == nil {
return nil
}
na := new ( Allocation )
* na = * a
2017-05-17 18:07:06 +00:00
if job {
na . Job = na . Job . Copy ( )
}
2016-02-11 01:54:43 +00:00
na . Resources = na . Resources . Copy ( )
2016-08-29 19:49:52 +00:00
na . SharedResources = na . SharedResources . Copy ( )
2016-02-11 01:54:43 +00:00
2016-03-21 23:29:21 +00:00
if a . TaskResources != nil {
tr := make ( map [ string ] * Resources , len ( na . TaskResources ) )
for task , resource := range na . TaskResources {
tr [ task ] = resource . Copy ( )
}
na . TaskResources = tr
2016-02-11 01:54:43 +00:00
}
na . Metrics = na . Metrics . Copy ( )
2017-05-11 18:03:15 +00:00
na . DeploymentStatus = na . DeploymentStatus . Copy ( )
2016-02-11 01:54:43 +00:00
2016-03-21 23:29:21 +00:00
if a . TaskStates != nil {
ts := make ( map [ string ] * TaskState , len ( na . TaskStates ) )
for task , state := range na . TaskStates {
ts [ task ] = state . Copy ( )
}
na . TaskStates = ts
2016-02-11 01:54:43 +00:00
}
2018-01-14 15:03:08 +00:00
2018-01-17 17:44:06 +00:00
na . RescheduleTracker = a . RescheduleTracker . Copy ( )
2016-02-11 01:54:43 +00:00
return na
2015-07-04 00:11:53 +00:00
}
2015-12-15 03:20:57 +00:00
// TerminalStatus returns if the desired or actual status is terminal and
// will no longer transition.
2015-08-15 23:07:22 +00:00
func ( a * Allocation ) TerminalStatus ( ) bool {
2015-12-16 22:34:17 +00:00
// First check the desired state and if that isn't terminal, check client
// state.
2015-08-25 23:18:37 +00:00
switch a . DesiredStatus {
2016-07-13 19:20:46 +00:00
case AllocDesiredStatusStop , AllocDesiredStatusEvict :
2015-08-15 23:07:22 +00:00
return true
2016-04-08 21:22:06 +00:00
default :
}
2018-04-04 19:38:15 +00:00
return a . ClientTerminalStatus ( )
}
// ClientTerminalStatus returns if the client status is terminal and will no longer transition
func ( a * Allocation ) ClientTerminalStatus ( ) bool {
2016-04-08 21:22:06 +00:00
switch a . ClientStatus {
2016-08-03 22:45:42 +00:00
case AllocClientStatusComplete , AllocClientStatusFailed , AllocClientStatusLost :
2016-04-08 21:22:06 +00:00
return true
2015-12-16 22:34:17 +00:00
default :
return false
2015-08-15 23:07:22 +00:00
}
}
2018-01-14 15:03:08 +00:00
// ShouldReschedule returns if the allocation is eligible to be rescheduled according
2018-01-17 17:05:22 +00:00
// to its status and ReschedulePolicy given its failure time
func ( a * Allocation ) ShouldReschedule ( reschedulePolicy * ReschedulePolicy , failTime time . Time ) bool {
2018-01-14 15:03:08 +00:00
// First check the desired state
switch a . DesiredStatus {
case AllocDesiredStatusStop , AllocDesiredStatusEvict :
return false
default :
}
switch a . ClientStatus {
case AllocClientStatusFailed :
2018-01-17 17:05:22 +00:00
return a . RescheduleEligible ( reschedulePolicy , failTime )
2018-01-14 15:03:08 +00:00
default :
return false
}
}
2018-01-16 21:01:31 +00:00
// RescheduleEligible returns if the allocation is eligible to be rescheduled according
// to its ReschedulePolicy and the current state of its reschedule trackers
2018-01-17 17:05:22 +00:00
func ( a * Allocation ) RescheduleEligible ( reschedulePolicy * ReschedulePolicy , failTime time . Time ) bool {
2018-01-16 21:01:31 +00:00
if reschedulePolicy == nil {
return false
}
attempts := reschedulePolicy . Attempts
interval := reschedulePolicy . Interval
2018-03-02 00:20:09 +00:00
enabled := attempts > 0 || reschedulePolicy . Unlimited
if ! enabled {
2018-01-14 15:03:08 +00:00
return false
}
2018-03-24 15:29:20 +00:00
if reschedulePolicy . Unlimited {
return true
}
2018-03-26 18:30:09 +00:00
// Early return true if there are no attempts yet and the number of allowed attempts is > 0
2018-03-24 15:29:20 +00:00
if ( a . RescheduleTracker == nil || len ( a . RescheduleTracker . Events ) == 0 ) && attempts > 0 {
2018-01-14 15:03:08 +00:00
return true
}
attempted := 0
2018-01-17 17:05:22 +00:00
for j := len ( a . RescheduleTracker . Events ) - 1 ; j >= 0 ; j -- {
lastAttempt := a . RescheduleTracker . Events [ j ] . RescheduleTime
timeDiff := failTime . UTC ( ) . UnixNano ( ) - lastAttempt
2018-01-14 15:03:08 +00:00
if timeDiff < interval . Nanoseconds ( ) {
attempted += 1
}
}
return attempted < attempts
}
2018-03-02 00:20:09 +00:00
// LastEventTime is the time of the last task event in the allocation.
2018-03-29 02:22:25 +00:00
// It is used to determine allocation failure time. If the FinishedAt field
// is not set, the alloc's modify time is used
2018-03-02 00:20:09 +00:00
func ( a * Allocation ) LastEventTime ( ) time . Time {
var lastEventTime time . Time
if a . TaskStates != nil {
2018-03-29 19:05:56 +00:00
for _ , s := range a . TaskStates {
if lastEventTime . IsZero ( ) || s . FinishedAt . After ( lastEventTime ) {
lastEventTime = s . FinishedAt
2018-03-02 00:20:09 +00:00
}
}
}
2018-04-03 20:49:18 +00:00
2018-03-29 02:22:25 +00:00
if lastEventTime . IsZero ( ) {
return time . Unix ( 0 , a . ModifyTime ) . UTC ( )
}
2018-03-02 00:20:09 +00:00
return lastEventTime
}
2018-03-08 15:36:01 +00:00
// ReschedulePolicy returns the reschedule policy based on the task group
func ( a * Allocation ) ReschedulePolicy ( ) * ReschedulePolicy {
tg := a . Job . LookupTaskGroup ( a . TaskGroup )
if tg == nil {
return nil
}
return tg . ReschedulePolicy
}
2018-03-08 00:02:22 +00:00
// NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled,
2018-03-02 00:20:09 +00:00
// and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules
2018-03-08 15:36:01 +00:00
func ( a * Allocation ) NextRescheduleTime ( ) ( time . Time , bool ) {
2018-03-02 00:20:09 +00:00
failTime := a . LastEventTime ( )
2018-03-08 15:36:01 +00:00
reschedulePolicy := a . ReschedulePolicy ( )
2018-03-29 14:28:52 +00:00
if a . DesiredStatus == AllocDesiredStatusStop || a . ClientStatus != AllocClientStatusFailed || failTime . IsZero ( ) || reschedulePolicy == nil {
2018-03-02 00:20:09 +00:00
return time . Time { } , false
}
2018-03-08 15:36:01 +00:00
nextDelay := a . NextDelay ( )
2018-03-02 00:20:09 +00:00
nextRescheduleTime := failTime . Add ( nextDelay )
rescheduleEligible := reschedulePolicy . Unlimited || ( reschedulePolicy . Attempts > 0 && a . RescheduleTracker == nil )
if reschedulePolicy . Attempts > 0 && a . RescheduleTracker != nil && a . RescheduleTracker . Events != nil {
// Check for eligibility based on the interval if max attempts is set
attempted := 0
for j := len ( a . RescheduleTracker . Events ) - 1 ; j >= 0 ; j -- {
lastAttempt := a . RescheduleTracker . Events [ j ] . RescheduleTime
timeDiff := failTime . UTC ( ) . UnixNano ( ) - lastAttempt
if timeDiff < reschedulePolicy . Interval . Nanoseconds ( ) {
attempted += 1
}
}
rescheduleEligible = attempted < reschedulePolicy . Attempts && nextDelay < reschedulePolicy . Interval
}
return nextRescheduleTime , rescheduleEligible
}
2018-03-08 00:02:22 +00:00
// NextDelay returns a duration after which the allocation can be rescheduled.
// It is calculated according to the delay function and previous reschedule attempts.
2018-03-08 15:36:01 +00:00
func ( a * Allocation ) NextDelay ( ) time . Duration {
policy := a . ReschedulePolicy ( )
2018-03-02 00:20:09 +00:00
delayDur := policy . Delay
if a . RescheduleTracker == nil || a . RescheduleTracker . Events == nil || len ( a . RescheduleTracker . Events ) == 0 {
return delayDur
}
events := a . RescheduleTracker . Events
switch policy . DelayFunction {
case "exponential" :
delayDur = a . RescheduleTracker . Events [ len ( a . RescheduleTracker . Events ) - 1 ] . Delay * 2
case "fibonacci" :
if len ( events ) >= 2 {
fibN1Delay := events [ len ( events ) - 1 ] . Delay
fibN2Delay := events [ len ( events ) - 2 ] . Delay
// Handle reset of delay ceiling which should cause
// a new series to start
2018-03-13 15:06:26 +00:00
if fibN2Delay == policy . MaxDelay && fibN1Delay == policy . Delay {
2018-03-02 00:20:09 +00:00
delayDur = fibN1Delay
} else {
delayDur = fibN1Delay + fibN2Delay
}
}
default :
return delayDur
}
2018-03-13 15:06:26 +00:00
if policy . MaxDelay > 0 && delayDur > policy . MaxDelay {
delayDur = policy . MaxDelay
2018-03-02 00:20:09 +00:00
// check if delay needs to be reset
2018-03-08 00:02:22 +00:00
lastRescheduleEvent := a . RescheduleTracker . Events [ len ( a . RescheduleTracker . Events ) - 1 ]
timeDiff := a . LastEventTime ( ) . UTC ( ) . UnixNano ( ) - lastRescheduleEvent . RescheduleTime
if timeDiff > delayDur . Nanoseconds ( ) {
delayDur = policy . Delay
2018-03-02 00:20:09 +00:00
}
2018-03-08 00:02:22 +00:00
2018-03-02 00:20:09 +00:00
}
return delayDur
}
2016-08-22 16:34:24 +00:00
// Terminated returns if the allocation is in a terminal state on a client.
func ( a * Allocation ) Terminated ( ) bool {
if a . ClientStatus == AllocClientStatusFailed ||
a . ClientStatus == AllocClientStatusComplete ||
a . ClientStatus == AllocClientStatusLost {
return true
}
return false
}
2016-05-25 00:23:18 +00:00
// RanSuccessfully returns whether the client has ran the allocation and all
2018-01-04 22:20:32 +00:00
// tasks finished successfully. Critically this function returns whether the
// allocation has ran to completion and not just that the alloc has converged to
// its desired state. That is to say that a batch allocation must have finished
// with exit code 0 on all task groups. This doesn't really have meaning on a
// non-batch allocation because a service and system allocation should not
// finish.
2016-05-25 00:23:18 +00:00
func ( a * Allocation ) RanSuccessfully ( ) bool {
2018-01-04 22:20:32 +00:00
// Handle the case the client hasn't started the allocation.
if len ( a . TaskStates ) == 0 {
return false
}
2018-01-14 15:03:08 +00:00
// Check to see if all the tasks finished successfully in the allocation
2018-01-04 22:20:32 +00:00
allSuccess := true
for _ , state := range a . TaskStates {
allSuccess = allSuccess && state . Successful ( )
}
return allSuccess
2016-05-25 00:23:18 +00:00
}
2016-10-03 16:59:57 +00:00
// ShouldMigrate returns if the allocation needs data migration
func ( a * Allocation ) ShouldMigrate ( ) bool {
2017-10-12 00:04:09 +00:00
if a . PreviousAllocation == "" {
return false
}
2016-10-03 16:59:57 +00:00
if a . DesiredStatus == AllocDesiredStatusStop || a . DesiredStatus == AllocDesiredStatusEvict {
return false
}
2016-10-19 18:12:25 +00:00
tg := a . Job . LookupTaskGroup ( a . TaskGroup )
// if the task group is nil or the ephemeral disk block isn't present then
// we won't migrate
if tg == nil || tg . EphemeralDisk == nil {
return false
}
// We won't migrate any data is the user hasn't enabled migration or the
// disk is not marked as sticky
if ! tg . EphemeralDisk . Migrate || ! tg . EphemeralDisk . Sticky {
2016-10-03 16:59:57 +00:00
return false
}
return true
}
2017-11-18 15:30:50 +00:00
// SetEventDisplayMessage populates the display message if its not already set,
// a temporary fix to handle old allocations that don't have it.
// This method will be removed in a future release.
2017-11-17 20:53:26 +00:00
func ( a * Allocation ) SetEventDisplayMessages ( ) {
setDisplayMsg ( a . TaskStates )
}
2017-06-02 23:11:29 +00:00
// Stub returns a list stub for the allocation
func ( a * Allocation ) Stub ( ) * AllocListStub {
return & AllocListStub {
ID : a . ID ,
EvalID : a . EvalID ,
Name : a . Name ,
NodeID : a . NodeID ,
JobID : a . JobID ,
2017-07-07 04:51:13 +00:00
JobVersion : a . Job . Version ,
2017-06-02 23:11:29 +00:00
TaskGroup : a . TaskGroup ,
DesiredStatus : a . DesiredStatus ,
DesiredDescription : a . DesiredDescription ,
ClientStatus : a . ClientStatus ,
ClientDescription : a . ClientDescription ,
2018-04-07 00:23:35 +00:00
DesiredTransition : a . DesiredTransition ,
2017-06-02 23:11:29 +00:00
TaskStates : a . TaskStates ,
2017-06-26 21:23:52 +00:00
DeploymentStatus : a . DeploymentStatus ,
2018-03-20 02:42:37 +00:00
FollowupEvalID : a . FollowupEvalID ,
2018-05-01 19:53:47 +00:00
RescheduleTracker : a . RescheduleTracker ,
2017-06-02 23:11:29 +00:00
CreateIndex : a . CreateIndex ,
ModifyIndex : a . ModifyIndex ,
CreateTime : a . CreateTime ,
2017-10-25 18:06:25 +00:00
ModifyTime : a . ModifyTime ,
2017-06-02 23:11:29 +00:00
}
}
2015-09-06 22:34:28 +00:00
// AllocListStub is used to return a subset of alloc information
type AllocListStub struct {
ID string
EvalID string
Name string
NodeID string
JobID string
2017-07-07 04:51:13 +00:00
JobVersion uint64
2015-09-06 22:34:28 +00:00
TaskGroup string
DesiredStatus string
DesiredDescription string
ClientStatus string
ClientDescription string
2018-04-07 00:23:35 +00:00
DesiredTransition DesiredTransition
2015-11-12 23:28:22 +00:00
TaskStates map [ string ] * TaskState
2017-06-26 21:23:52 +00:00
DeploymentStatus * AllocDeploymentStatus
2018-03-08 00:02:22 +00:00
FollowupEvalID string
2018-05-01 19:53:47 +00:00
RescheduleTracker * RescheduleTracker
2015-09-06 22:34:28 +00:00
CreateIndex uint64
ModifyIndex uint64
2016-02-09 05:58:05 +00:00
CreateTime int64
2017-10-25 18:06:25 +00:00
ModifyTime int64
2015-09-06 22:34:28 +00:00
}
2017-11-18 15:30:50 +00:00
// SetEventDisplayMessage populates the display message if its not already set,
// a temporary fix to handle old allocations that don't have it.
// This method will be removed in a future release.
2017-11-17 20:53:26 +00:00
func ( a * AllocListStub ) SetEventDisplayMessages ( ) {
setDisplayMsg ( a . TaskStates )
}
func setDisplayMsg ( taskStates map [ string ] * TaskState ) {
if taskStates != nil {
for _ , taskState := range taskStates {
for _ , event := range taskState . Events {
event . PopulateEventDisplayMessage ( )
}
}
}
}
2015-07-04 00:37:01 +00:00
// AllocMetric is used to track various metrics while attempting
// to make an allocation. These are used to debug a job, or to better
// understand the pressure within the system.
type AllocMetric struct {
// NodesEvaluated is the number of nodes that were evaluated
NodesEvaluated int
2015-10-27 21:31:14 +00:00
// NodesFiltered is the number of nodes filtered due to a constraint
2015-07-04 00:37:01 +00:00
NodesFiltered int
2016-01-04 20:07:33 +00:00
// NodesAvailable is the number of nodes available for evaluation per DC.
NodesAvailable map [ string ] int
2015-07-04 00:37:01 +00:00
// ClassFiltered is the number of nodes filtered by class
ClassFiltered map [ string ] int
// ConstraintFiltered is the number of failures caused by constraint
ConstraintFiltered map [ string ] int
2015-09-24 06:56:25 +00:00
// NodesExhausted is the number of nodes skipped due to being
2015-07-04 00:37:01 +00:00
// exhausted of at least one resource
NodesExhausted int
// ClassExhausted is the number of nodes exhausted by class
ClassExhausted map [ string ] int
2015-09-23 00:37:33 +00:00
// DimensionExhausted provides the count by dimension or reason
DimensionExhausted map [ string ] int
2015-09-13 23:48:01 +00:00
2017-10-13 21:36:02 +00:00
// QuotaExhausted provides the exhausted dimensions
QuotaExhausted [ ] string
2015-07-04 00:37:01 +00:00
// Scores is the scores of the final few nodes remaining
// for placement. The top score is typically selected.
2015-08-14 04:46:33 +00:00
Scores map [ string ] float64
2015-07-04 00:37:01 +00:00
// AllocationTime is a measure of how long the allocation
// attempt took. This can affect performance and SLAs.
AllocationTime time . Duration
2015-08-16 17:03:21 +00:00
// CoalescedFailures indicates the number of other
// allocations that were coalesced into this failed allocation.
// This is to prevent creating many failed allocations for a
// single task group.
CoalescedFailures int
2015-07-04 00:37:01 +00:00
}
2016-02-11 01:54:43 +00:00
func ( a * AllocMetric ) Copy ( ) * AllocMetric {
if a == nil {
return nil
}
na := new ( AllocMetric )
* na = * a
2017-01-18 23:55:14 +00:00
na . NodesAvailable = helper . CopyMapStringInt ( na . NodesAvailable )
na . ClassFiltered = helper . CopyMapStringInt ( na . ClassFiltered )
na . ConstraintFiltered = helper . CopyMapStringInt ( na . ConstraintFiltered )
na . ClassExhausted = helper . CopyMapStringInt ( na . ClassExhausted )
na . DimensionExhausted = helper . CopyMapStringInt ( na . DimensionExhausted )
2017-10-13 21:36:02 +00:00
na . QuotaExhausted = helper . CopySliceString ( na . QuotaExhausted )
2017-01-18 23:55:14 +00:00
na . Scores = helper . CopyMapStringFloat64 ( na . Scores )
2016-02-11 01:54:43 +00:00
return na
}
2015-08-14 04:46:33 +00:00
func ( a * AllocMetric ) EvaluateNode ( ) {
a . NodesEvaluated += 1
}
func ( a * AllocMetric ) FilterNode ( node * Node , constraint string ) {
a . NodesFiltered += 1
if node != nil && node . NodeClass != "" {
if a . ClassFiltered == nil {
a . ClassFiltered = make ( map [ string ] int )
}
a . ClassFiltered [ node . NodeClass ] += 1
}
if constraint != "" {
if a . ConstraintFiltered == nil {
a . ConstraintFiltered = make ( map [ string ] int )
}
a . ConstraintFiltered [ constraint ] += 1
}
}
2015-09-13 23:48:01 +00:00
func ( a * AllocMetric ) ExhaustedNode ( node * Node , dimension string ) {
2015-08-14 04:46:33 +00:00
a . NodesExhausted += 1
if node != nil && node . NodeClass != "" {
if a . ClassExhausted == nil {
a . ClassExhausted = make ( map [ string ] int )
}
a . ClassExhausted [ node . NodeClass ] += 1
}
2015-09-13 23:48:01 +00:00
if dimension != "" {
2015-09-23 00:37:33 +00:00
if a . DimensionExhausted == nil {
a . DimensionExhausted = make ( map [ string ] int )
2015-09-13 23:48:01 +00:00
}
2015-09-23 00:37:33 +00:00
a . DimensionExhausted [ dimension ] += 1
2015-09-13 23:48:01 +00:00
}
2015-08-14 04:46:33 +00:00
}
2017-10-13 21:36:02 +00:00
func ( a * AllocMetric ) ExhaustQuota ( dimensions [ ] string ) {
if a . QuotaExhausted == nil {
a . QuotaExhausted = make ( [ ] string , 0 , len ( dimensions ) )
}
a . QuotaExhausted = append ( a . QuotaExhausted , dimensions ... )
}
2015-08-16 16:57:30 +00:00
func ( a * AllocMetric ) ScoreNode ( node * Node , name string , score float64 ) {
2015-08-14 04:46:33 +00:00
if a . Scores == nil {
a . Scores = make ( map [ string ] float64 )
}
2015-08-16 16:57:30 +00:00
key := fmt . Sprintf ( "%s.%s" , node . ID , name )
a . Scores [ key ] = score
2015-08-14 04:46:33 +00:00
}
2017-05-11 18:03:15 +00:00
// AllocDeploymentStatus captures the status of the allocation as part of the
// deployment. This can include things like if the allocation has been marked as
2018-03-11 18:12:57 +00:00
// healthy.
2017-05-11 18:03:15 +00:00
type AllocDeploymentStatus struct {
// Healthy marks whether the allocation has been marked healthy or unhealthy
// as part of a deployment. It can be unset if it has neither been marked
// healthy or unhealthy.
Healthy * bool
2017-06-06 21:08:46 +00:00
2018-03-23 17:56:00 +00:00
// Timestamp is the time at which the health status was set.
Timestamp time . Time
2018-04-19 20:58:06 +00:00
// Canary marks whether the allocation is a canary or not. A canary that has
// been promoted will have this field set to false.
Canary bool
2017-06-26 21:23:52 +00:00
// ModifyIndex is the raft index in which the deployment status was last
// changed.
ModifyIndex uint64
2017-06-06 21:08:46 +00:00
}
2018-03-28 23:49:56 +00:00
// HasHealth returns true if the allocation has its health set.
func ( a * AllocDeploymentStatus ) HasHealth ( ) bool {
return a != nil && a . Healthy != nil
}
2017-06-06 21:08:46 +00:00
// IsHealthy returns if the allocation is marked as healthy as part of a
// deployment
func ( a * AllocDeploymentStatus ) IsHealthy ( ) bool {
if a == nil {
return false
}
return a . Healthy != nil && * a . Healthy
}
2017-06-26 21:23:52 +00:00
// IsUnhealthy returns if the allocation is marked as unhealthy as part of a
// deployment
func ( a * AllocDeploymentStatus ) IsUnhealthy ( ) bool {
if a == nil {
return false
}
return a . Healthy != nil && ! * a . Healthy
}
2018-04-23 23:35:25 +00:00
// IsCanary returns if the allocation is marked as a canary
func ( a * AllocDeploymentStatus ) IsCanary ( ) bool {
if a == nil {
return false
}
return a . Canary
}
2017-05-11 18:03:15 +00:00
func ( a * AllocDeploymentStatus ) Copy ( ) * AllocDeploymentStatus {
if a == nil {
return nil
}
c := new ( AllocDeploymentStatus )
2017-06-06 21:08:46 +00:00
* c = * a
2017-05-11 18:03:15 +00:00
if a . Healthy != nil {
c . Healthy = helper . BoolToPtr ( * a . Healthy )
}
return c
}
2015-07-23 22:27:13 +00:00
const (
2016-01-31 00:16:13 +00:00
EvalStatusBlocked = "blocked"
EvalStatusPending = "pending"
EvalStatusComplete = "complete"
EvalStatusFailed = "failed"
2016-02-01 02:46:45 +00:00
EvalStatusCancelled = "canceled"
2015-07-23 22:27:13 +00:00
)
2015-08-06 18:48:44 +00:00
const (
2017-07-03 18:08:35 +00:00
EvalTriggerJobRegister = "job-register"
EvalTriggerJobDeregister = "job-deregister"
EvalTriggerPeriodicJob = "periodic-job"
2018-01-24 00:47:00 +00:00
EvalTriggerNodeDrain = "node-drain"
2017-07-03 18:08:35 +00:00
EvalTriggerNodeUpdate = "node-update"
EvalTriggerScheduled = "scheduled"
EvalTriggerRollingUpdate = "rolling-update"
EvalTriggerDeploymentWatcher = "deployment-watcher"
EvalTriggerFailedFollowUp = "failed-follow-up"
EvalTriggerMaxPlans = "max-plan-attempts"
2018-01-17 17:05:22 +00:00
EvalTriggerRetryFailedAlloc = "alloc-failure"
2015-08-15 22:15:00 +00:00
)
const (
2015-09-07 18:01:29 +00:00
// CoreJobEvalGC is used for the garbage collection of evaluations
2015-09-24 06:56:25 +00:00
// and allocations. We periodically scan evaluations in a terminal state,
2015-09-07 18:01:29 +00:00
// in which all the corresponding allocations are also terminal. We
// delete these out of the system to bound the state.
2015-08-15 22:15:00 +00:00
CoreJobEvalGC = "eval-gc"
2015-09-07 18:01:29 +00:00
// CoreJobNodeGC is used for the garbage collection of failed nodes.
// We periodically scan nodes in a terminal state, and if they have no
// corresponding allocations we delete these out of the system.
CoreJobNodeGC = "node-gc"
2015-12-15 03:20:57 +00:00
// CoreJobJobGC is used for the garbage collection of eligible jobs. We
// periodically scan garbage collectible jobs and check if both their
// evaluations and allocations are terminal. If so, we delete these out of
// the system.
CoreJobJobGC = "job-gc"
2016-04-08 18:42:02 +00:00
2017-06-29 19:32:37 +00:00
// CoreJobDeploymentGC is used for the garbage collection of eligible
// deployments. We periodically scan garbage collectible deployments and
// check if they are terminal. If so, we delete these out of the system.
CoreJobDeploymentGC = "deployment-gc"
2016-04-08 18:42:02 +00:00
// CoreJobForceGC is used to force garbage collection of all GCable objects.
CoreJobForceGC = "force-gc"
2015-08-06 18:48:44 +00:00
)
2015-07-23 22:27:13 +00:00
// Evaluation is used anytime we need to apply business logic as a result
// of a change to our desired state (job specification) or the emergent state
// (registered nodes). When the inputs change, we need to "evaluate" them,
// potentially taking action (allocation of work) or doing nothing if the state
// of the world does not require it.
type Evaluation struct {
2017-12-13 17:36:03 +00:00
// ID is a randomly generated UUID used for this evaluation. This
2015-07-23 22:27:13 +00:00
// is assigned upon the creation of the evaluation.
ID string
2017-09-07 23:56:15 +00:00
// Namespace is the namespace the evaluation is created in
Namespace string
2015-07-24 00:31:08 +00:00
// Priority is used to control scheduling importance and if this job
// can preempt other jobs.
Priority int
// Type is used to control which schedulers are available to handle
// this evaluation.
Type string
2015-07-24 05:30:08 +00:00
// TriggeredBy is used to give some insight into why this Eval
// was created. (Job change, node failure, alloc failure, etc).
TriggeredBy string
2015-09-24 06:56:25 +00:00
// JobID is the job this evaluation is scoped to. Evaluations cannot
2015-08-06 00:55:15 +00:00
// be run in parallel for a given JobID, so we serialize on this.
JobID string
2015-08-06 18:48:44 +00:00
// JobModifyIndex is the modify index of the job at the time
// the evaluation was created
JobModifyIndex uint64
2015-08-06 23:39:20 +00:00
// NodeID is the node that was affected triggering the evaluation.
NodeID string
// NodeModifyIndex is the modify index of the node at the time
// the evaluation was created
NodeModifyIndex uint64
2017-06-26 21:23:52 +00:00
// DeploymentID is the ID of the deployment that triggered the evaluation.
DeploymentID string
2015-07-23 22:27:13 +00:00
// Status of the evaluation
Status string
2015-08-15 20:08:06 +00:00
// StatusDescription is meant to provide more human useful information
StatusDescription string
2015-09-07 19:47:48 +00:00
// Wait is a minimum wait time for running the eval. This is used to
2018-03-02 00:20:09 +00:00
// support a rolling upgrade in versions prior to 0.7.0
// Deprecated
2015-09-07 19:47:48 +00:00
Wait time . Duration
2018-03-02 00:20:09 +00:00
// WaitUntil is the time when this eval should be run. This is used to
// supported delayed rescheduling of failed allocations
WaitUntil time . Time
2015-09-07 19:47:48 +00:00
// NextEval is the evaluation ID for the eval created to do a followup.
// This is used to support rolling upgrades, where we need a chain of evaluations.
NextEval string
// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
// This is used to support rolling upgrades, where we need a chain of evaluations.
PreviousEval string
2016-05-25 01:12:59 +00:00
// BlockedEval is the evaluation ID for a created blocked eval. A
2016-05-19 20:09:52 +00:00
// blocked eval will be created if all allocations could not be placed due
// to constraints or lacking resources.
2016-05-25 01:12:59 +00:00
BlockedEval string
2016-05-19 20:09:52 +00:00
2016-05-19 01:11:40 +00:00
// FailedTGAllocs are task groups which have allocations that could not be
// made, but the metrics are persisted so that the user can use the feedback
// to determine the cause.
FailedTGAllocs map [ string ] * AllocMetric
2016-05-15 16:41:34 +00:00
// ClassEligibility tracks computed node classes that have been explicitly
2016-01-30 01:46:44 +00:00
// marked as eligible or ineligible.
ClassEligibility map [ string ] bool
2016-01-28 21:43:48 +00:00
2017-10-13 21:36:02 +00:00
// QuotaLimitReached marks whether a quota limit was reached for the
// evaluation.
QuotaLimitReached string
2016-01-28 21:43:48 +00:00
// EscapedComputedClass marks whether the job has constraints that are not
// captured by computed node classes.
2016-01-30 01:46:44 +00:00
EscapedComputedClass bool
2016-01-28 21:43:48 +00:00
2016-05-05 18:21:58 +00:00
// AnnotatePlan triggers the scheduler to provide additional annotations
// during the evaluation. This should not be set during normal operations.
AnnotatePlan bool
2017-03-09 20:37:41 +00:00
// QueuedAllocations is the number of unplaced allocations at the time the
// evaluation was processed. The map is keyed by Task Group names.
QueuedAllocations map [ string ] int
2017-10-23 22:12:45 +00:00
// LeaderACL provides the ACL token to when issuing RPCs back to the
2017-10-23 22:04:00 +00:00
// leader. This will be a valid management token as long as the leader is
// active. This should not ever be exposed via the API.
LeaderACL string
2016-05-21 01:07:10 +00:00
// SnapshotIndex is the Raft index of the snapshot used to process the
// evaluation. As such it will only be set once it has gone through the
// scheduler.
SnapshotIndex uint64
2015-07-23 22:27:13 +00:00
// Raft Indexes
CreateIndex uint64
ModifyIndex uint64
}
2015-08-15 23:07:22 +00:00
// TerminalStatus returns if the current status is terminal and
// will no longer transition.
func ( e * Evaluation ) TerminalStatus ( ) bool {
switch e . Status {
2016-01-31 00:16:13 +00:00
case EvalStatusComplete , EvalStatusFailed , EvalStatusCancelled :
2015-08-15 23:07:22 +00:00
return true
default :
return false
}
}
2015-08-14 00:11:20 +00:00
func ( e * Evaluation ) GoString ( ) string {
2017-09-07 23:56:15 +00:00
return fmt . Sprintf ( "<Eval %q JobID: %q Namespace: %q>" , e . ID , e . JobID , e . Namespace )
2015-08-14 00:11:20 +00:00
}
2015-08-15 21:16:40 +00:00
func ( e * Evaluation ) Copy ( ) * Evaluation {
2016-02-11 01:54:43 +00:00
if e == nil {
return nil
}
2015-08-15 21:16:40 +00:00
ne := new ( Evaluation )
* ne = * e
2016-05-19 01:11:40 +00:00
// Copy ClassEligibility
if e . ClassEligibility != nil {
classes := make ( map [ string ] bool , len ( e . ClassEligibility ) )
for class , elig := range e . ClassEligibility {
classes [ class ] = elig
}
ne . ClassEligibility = classes
}
// Copy FailedTGAllocs
if e . FailedTGAllocs != nil {
failedTGs := make ( map [ string ] * AllocMetric , len ( e . FailedTGAllocs ) )
for tg , metric := range e . FailedTGAllocs {
failedTGs [ tg ] = metric . Copy ( )
}
ne . FailedTGAllocs = failedTGs
}
2016-07-18 22:04:05 +00:00
// Copy queued allocations
if e . QueuedAllocations != nil {
queuedAllocations := make ( map [ string ] int , len ( e . QueuedAllocations ) )
for tg , num := range e . QueuedAllocations {
queuedAllocations [ tg ] = num
}
ne . QueuedAllocations = queuedAllocations
}
2015-08-15 21:16:40 +00:00
return ne
}
2016-01-29 23:31:32 +00:00
// ShouldEnqueue checks if a given evaluation should be enqueued into the
// eval_broker
2015-08-06 18:28:55 +00:00
func ( e * Evaluation ) ShouldEnqueue ( ) bool {
switch e . Status {
case EvalStatusPending :
return true
2016-01-31 00:16:13 +00:00
case EvalStatusComplete , EvalStatusFailed , EvalStatusBlocked , EvalStatusCancelled :
2016-01-29 23:31:32 +00:00
return false
default :
panic ( fmt . Sprintf ( "unhandled evaluation (%s) status %s" , e . ID , e . Status ) )
}
}
// ShouldBlock checks if a given evaluation should be entered into the blocked
// eval tracker.
func ( e * Evaluation ) ShouldBlock ( ) bool {
switch e . Status {
case EvalStatusBlocked :
return true
2016-01-31 00:16:13 +00:00
case EvalStatusComplete , EvalStatusFailed , EvalStatusPending , EvalStatusCancelled :
2015-08-06 18:28:55 +00:00
return false
default :
panic ( fmt . Sprintf ( "unhandled evaluation (%s) status %s" , e . ID , e . Status ) )
}
}
2015-08-11 23:34:06 +00:00
// MakePlan is used to make a plan from the given evaluation
// for a given Job
func ( e * Evaluation ) MakePlan ( j * Job ) * Plan {
p := & Plan {
EvalID : e . ID ,
2015-08-13 23:29:28 +00:00
Priority : e . Priority ,
2016-02-21 19:31:27 +00:00
Job : j ,
2015-08-25 23:52:56 +00:00
NodeUpdate : make ( map [ string ] [ ] * Allocation ) ,
2015-08-11 23:34:06 +00:00
NodeAllocation : make ( map [ string ] [ ] * Allocation ) ,
}
2015-08-13 23:29:28 +00:00
if j != nil {
p . AllAtOnce = j . AllAtOnce
}
2015-08-11 23:34:06 +00:00
return p
}
2015-09-07 22:08:50 +00:00
// NextRollingEval creates an evaluation to followup this eval for rolling updates
func ( e * Evaluation ) NextRollingEval ( wait time . Duration ) * Evaluation {
return & Evaluation {
2017-09-29 16:58:48 +00:00
ID : uuid . Generate ( ) ,
2017-09-07 23:56:15 +00:00
Namespace : e . Namespace ,
2015-09-07 22:08:50 +00:00
Priority : e . Priority ,
Type : e . Type ,
TriggeredBy : EvalTriggerRollingUpdate ,
JobID : e . JobID ,
JobModifyIndex : e . JobModifyIndex ,
Status : EvalStatusPending ,
Wait : wait ,
PreviousEval : e . ID ,
}
}
2016-05-25 01:12:59 +00:00
// CreateBlockedEval creates a blocked evaluation to followup this eval to place any
2016-05-15 16:41:34 +00:00
// failed allocations. It takes the classes marked explicitly eligible or
2017-10-13 21:36:02 +00:00
// ineligible, whether the job has escaped computed node classes and whether the
// quota limit was reached.
func ( e * Evaluation ) CreateBlockedEval ( classEligibility map [ string ] bool ,
escaped bool , quotaReached string ) * Evaluation {
2016-01-28 21:43:48 +00:00
return & Evaluation {
2017-09-29 16:58:48 +00:00
ID : uuid . Generate ( ) ,
2017-09-07 23:56:15 +00:00
Namespace : e . Namespace ,
2016-01-28 21:43:48 +00:00
Priority : e . Priority ,
Type : e . Type ,
TriggeredBy : e . TriggeredBy ,
JobID : e . JobID ,
JobModifyIndex : e . JobModifyIndex ,
Status : EvalStatusBlocked ,
PreviousEval : e . ID ,
2016-01-30 01:46:44 +00:00
ClassEligibility : classEligibility ,
2016-01-28 21:43:48 +00:00
EscapedComputedClass : escaped ,
2017-10-13 21:36:02 +00:00
QuotaLimitReached : quotaReached ,
2016-01-28 21:43:48 +00:00
}
}
2017-04-12 21:47:59 +00:00
// CreateFailedFollowUpEval creates a follow up evaluation when the current one
2017-08-07 21:13:05 +00:00
// has been marked as failed because it has hit the delivery limit and will not
2017-04-12 21:47:59 +00:00
// be retried by the eval_broker.
func ( e * Evaluation ) CreateFailedFollowUpEval ( wait time . Duration ) * Evaluation {
return & Evaluation {
2017-09-29 16:58:48 +00:00
ID : uuid . Generate ( ) ,
2017-09-07 23:56:15 +00:00
Namespace : e . Namespace ,
2017-04-12 21:47:59 +00:00
Priority : e . Priority ,
Type : e . Type ,
TriggeredBy : EvalTriggerFailedFollowUp ,
JobID : e . JobID ,
JobModifyIndex : e . JobModifyIndex ,
Status : EvalStatusPending ,
Wait : wait ,
PreviousEval : e . ID ,
}
}
2015-07-27 21:59:16 +00:00
// Plan is used to submit a commit plan for task allocations. These
// are submitted to the leader which verifies that resources have
2018-03-11 17:35:27 +00:00
// not been overcommitted before admitting the plan.
2015-07-27 21:59:16 +00:00
type Plan struct {
2015-07-29 00:49:45 +00:00
// EvalID is the evaluation ID this plan is associated with
EvalID string
2015-08-12 22:44:36 +00:00
// EvalToken is used to prevent a split-brain processing of
// an evaluation. There should only be a single scheduler running
// an Eval at a time, but this could be violated after a leadership
// transition. This unique token is used to reject plans that are
// being submitted from a different leader.
EvalToken string
2015-07-27 21:59:16 +00:00
// Priority is the priority of the upstream job
Priority int
2015-07-29 00:49:45 +00:00
// AllAtOnce is used to control if incremental scheduling of task groups
// is allowed or if we must do a gang scheduling of the entire job.
// If this is false, a plan may be partially applied. Otherwise, the
// entire plan must be able to make progress.
AllAtOnce bool
2016-02-21 19:31:27 +00:00
// Job is the parent job of all the allocations in the Plan.
// Since a Plan only involves a single Job, we can reduce the size
// of the plan by only including it once.
Job * Job
2015-08-25 23:52:56 +00:00
// NodeUpdate contains all the allocations for each node. For each node,
// this is a list of the allocations to update to either stop or evict.
NodeUpdate map [ string ] [ ] * Allocation
2015-07-29 00:49:45 +00:00
// NodeAllocation contains all the allocations for each node.
// The evicts must be considered prior to the allocations.
2015-08-04 23:32:46 +00:00
NodeAllocation map [ string ] [ ] * Allocation
2015-08-15 20:27:42 +00:00
2016-05-05 18:21:58 +00:00
// Annotations contains annotations by the scheduler to be used by operators
// to understand the decisions made by the scheduler.
Annotations * PlanAnnotations
2017-04-21 23:37:10 +00:00
2017-07-04 20:31:01 +00:00
// Deployment is the deployment created or updated by the scheduler that
// should be applied by the planner.
Deployment * Deployment
2017-05-11 19:49:04 +00:00
// DeploymentUpdates is a set of status updates to apply to the given
// deployments. This allows the scheduler to cancel any unneeded deployment
// because the job is stopped or the update block is removed.
DeploymentUpdates [ ] * DeploymentStatusUpdate
2015-07-27 21:59:16 +00:00
}
2016-08-03 22:45:42 +00:00
// AppendUpdate marks the allocation for eviction. The clientStatus of the
// allocation may be optionally set by passing in a non-empty value.
func ( p * Plan ) AppendUpdate ( alloc * Allocation , desiredStatus , desiredDesc , clientStatus string ) {
2015-08-26 00:06:06 +00:00
newAlloc := new ( Allocation )
* newAlloc = * alloc
2016-02-24 22:50:59 +00:00
// If the job is not set in the plan we are deregistering a job so we
// extract the job from the allocation.
if p . Job == nil && newAlloc . Job != nil {
p . Job = newAlloc . Job
}
// Normalize the job
newAlloc . Job = nil
2016-03-01 22:09:25 +00:00
// Strip the resources as it can be rebuilt.
newAlloc . Resources = nil
2016-08-03 22:45:42 +00:00
newAlloc . DesiredStatus = desiredStatus
newAlloc . DesiredDescription = desiredDesc
if clientStatus != "" {
newAlloc . ClientStatus = clientStatus
}
2015-08-13 21:02:39 +00:00
node := alloc . NodeID
2015-08-25 23:52:56 +00:00
existing := p . NodeUpdate [ node ]
2015-08-26 00:06:06 +00:00
p . NodeUpdate [ node ] = append ( existing , newAlloc )
2015-08-13 21:02:39 +00:00
}
2015-09-07 19:00:34 +00:00
func ( p * Plan ) PopUpdate ( alloc * Allocation ) {
existing := p . NodeUpdate [ alloc . NodeID ]
n := len ( existing )
if n > 0 && existing [ n - 1 ] . ID == alloc . ID {
existing = existing [ : n - 1 ]
2015-09-07 19:05:18 +00:00
if len ( existing ) > 0 {
p . NodeUpdate [ alloc . NodeID ] = existing
} else {
delete ( p . NodeUpdate , alloc . NodeID )
}
2015-09-07 19:00:34 +00:00
}
}
2015-08-13 21:02:39 +00:00
func ( p * Plan ) AppendAlloc ( alloc * Allocation ) {
node := alloc . NodeID
existing := p . NodeAllocation [ node ]
p . NodeAllocation [ node ] = append ( existing , alloc )
}
2015-08-14 01:16:32 +00:00
// IsNoOp checks if this plan would do nothing
func ( p * Plan ) IsNoOp ( ) bool {
2017-05-18 19:36:04 +00:00
return len ( p . NodeUpdate ) == 0 &&
len ( p . NodeAllocation ) == 0 &&
2017-07-04 20:31:01 +00:00
p . Deployment == nil &&
2017-05-18 19:36:04 +00:00
len ( p . DeploymentUpdates ) == 0
2015-08-14 01:16:32 +00:00
}
2015-07-27 22:31:49 +00:00
// PlanResult is the result of a plan submitted to the leader.
2015-07-27 21:59:16 +00:00
type PlanResult struct {
2015-08-25 23:52:56 +00:00
// NodeUpdate contains all the updates that were committed.
NodeUpdate map [ string ] [ ] * Allocation
2015-07-29 00:49:45 +00:00
// NodeAllocation contains all the allocations that were committed.
2015-08-04 23:32:46 +00:00
NodeAllocation map [ string ] [ ] * Allocation
2015-07-29 00:49:45 +00:00
2017-07-06 04:26:04 +00:00
// Deployment is the deployment that was committed.
Deployment * Deployment
2017-08-07 21:13:05 +00:00
// DeploymentUpdates is the set of deployment updates that were committed.
2017-07-06 04:26:04 +00:00
DeploymentUpdates [ ] * DeploymentStatusUpdate
2015-07-29 00:49:45 +00:00
// RefreshIndex is the index the worker should refresh state up to.
// This allows all evictions and allocations to be materialized.
// If any allocations were rejected due to stale data (node state,
// over committed) this can be used to force a worker refresh.
2015-07-28 23:36:15 +00:00
RefreshIndex uint64
2015-07-29 00:49:45 +00:00
// AllocIndex is the Raft index in which the evictions and
// allocations took place. This is used for the write index.
2015-07-27 22:31:49 +00:00
AllocIndex uint64
2015-07-27 21:59:16 +00:00
}
2015-08-26 00:36:52 +00:00
// IsNoOp checks if this plan result would do nothing
func ( p * PlanResult ) IsNoOp ( ) bool {
2017-07-06 04:26:04 +00:00
return len ( p . NodeUpdate ) == 0 && len ( p . NodeAllocation ) == 0 &&
len ( p . DeploymentUpdates ) == 0 && p . Deployment == nil
2015-08-26 00:36:52 +00:00
}
2015-08-13 22:17:24 +00:00
// FullCommit is used to check if all the allocations in a plan
// were committed as part of the result. Returns if there was
// a match, and the number of expected and actual allocations.
func ( p * PlanResult ) FullCommit ( plan * Plan ) ( bool , int , int ) {
expected := 0
actual := 0
for name , allocList := range plan . NodeAllocation {
didAlloc , _ := p . NodeAllocation [ name ]
expected += len ( allocList )
actual += len ( didAlloc )
}
return actual == expected , expected , actual
}
2016-05-05 18:21:58 +00:00
// PlanAnnotations holds annotations made by the scheduler to give further debug
// information to operators.
type PlanAnnotations struct {
// DesiredTGUpdates is the set of desired updates per task group.
DesiredTGUpdates map [ string ] * DesiredUpdates
}
// DesiredUpdates is the set of changes the scheduler would like to make given
// sufficient resources and cluster capacity.
type DesiredUpdates struct {
Ignore uint64
Place uint64
Migrate uint64
Stop uint64
InPlaceUpdate uint64
DestructiveUpdate uint64
2017-05-23 20:02:47 +00:00
Canary uint64
2016-05-05 18:21:58 +00:00
}
2017-07-07 23:49:08 +00:00
func ( d * DesiredUpdates ) GoString ( ) string {
return fmt . Sprintf ( "(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)" ,
d . Place , d . InPlaceUpdate , d . DestructiveUpdate , d . Stop , d . Migrate , d . Ignore , d . Canary )
}
2015-06-05 22:21:17 +00:00
// msgpackHandle is a shared handle for encoding/decoding of structs
2015-11-18 23:16:42 +00:00
var MsgpackHandle = func ( ) * codec . MsgpackHandle {
2015-11-16 22:25:19 +00:00
h := & codec . MsgpackHandle { RawToString : true }
// Sets the default type for decoding a map into a nil interface{}.
// This is necessary in particular because we store the driver configs as a
// nil interface{}.
h . MapType = reflect . TypeOf ( map [ string ] interface { } ( nil ) )
return h
} ( )
2015-06-05 22:21:17 +00:00
2017-04-28 20:18:04 +00:00
var (
// JsonHandle and JsonHandlePretty are the codec handles to JSON encode
// structs. The pretty handle will add indents for easier human consumption.
JsonHandle = & codec . JsonHandle {
HTMLCharsAsIs : true ,
}
JsonHandlePretty = & codec . JsonHandle {
HTMLCharsAsIs : true ,
Indent : 4 ,
}
)
2018-01-19 00:51:49 +00:00
// TODO Figure out if we can remove this. This is our fork that is just way
// behind. I feel like its original purpose was to pin at a stable version but
// now we can accomplish this with vendoring.
2016-02-21 02:05:17 +00:00
var HashiMsgpackHandle = func ( ) * hcodec . MsgpackHandle {
h := & hcodec . MsgpackHandle { RawToString : true }
// Sets the default type for decoding a map into a nil interface{}.
// This is necessary in particular because we store the driver configs as a
// nil interface{}.
h . MapType = reflect . TypeOf ( map [ string ] interface { } ( nil ) )
return h
} ( )
2015-06-05 22:21:17 +00:00
// Decode is used to decode a MsgPack encoded object
func Decode ( buf [ ] byte , out interface { } ) error {
2015-11-18 23:16:42 +00:00
return codec . NewDecoder ( bytes . NewReader ( buf ) , MsgpackHandle ) . Decode ( out )
2015-06-05 22:21:17 +00:00
}
// Encode is used to encode a MsgPack object with type prefix
func Encode ( t MessageType , msg interface { } ) ( [ ] byte , error ) {
var buf bytes . Buffer
buf . WriteByte ( uint8 ( t ) )
2015-11-18 23:16:42 +00:00
err := codec . NewEncoder ( & buf , MsgpackHandle ) . Encode ( msg )
2015-06-05 22:21:17 +00:00
return buf . Bytes ( ) , err
}
2016-10-17 17:48:04 +00:00
// KeyringResponse is a unified key response and can be used for install,
// remove, use, as well as listing key queries.
type KeyringResponse struct {
Messages map [ string ] string
Keys map [ string ] int
NumNodes int
}
// KeyringRequest is request objects for serf key operations.
type KeyringRequest struct {
Key string
}
2016-10-23 01:08:30 +00:00
// RecoverableError wraps an error and marks whether it is recoverable and could
// be retried or it is fatal.
type RecoverableError struct {
Err string
2017-03-27 23:27:24 +00:00
Recoverable bool
2016-10-23 01:08:30 +00:00
}
// NewRecoverableError is used to wrap an error and mark it as recoverable or
// not.
2017-01-14 00:46:08 +00:00
func NewRecoverableError ( e error , recoverable bool ) error {
2016-10-23 01:08:30 +00:00
if e == nil {
return nil
}
return & RecoverableError {
Err : e . Error ( ) ,
2017-03-27 23:27:24 +00:00
Recoverable : recoverable ,
2016-10-23 01:08:30 +00:00
}
}
2017-03-27 22:37:15 +00:00
// WrapRecoverable wraps an existing error in a new RecoverableError with a new
// message. If the error was recoverable before the returned error is as well;
// otherwise it is unrecoverable.
func WrapRecoverable ( msg string , err error ) error {
2017-03-27 23:27:24 +00:00
return & RecoverableError { Err : msg , Recoverable : IsRecoverable ( err ) }
2017-03-27 22:37:15 +00:00
}
2016-10-23 01:08:30 +00:00
func ( r * RecoverableError ) Error ( ) string {
return r . Err
}
2017-01-14 00:46:08 +00:00
2017-03-27 23:27:24 +00:00
func ( r * RecoverableError ) IsRecoverable ( ) bool {
return r . Recoverable
2017-03-24 22:26:05 +00:00
}
// Recoverable is an interface for errors to implement to indicate whether or
// not they are fatal or recoverable.
type Recoverable interface {
error
2017-03-27 23:27:24 +00:00
IsRecoverable ( ) bool
2017-03-24 22:26:05 +00:00
}
2017-01-14 00:46:08 +00:00
// IsRecoverable returns true if error is a RecoverableError with
// Recoverable=true. Otherwise false is returned.
func IsRecoverable ( e error ) bool {
2017-03-24 22:26:05 +00:00
if re , ok := e . ( Recoverable ) ; ok {
2017-03-27 23:27:24 +00:00
return re . IsRecoverable ( )
2017-01-14 00:46:08 +00:00
}
return false
}
2017-08-07 21:21:00 +00:00
2018-03-13 22:09:03 +00:00
// WrappedServerError wraps an error and satisfies
// both the Recoverable and the ServerSideError interfaces
type WrappedServerError struct {
2018-03-14 01:49:01 +00:00
Err error
2018-03-13 22:09:03 +00:00
}
// NewWrappedServerError is used to create a wrapped server side error
func NewWrappedServerError ( e error ) error {
return & WrappedServerError {
Err : e ,
}
}
func ( r * WrappedServerError ) IsRecoverable ( ) bool {
return IsRecoverable ( r . Err )
}
func ( r * WrappedServerError ) Error ( ) string {
return r . Err . Error ( )
}
func ( r * WrappedServerError ) IsServerSide ( ) bool {
return true
}
2018-03-13 23:25:41 +00:00
// ServerSideError is an interface for errors to implement to indicate
2018-03-14 01:49:01 +00:00
// errors occurring after the request makes it to a server
2018-03-13 22:09:03 +00:00
type ServerSideError interface {
error
IsServerSide ( ) bool
}
// IsServerSide returns true if error is a wrapped
// server side error
func IsServerSide ( e error ) bool {
if se , ok := e . ( ServerSideError ) ; ok {
return se . IsServerSide ( )
}
return false
}
2017-08-07 21:21:00 +00:00
// ACLPolicy is used to represent an ACL policy
type ACLPolicy struct {
2017-08-08 00:10:04 +00:00
Name string // Unique name
Description string // Human readable
Rules string // HCL or JSON format
2017-08-30 16:58:42 +00:00
Hash [ ] byte
2017-08-08 00:10:04 +00:00
CreateIndex uint64
ModifyIndex uint64
}
2017-08-07 21:21:00 +00:00
2017-08-30 16:58:42 +00:00
// SetHash is used to compute and set the hash of the ACL policy
func ( c * ACLPolicy ) SetHash ( ) [ ] byte {
// Initialize a 256bit Blake2 hash (32 bytes)
hash , err := blake2b . New256 ( nil )
if err != nil {
panic ( err )
}
// Write all the user set fields
hash . Write ( [ ] byte ( c . Name ) )
hash . Write ( [ ] byte ( c . Description ) )
hash . Write ( [ ] byte ( c . Rules ) )
// Finalize the hash
hashVal := hash . Sum ( nil )
// Set and return the hash
c . Hash = hashVal
return hashVal
}
2017-08-08 22:19:59 +00:00
func ( a * ACLPolicy ) Stub ( ) * ACLPolicyListStub {
return & ACLPolicyListStub {
Name : a . Name ,
Description : a . Description ,
2017-08-30 16:58:42 +00:00
Hash : a . Hash ,
2017-08-08 22:19:59 +00:00
CreateIndex : a . CreateIndex ,
ModifyIndex : a . ModifyIndex ,
}
}
2017-08-12 21:11:49 +00:00
func ( a * ACLPolicy ) Validate ( ) error {
var mErr multierror . Error
if ! validPolicyName . MatchString ( a . Name ) {
err := fmt . Errorf ( "invalid name '%s'" , a . Name )
mErr . Errors = append ( mErr . Errors , err )
}
if _ , err := acl . Parse ( a . Rules ) ; err != nil {
err = fmt . Errorf ( "failed to parse rules: %v" , err )
mErr . Errors = append ( mErr . Errors , err )
}
if len ( a . Description ) > maxPolicyDescriptionLength {
err := fmt . Errorf ( "description longer than %d" , maxPolicyDescriptionLength )
mErr . Errors = append ( mErr . Errors , err )
}
return mErr . ErrorOrNil ( )
}
2017-08-08 00:10:04 +00:00
// ACLPolicyListStub is used to for listing ACL policies
type ACLPolicyListStub struct {
Name string
Description string
2017-08-30 16:58:42 +00:00
Hash [ ] byte
2017-08-07 21:21:00 +00:00
CreateIndex uint64
ModifyIndex uint64
}
2017-08-08 00:10:04 +00:00
// ACLPolicyListRequest is used to request a list of policies
type ACLPolicyListRequest struct {
QueryOptions
}
// ACLPolicySpecificRequest is used to query a specific policy
type ACLPolicySpecificRequest struct {
Name string
QueryOptions
}
2017-08-20 22:21:59 +00:00
// ACLPolicySetRequest is used to query a set of policies
type ACLPolicySetRequest struct {
Names [ ] string
QueryOptions
}
2017-08-08 00:10:04 +00:00
// ACLPolicyListResponse is used for a list request
type ACLPolicyListResponse struct {
Policies [ ] * ACLPolicyListStub
QueryMeta
}
// SingleACLPolicyResponse is used to return a single policy
type SingleACLPolicyResponse struct {
Policy * ACLPolicy
QueryMeta
}
2017-08-08 03:53:07 +00:00
2017-08-20 22:21:59 +00:00
// ACLPolicySetResponse is used to return a set of policies
type ACLPolicySetResponse struct {
Policies map [ string ] * ACLPolicy
QueryMeta
}
2017-08-08 03:53:07 +00:00
// ACLPolicyDeleteRequest is used to delete a set of policies
type ACLPolicyDeleteRequest struct {
Names [ ] string
WriteRequest
}
2017-08-08 04:01:14 +00:00
// ACLPolicyUpsertRequest is used to upsert a set of policies
type ACLPolicyUpsertRequest struct {
Policies [ ] * ACLPolicy
WriteRequest
}
2017-08-12 21:36:10 +00:00
// ACLToken represents a client token which is used to Authenticate
type ACLToken struct {
2017-08-30 16:58:42 +00:00
AccessorID string // Public Accessor ID (UUID)
SecretID string // Secret ID, private (UUID)
Name string // Human friendly name
Type string // Client or Management
Policies [ ] string // Policies this token ties to
Global bool // Global or Region local
Hash [ ] byte
2017-08-12 21:36:10 +00:00
CreateTime time . Time // Time of creation
CreateIndex uint64
ModifyIndex uint64
}
2017-08-20 01:29:04 +00:00
var (
// AnonymousACLToken is used no SecretID is provided, and the
// request is made anonymously.
AnonymousACLToken = & ACLToken {
AccessorID : "anonymous" ,
Name : "Anonymous Token" ,
Type : ACLClientToken ,
Policies : [ ] string { "anonymous" } ,
Global : false ,
}
)
2017-08-12 23:08:00 +00:00
type ACLTokenListStub struct {
AccessorID string
Name string
Type string
Policies [ ] string
Global bool
2017-08-30 16:58:42 +00:00
Hash [ ] byte
2017-08-12 23:08:00 +00:00
CreateTime time . Time
CreateIndex uint64
ModifyIndex uint64
}
2017-08-30 16:58:42 +00:00
// SetHash is used to compute and set the hash of the ACL token
func ( a * ACLToken ) SetHash ( ) [ ] byte {
// Initialize a 256bit Blake2 hash (32 bytes)
hash , err := blake2b . New256 ( nil )
if err != nil {
panic ( err )
}
// Write all the user set fields
hash . Write ( [ ] byte ( a . Name ) )
hash . Write ( [ ] byte ( a . Type ) )
for _ , policyName := range a . Policies {
hash . Write ( [ ] byte ( policyName ) )
}
if a . Global {
hash . Write ( [ ] byte ( "global" ) )
} else {
hash . Write ( [ ] byte ( "local" ) )
}
// Finalize the hash
hashVal := hash . Sum ( nil )
// Set and return the hash
a . Hash = hashVal
return hashVal
}
2017-08-12 23:08:00 +00:00
func ( a * ACLToken ) Stub ( ) * ACLTokenListStub {
return & ACLTokenListStub {
AccessorID : a . AccessorID ,
Name : a . Name ,
Type : a . Type ,
Policies : a . Policies ,
Global : a . Global ,
2017-08-30 16:58:42 +00:00
Hash : a . Hash ,
2017-08-12 23:08:00 +00:00
CreateTime : a . CreateTime ,
CreateIndex : a . CreateIndex ,
ModifyIndex : a . ModifyIndex ,
}
}
2017-08-12 21:36:10 +00:00
// Validate is used to sanity check a token
func ( a * ACLToken ) Validate ( ) error {
var mErr multierror . Error
if len ( a . Name ) > maxTokenNameLength {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "token name too long" ) )
}
switch a . Type {
case ACLClientToken :
if len ( a . Policies ) == 0 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "client token missing policies" ) )
}
case ACLManagementToken :
if len ( a . Policies ) != 0 {
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "management token cannot be associated with policies" ) )
}
default :
mErr . Errors = append ( mErr . Errors , fmt . Errorf ( "token type must be client or management" ) )
}
return mErr . ErrorOrNil ( )
}
2017-08-12 22:44:05 +00:00
2017-08-22 00:45:11 +00:00
// PolicySubset checks if a given set of policies is a subset of the token
func ( a * ACLToken ) PolicySubset ( policies [ ] string ) bool {
// Hot-path the management tokens, superset of all policies.
if a . Type == ACLManagementToken {
return true
}
associatedPolicies := make ( map [ string ] struct { } , len ( a . Policies ) )
for _ , policy := range a . Policies {
associatedPolicies [ policy ] = struct { } { }
}
for _ , policy := range policies {
if _ , ok := associatedPolicies [ policy ] ; ! ok {
return false
}
}
return true
}
2017-08-12 22:44:05 +00:00
// ACLTokenListRequest is used to request a list of tokens
type ACLTokenListRequest struct {
2017-08-13 23:32:46 +00:00
GlobalOnly bool
2017-08-12 22:44:05 +00:00
QueryOptions
}
// ACLTokenSpecificRequest is used to query a specific token
type ACLTokenSpecificRequest struct {
AccessorID string
QueryOptions
}
2017-08-20 22:21:59 +00:00
// ACLTokenSetRequest is used to query a set of tokens
type ACLTokenSetRequest struct {
AccessorIDS [ ] string
QueryOptions
}
2017-08-12 22:44:05 +00:00
// ACLTokenListResponse is used for a list request
type ACLTokenListResponse struct {
2017-08-12 23:08:00 +00:00
Tokens [ ] * ACLTokenListStub
2017-08-12 22:44:05 +00:00
QueryMeta
}
// SingleACLTokenResponse is used to return a single token
type SingleACLTokenResponse struct {
Token * ACLToken
QueryMeta
}
2017-08-20 22:21:59 +00:00
// ACLTokenSetResponse is used to return a set of token
type ACLTokenSetResponse struct {
Tokens map [ string ] * ACLToken // Keyed by Accessor ID
QueryMeta
}
2017-08-20 21:53:51 +00:00
// ResolveACLTokenRequest is used to resolve a specific token
type ResolveACLTokenRequest struct {
SecretID string
QueryOptions
}
// ResolveACLTokenResponse is used to resolve a single token
type ResolveACLTokenResponse struct {
Token * ACLToken
QueryMeta
}
2017-08-12 22:44:05 +00:00
// ACLTokenDeleteRequest is used to delete a set of tokens
type ACLTokenDeleteRequest struct {
AccessorIDs [ ] string
WriteRequest
}
2017-08-21 01:19:26 +00:00
// ACLTokenBootstrapRequest is used to bootstrap ACLs
type ACLTokenBootstrapRequest struct {
2017-09-10 23:03:30 +00:00
Token * ACLToken // Not client specifiable
ResetIndex uint64 // Reset index is used to clear the bootstrap token
2017-08-21 01:19:26 +00:00
WriteRequest
}
2017-08-12 22:44:05 +00:00
// ACLTokenUpsertRequest is used to upsert a set of tokens
type ACLTokenUpsertRequest struct {
Tokens [ ] * ACLToken
WriteRequest
}
2017-08-12 23:29:11 +00:00
// ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest
type ACLTokenUpsertResponse struct {
Tokens [ ] * ACLToken
WriteMeta
}