From 78d7c2bbeefff9299db328a6c85fe09d0f2b3746 Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Mon, 20 Nov 2023 14:06:51 -0500 Subject: [PATCH] backport of commit 659c0945fc35a2a0a999be7ce531beb55cfeeeab (#19004) (#19129) [core] Honor job's namespace when checking `distinct_hosts` feasibility Manual backport because of conflicts in upgrade-specific.mdx --- .changelog/19004.txt | 3 + scheduler/feasible.go | 7 +- scheduler/feasible_test.go | 16 +++- .../docs/job-specification/constraint.mdx | 12 ++- .../content/docs/upgrade/upgrade-specific.mdx | 80 ++++++++++++++++--- 5 files changed, 98 insertions(+), 20 deletions(-) create mode 100644 .changelog/19004.txt diff --git a/.changelog/19004.txt b/.changelog/19004.txt new file mode 100644 index 000000000..e9bf238cd --- /dev/null +++ b/.changelog/19004.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +core: Honor job's namespace when checking `distinct_hosts` feasibility +``` diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 60b312c98..bf4f5635b 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -625,11 +625,12 @@ func (iter *DistinctHostsIterator) satisfiesDistinctHosts(option *structs.Node) // Skip the node if the task group has already been allocated on it. for _, alloc := range proposed { - // If the job has a distinct_hosts constraint we only need an alloc - // collision on the JobID but if the constraint is on the TaskGroup then + // If the job has a distinct_hosts constraint we need an alloc collision + // on the Namespace,JobID but if the constraint is on the TaskGroup then // we need both a job and TaskGroup collision. - jobCollision := alloc.JobID == iter.job.ID + jobCollision := alloc.JobID == iter.job.ID && alloc.Namespace == iter.job.Namespace taskCollision := alloc.TaskGroup == iter.tg.Name + if iter.jobDistinctHosts && jobCollision || jobCollision && taskCollision { return false } diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 8a212bfaf..69cfa0b2e 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -1465,8 +1465,12 @@ func TestDistinctHostsIterator_JobDistinctHosts_Table(t *testing.T) { na := make([]*structs.Allocation, len(js)) for i, j := range js { allocID := uuid.Generate() + ns := structs.DefaultNamespace + if j.Namespace != "" { + ns = j.Namespace + } na[i] = &structs.Allocation{ - Namespace: structs.DefaultNamespace, + Namespace: ns, TaskGroup: j.TaskGroups[0].Name, JobID: j.ID, Job: j, @@ -1522,16 +1526,20 @@ func TestDistinctHostsIterator_JobDistinctHosts_Table(t *testing.T) { j := job.Copy() j.Constraints[0].RTarget = tc.RTarget + // This job has all the same identifiers as the first; however, it is + // placed in a different namespace to ensure that it doesn't interact + // with the feasibility of this placement. oj := j.Copy() - oj.ID = "otherJob" + oj.Namespace = "different" plan := ctx.Plan() // Add allocations so that some of the nodes will be ineligible // to receive the job when the distinct_hosts constraint // is active. This will require the job be placed on n3. // - // Another job is placed on all of the nodes to ensure that there - // are no unexpected interactions. + // Another job (oj) is placed on all of the nodes to ensure that + // there are no unexpected interactions between namespaces. + plan.NodeAllocation[n1.ID] = makeJobAllocs([]*structs.Job{j, oj}) plan.NodeAllocation[n2.ID] = makeJobAllocs([]*structs.Job{j, oj}) plan.NodeAllocation[n3.ID] = makeJobAllocs([]*structs.Job{oj}) diff --git a/website/content/docs/job-specification/constraint.mdx b/website/content/docs/job-specification/constraint.mdx index 3f2f947f5..565adf7c8 100644 --- a/website/content/docs/job-specification/constraint.mdx +++ b/website/content/docs/job-specification/constraint.mdx @@ -22,10 +22,14 @@ filter on [attributes][interpolation] or [client metadata][client-meta]. Additionally constraints may be specified at the [job][job], [group][group], or [task][task] levels for ultimate flexibility. -~> **It is possible to define irreconcilable constraints in a job.** -For example, because all [tasks within a group are scheduled on the same client node][group], -specifying different [`${attr.unique.hostname}`][node-variables] constraints at -the task level will cause a job to be unplaceable. + + +**It is possible to define irreconcilable constraints in a job.** + +For example, specifying different [`${attr.unique.hostname}`][node-variables] +constraints at the task level will cause a job to be unplaceable because all +[tasks within a group are scheduled on the same client node][group]. + ```hcl job "docs" { diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index 8fe6ae1a0..abc7f1504 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -13,6 +13,21 @@ upgrade. However, specific versions of Nomad may have more details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 1.6.4, 1.5.11 + +#### The `distinct_hosts` Constraint Now Honors Namespaces + +Nomad 1.6.4 and 1.5.11 change the behavior of the [`distinct_hosts`][] +constraint such that namespaces are taken into account when choosing feasible +clients for allocation placement. The previous, less-expected behavior would +cause **any** job with the same name running on a client to cause that node to +be considered infeasible. + +This change allows workloads that formerly did not colocate to be scheduled +onto the same client when they are in different namespaces. To prevent this, +consider using [node pools] and constrain the jobs with a [`distinct_property`][] +constraint over [`${node.pool}`][node_attributes]. + ## Nomad 1.6.0 #### Enterprise License Validation with BuildDate @@ -213,7 +228,7 @@ allocation's task directory. In an effort to improve the resilience and security model of the Nomad Client, in 1.5.0 artifact downloads occur in a sub-process. Where possible, that sub-process is run as the `nobody` user, and on modern Linux systems will -be isolated from the filesystem via the kernel's [landlock] capabilitiy. +be isolated from the filesystem via the kernel's [landlock] capability. Operators are encouraged to ensure jobs making use of artifacts continue to work as expected. In particular, git-ssh users will need to make sure the system-wide @@ -1795,43 +1810,90 @@ draining the node so no tasks are running on it. This can be verified by running state. Once that is done the client can be killed, the `data_dir` should be deleted and then Nomad 0.3.0 can be launched. +[`allow_caps`]: /nomad/docs/drivers/docker#allow_caps +[`cap_net_raw`]: https://security.stackexchange.com/a/128988 +[`consul.allow_unauthenticated`]: /nomad/docs/configuration/consul#allow_unauthenticated +[`distinct_hosts`]: /nomad/docs/job-specification/constraint#distinct_hosts +[`distinct_property`]: /nomad/docs/job-specification/constraint#distinct_property +[`extra_hosts`]: /nomad/docs/drivers/docker#extra_hosts +[`linux capabilities`]: https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities +[`Local`]: /consul/docs/security/acl/acl-tokens#token-attributes +[`log_file`]: /nomad/docs/configuration#log_file +[`raft protocol`]: /nomad/docs/configuration/server#raft_protocol +[`rejoin_after_leave`]: /nomad/docs/configuration/server#rejoin_after_leave +[`sidecar_task.config`]: /nomad/docs/job-specification/sidecar_task#config +[`template.disable_file_sandbox`]: /nomad/docs/configuration/client#template-parameters +[`vault.allow_unauthenticated`]: /nomad/docs/configuration/vault#allow_unauthenticated +[`vault.policies`]: /nomad/docs/job-specification/vault#policies +[`vault.task_token_ttl`]: /nomad/docs/configuration/vault#task_token_ttl +[`vault.token`]: /nomad/docs/configuration/vault#token +[`volume create`]: /nomad/docs/commands/volume/create +[`volume register`]: /nomad/docs/commands/volume/register +[`volume`]: /nomad/docs/job-specification/volume +[alloc_overlap]: https://github.com/hashicorp/nomad/issues/10440 +[allow_caps_exec]: /nomad/docs/drivers/exec#allow_caps +[allow_caps_java]: /nomad/docs/drivers/java#allow_caps +[anon_token]: /consul/docs/security/acl/acl-tokens#special-purpose-tokens [api_jobs_parse]: /nomad/api-docs/jobs#parse-job -[artifacts]: /nomad/docs/job-specification/artifact +[artifact_env]: /nomad/docs/configuration/client#set_environment_variables +[artifact_fs_isolation]: /nomad/docs/configuration/client#disable_filesystem_isolation [artifact_params]: /nomad/docs/job-specification/artifact#artifact-parameters -[cgroups2]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html +[artifacts]: /nomad/docs/job-specification/artifact +[cap_add_exec]: /nomad/docs/drivers/exec#cap_add +[cap_drop_exec]: /nomad/docs/drivers/exec#cap_drop [cgroup_parent]: /nomad/docs/configuration/client#cgroup_parent +[cgroups2]: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html [client_artifact]: /nomad/docs/configuration/client#artifact-parameters +[consul_acl]: https://github.com/hashicorp/consul/issues/7414 [cores]: /nomad/docs/job-specification/resources#cores +[cpu]: /nomad/docs/concepts/cpu +[dangling_container_reconciliation]: /nomad/docs/drivers/docker#enabled [dangling-containers]: /nomad/docs/drivers/docker#dangling-containers +[decompression_file_count_limit]: /nomad/docs/configuration/client#decompression_file_count_limit +[decompression_size_limit]: /nomad/docs/configuration/client#decompression_size_limit [drain-api]: /nomad/api-docs/nodes#drain-node [drain-cli]: /nomad/docs/commands/node/drain [dst]: /nomad/docs/job-specification/periodic#daylight-saving-time +[enterprise licensing]: /nomad/docs/enterprise/license [envoy_concurrency]: https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-concurrency +[gh_10446]: https://github.com/hashicorp/nomad/pull/10446#issuecomment-1224833906 +[gh_issue]: https://github.com/hashicorp/nomad/issues/new/choose +[gh-10875]: https://github.com/hashicorp/nomad/pull/10875 +[gh-11563]: https://github.com/hashicorp/nomad/issues/11563 [gh-6787]: https://github.com/hashicorp/nomad/issues/6787 [gh-8457]: https://github.com/hashicorp/nomad/issues/8457 [gh-9148]: https://github.com/hashicorp/nomad/issues/9148 -[gh-10875]: https://github.com/hashicorp/nomad/pull/10875 -[gh-11563]: https://github.com/hashicorp/nomad/issues/11563 [go-client]: https://pkg.go.dev/github.com/hashicorp/nomad/api#Client +[hard_guide]: /nomad/docs/install/production/requirements#hardening-nomad [hcl2]: https://github.com/hashicorp/hcl2 +[kill_timeout]: /nomad/docs/job-specification/task#kill_timeout +[landlock]: https://docs.kernel.org/userspace-api/landlock.html [limits]: /nomad/docs/configuration#limits [lxc]: /nomad/plugins/drivers/community/lxc +[max_kill_timeout]: /nomad/docs/configuration/client#max_kill_timeout [migrate]: /nomad/docs/job-specification/migrate +[Migrating to Using Workload Identity with Consul]: /nomad/docs/integrations/consul-integration#migrating-to-using-workload-identity-with-consul +[Migrating to Using Workload Identity with Vault]: /nomad/docs/integrations/vault-integration#migrating-to-using-workload-identity-with-vault +[no_net_raw]: /nomad/docs/upgrade/upgrade-specific#nomad-1-1-0-rc1-1-0-5-0-12-12 +[node drain]: /nomad/docs/upgrade#5-upgrade-clients +[node pools]: /nomad/docs/concepts/node-pools +[node_attributes]: /nomad/docs/runtime/interpolation#node-attributes [nvidia]: /nomad/plugins/devices/nvidia +[pki]: /vault/docs/secrets/pki [plugin-block]: /nomad/docs/configuration/plugin [plugins]: /nomad/plugins/drivers/community [preemption-api]: /nomad/api-docs/operator#update-scheduler-configuration [preemption]: /nomad/docs/concepts/scheduling/preemption [proxy_concurrency]: /nomad/docs/job-specification/sidecar_task#proxy_concurrency -[`sidecar_task.config`]: /nomad/docs/job-specification/sidecar_task#config -[`raft_protocol`]: /nomad/docs/configuration/server#raft_protocol -[`raft protocol`]: /nomad/docs/configuration/server#raft_protocol -[`rejoin_after_leave`]: /nomad/docs/configuration/server#rejoin_after_leave [reserved]: /nomad/docs/configuration/client#reserved-parameters [task-config]: /nomad/docs/job-specification/task#config +[template_gid]: /nomad/docs/job-specification/template#gid +[template_uid]: /nomad/docs/job-specification/template#uid [tls-guide]: /nomad/tutorials/transport-security/security-enable-tls [tls-vault-guide]: /nomad/tutorials/integrate-vault/vault-pki-nomad [update]: /nomad/docs/job-specification/update +[upgrade process]: /nomad/docs/upgrade#upgrade-process +[Upgrading to Raft Protocol 3]: /nomad/docs/upgrade#upgrading-to-raft-protocol-3 [validate]: /nomad/docs/commands/job/validate [vault_grace]: /nomad/docs/job-specification/template [node drain]: /nomad/docs/upgrade#5-upgrade-clients