From 37834dffdadedca8190738e60f607f037af4b17c Mon Sep 17 00:00:00 2001 From: jmwilkinson Date: Thu, 2 Feb 2023 06:57:45 -0800 Subject: [PATCH] Allow wildcard datacenters to be specified in job file (#11170) Also allows for default value of `datacenters = ["*"]` --- .changelog/11170.txt | 7 +++ command/agent/command.go | 4 +- command/agent/command_test.go | 7 ++- command/assets/connect-short.nomad | 1 - command/assets/connect.nomad | 5 +- command/assets/example-short.nomad | 1 - command/assets/example.nomad | 5 +- command/testdata/example-short-bad.json | 2 +- nomad/node_endpoint.go | 2 +- nomad/structs/structs.go | 9 +++ nomad/structs/structs_test.go | 6 +- scheduler/generic_sched_test.go | 10 ++-- scheduler/util.go | 15 +++-- scheduler/util_test.go | 55 +++++++++++++------ .../content/docs/job-specification/job.mdx | 6 +- .../content/docs/upgrade/upgrade-specific.mdx | 14 +++++ 16 files changed, 103 insertions(+), 46 deletions(-) create mode 100644 .changelog/11170.txt diff --git a/.changelog/11170.txt b/.changelog/11170.txt new file mode 100644 index 000000000..84b2273dc --- /dev/null +++ b/.changelog/11170.txt @@ -0,0 +1,7 @@ +```release-note:breaking-change +config: the `datacenter` field for agent configuration no longer accepts the `*` character as part of the datacenter name +``` + +```release-note:improvement +jobspec: the `datacenters` field now accepts wildcards +``` diff --git a/command/agent/command.go b/command/agent/command.go index f46c44d92..a8b773871 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -315,8 +315,8 @@ func (c *Command) IsValidConfig(config, cmdConfig *Config) bool { } // Check that the datacenter name does not contain invalid characters - if strings.ContainsAny(config.Datacenter, "\000") { - c.Ui.Error("Datacenter contains invalid characters") + if strings.ContainsAny(config.Datacenter, "\000*") { + c.Ui.Error("Datacenter contains invalid characters (null or '*')") return false } diff --git a/command/agent/command_test.go b/command/agent/command_test.go index 1e313518b..860dd174d 100644 --- a/command/agent/command_test.go +++ b/command/agent/command_test.go @@ -148,7 +148,7 @@ func TestCommand_MetaConfigValidation(t *testing.T) { } } -func TestCommand_NullCharInDatacenter(t *testing.T) { +func TestCommand_InvalidCharInDatacenter(t *testing.T) { ci.Parallel(t) tmpDir := t.TempDir() @@ -157,6 +157,9 @@ func TestCommand_NullCharInDatacenter(t *testing.T) { "char-\\000-in-the-middle", "ends-with-\\000", "\\000-at-the-beginning", + "char-*-in-the-middle", + "ends-with-*", + "*-at-the-beginning", } for _, tc := range tcases { configFile := filepath.Join(tmpDir, "conf1.hcl") @@ -188,7 +191,7 @@ func TestCommand_NullCharInDatacenter(t *testing.T) { } out := ui.ErrorWriter.String() - exp := "Datacenter contains invalid characters" + exp := "Datacenter contains invalid characters (null or '*')" if !strings.Contains(out, exp) { t.Fatalf("expect to find %q\n\n%s", exp, out) } diff --git a/command/assets/connect-short.nomad b/command/assets/connect-short.nomad index 1e44069bf..91a1948be 100644 --- a/command/assets/connect-short.nomad +++ b/command/assets/connect-short.nomad @@ -1,5 +1,4 @@ job "countdash" { - datacenters = ["dc1"] group "api" { network { diff --git a/command/assets/connect.nomad b/command/assets/connect.nomad index 8ac0c8dd7..6f785e478 100644 --- a/command/assets/connect.nomad +++ b/command/assets/connect.nomad @@ -17,8 +17,9 @@ job "countdash" { # region = "global" # # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = ["dc1"] + # be considered when placing this task. This accepts wildcards and defaults + # allowing placement on all datacenters. + datacenters = ["*"] # The "type" parameter controls the type of job, which impacts the scheduler's # decision on placement. This configuration is optional and defaults to diff --git a/command/assets/example-short.nomad b/command/assets/example-short.nomad index 4bb349c81..80bae974d 100644 --- a/command/assets/example-short.nomad +++ b/command/assets/example-short.nomad @@ -1,5 +1,4 @@ job "example" { - datacenters = ["dc1"] group "cache" { network { diff --git a/command/assets/example.nomad b/command/assets/example.nomad index 82b1bc100..92419752d 100644 --- a/command/assets/example.nomad +++ b/command/assets/example.nomad @@ -17,8 +17,9 @@ job "example" { # region = "global" # # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = ["dc1"] + # be considered when placing this task. This accepts wildcards and defaults + # allowing placement on all datacenters. + datacenters = ["*"] # The "type" parameter controls the type of job, which impacts the scheduler's # decision on placement. This configuration is optional and defaults to diff --git a/command/testdata/example-short-bad.json b/command/testdata/example-short-bad.json index 81a65ceee..8f6fcdb6d 100644 --- a/command/testdata/example-short-bad.json +++ b/command/testdata/example-short-bad.json @@ -2,7 +2,7 @@ "Job": { "Region": null, "Namespace": null, - "ID": "example", + "ID": "bad example", "Name": "example", "Type": null, "Priority": null, diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 38990cf4e..732b90289 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -1621,7 +1621,7 @@ func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string, // datacenter cardinality tends to be low so the check // shouldn't add much work. for _, dc := range job.Datacenters { - if dc == node.Datacenter { + if node.IsInDC(dc) { sysJobs = append(sysJobs, job) break } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index d6ae0f318..ba300873e 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -44,6 +44,7 @@ import ( psstructs "github.com/hashicorp/nomad/plugins/shared/structs" "github.com/miekg/dns" "github.com/mitchellh/copystructure" + "github.com/ryanuber/go-glob" "golang.org/x/crypto/blake2b" "golang.org/x/exp/maps" "golang.org/x/exp/slices" @@ -2294,6 +2295,10 @@ func (n *Node) ComparableResources() *ComparableResources { } } +func (n *Node) IsInDC(dc string) bool { + return glob.Glob(dc, n.Datacenter) +} + // Stub returns a summarized version of the node func (n *Node) Stub(fields *NodeStubFields) *NodeListStub { @@ -4364,6 +4369,10 @@ func (j *Job) Canonicalize() { j.Namespace = DefaultNamespace } + if len(j.Datacenters) == 0 { + j.Datacenters = []string{"*"} + } + for _, tg := range j.TaskGroups { tg.Canonicalize(j) } diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 584b7c8a6..e283058dc 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -58,7 +58,7 @@ func TestJob_Validate(t *testing.T) { Name: "my-job", Type: JobTypeService, Priority: 50, - Datacenters: []string{"dc1"}, + Datacenters: []string{"*"}, TaskGroups: []*TaskGroup{ { Name: "web", @@ -91,7 +91,7 @@ func TestJob_Validate(t *testing.T) { "group 3 missing name", "Task group web validation failed", ) - // test for empty datacenters + // test for invalid datacenters j = &Job{ Datacenters: []string{""}, } @@ -372,7 +372,7 @@ func testJob() *Job { Type: JobTypeService, Priority: 50, AllAtOnce: false, - Datacenters: []string{"dc1"}, + Datacenters: []string{"*"}, Constraints: []*Constraint{ { LTarget: "$attr.kernel.name", diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 4b99f9e75..9017e707d 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/exp/slices" @@ -756,7 +757,7 @@ func TestServiceSched_Spread(t *testing.T) { remaining := uint8(100 - start) // Create a job that uses spread over data center job := mock.Job() - job.Datacenters = []string{"dc1", "dc2"} + job.Datacenters = []string{"dc*"} job.TaskGroups[0].Count = 10 job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads, &structs.Spread{ @@ -1107,10 +1108,9 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) { t.Fatalf("bad: %#v", metrics) } - // Check the available nodes - if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 0 { - t.Fatalf("bad: %#v", metrics) - } + _, ok = metrics.NodesAvailable["dc1"] + must.False(t, ok, must.Sprintf( + "expected NodesAvailable metric to be unpopulated when there are no nodes")) // Check queued allocations queued := outEval.QueuedAllocations["web"] diff --git a/scheduler/util.go b/scheduler/util.go index 4e7d2bd43..cbc0536b4 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -361,10 +361,7 @@ func diffSystemAllocs( // mapping of each data center to the count of ready nodes. func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]struct{}, map[string]int, error) { // Index the DCs - dcMap := make(map[string]int, len(dcs)) - for _, dc := range dcs { - dcMap[dc] = 0 - } + dcMap := make(map[string]int) // Scan the nodes ws := memdb.NewWatchSet() @@ -386,11 +383,13 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]str notReady[node.ID] = struct{}{} continue } - if _, ok := dcMap[node.Datacenter]; !ok { - continue + for _, dc := range dcs { + if node.IsInDC(dc) { + out = append(out, node) + dcMap[node.Datacenter]++ + break + } } - out = append(out, node) - dcMap[node.Datacenter]++ } return out, notReady, dcMap, nil } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index f85e38ebc..475c3d672 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/hashicorp/nomad/ci" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -560,25 +561,47 @@ func TestReadyNodesInDCs(t *testing.T) { node3.Datacenter = "dc2" node3.Status = structs.NodeStatusDown node4 := mock.DrainNode() + node5 := mock.Node() + node5.Datacenter = "not-this-dc" - require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) - require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1001, node2)) - require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1002, node3)) - require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1003, node4)) + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) // dc1 ready + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1001, node2)) // dc2 ready + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1002, node3)) // dc2 not ready + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1003, node4)) // dc2 not ready + must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1004, node5)) // ready never match - nodes, notReady, dc, err := readyNodesInDCs(state, []string{"dc1", "dc2"}) - require.NoError(t, err) - require.Equal(t, 2, len(nodes)) - require.NotEqual(t, node3.ID, nodes[0].ID) - require.NotEqual(t, node3.ID, nodes[1].ID) + testCases := []struct { + name string + datacenters []string + expectReady []*structs.Node + expectNotReady map[string]struct{} + expectIndex map[string]int + }{ + { + name: "no wildcards", + datacenters: []string{"dc1", "dc2"}, + expectReady: []*structs.Node{node1, node2}, + expectNotReady: map[string]struct{}{node3.ID: struct{}{}, node4.ID: struct{}{}}, + expectIndex: map[string]int{"dc1": 1, "dc2": 1}, + }, + { + name: "with wildcard", + datacenters: []string{"dc*"}, + expectReady: []*structs.Node{node1, node2}, + expectNotReady: map[string]struct{}{node3.ID: struct{}{}, node4.ID: struct{}{}}, + expectIndex: map[string]int{"dc1": 1, "dc2": 1}, + }, + } - require.Contains(t, dc, "dc1") - require.Equal(t, 1, dc["dc1"]) - require.Contains(t, dc, "dc2") - require.Equal(t, 1, dc["dc2"]) - - require.Contains(t, notReady, node3.ID) - require.Contains(t, notReady, node4.ID) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ready, notReady, dcIndex, err := readyNodesInDCs(state, tc.datacenters) + must.NoError(t, err) + must.SliceContainsAll(t, tc.expectReady, ready, must.Sprint("expected ready to match")) + must.Eq(t, tc.expectNotReady, notReady, must.Sprint("expected not-ready to match")) + must.Eq(t, tc.expectIndex, dcIndex, must.Sprint("expected datacenter counts to match")) + }) + } } func TestRetryMax(t *testing.T) { diff --git a/website/content/docs/job-specification/job.mdx b/website/content/docs/job-specification/job.mdx index d1c66c49b..21cd7fb91 100644 --- a/website/content/docs/job-specification/job.mdx +++ b/website/content/docs/job-specification/job.mdx @@ -76,8 +76,10 @@ job "docs" { to define criteria for spreading allocations across a node attribute or metadata. See the [Nomad spread reference][spread] for more details. -- `datacenters` `(array: )` - A list of datacenters in the region which are eligible - for task placement. This must be provided, and does not have a default. +- `datacenters` `(array: ["*"])` - A list of datacenters in the region + which are eligible for task placement. This field allows wildcard globbing + through the use of `*` for multi-character matching. The default value is + `["*"]`, which allows the job to be placed in any available datacenter. - `group` ([Group][group]: <required>) - Specifies the start of a group of tasks. This can be provided multiple times to define additional diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index 445caa0eb..0626874ac 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -62,6 +62,20 @@ from the Nomad client by setting [`set_environment_variables`][artifact_env]. The use of filesystem isolation can be disabled in Client configuration by setting [`disable_filesystem_isolation`][artifact_fs_isolation]. +#### Datacenter Wildcards + +In Nomad 1.5.0, the +[`datacenters`][/nomad/docs/job-specification/job#datacenters] field for a job +accepts wildcards for multi-character matching. For example, `datacenters = +["dc*"]` will match all datacenters that start with `"dc"`. The default value +for `datacenters` is now `["*"]`, so the field can be omitted. + +The `*` character is no longer a legal character in the +[`datacenter`][/nomad/docs/configuration#datacenter] field for an agent +configuration. Before upgrading to Nomad 1.5.0, you should first ensure that +you've updated any jobs that currently have a `*` in their datacenter name and +then ensure that no agents have this character in their `datacenter` field name. + #### Server `rejoin_after_leave` (default: `false`) now enforced All Nomad versions prior to v1.5.0 have incorrectly ignored the Server