open-nomad/command/util_test.go

package command

import (
	"os"
	"testing"

	"github.com/hashicorp/nomad/api"
	"github.com/hashicorp/nomad/command/agent"
	"github.com/hashicorp/nomad/helper"
	"github.com/hashicorp/nomad/testutil"
)

func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) {
	// Make a new test server
	a := agent.NewTestAgent(t, t.Name(), func(config *agent.Config) {
		config.Client.Enabled = runClient

		if cb != nil {
			cb(config)
		}
	})
	t.Cleanup(func() { a.Shutdown() })

	c := a.Client()
	return a, c, a.HTTPAddr()
}

// testClient starts a new test client, blocks until it joins, and performs
// cleanup after the test is complete.
func testClient(t *testing.T, name string, cb func(*agent.Config)) (*agent.TestAgent, *api.Client, string) {
	t.Logf("Starting client agent %s", name)
	a := agent.NewTestAgent(t, name, func(config *agent.Config) {
		if cb != nil {
			cb(config)
		}
	})
	t.Cleanup(func() { a.Shutdown() })

	c := a.Client()
	t.Logf("Waiting for client %s to join server(s) %s", name, a.GetConfig().Client.Servers)
	testutil.WaitForClient(t, a.Agent.RPC, a.Agent.Client().NodeID(), a.Agent.Client().Region())

	return a, c, a.HTTPAddr()
}

func testJob(jobID string) *api.Job {
	task := api.NewTask("task1", "mock_driver").
		SetConfig("kill_after", "1s").
		SetConfig("run_for", "5s").
		SetConfig("exit_code", 0).
		Require(&api.Resources{
			MemoryMB: helper.IntToPtr(256),
			CPU:      helper.IntToPtr(100),
		}).
		SetLogConfig(&api.LogConfig{
			MaxFiles:      helper.IntToPtr(1),
			MaxFileSizeMB: helper.IntToPtr(2),
		})

	group := api.NewTaskGroup("group1", 1).
		AddTask(task).
		RequireDisk(&api.EphemeralDisk{
			SizeMB: helper.IntToPtr(20),
		})

	job := api.NewBatchJob(jobID, jobID, "global", 1).
		AddDatacenter("dc1").
		AddTaskGroup(group)

	return job
}

func testMultiRegionJob(jobID, region, datacenter string) *api.Job {
	task := api.NewTask("task1", "mock_driver").
		SetConfig("kill_after", "10s").
		SetConfig("run_for", "15s").
		SetConfig("exit_code", 0).
		Require(&api.Resources{
			MemoryMB: helper.IntToPtr(256),
			CPU:      helper.IntToPtr(100),
		}).
		SetLogConfig(&api.LogConfig{
			MaxFiles:      helper.IntToPtr(1),
			MaxFileSizeMB: helper.IntToPtr(2),
		})

	group := api.NewTaskGroup("group1", 1).
		AddTask(task).
		RequireDisk(&api.EphemeralDisk{
			SizeMB: helper.IntToPtr(20),
		})

	job := api.NewServiceJob(jobID, jobID, region, 1).AddDatacenter(datacenter).AddTaskGroup(group)
	job.Region = nil
	job.Multiregion = &api.Multiregion{
		Regions: []*api.MultiregionRegion{
			{
				Name:        "east",
				Datacenters: []string{"east-1"},
			},
			{
				Name:        "west",
				Datacenters: []string{"west-1"},
			},
		},
	}

	return job
}

// setEnv wraps os.Setenv(key, value) and restores the environment variable to initial value in test cleanup
func setEnv(t *testing.T, key, value string) {
	initial, ok := os.LookupEnv(key)
	os.Setenv(key, value)

	t.Cleanup(func() {
		if ok {
			os.Setenv(key, initial)
		} else {
			os.Unsetenv(key)
		}
	})
}
command: tests 2015-09-11 18:10:20 +00:00			`package command`

			`import (`
tests: ensure that tests restore env-var values (#11309) Fix a test corruption issue, where a test accidentally unsets the `NOMAD_LICENSE` environment variable, that's relied on by some tests. As a habit, tests should always restore the environment variable value on test completion. Golang 1.17 introduced [`t.Setenv`](https://pkg.go.dev/testing#T.Setenv) to address this issue. However, as 1.0.x and 1.1.x branches target golang 1.15 and 1.16, I opted to use a helper function to ease backports. 2021-10-13 21:26:56 +00:00			`"os"`
command: tests 2015-09-11 18:10:20 +00:00			`"testing"`

command: cli tests 2015-09-12 21:50:05 +00:00			`"github.com/hashicorp/nomad/api"`
Switch to in-process agent 2017-07-21 04:07:32 +00:00			`"github.com/hashicorp/nomad/command/agent"`
Added tests 2017-02-13 23:18:17 +00:00			`"github.com/hashicorp/nomad/helper"`
debug: Improve namespace and region support (#11269) * Include region and namespace in CLI output * Add region and prefix matching for server members * Add namespace and region API outputs to cluster metadata folder * Add region awareness to WaitForClient helper function * Add helper functions for SliceStringHasPrefix and StringHasPrefixInSlice * Refactor test client agent generation * Add tests for region * Add changelog 2021-10-12 20:58:41 +00:00			`"github.com/hashicorp/nomad/testutil"`
command: tests 2015-09-11 18:10:20 +00:00			`)`

Switch to in-process agent 2017-07-21 04:07:32 +00:00			`func testServer(t testing.T, runClient bool, cb func(agent.Config)) (agent.TestAgent, api.Client, string) {`
command: use testutil.TestServer in command tests 2015-09-12 23:12:56 +00:00			`// Make a new test server`
Standardize retrieving a free port into a helper package 2017-10-19 04:45:18 +00:00			`a := agent.NewTestAgent(t, t.Name(), func(config *agent.Config) {`
Switch to in-process agent 2017-07-21 04:07:32 +00:00			`config.Client.Enabled = runClient`

			`if cb != nil {`
			`cb(config)`
			`}`
			`})`
always shutdown test server on test cleanup 2020-06-25 16:44:19 +00:00			`t.Cleanup(func() { a.Shutdown() })`
command: cli tests 2015-09-12 21:50:05 +00:00
Switch to in-process agent 2017-07-21 04:07:32 +00:00			`c := a.Client()`
			`return a, c, a.HTTPAddr()`
command: tests 2015-09-11 18:10:20 +00:00			`}`
command: fix tests after job validation 2015-09-16 18:42:28 +00:00
debug: Improve namespace and region support (#11269) * Include region and namespace in CLI output * Add region and prefix matching for server members * Add namespace and region API outputs to cluster metadata folder * Add region awareness to WaitForClient helper function * Add helper functions for SliceStringHasPrefix and StringHasPrefixInSlice * Refactor test client agent generation * Add tests for region * Add changelog 2021-10-12 20:58:41 +00:00			`// testClient starts a new test client, blocks until it joins, and performs`
			`// cleanup after the test is complete.`
			`func testClient(t testing.T, name string, cb func(agent.Config)) (agent.TestAgent, api.Client, string) {`
Fix flaky `operator debug` test (#12501) We introduced a `pprof-interval` argument to `operator debug` in #11938, and unfortunately this has resulted in a lot of test flakes. The actual command in use is mostly fine (although I've fixed some quirks here), so what's really happened is that the change has revealed some existing issues in the tests. Summary of changes: * Make first pprof collection synchronous to preserve the existing behavior for the common case where the pprof interval matches the duration. * Clamp `operator debug` pprof timing to that of the command. The `pprof-duration` should be no more than `duration` and the `pprof-interval` should be no more than `pprof-duration`. Clamp the values rather than throwing errors, which could change the commands that existing users might already have in debugging scripts * Testing: remove test parallelism The `operator debug` tests that stand up servers can't be run in parallel, because we don't have a way of canceling the API calls for pprof. The agent will still be running the last pprof when we exit, and that breaks the next test that talks to that same agent. (Because you can only run one pprof at a time on any process!) We could split off each subtest into its own server, but this test suite is already very slow. In future work we should fix this "for real" by making the API call cancelable. * Testing: assert against unexpected errors in `operator debug` tests. If we assert there are no unexpected error outputs, it's easier for the developer to debug when something is going wrong with the tests because the error output will be presented as a failing test, rather than just a failing exit code check. Or worse, no failing exit code check! This also forces us to be explicit about which tests will return 0 exit codes but still emit (presumably ignorable) error outputs. Additional minor bug fixes (mostly in tests) and test refactorings: * Fix text alignment on pprof Duration in `operator debug` output * Remove "done" channel from `operator debug` event stream test. The goroutine we're blocking for here already tells us it's done by sending a value, so block on that instead of an extraneous channel * Event stream test timer should start at current time, not zero * Remove noise from `operator debug` test log output. The `t.Logf` calls already are picked out from the rest of the test output by being prefixed with the filename. * Remove explicit pprof args so we use the defaults clamped from duration/interval 2022-04-07 19:00:07 +00:00			`t.Logf("Starting client agent %s", name)`
debug: Improve namespace and region support (#11269) * Include region and namespace in CLI output * Add region and prefix matching for server members * Add namespace and region API outputs to cluster metadata folder * Add region awareness to WaitForClient helper function * Add helper functions for SliceStringHasPrefix and StringHasPrefixInSlice * Refactor test client agent generation * Add tests for region * Add changelog 2021-10-12 20:58:41 +00:00			`a := agent.NewTestAgent(t, name, func(config *agent.Config) {`
			`if cb != nil {`
			`cb(config)`
			`}`
			`})`
			`t.Cleanup(func() { a.Shutdown() })`

			`c := a.Client()`
Fix flaky `operator debug` test (#12501) We introduced a `pprof-interval` argument to `operator debug` in #11938, and unfortunately this has resulted in a lot of test flakes. The actual command in use is mostly fine (although I've fixed some quirks here), so what's really happened is that the change has revealed some existing issues in the tests. Summary of changes: * Make first pprof collection synchronous to preserve the existing behavior for the common case where the pprof interval matches the duration. * Clamp `operator debug` pprof timing to that of the command. The `pprof-duration` should be no more than `duration` and the `pprof-interval` should be no more than `pprof-duration`. Clamp the values rather than throwing errors, which could change the commands that existing users might already have in debugging scripts * Testing: remove test parallelism The `operator debug` tests that stand up servers can't be run in parallel, because we don't have a way of canceling the API calls for pprof. The agent will still be running the last pprof when we exit, and that breaks the next test that talks to that same agent. (Because you can only run one pprof at a time on any process!) We could split off each subtest into its own server, but this test suite is already very slow. In future work we should fix this "for real" by making the API call cancelable. * Testing: assert against unexpected errors in `operator debug` tests. If we assert there are no unexpected error outputs, it's easier for the developer to debug when something is going wrong with the tests because the error output will be presented as a failing test, rather than just a failing exit code check. Or worse, no failing exit code check! This also forces us to be explicit about which tests will return 0 exit codes but still emit (presumably ignorable) error outputs. Additional minor bug fixes (mostly in tests) and test refactorings: * Fix text alignment on pprof Duration in `operator debug` output * Remove "done" channel from `operator debug` event stream test. The goroutine we're blocking for here already tells us it's done by sending a value, so block on that instead of an extraneous channel * Event stream test timer should start at current time, not zero * Remove noise from `operator debug` test log output. The `t.Logf` calls already are picked out from the rest of the test output by being prefixed with the filename. * Remove explicit pprof args so we use the defaults clamped from duration/interval 2022-04-07 19:00:07 +00:00			`t.Logf("Waiting for client %s to join server(s) %s", name, a.GetConfig().Client.Servers)`
debug: Improve namespace and region support (#11269) * Include region and namespace in CLI output * Add region and prefix matching for server members * Add namespace and region API outputs to cluster metadata folder * Add region awareness to WaitForClient helper function * Add helper functions for SliceStringHasPrefix and StringHasPrefixInSlice * Refactor test client agent generation * Add tests for region * Add changelog 2021-10-12 20:58:41 +00:00			`testutil.WaitForClient(t, a.Agent.RPC, a.Agent.Client().NodeID(), a.Agent.Client().Region())`

			`return a, c, a.HTTPAddr()`
			`}`

command: fix tests after job validation 2015-09-16 18:42:28 +00:00			`func testJob(jobID string) *api.Job {`
Attempting to fix alloc status test 2016-08-22 16:35:25 +00:00			`task := api.NewTask("task1", "mock_driver").`
			`SetConfig("kill_after", "1s").`
			`SetConfig("run_for", "5s").`
			`SetConfig("exit_code", 0).`
Tests 2016-02-02 21:50:30 +00:00			`Require(&api.Resources{`
Added tests 2017-02-13 23:18:17 +00:00			`MemoryMB: helper.IntToPtr(256),`
			`CPU: helper.IntToPtr(100),`
Fixes 2016-02-19 23:49:32 +00:00			`}).`
Fixed some tests 2016-02-11 18:42:56 +00:00			`SetLogConfig(&api.LogConfig{`
Added tests 2017-02-13 23:18:17 +00:00			`MaxFiles: helper.IntToPtr(1),`
			`MaxFileSizeMB: helper.IntToPtr(2),`
Fixes 2016-02-19 23:49:32 +00:00			`})`
command: fix tests after job validation 2015-09-16 18:42:28 +00:00
			`group := api.NewTaskGroup("group1", 1).`
Fixed some more tests 2016-08-26 04:05:21 +00:00			`AddTask(task).`
Renaming LocalDisk to EphemeralDisk (#1710) Renaming LocalDisk to EphemeralDisk 2016-09-14 22:43:42 +00:00			`RequireDisk(&api.EphemeralDisk{`
Added tests 2017-02-13 23:18:17 +00:00			`SizeMB: helper.IntToPtr(20),`
Fixed some more tests 2016-08-26 04:05:21 +00:00			`})`
command: fix tests after job validation 2015-09-16 18:42:28 +00:00
backfill region from job hcl in jobUpdate and jobPlan endpoints - updated region in job metadata that gets persisted to nomad datastore - fixed many unrelated unit tests that used an invalid region value (they previously passed because hcl wasn't getting picked up and the job would default to global region) 2019-05-02 20:00:21 +00:00			`job := api.NewBatchJob(jobID, jobID, "global", 1).`
command: fix tests after job validation 2015-09-16 18:42:28 +00:00			`AddDatacenter("dc1").`
			`AddTaskGroup(group)`

			`return job`
			`}`
Multiregion deploy status and job status CLI 2020-06-15 14:05:31 +00:00
			`func testMultiRegionJob(jobID, region, datacenter string) *api.Job {`
			`task := api.NewTask("task1", "mock_driver").`
			`SetConfig("kill_after", "10s").`
			`SetConfig("run_for", "15s").`
			`SetConfig("exit_code", 0).`
			`Require(&api.Resources{`
			`MemoryMB: helper.IntToPtr(256),`
			`CPU: helper.IntToPtr(100),`
			`}).`
			`SetLogConfig(&api.LogConfig{`
			`MaxFiles: helper.IntToPtr(1),`
			`MaxFileSizeMB: helper.IntToPtr(2),`
			`})`

			`group := api.NewTaskGroup("group1", 1).`
			`AddTask(task).`
			`RequireDisk(&api.EphemeralDisk{`
			`SizeMB: helper.IntToPtr(20),`
			`})`

			`job := api.NewServiceJob(jobID, jobID, region, 1).AddDatacenter(datacenter).AddTaskGroup(group)`
Multiregion job registration Integration points for multiregion jobs to be registered in the enterprise version of Nomad: * hook in `Job.Register` for enterprise to send job to peer regions * remove monitoring from `nomad job run` and `nomad job stop` for multiregion jobs 2020-06-15 20:18:14 +00:00			`job.Region = nil`
Multiregion deploy status and job status CLI 2020-06-15 14:05:31 +00:00			`job.Multiregion = &api.Multiregion{`
			`Regions: []*api.MultiregionRegion{`
			`{`
			`Name: "east",`
			`Datacenters: []string{"east-1"},`
			`},`
			`{`
			`Name: "west",`
			`Datacenters: []string{"west-1"},`
			`},`
			`},`
			`}`

			`return job`
			`}`
tests: ensure that tests restore env-var values (#11309) Fix a test corruption issue, where a test accidentally unsets the `NOMAD_LICENSE` environment variable, that's relied on by some tests. As a habit, tests should always restore the environment variable value on test completion. Golang 1.17 introduced [`t.Setenv`](https://pkg.go.dev/testing#T.Setenv) to address this issue. However, as 1.0.x and 1.1.x branches target golang 1.15 and 1.16, I opted to use a helper function to ease backports. 2021-10-13 21:26:56 +00:00
			`// setEnv wraps os.Setenv(key, value) and restores the environment variable to initial value in test cleanup`
			`func setEnv(t *testing.T, key, value string) {`
			`initial, ok := os.LookupEnv(key)`
			`os.Setenv(key, value)`

			`t.Cleanup(func() {`
			`if ok {`
			`os.Setenv(key, initial)`
			`} else {`
			`os.Unsetenv(key)`
			`}`
			`})`
			`}`