open-nomad/e2e/operator_scheduler/operator_scheduler_test.go
James Rasell 181b247384
core: allow pausing and un-pausing of leader broker routine (#13045)
* core: allow pause/un-pause of eval broker on region leader.

* agent: add ability to pause eval broker via scheduler config.

* cli: add operator scheduler commands to interact with config.

* api: add ability to pause eval broker via scheduler config

* e2e: add operator scheduler test for eval broker pause.

* docs: include new opertor scheduler CLI and pause eval API info.
2022-07-06 16:13:48 +02:00

118 lines
3.8 KiB
Go

package operator_scheduler
import (
"context"
"fmt"
"testing"
"time"
"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const jobBasic = "./input/basic.nomad"
// TestOperatorScheduler runs the Nomad Operator Scheduler suit of tests which
// focus on the behaviour of the /v1/operator/scheduler API.
func TestOperatorScheduler(t *testing.T) {
// Wait until we have a usable cluster before running the tests.
nomadClient := e2eutil.NomadClient(t)
e2eutil.WaitForLeader(t, nomadClient)
e2eutil.WaitForNodesReady(t, nomadClient, 1)
// Run our test cases.
t.Run("TestOperatorScheduler_ConfigPauseEvalBroker", testConfigPauseEvalBroker)
}
// testConfig tests pausing and un-pausing the eval broker and ensures the
// correct behaviour is observed at each stage.
func testConfigPauseEvalBroker(t *testing.T) {
nomadClient := e2eutil.NomadClient(t)
// Generate our job ID which will be used for the entire test.
jobID := "operator-scheduler-config-pause-eval-broker-" + uuid.Generate()[:8]
jobIDs := []string{jobID}
// Defer a cleanup function to remove the job. This will trigger if the
// test fails, unless the cancel function is called.
ctx, cancel := context.WithCancel(context.Background())
defer e2eutil.CleanupJobsAndGCWithContext(t, ctx, &jobIDs)
// Register the job and ensure the alloc reaches the running state before
// moving safely on.
allocStubs := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, jobBasic, jobID, "")
require.Len(t, allocStubs, 1)
e2eutil.WaitForAllocRunning(t, nomadClient, allocStubs[0].ID)
// Get the current scheduler config object.
schedulerConfig, _, err := nomadClient.Operator().SchedulerGetConfiguration(nil)
require.NoError(t, err)
require.NotNil(t, schedulerConfig.SchedulerConfig)
// Set the eval broker to be paused.
schedulerConfig.SchedulerConfig.PauseEvalBroker = true
// Write the config back to Nomad.
schedulerConfigUpdate, _, err := nomadClient.Operator().SchedulerSetConfiguration(
schedulerConfig.SchedulerConfig, nil)
require.NoError(t, err)
require.True(t, schedulerConfigUpdate.Updated)
// Perform a deregister call. The call will succeed and create an
// evaluation. Do not use purge, so we can check the job status when
// dereigster happens.
evalID, _, err := nomadClient.Jobs().Deregister(jobID, false, nil)
require.NoError(t, err)
require.NotEmpty(t, evalID)
// Evaluation status is set to pending initially, so there isn't a great
// way to ensure it doesn't transition to another status other than polling
// for a long enough time to assume it won't change.
timedFn := func() error {
// 5 seconds should be more than enough time for an eval to change
// status unless the broker is disabled.
timer := time.NewTimer(5 * time.Second)
defer timer.Stop()
for {
select {
case <-timer.C:
return nil
default:
evalInfo, _, err := nomadClient.Evaluations().Info(evalID, nil)
if err != nil {
return err
}
if !assert.Equal(t, "pending", evalInfo.Status) {
return fmt.Errorf(`expected eval status "pending", got %q`, evalInfo.Status)
}
time.Sleep(1 * time.Second)
}
}
}
require.NoError(t, timedFn())
// Set the eval broker to be un-paused.
schedulerConfig.SchedulerConfig.PauseEvalBroker = false
// Write the config back to Nomad.
schedulerConfigUpdate, _, err = nomadClient.Operator().SchedulerSetConfiguration(
schedulerConfig.SchedulerConfig, nil)
require.NoError(t, err)
require.True(t, schedulerConfigUpdate.Updated)
// Ensure the job is stopped, then run the garbage collection to clear out
// all resources.
e2eutil.WaitForJobStopped(t, nomadClient, jobID)
_, err = e2eutil.Command("nomad", "system", "gc")
require.NoError(t, err)
// If we have reached this far, we do not need to run the cleanup function.
cancel()
}