open-nomad/command/agent/operator_endpoint_test.go

524 lines
16 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package agent
import (
"bytes"
"crypto/sha256"
"encoding/base64"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"path"
"path/filepath"
"strings"
"testing"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestHTTP_OperatorRaftConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, err := http.NewRequest(http.MethodGet, "/v1/operator/raft/configuration", body)
if err != nil {
t.Fatalf("err: %v", err)
}
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorRaftConfiguration(resp, req)
if err != nil {
t.Fatalf("err: %v", err)
}
if resp.Code != 200 {
t.Fatalf("bad code: %d", resp.Code)
}
out, ok := obj.(structs.RaftConfigurationResponse)
if !ok {
t.Fatalf("unexpected: %T", obj)
}
if len(out.Servers) != 1 ||
!out.Servers[0].Leader ||
!out.Servers[0].Voter {
t.Fatalf("bad: %v", out)
}
})
}
func TestHTTP_OperatorRaftPeer(t *testing.T) {
ci.Parallel(t)
assert := assert.New(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, err := http.NewRequest(http.MethodDelete, "/v1/operator/raft/peer?address=nope", body)
assert.Nil(err)
// If we get this error, it proves we sent the address all the
// way through.
resp := httptest.NewRecorder()
_, err = s.Server.OperatorRaftPeer(resp, req)
if err == nil || !strings.Contains(err.Error(),
"address \"nope\" was not found in the Raft configuration") {
t.Fatalf("err: %v", err)
}
})
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, err := http.NewRequest(http.MethodDelete, "/v1/operator/raft/peer?id=nope", body)
assert.Nil(err)
// If we get this error, it proves we sent the address all the
// way through.
resp := httptest.NewRecorder()
_, err = s.Server.OperatorRaftPeer(resp, req)
if err == nil || !strings.Contains(err.Error(),
"id \"nope\" was not found in the Raft configuration") {
t.Fatalf("err: %v", err)
}
})
}
func TestOperator_AutopilotGetConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, _ := http.NewRequest(http.MethodGet, "/v1/operator/autopilot/configuration", body)
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorAutopilotConfiguration(resp, req)
if err != nil {
t.Fatalf("err: %v", err)
}
if resp.Code != http.StatusOK {
t.Fatalf("bad code: %d", resp.Code)
}
out, ok := obj.(api.AutopilotConfiguration)
if !ok {
t.Fatalf("unexpected: %T", obj)
}
if !out.CleanupDeadServers {
t.Fatalf("bad: %#v", out)
}
})
}
func TestOperator_AutopilotSetConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer([]byte(`{"CleanupDeadServers": false}`))
req, _ := http.NewRequest(http.MethodPut, "/v1/operator/autopilot/configuration", body)
resp := httptest.NewRecorder()
if _, err := s.Server.OperatorAutopilotConfiguration(resp, req); err != nil {
t.Fatalf("err: %v", err)
}
if resp.Code != 200 {
t.Fatalf("bad code: %d, %q", resp.Code, resp.Body.String())
}
args := structs.GenericRequest{
QueryOptions: structs.QueryOptions{
Region: s.Config.Region,
},
}
var reply structs.AutopilotConfig
if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
t.Fatalf("err: %v", err)
}
if reply.CleanupDeadServers {
t.Fatalf("bad: %#v", reply)
}
})
}
func TestOperator_AutopilotCASConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer([]byte(`{"CleanupDeadServers": false}`))
req, _ := http.NewRequest(http.MethodPut, "/v1/operator/autopilot/configuration", body)
resp := httptest.NewRecorder()
if _, err := s.Server.OperatorAutopilotConfiguration(resp, req); err != nil {
t.Fatalf("err: %v", err)
}
if resp.Code != 200 {
t.Fatalf("bad code: %d", resp.Code)
}
args := structs.GenericRequest{
QueryOptions: structs.QueryOptions{
Region: s.Config.Region,
},
}
var reply structs.AutopilotConfig
if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
t.Fatalf("err: %v", err)
}
if reply.CleanupDeadServers {
t.Fatalf("bad: %#v", reply)
}
// Create a CAS request, bad index
{
buf := bytes.NewBuffer([]byte(`{"CleanupDeadServers": true}`))
req, _ := http.NewRequest(http.MethodPut, fmt.Sprintf("/v1/operator/autopilot/configuration?cas=%d", reply.ModifyIndex-1), buf)
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorAutopilotConfiguration(resp, req)
if err != nil {
t.Fatalf("err: %v", err)
}
if res := obj.(bool); res {
t.Fatalf("should NOT work")
}
}
// Create a CAS request, good index
{
buf := bytes.NewBuffer([]byte(`{"CleanupDeadServers": true}`))
req, _ := http.NewRequest(http.MethodPut, fmt.Sprintf("/v1/operator/autopilot/configuration?cas=%d", reply.ModifyIndex), buf)
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorAutopilotConfiguration(resp, req)
if err != nil {
t.Fatalf("err: %v", err)
}
if res := obj.(bool); !res {
t.Fatalf("should work")
}
}
// Verify the update
if err := s.RPC("Operator.AutopilotGetConfiguration", &args, &reply); err != nil {
t.Fatalf("err: %v", err)
}
if !reply.CleanupDeadServers {
t.Fatalf("bad: %#v", reply)
}
})
}
func TestOperator_ServerHealth(t *testing.T) {
ci.Parallel(t)
httpTest(t, func(c *Config) {
c.Server.RaftProtocol = 3
}, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, _ := http.NewRequest(http.MethodGet, "/v1/operator/autopilot/health", body)
f := func() error {
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorServerHealth(resp, req)
if err != nil {
return fmt.Errorf("failed to get operator server health: %w", err)
}
if code := resp.Code; code != 200 {
return fmt.Errorf("response code not 200, got: %d", code)
}
out := obj.(*api.OperatorHealthReply)
if n := len(out.Servers); n != 1 {
return fmt.Errorf("expected 1 server, got: %d", n)
}
s1, s2 := out.Servers[0].Name, s.server.LocalMember().Name
if s1 != s2 {
return fmt.Errorf("expected server names to match, got %s and %s", s1, s2)
}
if out.Servers[0].SerfStatus != "alive" {
return fmt.Errorf("expected serf status to be alive, got: %s", out.Servers[0].SerfStatus)
}
if out.FailureTolerance != 0 {
return fmt.Errorf("expected failure tolerance of 0, got: %d", out.FailureTolerance)
}
return nil
}
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(f),
wait.Timeout(10*time.Second),
wait.Gap(1*time.Second),
))
})
}
func TestOperator_ServerHealth_Unhealthy(t *testing.T) {
ci.Parallel(t)
httpTest(t, func(c *Config) {
c.Server.RaftProtocol = 3
c.Autopilot.LastContactThreshold = -1 * time.Second
}, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, _ := http.NewRequest(http.MethodGet, "/v1/operator/autopilot/health", body)
f := func() error {
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorServerHealth(resp, req)
if err != nil {
return fmt.Errorf("failed to get operator server health: %w", err)
}
if code := resp.Code; code != 429 {
return fmt.Errorf("expected code 429, got: %d", code)
}
out := obj.(*api.OperatorHealthReply)
if n := len(out.Servers); n != 1 {
return fmt.Errorf("expected 1 server, got: %d", n)
}
if out.Healthy {
return fmt.Errorf("expected server to be unhealthy")
}
s1, s2 := out.Servers[0].Name, s.server.LocalMember().Name
if s1 != s2 {
return fmt.Errorf("expected server names to match, got %s and %s", s1, s2)
}
return nil
}
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(f),
wait.Timeout(10*time.Second),
wait.Gap(1*time.Second),
))
})
}
func TestOperator_SchedulerGetConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer(nil)
req, _ := http.NewRequest(http.MethodGet, "/v1/operator/scheduler/configuration", body)
resp := httptest.NewRecorder()
obj, err := s.Server.OperatorSchedulerConfiguration(resp, req)
require.Nil(t, err)
require.Equal(t, 200, resp.Code)
out, ok := obj.(structs.SchedulerConfigurationResponse)
require.True(t, ok)
// Only system jobs can preempt other jobs by default.
require.True(t, out.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
require.False(t, out.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
require.False(t, out.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
require.False(t, out.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
require.False(t, out.SchedulerConfig.MemoryOversubscriptionEnabled)
require.False(t, out.SchedulerConfig.PauseEvalBroker)
})
}
func TestOperator_SchedulerSetConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
body := bytes.NewBuffer([]byte(`
{
"MemoryOversubscriptionEnabled": true,
"PauseEvalBroker": true,
"PreemptionConfig": {
"SystemSchedulerEnabled": true,
"ServiceSchedulerEnabled": true
}
}`))
req, _ := http.NewRequest(http.MethodPut, "/v1/operator/scheduler/configuration", body)
resp := httptest.NewRecorder()
setResp, err := s.Server.OperatorSchedulerConfiguration(resp, req)
require.Nil(t, err)
require.Equal(t, 200, resp.Code)
schedSetResp, ok := setResp.(structs.SchedulerSetConfigurationResponse)
require.True(t, ok)
require.NotZero(t, schedSetResp.Index)
args := structs.GenericRequest{
QueryOptions: structs.QueryOptions{
Region: s.Config.Region,
},
}
var reply structs.SchedulerConfigurationResponse
err = s.RPC("Operator.SchedulerGetConfiguration", &args, &reply)
require.Nil(t, err)
require.True(t, reply.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
require.False(t, reply.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
require.False(t, reply.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
require.True(t, reply.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
require.True(t, reply.SchedulerConfig.MemoryOversubscriptionEnabled)
require.True(t, reply.SchedulerConfig.PauseEvalBroker)
})
}
func TestOperator_SchedulerCASConfiguration(t *testing.T) {
ci.Parallel(t)
httpTest(t, nil, func(s *TestAgent) {
require := require.New(t)
body := bytes.NewBuffer([]byte(`{"PreemptionConfig": {
"SystemSchedulerEnabled": true,
"SysBatchSchedulerEnabled":true,
"BatchSchedulerEnabled":true
}}`))
req, _ := http.NewRequest(http.MethodPut, "/v1/operator/scheduler/configuration", body)
resp := httptest.NewRecorder()
setResp, err := s.Server.OperatorSchedulerConfiguration(resp, req)
require.Nil(err)
require.Equal(200, resp.Code)
schedSetResp, ok := setResp.(structs.SchedulerSetConfigurationResponse)
require.True(ok)
require.NotZero(schedSetResp.Index)
args := structs.GenericRequest{
QueryOptions: structs.QueryOptions{
Region: s.Config.Region,
},
}
var reply structs.SchedulerConfigurationResponse
if err := s.RPC("Operator.SchedulerGetConfiguration", &args, &reply); err != nil {
t.Fatalf("err: %v", err)
}
require.True(reply.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
require.True(reply.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
require.True(reply.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
require.False(reply.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
// Create a CAS request, bad index
{
buf := bytes.NewBuffer([]byte(`{"PreemptionConfig": {
"SystemSchedulerEnabled": false,
"BatchSchedulerEnabled":true
}}`))
req, _ := http.NewRequest(http.MethodPut, fmt.Sprintf("/v1/operator/scheduler/configuration?cas=%d", reply.QueryMeta.Index-1), buf)
resp := httptest.NewRecorder()
setResp, err := s.Server.OperatorSchedulerConfiguration(resp, req)
require.Nil(err)
// Verify that the response has Updated=false
schedSetResp, ok := setResp.(structs.SchedulerSetConfigurationResponse)
require.True(ok)
require.NotZero(schedSetResp.Index)
require.False(schedSetResp.Updated)
}
// Create a CAS request, good index
{
buf := bytes.NewBuffer([]byte(`{"PreemptionConfig": {
"SystemSchedulerEnabled": false,
"BatchSchedulerEnabled":false
}}`))
req, _ := http.NewRequest(http.MethodPut, fmt.Sprintf("/v1/operator/scheduler/configuration?cas=%d", reply.QueryMeta.Index), buf)
resp := httptest.NewRecorder()
setResp, err := s.Server.OperatorSchedulerConfiguration(resp, req)
require.Nil(err)
// Verify that the response has Updated=true
schedSetResp, ok := setResp.(structs.SchedulerSetConfigurationResponse)
require.True(ok)
require.NotZero(schedSetResp.Index)
require.True(schedSetResp.Updated)
}
// Verify the update
if err := s.RPC("Operator.SchedulerGetConfiguration", &args, &reply); err != nil {
t.Fatalf("err: %v", err)
}
require.False(reply.SchedulerConfig.PreemptionConfig.SystemSchedulerEnabled)
require.False(reply.SchedulerConfig.PreemptionConfig.SysBatchSchedulerEnabled)
require.False(reply.SchedulerConfig.PreemptionConfig.BatchSchedulerEnabled)
require.False(reply.SchedulerConfig.PreemptionConfig.ServiceSchedulerEnabled)
})
}
func TestOperator_SnapshotRequests(t *testing.T) {
ci.Parallel(t)
dir := t.TempDir()
snapshotPath := filepath.Join(dir, "snapshot.bin")
job := mock.Job()
// test snapshot generation
httpTest(t, func(c *Config) {
c.Server.BootstrapExpect = 1
c.DevMode = false
c.DataDir = path.Join(dir, "server")
c.AdvertiseAddrs.HTTP = "127.0.0.1"
c.AdvertiseAddrs.RPC = "127.0.0.1"
c.AdvertiseAddrs.Serf = "127.0.0.1"
// don't actually run the job
c.Client.Enabled = false
}, func(s *TestAgent) {
// make a simple update
jargs := structs.JobRegisterRequest{
Job: job,
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: structs.DefaultNamespace,
},
}
var jresp structs.JobRegisterResponse
err := s.Agent.RPC("Job.Register", &jargs, &jresp)
require.NoError(t, err)
// now actually snapshot
req, _ := http.NewRequest(http.MethodGet, "/v1/operator/snapshot", nil)
resp := httptest.NewRecorder()
_, err = s.Server.SnapshotRequest(resp, req)
require.NoError(t, err)
require.Equal(t, 200, resp.Code)
digest := resp.Header().Get("Digest")
require.NotEmpty(t, digest)
require.Contains(t, digest, "sha-256=")
hash := sha256.New()
f, err := os.Create(snapshotPath)
require.NoError(t, err)
defer f.Close()
_, err = io.Copy(io.MultiWriter(f, hash), resp.Body)
require.NoError(t, err)
expectedChecksum := "sha-256=" + base64.StdEncoding.EncodeToString(hash.Sum(nil))
require.Equal(t, digest, expectedChecksum)
})
// test snapshot restoration
httpTest(t, func(c *Config) {
c.Server.BootstrapExpect = 1
c.DevMode = false
c.DataDir = path.Join(dir, "server2")
c.AdvertiseAddrs.HTTP = "127.0.0.1"
c.AdvertiseAddrs.RPC = "127.0.0.1"
c.AdvertiseAddrs.Serf = "127.0.0.1"
// don't actually run the job
c.Client.Enabled = false
}, func(s *TestAgent) {
jobExists := func() bool {
// check job isn't present
req, _ := http.NewRequest(http.MethodGet, "/v1/job/"+job.ID, nil)
resp := httptest.NewRecorder()
j, _ := s.Server.jobCRUD(resp, req, job.ID)
return j != nil
}
// job doesn't get initially
require.False(t, jobExists())
// restrore and check if job exists after
f, err := os.Open(snapshotPath)
require.NoError(t, err)
defer f.Close()
req, _ := http.NewRequest(http.MethodPut, "/v1/operator/snapshot", f)
resp := httptest.NewRecorder()
_, err = s.Server.SnapshotRequest(resp, req)
require.NoError(t, err)
require.Equal(t, 200, resp.Code)
require.True(t, jobExists())
})
}