621 lines
15 KiB
Go
621 lines
15 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package api
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sort"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/api/internal/testutil"
|
|
"github.com/shoenig/test/must"
|
|
"github.com/shoenig/test/wait"
|
|
)
|
|
|
|
func queryNodeList(t *testing.T, nodes *Nodes) ([]*NodeListStub, *QueryMeta) {
|
|
t.Helper()
|
|
var (
|
|
nodeListStub []*NodeListStub
|
|
queryMeta *QueryMeta
|
|
err error
|
|
)
|
|
|
|
f := func() error {
|
|
nodeListStub, queryMeta, err = nodes.List(nil)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to list nodes: %w", err)
|
|
}
|
|
if len(nodeListStub) == 0 {
|
|
return fmt.Errorf("no nodes yet")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
must.Wait(t, wait.InitialSuccess(
|
|
wait.ErrorFunc(f),
|
|
wait.Timeout(10*time.Second),
|
|
wait.Gap(1*time.Second),
|
|
))
|
|
|
|
return nodeListStub, queryMeta
|
|
}
|
|
|
|
func oneNodeFromNodeList(t *testing.T, nodes *Nodes) *NodeListStub {
|
|
nodeListStub, _ := queryNodeList(t, nodes)
|
|
must.Len(t, 1, nodeListStub, must.Sprint("expected 1 node"))
|
|
return nodeListStub[0]
|
|
}
|
|
|
|
func TestNodes_List(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
nodeListStub, queryMeta := queryNodeList(t, nodes)
|
|
must.Len(t, 1, nodeListStub)
|
|
|
|
// Check that we got valid QueryMeta.
|
|
assertQueryMeta(t, queryMeta)
|
|
}
|
|
|
|
func TestNodes_PrefixList(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Get the node ID
|
|
nodeID := oneNodeFromNodeList(t, nodes).ID
|
|
|
|
// Find node based on four character prefix
|
|
out, qm, err := nodes.PrefixList(nodeID[:4])
|
|
must.NoError(t, err)
|
|
must.Len(t, 1, out, must.Sprint("expected only 1 node"))
|
|
|
|
// Check that we got valid QueryMeta.
|
|
assertQueryMeta(t, qm)
|
|
}
|
|
|
|
// TestNodes_List_Resources asserts that ?resources=true includes allocated and
|
|
// reserved resources in the response.
|
|
func TestNodes_List_Resources(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
node := oneNodeFromNodeList(t, nodes)
|
|
|
|
// By default resources should *not* be included
|
|
must.Nil(t, node.NodeResources)
|
|
must.Nil(t, node.ReservedResources)
|
|
|
|
qo := &QueryOptions{
|
|
Params: map[string]string{"resources": "true"},
|
|
}
|
|
|
|
out, _, err := nodes.List(qo)
|
|
must.NoError(t, err)
|
|
must.NotNil(t, out[0].NodeResources)
|
|
must.NotNil(t, out[0].ReservedResources)
|
|
}
|
|
|
|
func TestNodes_Info(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
startTime := time.Now().Unix()
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Retrieving a nonexistent node returns error
|
|
_, _, infoErr := nodes.Info("12345678-abcd-efab-cdef-123456789abc", nil)
|
|
must.ErrorContains(t, infoErr, "not found")
|
|
|
|
// Get the node ID and DC
|
|
node := oneNodeFromNodeList(t, nodes)
|
|
nodeID, dc := node.ID, node.Datacenter
|
|
|
|
// Querying for existing nodes returns properly
|
|
result, qm, err := nodes.Info(nodeID, nil)
|
|
must.NoError(t, err)
|
|
|
|
assertQueryMeta(t, qm)
|
|
|
|
// Check that the result is what we expect
|
|
must.Eq(t, nodeID, result.ID)
|
|
must.Eq(t, dc, result.Datacenter)
|
|
|
|
must.Eq(t, 20000, result.NodeResources.MinDynamicPort)
|
|
must.Eq(t, 32000, result.NodeResources.MaxDynamicPort)
|
|
|
|
// Check that the StatusUpdatedAt field is being populated correctly
|
|
must.Less(t, result.StatusUpdatedAt, startTime)
|
|
|
|
// check we have at least one event
|
|
must.GreaterEq(t, 1, len(result.Events))
|
|
}
|
|
|
|
func TestNodes_NoSecretID(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Get the node ID
|
|
nodeID := oneNodeFromNodeList(t, nodes).ID
|
|
|
|
// perform a raw http call and make sure that:
|
|
// - "ID" to make sure that raw decoding is working correctly
|
|
// - "SecretID" to make sure it's not present
|
|
resp := make(map[string]interface{})
|
|
_, err := c.query("/v1/node/"+nodeID, &resp, nil)
|
|
must.NoError(t, err)
|
|
must.Eq(t, nodeID, resp["ID"].(string))
|
|
must.Eq(t, "", resp["SecretID"])
|
|
}
|
|
|
|
func TestNodes_ToggleDrain(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Wait for node registration and get the ID
|
|
nodeID := oneNodeFromNodeList(t, nodes).ID
|
|
|
|
// Check for drain mode
|
|
out, _, err := nodes.Info(nodeID, nil)
|
|
must.NoError(t, err)
|
|
must.False(t, out.Drain)
|
|
must.Nil(t, out.LastDrain)
|
|
|
|
// Toggle it on
|
|
timeBeforeDrain := time.Now().Add(-1 * time.Second)
|
|
spec := &DrainSpec{
|
|
Deadline: 10 * time.Second,
|
|
}
|
|
drainMeta := map[string]string{
|
|
"reason": "this node needs to go",
|
|
}
|
|
drainOut, err := nodes.UpdateDrainOpts(nodeID, &DrainOptions{
|
|
DrainSpec: spec,
|
|
MarkEligible: false,
|
|
Meta: drainMeta,
|
|
}, nil)
|
|
must.NoError(t, err)
|
|
assertWriteMeta(t, &drainOut.WriteMeta)
|
|
|
|
// Drain may have completed before we can check, use event stream
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
streamCh, err := c.EventStream().Stream(ctx, map[Topic][]string{
|
|
TopicNode: {nodeID},
|
|
}, 0, nil)
|
|
must.NoError(t, err)
|
|
|
|
// we expect to see the node change to Drain:true and then back to Drain:false+ineligible
|
|
var sawDraining, sawDrainComplete uint64
|
|
for sawDrainComplete == 0 {
|
|
select {
|
|
case events := <-streamCh:
|
|
must.NoError(t, events.Err)
|
|
for _, e := range events.Events {
|
|
node, err := e.Node()
|
|
must.NoError(t, err)
|
|
must.Eq(t, node.DrainStrategy != nil, node.Drain)
|
|
must.True(t, !node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible"
|
|
if node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
|
|
must.NotNil(t, node.LastDrain)
|
|
must.Eq(t, DrainStatusDraining, node.LastDrain.Status)
|
|
now := time.Now()
|
|
must.False(t, node.LastDrain.StartedAt.Before(timeBeforeDrain))
|
|
must.False(t, node.LastDrain.StartedAt.After(now))
|
|
must.Eq(t, drainMeta, node.LastDrain.Meta)
|
|
sawDraining = node.ModifyIndex
|
|
} else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible {
|
|
must.NotNil(t, node.LastDrain)
|
|
must.Eq(t, DrainStatusComplete, node.LastDrain.Status)
|
|
must.True(t, !node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt))
|
|
must.Eq(t, drainMeta, node.LastDrain.Meta)
|
|
sawDrainComplete = node.ModifyIndex
|
|
}
|
|
}
|
|
case <-time.After(5 * time.Second):
|
|
must.Unreachable(t, must.Sprint("waiting on stream event that never happened"))
|
|
}
|
|
}
|
|
|
|
// Toggle off again
|
|
drainOut, err = nodes.UpdateDrain(nodeID, nil, true, nil)
|
|
must.NoError(t, err)
|
|
assertWriteMeta(t, &drainOut.WriteMeta)
|
|
|
|
// Check again
|
|
out, _, err = nodes.Info(nodeID, nil)
|
|
must.NoError(t, err)
|
|
must.False(t, out.Drain)
|
|
must.Nil(t, out.DrainStrategy)
|
|
must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility)
|
|
}
|
|
|
|
func TestNodes_ToggleEligibility(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Get node ID
|
|
nodeID := oneNodeFromNodeList(t, nodes).ID
|
|
|
|
// Check for eligibility
|
|
out, _, err := nodes.Info(nodeID, nil)
|
|
must.NoError(t, err)
|
|
must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility)
|
|
|
|
// Toggle it off
|
|
eligOut, err := nodes.ToggleEligibility(nodeID, false, nil)
|
|
must.NoError(t, err)
|
|
assertWriteMeta(t, &eligOut.WriteMeta)
|
|
|
|
// Check again
|
|
out, _, err = nodes.Info(nodeID, nil)
|
|
must.NoError(t, err)
|
|
must.Eq(t, NodeSchedulingIneligible, out.SchedulingEligibility)
|
|
|
|
// Toggle on
|
|
eligOut, err = nodes.ToggleEligibility(nodeID, true, nil)
|
|
must.NoError(t, err)
|
|
assertWriteMeta(t, &eligOut.WriteMeta)
|
|
|
|
// Check again
|
|
out, _, err = nodes.Info(nodeID, nil)
|
|
must.NoError(t, err)
|
|
must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility)
|
|
must.Nil(t, out.DrainStrategy)
|
|
}
|
|
|
|
func TestNodes_Allocations(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Looking up by a nonexistent node returns nothing. We
|
|
// don't check the index here because it's possible the node
|
|
// has already registered, in which case we will get a non-
|
|
// zero result anyways.
|
|
allocations, _, err := nodes.Allocations("nope", nil)
|
|
must.NoError(t, err)
|
|
must.Len(t, 0, allocations)
|
|
}
|
|
|
|
func TestNodes_ForceEvaluate(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Force-eval on a nonexistent node fails
|
|
_, _, err := nodes.ForceEvaluate("12345678-abcd-efab-cdef-123456789abc", nil)
|
|
must.ErrorContains(t, err, "not found")
|
|
|
|
// Wait for node registration and get the ID
|
|
nodeID := oneNodeFromNodeList(t, nodes).ID
|
|
|
|
// Try force-eval again. We don't check the WriteMeta because
|
|
// there are no allocations to process, so we would get an index
|
|
// of zero. Same goes for the eval ID.
|
|
_, _, err = nodes.ForceEvaluate(nodeID, nil)
|
|
must.NoError(t, err)
|
|
}
|
|
|
|
func TestNodes_Sort(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
nodes := []*NodeListStub{
|
|
{CreateIndex: 2},
|
|
{CreateIndex: 1},
|
|
{CreateIndex: 5},
|
|
}
|
|
sort.Sort(NodeIndexSort(nodes))
|
|
|
|
expect := []*NodeListStub{
|
|
{CreateIndex: 5},
|
|
{CreateIndex: 2},
|
|
{CreateIndex: 1},
|
|
}
|
|
must.Eq(t, expect, nodes)
|
|
}
|
|
|
|
// Unittest monitorDrainMultiplex when an error occurs
|
|
func TestNodes_MonitorDrain_Multiplex_Bad(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
ctx := context.Background()
|
|
multiplexCtx, cancel := context.WithCancel(ctx)
|
|
|
|
// monitorDrainMultiplex doesn't require anything on *Nodes, so we
|
|
// don't need to use a full Client
|
|
var nodeClient *Nodes
|
|
|
|
outCh := make(chan *MonitorMessage, 8)
|
|
nodeCh := make(chan *MonitorMessage, 1)
|
|
allocCh := make(chan *MonitorMessage, 8)
|
|
exitedCh := make(chan struct{})
|
|
go func() {
|
|
defer close(exitedCh)
|
|
nodeClient.monitorDrainMultiplex(ctx, cancel, outCh, nodeCh, allocCh)
|
|
}()
|
|
|
|
// Fake an alloc update
|
|
msg := Messagef(0, "alloc update")
|
|
allocCh <- msg
|
|
must.Eq(t, msg, <-outCh)
|
|
|
|
// Fake a node update
|
|
msg = Messagef(0, "node update")
|
|
nodeCh <- msg
|
|
must.Eq(t, msg, <-outCh)
|
|
|
|
// Fake an error that should shut everything down
|
|
msg = Messagef(MonitorMsgLevelError, "fake error")
|
|
nodeCh <- msg
|
|
must.Eq(t, msg, <-outCh)
|
|
|
|
_, ok := <-exitedCh
|
|
must.False(t, ok)
|
|
|
|
_, ok = <-outCh
|
|
must.False(t, ok)
|
|
|
|
// Exiting should also cancel the context that would be passed to the
|
|
// node & alloc watchers
|
|
select {
|
|
case <-multiplexCtx.Done():
|
|
case <-time.After(100 * time.Millisecond):
|
|
must.Unreachable(t, must.Sprint("multiplex context was not cancelled"))
|
|
}
|
|
}
|
|
|
|
// Unittest monitorDrainMultiplex when drain finishes
|
|
func TestNodes_MonitorDrain_Multiplex_Good(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
ctx := context.Background()
|
|
multiplexCtx, cancel := context.WithCancel(ctx)
|
|
|
|
// monitorDrainMultiplex doesn't require anything on *Nodes, so we
|
|
// don't need to use a full Client
|
|
var nodeClient *Nodes
|
|
|
|
outCh := make(chan *MonitorMessage, 8)
|
|
nodeCh := make(chan *MonitorMessage, 1)
|
|
allocCh := make(chan *MonitorMessage, 8)
|
|
exitedCh := make(chan struct{})
|
|
go func() {
|
|
defer close(exitedCh)
|
|
nodeClient.monitorDrainMultiplex(ctx, cancel, outCh, nodeCh, allocCh)
|
|
}()
|
|
|
|
// Fake a node updating and finishing
|
|
msg := Messagef(MonitorMsgLevelInfo, "node update")
|
|
nodeCh <- msg
|
|
close(nodeCh)
|
|
must.Eq(t, msg, <-outCh)
|
|
|
|
// Nothing else should have exited yet
|
|
select {
|
|
case badMsg, ok := <-outCh:
|
|
must.False(t, ok, must.Sprintf("unexpected output %v", badMsg))
|
|
must.Unreachable(t, must.Sprint("out channel closed unexpectedly"))
|
|
case <-exitedCh:
|
|
must.Unreachable(t, must.Sprint("multiplexer exited unexpectedly"))
|
|
case <-multiplexCtx.Done():
|
|
must.Unreachable(t, must.Sprint("multiplexer context canceled unexpectedly"))
|
|
case <-time.After(10 * time.Millisecond):
|
|
t.Logf("multiplexer still running as expected")
|
|
}
|
|
|
|
// Fake an alloc update coming in after the node monitor has finished
|
|
msg = Messagef(0, "alloc update")
|
|
allocCh <- msg
|
|
must.Eq(t, msg, <-outCh)
|
|
|
|
// Closing the allocCh should cause everything to exit
|
|
close(allocCh)
|
|
|
|
_, ok := <-exitedCh
|
|
must.False(t, ok)
|
|
|
|
_, ok = <-outCh
|
|
must.False(t, ok)
|
|
|
|
// Exiting should also cancel the context that would be passed to the
|
|
// node & alloc watchers
|
|
select {
|
|
case <-multiplexCtx.Done():
|
|
case <-time.After(100 * time.Millisecond):
|
|
must.Unreachable(t, must.Sprint("context was not cancelled"))
|
|
}
|
|
}
|
|
|
|
func TestNodes_DrainStrategy_Equal(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
// nil
|
|
var d *DrainStrategy
|
|
must.Equal(t, nil, d)
|
|
|
|
o := &DrainStrategy{}
|
|
must.NotEqual(t, d, o)
|
|
must.NotEqual(t, o, d)
|
|
|
|
d = &DrainStrategy{}
|
|
must.Equal(t, d, o)
|
|
must.Equal(t, o, d)
|
|
|
|
// ForceDeadline
|
|
d.ForceDeadline = time.Now()
|
|
must.NotEqual(t, d, o)
|
|
|
|
o.ForceDeadline = d.ForceDeadline
|
|
must.Equal(t, d, o)
|
|
|
|
// Deadline
|
|
d.Deadline = 1
|
|
must.NotEqual(t, d, o)
|
|
|
|
o.Deadline = 1
|
|
must.Equal(t, d, o)
|
|
|
|
// IgnoreSystemJobs
|
|
d.IgnoreSystemJobs = true
|
|
must.NotEqual(t, d, o)
|
|
|
|
o.IgnoreSystemJobs = true
|
|
must.Equal(t, d, o)
|
|
}
|
|
|
|
func TestNodes_Purge(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
|
|
c.DevMode = true
|
|
})
|
|
defer s.Stop()
|
|
nodes := c.Nodes()
|
|
|
|
// Purge on a nonexistent node fails.
|
|
_, _, err := c.Nodes().Purge("12345678-abcd-efab-cdef-123456789abc", nil)
|
|
must.ErrorContains(t, err, "not found")
|
|
|
|
// Wait for nodeID
|
|
nodeID := oneNodeFromNodeList(t, nodes).ID
|
|
|
|
// Perform the node purge and check the response objects.
|
|
out, meta, err := c.Nodes().Purge(nodeID, nil)
|
|
must.NoError(t, err)
|
|
must.NotNil(t, out)
|
|
|
|
// We can't use assertQueryMeta here, as the RPC response does not populate
|
|
// the known leader field.
|
|
must.Positive(t, meta.LastIndex)
|
|
}
|
|
|
|
func TestNodeStatValueFormatting(t *testing.T) {
|
|
testutil.Parallel(t)
|
|
|
|
cases := []struct {
|
|
expected string
|
|
value StatValue
|
|
}{
|
|
{
|
|
"true",
|
|
StatValue{BoolVal: pointerOf(true)},
|
|
},
|
|
{
|
|
"false",
|
|
StatValue{BoolVal: pointerOf(false)},
|
|
},
|
|
{
|
|
"myvalue",
|
|
StatValue{StringVal: pointerOf("myvalue")},
|
|
},
|
|
{
|
|
"2.718",
|
|
StatValue{
|
|
FloatNumeratorVal: float64ToPtr(2.718),
|
|
},
|
|
},
|
|
{
|
|
"2.718 / 3.14",
|
|
StatValue{
|
|
FloatNumeratorVal: float64ToPtr(2.718),
|
|
FloatDenominatorVal: float64ToPtr(3.14),
|
|
},
|
|
},
|
|
{
|
|
"2.718 MHz",
|
|
StatValue{
|
|
FloatNumeratorVal: float64ToPtr(2.718),
|
|
Unit: "MHz",
|
|
},
|
|
},
|
|
{
|
|
"2.718 / 3.14 MHz",
|
|
StatValue{
|
|
FloatNumeratorVal: float64ToPtr(2.718),
|
|
FloatDenominatorVal: float64ToPtr(3.14),
|
|
Unit: "MHz",
|
|
},
|
|
},
|
|
{
|
|
"2",
|
|
StatValue{
|
|
IntNumeratorVal: pointerOf(int64(2)),
|
|
},
|
|
},
|
|
{
|
|
"2 / 3",
|
|
StatValue{
|
|
IntNumeratorVal: pointerOf(int64(2)),
|
|
IntDenominatorVal: pointerOf(int64(3)),
|
|
},
|
|
},
|
|
{
|
|
"2 MHz",
|
|
StatValue{
|
|
IntNumeratorVal: pointerOf(int64(2)),
|
|
Unit: "MHz",
|
|
},
|
|
},
|
|
{
|
|
"2 / 3 MHz",
|
|
StatValue{
|
|
IntNumeratorVal: pointerOf(int64(2)),
|
|
IntDenominatorVal: pointerOf(int64(3)),
|
|
Unit: "MHz",
|
|
},
|
|
},
|
|
}
|
|
|
|
for i, c := range cases {
|
|
t.Run(fmt.Sprintf("case %d %v", i, c.expected), func(t *testing.T) {
|
|
formatted := c.value.String()
|
|
must.Eq(t, c.expected, formatted)
|
|
})
|
|
}
|
|
}
|