open-nomad/nomad/deploymentwatcher/deployments_watcher_test.go

package deploymentwatcher

import (
	"fmt"
	"testing"
	"time"

	memdb "github.com/hashicorp/go-memdb"
	"github.com/hashicorp/nomad/helper"
	"github.com/hashicorp/nomad/helper/testlog"
	"github.com/hashicorp/nomad/helper/uuid"
	"github.com/hashicorp/nomad/nomad/mock"
	"github.com/hashicorp/nomad/nomad/structs"
	"github.com/hashicorp/nomad/testutil"
	"github.com/stretchr/testify/assert"
	mocker "github.com/stretchr/testify/mock"
	"github.com/stretchr/testify/require"
)

func testDeploymentWatcher(t *testing.T, qps float64, batchDur time.Duration) (*Watcher, *mockBackend) {
	m := newMockBackend(t)
	w := NewDeploymentsWatcher(testlog.HCLogger(t), m, qps, batchDur)
	return w, m
}

func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) {
	return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
}

// Tests that the watcher properly watches for deployments and reconciles them
func TestWatcher_WatchDeployments(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create three jobs
	j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
	require.Nil(m.state.UpsertJob(100, j1))
	require.Nil(m.state.UpsertJob(101, j2))
	require.Nil(m.state.UpsertJob(102, j3))

	// Create three deployments all running
	d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
	d1.JobID = j1.ID
	d2.JobID = j2.ID
	d3.JobID = j3.ID

	// Upsert the first deployment
	require.Nil(m.state.UpsertDeployment(103, d1))

	// Next list 3
	block1 := make(chan time.Time)
	go func() {
		<-block1
		require.Nil(m.state.UpsertDeployment(104, d2))
		require.Nil(m.state.UpsertDeployment(105, d3))
	}()

	//// Next list 3 but have one be terminal
	block2 := make(chan time.Time)
	d3terminal := d3.Copy()
	d3terminal.Status = structs.DeploymentStatusFailed
	go func() {
		<-block2
		require.Nil(m.state.UpsertDeployment(106, d3terminal))
	}()

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "1 deployment returned") })

	close(block1)
	testutil.WaitForResult(func() (bool, error) { return 3 == watchersCount(w), nil },
		func(err error) { require.Equal(3, watchersCount(w), "3 deployment returned") })

	close(block2)
	testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil },
		func(err error) { require.Equal(3, watchersCount(w), "3 deployment returned - 1 terminal") })
}

// Tests that calls against an unknown deployment fail
func TestWatcher_UnknownDeployment(t *testing.T) {
	t.Parallel()
	assert := assert.New(t)
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)
	w.SetEnabled(true, m.state)

	// The expected error is that it should be an unknown deployment
	dID := uuid.Generate()
	expected := fmt.Sprintf("unknown deployment %q", dID)

	// Request setting the health against an unknown deployment
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:         dID,
		HealthyAllocationIDs: []string{uuid.Generate()},
	}
	var resp structs.DeploymentUpdateResponse
	err := w.SetAllocHealth(req, &resp)
	if assert.NotNil(err, "should have error for unknown deployment") {
		require.Contains(err.Error(), expected)
	}

	// Request promoting against an unknown deployment
	req2 := &structs.DeploymentPromoteRequest{
		DeploymentID: dID,
		All:          true,
	}
	err = w.PromoteDeployment(req2, &resp)
	if assert.NotNil(err, "should have error for unknown deployment") {
		require.Contains(err.Error(), expected)
	}

	// Request pausing against an unknown deployment
	req3 := &structs.DeploymentPauseRequest{
		DeploymentID: dID,
		Pause:        true,
	}
	err = w.PauseDeployment(req3, &resp)
	if assert.NotNil(err, "should have error for unknown deployment") {
		require.Contains(err.Error(), expected)
	}

	// Request failing against an unknown deployment
	req4 := &structs.DeploymentFailRequest{
		DeploymentID: dID,
	}
	err = w.FailDeployment(req4, &resp)
	if assert.NotNil(err, "should have error for unknown deployment") {
		require.Contains(err.Error(), expected)
	}
}

// Test setting an unknown allocation's health
func TestWatcher_SetAllocHealth_Unknown(t *testing.T) {
	t.Parallel()
	assert := assert.New(t)
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// require that we get a call to UpsertDeploymentAllocHealth
	a := mock.Alloc()
	matchConfig := &matchDeploymentAllocHealthRequestConfig{
		DeploymentID: d.ID,
		Healthy:      []string{a.ID},
		Eval:         true,
	}
	matcher := matchDeploymentAllocHealthRequest(matchConfig)
	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call SetAllocHealth
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:         d.ID,
		HealthyAllocationIDs: []string{a.ID},
	}
	var resp structs.DeploymentUpdateResponse
	err := w.SetAllocHealth(req, &resp)
	if assert.NotNil(err, "Set health of unknown allocation") {
		require.Contains(err.Error(), "unknown")
	}
	require.Equal(1, watchersCount(w), "Deployment should still be active")
}

// Test setting allocation health
func TestWatcher_SetAllocHealth_Healthy(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	a := mock.Alloc()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// require that we get a call to UpsertDeploymentAllocHealth
	matchConfig := &matchDeploymentAllocHealthRequestConfig{
		DeploymentID: d.ID,
		Healthy:      []string{a.ID},
		Eval:         true,
	}
	matcher := matchDeploymentAllocHealthRequest(matchConfig)
	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call SetAllocHealth
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:         d.ID,
		HealthyAllocationIDs: []string{a.ID},
	}
	var resp structs.DeploymentUpdateResponse
	err := w.SetAllocHealth(req, &resp)
	require.Nil(err, "SetAllocHealth")
	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher))
}

// Test setting allocation unhealthy
func TestWatcher_SetAllocHealth_Unhealthy(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	a := mock.Alloc()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// require that we get a call to UpsertDeploymentAllocHealth
	matchConfig := &matchDeploymentAllocHealthRequestConfig{
		DeploymentID: d.ID,
		Unhealthy:    []string{a.ID},
		Eval:         true,
		DeploymentUpdate: &structs.DeploymentStatusUpdate{
			DeploymentID:      d.ID,
			Status:            structs.DeploymentStatusFailed,
			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
		},
	}
	matcher := matchDeploymentAllocHealthRequest(matchConfig)
	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call SetAllocHealth
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:           d.ID,
		UnhealthyAllocationIDs: []string{a.ID},
	}
	var resp structs.DeploymentUpdateResponse
	err := w.SetAllocHealth(req, &resp)
	require.Nil(err, "SetAllocHealth")

	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
}

// Test setting allocation unhealthy and that there should be a rollback
func TestWatcher_SetAllocHealth_Unhealthy_Rollback(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.AutoRevert = true
	j.TaskGroups[0].Update.ProgressDeadline = 0
	j.Stable = true
	d := mock.Deployment()
	d.JobID = j.ID
	d.TaskGroups["web"].AutoRevert = true
	a := mock.Alloc()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// Upsert the job again to get a new version
	j2 := j.Copy()
	j2.Stable = false
	// Modify the job to make its specification different
	j2.Meta["foo"] = "bar"

	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")

	// require that we get a call to UpsertDeploymentAllocHealth
	matchConfig := &matchDeploymentAllocHealthRequestConfig{
		DeploymentID: d.ID,
		Unhealthy:    []string{a.ID},
		Eval:         true,
		DeploymentUpdate: &structs.DeploymentStatusUpdate{
			DeploymentID:      d.ID,
			Status:            structs.DeploymentStatusFailed,
			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
		},
		JobVersion: helper.Uint64ToPtr(0),
	}
	matcher := matchDeploymentAllocHealthRequest(matchConfig)
	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call SetAllocHealth
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:           d.ID,
		UnhealthyAllocationIDs: []string{a.ID},
	}
	var resp structs.DeploymentUpdateResponse
	err := w.SetAllocHealth(req, &resp)
	require.Nil(err, "SetAllocHealth")

	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
}

// Test setting allocation unhealthy on job with identical spec and there should be no rollback
func TestWatcher_SetAllocHealth_Unhealthy_NoRollback(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.AutoRevert = true
	j.TaskGroups[0].Update.ProgressDeadline = 0
	j.Stable = true
	d := mock.Deployment()
	d.JobID = j.ID
	d.TaskGroups["web"].AutoRevert = true
	a := mock.Alloc()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// Upsert the job again to get a new version
	j2 := j.Copy()
	j2.Stable = false

	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")

	// require that we get a call to UpsertDeploymentAllocHealth
	matchConfig := &matchDeploymentAllocHealthRequestConfig{
		DeploymentID: d.ID,
		Unhealthy:    []string{a.ID},
		Eval:         true,
		DeploymentUpdate: &structs.DeploymentStatusUpdate{
			DeploymentID:      d.ID,
			Status:            structs.DeploymentStatusFailed,
			StatusDescription: structs.DeploymentStatusDescriptionFailedAllocations,
		},
		JobVersion: nil,
	}
	matcher := matchDeploymentAllocHealthRequest(matchConfig)
	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call SetAllocHealth
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:           d.ID,
		UnhealthyAllocationIDs: []string{a.ID},
	}
	var resp structs.DeploymentUpdateResponse
	err := w.SetAllocHealth(req, &resp)
	require.Nil(err, "SetAllocHealth")

	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
	m.AssertNumberOfCalls(t, "UpdateDeploymentAllocHealth", 1)
}

// Test promoting a deployment
func TestWatcher_PromoteDeployment_HealthyCanaries(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, canary alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.Canary = 1
	j.TaskGroups[0].Update.ProgressDeadline = 0
	d := mock.Deployment()
	d.JobID = j.ID
	a := mock.Alloc()
	d.TaskGroups[a.TaskGroup].DesiredCanaries = 1
	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
	a.DeploymentStatus = &structs.AllocDeploymentStatus{
		Healthy: helper.BoolToPtr(true),
	}
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// require that we get a call to UpsertDeploymentPromotion
	matchConfig := &matchDeploymentPromoteRequestConfig{
		Promotion: &structs.DeploymentPromoteRequest{
			DeploymentID: d.ID,
			All:          true,
		},
		Eval: true,
	}
	matcher := matchDeploymentPromoteRequest(matchConfig)
	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)

	// We may get an update for the desired transition.
	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call PromoteDeployment
	req := &structs.DeploymentPromoteRequest{
		DeploymentID: d.ID,
		All:          true,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.PromoteDeployment(req, &resp)
	require.Nil(err, "PromoteDeployment")
	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
}

// Test promoting a deployment with unhealthy canaries
func TestWatcher_PromoteDeployment_UnhealthyCanaries(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job, canary alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.Canary = 2
	j.TaskGroups[0].Update.ProgressDeadline = 0
	d := mock.Deployment()
	d.JobID = j.ID
	a := mock.Alloc()
	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID}
	d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// require that we get a call to UpsertDeploymentPromotion
	matchConfig := &matchDeploymentPromoteRequestConfig{
		Promotion: &structs.DeploymentPromoteRequest{
			DeploymentID: d.ID,
			All:          true,
		},
		Eval: true,
	}
	matcher := matchDeploymentPromoteRequest(matchConfig)
	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call SetAllocHealth
	req := &structs.DeploymentPromoteRequest{
		DeploymentID: d.ID,
		All:          true,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.PromoteDeployment(req, &resp)
	if assert.NotNil(t, err, "PromoteDeployment") {
		// 0/2 because the old version has been stopped but the canary isn't marked healthy yet
		require.Contains(err.Error(), `Task group "web" has 0/2 healthy allocations`, "Should error because canary isn't marked healthy")
	}

	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher))
}

func TestWatcher_AutoPromoteDeployment(t *testing.T) {
	t.Parallel()
	w, m := defaultTestDeploymentWatcher(t)
	now := time.Now()

	// Create 1 UpdateStrategy, 1 job (1 TaskGroup), 2 canaries, and 1 deployment
	upd := structs.DefaultUpdateStrategy.Copy()
	upd.AutoPromote = true
	upd.MaxParallel = 2
	upd.Canary = 2
	upd.ProgressDeadline = 5 * time.Second

	j := mock.Job()
	j.TaskGroups[0].Update = upd

	d := mock.Deployment()
	d.JobID = j.ID
	// This is created in scheduler.computeGroup at runtime, where properties from the
	// UpdateStrategy are copied in
	d.TaskGroups = map[string]*structs.DeploymentState{
		"web": {
			AutoPromote:      upd.AutoPromote,
			AutoRevert:       upd.AutoRevert,
			ProgressDeadline: upd.ProgressDeadline,
			DesiredTotal:     2,
		},
	}

	alloc := func() *structs.Allocation {
		a := mock.Alloc()
		a.DeploymentID = d.ID
		a.CreateTime = now.UnixNano()
		a.ModifyTime = now.UnixNano()
		a.DeploymentStatus = &structs.AllocDeploymentStatus{
			Canary: true,
		}
		return a
	}

	a := alloc()
	b := alloc()

	d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID, b.ID}
	d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
	require.NoError(t, m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.NoError(t, m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a, b}), "UpsertAllocs")

	// =============================================================
	// Support method calls
	matchConfig0 := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusFailed,
		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
		Eval:              true,
	}
	matcher0 := matchDeploymentStatusUpdateRequest(matchConfig0)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher0)).Return(nil)

	matchConfig1 := &matchDeploymentAllocHealthRequestConfig{
		DeploymentID: d.ID,
		Healthy:      []string{a.ID, b.ID},
		Eval:         true,
	}
	matcher1 := matchDeploymentAllocHealthRequest(matchConfig1)
	m.On("UpdateDeploymentAllocHealth", mocker.MatchedBy(matcher1)).Return(nil)

	matchConfig2 := &matchDeploymentPromoteRequestConfig{
		Promotion: &structs.DeploymentPromoteRequest{
			DeploymentID: d.ID,
			All:          true,
		},
		Eval: true,
	}
	matcher2 := matchDeploymentPromoteRequest(matchConfig2)
	m.On("UpdateDeploymentPromotion", mocker.MatchedBy(matcher2)).Return(nil)
	// =============================================================

	// Start the deployment
	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == len(w.watchers), nil },
		func(err error) { require.Equal(t, 1, len(w.watchers), "Should have 1 deployment") })

	// Mark the canaries healthy
	req := &structs.DeploymentAllocHealthRequest{
		DeploymentID:         d.ID,
		HealthyAllocationIDs: []string{a.ID, b.ID},
	}
	var resp structs.DeploymentUpdateResponse
	// Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in
	// state.UpdateDeploymentAllocHealth via a raft shim?
	err := w.SetAllocHealth(req, &resp)
	require.NoError(t, err)

	ws := memdb.NewWatchSet()

	testutil.WaitForResult(
		func() (bool, error) {
			ds, _ := m.state.DeploymentsByJobID(ws, j.Namespace, j.ID, true)
			d = ds[0]
			return 2 == d.TaskGroups["web"].HealthyAllocs, nil
		},
		func(err error) { require.NoError(t, err) },
	)

	require.Equal(t, 1, len(w.watchers), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentPromotion", mocker.MatchedBy(matcher2))

	require.Equal(t, "running", d.Status)
	require.True(t, d.TaskGroups["web"].Promoted)

	a1, _ := m.state.AllocByID(ws, a.ID)
	require.False(t, a1.DeploymentStatus.Canary)
	require.Equal(t, "pending", a1.ClientStatus)
	require.Equal(t, "run", a1.DesiredStatus)

	b1, _ := m.state.AllocByID(ws, b.ID)
	require.False(t, b1.DeploymentStatus.Canary)
}

// Test pausing a deployment that is running
func TestWatcher_PauseDeployment_Pause_Running(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// require that we get a call to UpsertDeploymentStatusUpdate
	matchConfig := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusPaused,
		StatusDescription: structs.DeploymentStatusDescriptionPaused,
	}
	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call PauseDeployment
	req := &structs.DeploymentPauseRequest{
		DeploymentID: d.ID,
		Pause:        true,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.PauseDeployment(req, &resp)
	require.Nil(err, "PauseDeployment")

	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
}

// Test pausing a deployment that is paused
func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	d.Status = structs.DeploymentStatusPaused
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// require that we get a call to UpsertDeploymentStatusUpdate
	matchConfig := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusPaused,
		StatusDescription: structs.DeploymentStatusDescriptionPaused,
	}
	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call PauseDeployment
	req := &structs.DeploymentPauseRequest{
		DeploymentID: d.ID,
		Pause:        true,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.PauseDeployment(req, &resp)
	require.Nil(err, "PauseDeployment")

	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
}

// Test unpausing a deployment that is paused
func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	d.Status = structs.DeploymentStatusPaused
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// require that we get a call to UpsertDeploymentStatusUpdate
	matchConfig := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusRunning,
		StatusDescription: structs.DeploymentStatusDescriptionRunning,
		Eval:              true,
	}
	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call PauseDeployment
	req := &structs.DeploymentPauseRequest{
		DeploymentID: d.ID,
		Pause:        false,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.PauseDeployment(req, &resp)
	require.Nil(err, "PauseDeployment")

	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
}

// Test unpausing a deployment that is running
func TestWatcher_PauseDeployment_Unpause_Running(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// require that we get a call to UpsertDeploymentStatusUpdate
	matchConfig := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusRunning,
		StatusDescription: structs.DeploymentStatusDescriptionRunning,
		Eval:              true,
	}
	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call PauseDeployment
	req := &structs.DeploymentPauseRequest{
		DeploymentID: d.ID,
		Pause:        false,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.PauseDeployment(req, &resp)
	require.Nil(err, "PauseDeployment")

	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
}

// Test failing a deployment that is running
func TestWatcher_FailDeployment_Running(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := defaultTestDeploymentWatcher(t)

	// Create a job and a deployment
	j := mock.Job()
	d := mock.Deployment()
	d.JobID = j.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// require that we get a call to UpsertDeploymentStatusUpdate
	matchConfig := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusFailed,
		StatusDescription: structs.DeploymentStatusDescriptionFailedByUser,
		Eval:              true,
	}
	matcher := matchDeploymentStatusUpdateRequest(matchConfig)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(matcher)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Call PauseDeployment
	req := &structs.DeploymentFailRequest{
		DeploymentID: d.ID,
	}
	var resp structs.DeploymentUpdateResponse
	err := w.FailDeployment(req, &resp)
	require.Nil(err, "FailDeployment")

	require.Equal(1, watchersCount(w), "Deployment should still be active")
	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(matcher))
}

// Tests that the watcher properly watches for allocation changes and takes the
// proper actions
func TestDeploymentWatcher_Watch_NoProgressDeadline(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.AutoRevert = true
	j.TaskGroups[0].Update.ProgressDeadline = 0
	j.Stable = true
	d := mock.Deployment()
	d.JobID = j.ID
	d.TaskGroups["web"].AutoRevert = true
	a := mock.Alloc()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// Upsert the job again to get a new version
	j2 := j.Copy()
	// Modify the job to make its specification different
	j2.Meta["foo"] = "bar"
	j2.Stable = false
	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")

	// require that we will get a update allocation call only once. This will
	// verify that the watcher is batching allocation changes
	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	// require that we get a call to UpsertDeploymentStatusUpdate
	c := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusFailed,
		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
		JobVersion:        helper.Uint64ToPtr(0),
		Eval:              true,
	}
	m2 := matchDeploymentStatusUpdateRequest(c)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Update the allocs health to healthy which should create an evaluation
	for i := 0; i < 5; i++ {
		req := &structs.ApplyDeploymentAllocHealthRequest{
			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
				DeploymentID:         d.ID,
				HealthyAllocationIDs: []string{a.ID},
			},
		}
		require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
	}

	// Wait for there to be one eval
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 1 {
			return false, fmt.Errorf("Got %d evals; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})

	// Update the allocs health to unhealthy which should create a job rollback,
	// status update and eval
	req2 := &structs.ApplyDeploymentAllocHealthRequest{
		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
			DeploymentID:           d.ID,
			UnhealthyAllocationIDs: []string{a.ID},
		},
	}
	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")

	// Wait for there to be one eval
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 2 {
			return false, fmt.Errorf("Got %d evals; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})

	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))

	// After we upsert the job version will go to 2. So use this to require the
	// original call happened.
	c2 := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusFailed,
		StatusDescription: structs.DeploymentStatusDescriptionRollback(structs.DeploymentStatusDescriptionFailedAllocations, 0),
		JobVersion:        helper.Uint64ToPtr(2),
		Eval:              true,
	}
	m3 := matchDeploymentStatusUpdateRequest(c2)
	m.AssertCalled(t, "UpdateDeploymentStatus", mocker.MatchedBy(m3))
	testutil.WaitForResult(func() (bool, error) { return 0 == watchersCount(w), nil },
		func(err error) { require.Equal(0, watchersCount(w), "Should have no deployment") })
}

func TestDeploymentWatcher_Watch_ProgressDeadline(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
	j.Stable = true
	d := mock.Deployment()
	d.JobID = j.ID
	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
	a := mock.Alloc()
	now := time.Now()
	a.CreateTime = now.UnixNano()
	a.ModifyTime = now.UnixNano()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// require that we get a call to UpsertDeploymentStatusUpdate
	c := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusFailed,
		StatusDescription: structs.DeploymentStatusDescriptionProgressDeadline,
		Eval:              true,
	}
	m2 := matchDeploymentStatusUpdateRequest(c)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Update the alloc to be unhealthy and require that nothing happens.
	a2 := a.Copy()
	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
		Healthy:   helper.BoolToPtr(false),
		Timestamp: now,
	}
	require.Nil(m.state.UpdateAllocsFromClient(100, []*structs.Allocation{a2}))

	// Wait for the deployment to be failed
	testutil.WaitForResult(func() (bool, error) {
		d, err := m.state.DeploymentByID(nil, d.ID)
		if err != nil {
			return false, err
		}

		return d.Status == structs.DeploymentStatusFailed, fmt.Errorf("bad status %q", d.Status)
	}, func(err error) {
		t.Fatal(err)
	})

	// require there are is only one evaluation
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 1 {
			return false, fmt.Errorf("Got %d evals; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})
}

// Test that progress deadline handling works when there are multiple groups
func TestDeploymentWatcher_ProgressCutoff(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Count = 1
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
	j.TaskGroups = append(j.TaskGroups, j.TaskGroups[0].Copy())
	j.TaskGroups[1].Name = "foo"
	j.TaskGroups[1].Update.ProgressDeadline = 1 * time.Second
	j.Stable = true

	d := mock.Deployment()
	d.JobID = j.ID
	d.TaskGroups["web"].DesiredTotal = 1
	d.TaskGroups["foo"] = d.TaskGroups["web"].Copy()
	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
	d.TaskGroups["foo"].ProgressDeadline = 1 * time.Second

	a := mock.Alloc()
	now := time.Now()
	a.CreateTime = now.UnixNano()
	a.ModifyTime = now.UnixNano()
	a.DeploymentID = d.ID

	a2 := mock.Alloc()
	a2.TaskGroup = "foo"
	a2.CreateTime = now.UnixNano()
	a2.ModifyTime = now.UnixNano()
	a2.DeploymentID = d.ID

	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a, a2}), "UpsertAllocs")

	// We may get an update for the desired transition.
	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	watcher, err := w.getOrCreateWatcher(d.ID)
	require.NoError(err)
	require.NotNil(watcher)

	d1, err := m.state.DeploymentByID(nil, d.ID)
	require.NoError(err)

	done := watcher.doneGroups(d1)
	require.Contains(done, "web")
	require.False(done["web"])
	require.Contains(done, "foo")
	require.False(done["foo"])

	cutoff1 := watcher.getDeploymentProgressCutoff(d1)
	require.False(cutoff1.IsZero())

	// Update the first allocation to be healthy
	a3 := a.Copy()
	a3.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a3}), "UpsertAllocs")

	// Get the updated deployment
	d2, err := m.state.DeploymentByID(nil, d.ID)
	require.NoError(err)

	done = watcher.doneGroups(d2)
	require.Contains(done, "web")
	require.True(done["web"])
	require.Contains(done, "foo")
	require.False(done["foo"])

	cutoff2 := watcher.getDeploymentProgressCutoff(d2)
	require.False(cutoff2.IsZero())
	require.True(cutoff1.UnixNano() < cutoff2.UnixNano())

	// Update the second allocation to be healthy
	a4 := a2.Copy()
	a4.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a4}), "UpsertAllocs")

	// Get the updated deployment
	d3, err := m.state.DeploymentByID(nil, d.ID)
	require.NoError(err)

	done = watcher.doneGroups(d3)
	require.Contains(done, "web")
	require.True(done["web"])
	require.Contains(done, "foo")
	require.True(done["foo"])

	cutoff3 := watcher.getDeploymentProgressCutoff(d2)
	require.True(cutoff3.IsZero())
}

// Test that we will allow the progress deadline to be reached when the canaries
// are healthy but we haven't promoted
func TestDeploymentWatcher_Watch_ProgressDeadline_Canaries(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.Canary = 1
	j.TaskGroups[0].Update.MaxParallel = 1
	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
	j.Stable = true
	d := mock.Deployment()
	d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
	d.JobID = j.ID
	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
	d.TaskGroups["web"].DesiredCanaries = 1
	a := mock.Alloc()
	now := time.Now()
	a.CreateTime = now.UnixNano()
	a.ModifyTime = now.UnixNano()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// require that we will get a createEvaluation call only once. This will
	// verify that the watcher is batching allocation changes
	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Update the alloc to be unhealthy and require that nothing happens.
	a2 := a.Copy()
	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
		Healthy:   helper.BoolToPtr(true),
		Timestamp: now,
	}
	require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))

	// Wait for the deployment to cross the deadline
	dout, err := m.state.DeploymentByID(nil, d.ID)
	require.NoError(err)
	require.NotNil(dout)
	state := dout.TaskGroups["web"]
	require.NotNil(state)
	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))

	// Require the deployment is still running
	dout, err = m.state.DeploymentByID(nil, d.ID)
	require.NoError(err)
	require.NotNil(dout)
	require.Equal(structs.DeploymentStatusRunning, dout.Status)
	require.Equal(structs.DeploymentStatusDescriptionRunningNeedsPromotion, dout.StatusDescription)

	// require there are is only one evaluation
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 1 {
			return false, fmt.Errorf("Got %d evals; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})
}

// Test that a promoted deployment with alloc healthy updates create
// evals to move the deployment forward
func TestDeploymentWatcher_PromotedCanary_UpdatedAllocs(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Count = 2
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.Canary = 1
	j.TaskGroups[0].Update.MaxParallel = 1
	j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond
	j.Stable = true

	d := mock.Deployment()
	d.TaskGroups["web"].DesiredTotal = 2
	d.TaskGroups["web"].DesiredCanaries = 1
	d.TaskGroups["web"].HealthyAllocs = 1
	d.StatusDescription = structs.DeploymentStatusDescriptionRunning
	d.JobID = j.ID
	d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond
	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(50 * time.Millisecond)

	a := mock.Alloc()
	now := time.Now()
	a.CreateTime = now.UnixNano()
	a.ModifyTime = now.UnixNano()
	a.DeploymentID = d.ID
	a.DeploymentStatus = &structs.AllocDeploymentStatus{
		Healthy:   helper.BoolToPtr(true),
		Timestamp: now,
	}
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Twice()

	// Create another alloc
	a2 := a.Copy()
	a2.ID = uuid.Generate()
	now = time.Now()
	a2.CreateTime = now.UnixNano()
	a2.ModifyTime = now.UnixNano()
	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
		Healthy:   helper.BoolToPtr(true),
		Timestamp: now,
	}
	d.TaskGroups["web"].RequireProgressBy = time.Now().Add(2 * time.Second)
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	// Wait until batch eval period passes before updating another alloc
	time.Sleep(1 * time.Second)
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")

	// Wait for the deployment to cross the deadline
	dout, err := m.state.DeploymentByID(nil, d.ID)
	require.NoError(err)
	require.NotNil(dout)
	state := dout.TaskGroups["web"]
	require.NotNil(state)
	time.Sleep(state.RequireProgressBy.Add(time.Second).Sub(now))

	// There should be two evals
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 2 {
			return false, fmt.Errorf("Got %d evals; want 2", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})
}

// Test scenario where deployment initially has no progress deadline
// After the deployment is updated, a failed alloc's DesiredTransition should be set
func TestDeploymentWatcher_Watch_StartWithoutProgressDeadline(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.ProgressDeadline = 500 * time.Millisecond
	j.Stable = true
	d := mock.Deployment()
	d.JobID = j.ID

	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	a := mock.Alloc()
	a.CreateTime = time.Now().UnixNano()
	a.DeploymentID = d.ID

	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	d.TaskGroups["web"].ProgressDeadline = 500 * time.Millisecond
	// Update the deployment with a progress deadline
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")

	// Match on DesiredTransition set to Reschedule for the failed alloc
	m1 := matchUpdateAllocDesiredTransitionReschedule([]string{a.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Update the alloc to be unhealthy
	a2 := a.Copy()
	a2.DeploymentStatus = &structs.AllocDeploymentStatus{
		Healthy:   helper.BoolToPtr(false),
		Timestamp: time.Now(),
	}
	require.Nil(m.state.UpdateAllocsFromClient(m.nextIndex(), []*structs.Allocation{a2}))

	// Wait for the alloc's DesiredState to set reschedule
	testutil.WaitForResult(func() (bool, error) {
		a, err := m.state.AllocByID(nil, a.ID)
		if err != nil {
			return false, err
		}
		dt := a.DesiredTransition
		shouldReschedule := dt.Reschedule != nil && *dt.Reschedule
		return shouldReschedule, fmt.Errorf("Desired Transition Reschedule should be set but got %v", shouldReschedule)
	}, func(err error) {
		t.Fatal(err)
	})
}

// Tests that the watcher fails rollback when the spec hasn't changed
func TestDeploymentWatcher_RollbackFailed(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Millisecond)

	// Create a job, alloc, and a deployment
	j := mock.Job()
	j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j.TaskGroups[0].Update.MaxParallel = 2
	j.TaskGroups[0].Update.AutoRevert = true
	j.TaskGroups[0].Update.ProgressDeadline = 0
	j.Stable = true
	d := mock.Deployment()
	d.JobID = j.ID
	d.TaskGroups["web"].AutoRevert = true
	a := mock.Alloc()
	a.DeploymentID = d.ID
	require.Nil(m.state.UpsertJob(m.nextIndex(), j), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a}), "UpsertAllocs")

	// Upsert the job again to get a new version
	j2 := j.Copy()
	// Modify the job to make its specification different
	j2.Stable = false
	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob2")

	// require that we will get a createEvaluation call only once. This will
	// verify that the watcher is batching allocation changes
	m1 := matchUpdateAllocDesiredTransitions([]string{d.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	// require that we get a call to UpsertDeploymentStatusUpdate with roll back failed as the status
	c := &matchDeploymentStatusUpdateConfig{
		DeploymentID:      d.ID,
		Status:            structs.DeploymentStatusFailed,
		StatusDescription: structs.DeploymentStatusDescriptionRollbackNoop(structs.DeploymentStatusDescriptionFailedAllocations, 0),
		JobVersion:        nil,
		Eval:              true,
	}
	m2 := matchDeploymentStatusUpdateRequest(c)
	m.On("UpdateDeploymentStatus", mocker.MatchedBy(m2)).Return(nil)

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 1 == watchersCount(w), nil },
		func(err error) { require.Equal(1, watchersCount(w), "Should have 1 deployment") })

	// Update the allocs health to healthy which should create an evaluation
	for i := 0; i < 5; i++ {
		req := &structs.ApplyDeploymentAllocHealthRequest{
			DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
				DeploymentID:         d.ID,
				HealthyAllocationIDs: []string{a.ID},
			},
		}
		require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")
	}

	// Wait for there to be one eval
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 1 {
			return false, fmt.Errorf("Got %d evals; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})

	// Update the allocs health to unhealthy which will cause attempting a rollback,
	// fail in that step, do status update and eval
	req2 := &structs.ApplyDeploymentAllocHealthRequest{
		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
			DeploymentID:           d.ID,
			UnhealthyAllocationIDs: []string{a.ID},
		},
	}
	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")

	// Wait for there to be one eval
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals, err := m.state.EvalsByJob(ws, j.Namespace, j.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals); l != 2 {
			return false, fmt.Errorf("Got %d evals; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})

	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))

	// verify that the job version hasn't changed after upsert
	m.state.JobByID(nil, structs.DefaultNamespace, j.ID)
	require.Equal(uint64(0), j.Version, "Expected job version 0 but got ", j.Version)
}

// Test allocation updates and evaluation creation is batched between watchers
func TestWatcher_BatchAllocUpdates(t *testing.T) {
	t.Parallel()
	require := require.New(t)
	w, m := testDeploymentWatcher(t, 1000.0, 1*time.Second)

	// Create a job, alloc, for two deployments
	j1 := mock.Job()
	j1.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j1.TaskGroups[0].Update.ProgressDeadline = 0
	d1 := mock.Deployment()
	d1.JobID = j1.ID
	a1 := mock.Alloc()
	a1.Job = j1
	a1.JobID = j1.ID
	a1.DeploymentID = d1.ID

	j2 := mock.Job()
	j2.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
	j2.TaskGroups[0].Update.ProgressDeadline = 0
	d2 := mock.Deployment()
	d2.JobID = j2.ID
	a2 := mock.Alloc()
	a2.Job = j2
	a2.JobID = j2.ID
	a2.DeploymentID = d2.ID

	require.Nil(m.state.UpsertJob(m.nextIndex(), j1), "UpsertJob")
	require.Nil(m.state.UpsertJob(m.nextIndex(), j2), "UpsertJob")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d1), "UpsertDeployment")
	require.Nil(m.state.UpsertDeployment(m.nextIndex(), d2), "UpsertDeployment")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a1}), "UpsertAllocs")
	require.Nil(m.state.UpsertAllocs(m.nextIndex(), []*structs.Allocation{a2}), "UpsertAllocs")

	// require that we will get a createEvaluation call only once and it contains
	// both deployments. This will verify that the watcher is batching
	// allocation changes
	m1 := matchUpdateAllocDesiredTransitions([]string{d1.ID, d2.ID})
	m.On("UpdateAllocDesiredTransition", mocker.MatchedBy(m1)).Return(nil).Once()

	w.SetEnabled(true, m.state)
	testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil },
		func(err error) { require.Equal(2, watchersCount(w), "Should have 2 deployment") })

	// Update the allocs health to healthy which should create an evaluation
	req := &structs.ApplyDeploymentAllocHealthRequest{
		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
			DeploymentID:         d1.ID,
			HealthyAllocationIDs: []string{a1.ID},
		},
	}
	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req), "UpsertDeploymentAllocHealth")

	req2 := &structs.ApplyDeploymentAllocHealthRequest{
		DeploymentAllocHealthRequest: structs.DeploymentAllocHealthRequest{
			DeploymentID:         d2.ID,
			HealthyAllocationIDs: []string{a2.ID},
		},
	}
	require.Nil(m.state.UpdateDeploymentAllocHealth(m.nextIndex(), req2), "UpsertDeploymentAllocHealth")

	// Wait for there to be one eval for each job
	testutil.WaitForResult(func() (bool, error) {
		ws := memdb.NewWatchSet()
		evals1, err := m.state.EvalsByJob(ws, j1.Namespace, j1.ID)
		if err != nil {
			return false, err
		}

		evals2, err := m.state.EvalsByJob(ws, j2.Namespace, j2.ID)
		if err != nil {
			return false, err
		}

		if l := len(evals1); l != 1 {
			return false, fmt.Errorf("Got %d evals for job %v; want 1", l, j1.ID)
		}

		if l := len(evals2); l != 1 {
			return false, fmt.Errorf("Got %d evals for job 2; want 1", l)
		}

		return true, nil
	}, func(err error) {
		t.Fatal(err)
	})

	m.AssertCalled(t, "UpdateAllocDesiredTransition", mocker.MatchedBy(m1))
	testutil.WaitForResult(func() (bool, error) { return 2 == watchersCount(w), nil },
		func(err error) { require.Equal(2, watchersCount(w), "Should have 2 deployment") })
}

func watchersCount(w *Watcher) int {
	w.l.Lock()
	defer w.l.Unlock()

	return len(w.watchers)
}