services: un-mark group services as deregistered if restart hook runs (#16905)
* services: un-mark group services as deregistered if restart hook runs This PR may fix a bug where group services will never be deregistered if the group undergoes a task restart. * e2e: add test case for restart and deregister group service * cl: add cl * e2e: add wait for service list call
This commit is contained in:
parent
1633cab363
commit
753c17c9de
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
services: Fixed a bug preventing group service deregistrations after alloc restarts
|
||||
```
|
|
@ -119,12 +119,14 @@ func (h *groupServiceHook) Prerun() error {
|
|||
defer func() {
|
||||
// Mark prerun as true to unblock Updates
|
||||
h.prerun = true
|
||||
// Mark deregistered as false to allow de-registration
|
||||
h.deregistered = false
|
||||
h.mu.Unlock()
|
||||
}()
|
||||
return h.preRunLocked()
|
||||
}
|
||||
|
||||
// caller must hold h.lock
|
||||
// caller must hold h.mu
|
||||
func (h *groupServiceHook) preRunLocked() error {
|
||||
if len(h.services) == 0 {
|
||||
return nil
|
||||
|
@ -188,6 +190,8 @@ func (h *groupServiceHook) PreTaskRestart() error {
|
|||
defer func() {
|
||||
// Mark prerun as true to unblock Updates
|
||||
h.prerun = true
|
||||
// Mark deregistered as false to allow de-registration
|
||||
h.deregistered = false
|
||||
h.mu.Unlock()
|
||||
}()
|
||||
|
||||
|
@ -201,7 +205,7 @@ func (h *groupServiceHook) PreKill() {
|
|||
|
||||
// implements the PreKill hook
|
||||
//
|
||||
// caller must hold h.lock
|
||||
// caller must hold h.mu
|
||||
func (h *groupServiceHook) preKillLocked() {
|
||||
// If we have a shutdown delay deregister group services and then wait
|
||||
// before continuing to kill tasks.
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
package consul
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
"github.com/hashicorp/nomad/helper/uuid"
|
||||
"github.com/shoenig/test/must"
|
||||
"github.com/shoenig/test/wait"
|
||||
)
|
||||
|
||||
func testAllocRestart(t *testing.T) {
|
||||
nc := e2eutil.NomadClient(t)
|
||||
cc := e2eutil.ConsulClient(t).Catalog()
|
||||
|
||||
const jobFile = "./input/alloc_restart.hcl"
|
||||
jobID := "alloc-restart-" + uuid.Short()
|
||||
jobIDs := []string{jobID}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
defer e2eutil.CleanupJobsAndGCWithContext(t, ctx, &jobIDs)
|
||||
|
||||
// register our job
|
||||
err := e2eutil.Register(jobID, jobFile)
|
||||
must.NoError(t, err)
|
||||
|
||||
// wait for our allocation to be running
|
||||
allocID := e2eutil.SingleAllocID(t, jobID, "", 0)
|
||||
e2eutil.WaitForAllocRunning(t, nc, allocID)
|
||||
|
||||
// make sure our service is registered
|
||||
services, _, err := cc.Service("alloc-restart-http", "", nil)
|
||||
must.NoError(t, err)
|
||||
must.Len(t, 1, services)
|
||||
|
||||
// restart the alloc
|
||||
stderr, err := e2eutil.Command("nomad", "alloc", "restart", allocID)
|
||||
must.NoError(t, err, must.Sprintf("stderr: %s", stderr))
|
||||
|
||||
// wait for alloc running again
|
||||
e2eutil.WaitForAllocRunning(t, nc, allocID)
|
||||
|
||||
// make sure our service is still registered
|
||||
services, _, err = cc.Service("alloc-restart-http", "", nil)
|
||||
must.NoError(t, err)
|
||||
must.Len(t, 1, services)
|
||||
|
||||
err = e2eutil.StopJob(jobID)
|
||||
must.NoError(t, err)
|
||||
|
||||
// make sure our service is no longer registered
|
||||
f := func() error {
|
||||
services, _, err = cc.Service("alloc-restart-http", "", nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(services) != 0 {
|
||||
return errors.New("expected empty services")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
must.Wait(t, wait.InitialSuccess(
|
||||
wait.ErrorFunc(f),
|
||||
wait.Timeout(10*time.Second),
|
||||
wait.Gap(1*time.Second),
|
||||
))
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package consul
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/nomad/e2e/e2eutil"
|
||||
)
|
||||
|
||||
func TestConsul(t *testing.T) {
|
||||
// todo: migrate the remaining consul tests
|
||||
|
||||
nomad := e2eutil.NomadClient(t)
|
||||
|
||||
e2eutil.WaitForLeader(t, nomad)
|
||||
e2eutil.WaitForNodesReady(t, nomad, 1)
|
||||
|
||||
t.Run("testServiceReversion", testServiceReversion)
|
||||
t.Run("testAllocRestart", testAllocRestart)
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
job "alloc-restart" {
|
||||
constraint {
|
||||
attribute = "${attr.kernel.name}"
|
||||
value = "linux"
|
||||
}
|
||||
|
||||
group "group" {
|
||||
network {
|
||||
port "http" {
|
||||
to = 8080
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "alloc-restart-http"
|
||||
port = "http"
|
||||
}
|
||||
|
||||
task "python" {
|
||||
driver = "raw_exec"
|
||||
|
||||
config {
|
||||
command = "python3"
|
||||
args = ["-m", "http.server", "8080", "--directory", "/tmp"]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 16
|
||||
memory = 32
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -13,17 +13,6 @@ import (
|
|||
"github.com/shoenig/test/must"
|
||||
)
|
||||
|
||||
func TestConsul(t *testing.T) {
|
||||
// todo: migrate the remaining consul tests
|
||||
|
||||
nomad := e2eutil.NomadClient(t)
|
||||
|
||||
e2eutil.WaitForLeader(t, nomad)
|
||||
e2eutil.WaitForNodesReady(t, nomad, 1)
|
||||
|
||||
t.Run("testServiceReversion", testServiceReversion)
|
||||
}
|
||||
|
||||
// testServiceReversion asserts we can
|
||||
// - submit a job with a service
|
||||
// - update that job and modify service
|
||||
|
|
Loading…
Reference in New Issue