2023-04-10 15:36:59 +00:00
|
|
|
// Copyright (c) HashiCorp, Inc.
|
|
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
|
2019-08-14 22:02:00 +00:00
|
|
|
package allocrunner
|
|
|
|
|
|
|
|
import (
|
2021-12-13 19:54:53 +00:00
|
|
|
"context"
|
2019-08-14 22:02:00 +00:00
|
|
|
"sync"
|
2019-11-18 16:16:25 +00:00
|
|
|
"time"
|
2019-08-14 22:02:00 +00:00
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
"github.com/hashicorp/go-hclog"
|
2019-08-14 22:02:00 +00:00
|
|
|
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
|
2022-03-15 08:38:30 +00:00
|
|
|
"github.com/hashicorp/nomad/client/serviceregistration"
|
2022-03-21 09:29:57 +00:00
|
|
|
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
|
2019-11-18 18:04:01 +00:00
|
|
|
"github.com/hashicorp/nomad/client/taskenv"
|
2022-06-07 14:18:19 +00:00
|
|
|
"github.com/hashicorp/nomad/helper"
|
2019-08-14 22:02:00 +00:00
|
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
|
|
)
|
|
|
|
|
2021-03-16 18:22:21 +00:00
|
|
|
const (
|
|
|
|
groupServiceHookName = "group_services"
|
|
|
|
)
|
|
|
|
|
2019-08-14 22:02:00 +00:00
|
|
|
// groupServiceHook manages task group Consul service registration and
|
|
|
|
// deregistration.
|
|
|
|
type groupServiceHook struct {
|
2022-06-07 14:18:19 +00:00
|
|
|
allocID string
|
|
|
|
jobID string
|
|
|
|
group string
|
2022-09-22 14:38:21 +00:00
|
|
|
namespace string
|
2022-09-09 13:59:04 +00:00
|
|
|
restarter serviceregistration.WorkloadRestarter
|
2022-06-07 14:18:19 +00:00
|
|
|
prerun bool
|
|
|
|
deregistered bool
|
|
|
|
networkStatus structs.NetworkStatus
|
|
|
|
shutdownDelayCtx context.Context
|
2019-08-14 22:02:00 +00:00
|
|
|
|
2022-09-22 14:38:21 +00:00
|
|
|
// providerNamespace is the Nomad or Consul namespace in which service
|
2022-04-06 17:26:22 +00:00
|
|
|
// registrations will be made. This field may be updated.
|
2022-09-22 14:38:21 +00:00
|
|
|
providerNamespace string
|
2022-03-21 09:29:57 +00:00
|
|
|
|
|
|
|
// serviceRegWrapper is the handler wrapper that is used to perform service
|
|
|
|
// and check registration and deregistration.
|
|
|
|
serviceRegWrapper *wrapper.HandlerWrapper
|
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
logger hclog.Logger
|
2019-11-18 18:04:01 +00:00
|
|
|
|
|
|
|
// The following fields may be updated
|
|
|
|
canary bool
|
|
|
|
services []*structs.Service
|
|
|
|
networks structs.Networks
|
2020-10-15 19:32:21 +00:00
|
|
|
ports structs.AllocatedPorts
|
2019-11-18 18:04:01 +00:00
|
|
|
taskEnvBuilder *taskenv.Builder
|
2021-12-13 19:54:53 +00:00
|
|
|
delay time.Duration
|
2019-11-18 18:04:01 +00:00
|
|
|
|
|
|
|
// Since Update() may be called concurrently with any other hook all
|
|
|
|
// hook methods must be fully serialized
|
|
|
|
mu sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
type groupServiceHookConfig struct {
|
2022-06-07 14:18:19 +00:00
|
|
|
alloc *structs.Allocation
|
2022-09-09 13:59:04 +00:00
|
|
|
restarter serviceregistration.WorkloadRestarter
|
2022-06-07 14:18:19 +00:00
|
|
|
taskEnvBuilder *taskenv.Builder
|
|
|
|
networkStatus structs.NetworkStatus
|
|
|
|
shutdownDelayCtx context.Context
|
2023-03-17 14:44:21 +00:00
|
|
|
logger hclog.Logger
|
2022-03-21 09:29:57 +00:00
|
|
|
|
2022-09-22 14:38:21 +00:00
|
|
|
// providerNamespace is the Nomad or Consul namespace in which service
|
2022-03-21 09:29:57 +00:00
|
|
|
// registrations will be made.
|
2022-09-22 14:38:21 +00:00
|
|
|
providerNamespace string
|
2022-03-21 09:29:57 +00:00
|
|
|
|
|
|
|
// serviceRegWrapper is the handler wrapper that is used to perform service
|
|
|
|
// and check registration and deregistration.
|
|
|
|
serviceRegWrapper *wrapper.HandlerWrapper
|
2019-08-14 22:02:00 +00:00
|
|
|
}
|
|
|
|
|
2019-11-18 18:04:01 +00:00
|
|
|
func newGroupServiceHook(cfg groupServiceHookConfig) *groupServiceHook {
|
2019-11-18 16:16:25 +00:00
|
|
|
var shutdownDelay time.Duration
|
|
|
|
tg := cfg.alloc.Job.LookupTaskGroup(cfg.alloc.TaskGroup)
|
|
|
|
|
2019-12-04 16:06:01 +00:00
|
|
|
if tg.ShutdownDelay != nil {
|
2019-11-18 16:16:25 +00:00
|
|
|
shutdownDelay = *tg.ShutdownDelay
|
|
|
|
}
|
|
|
|
|
2019-08-14 22:02:00 +00:00
|
|
|
h := &groupServiceHook{
|
2022-06-07 14:18:19 +00:00
|
|
|
allocID: cfg.alloc.ID,
|
|
|
|
jobID: cfg.alloc.JobID,
|
|
|
|
group: cfg.alloc.TaskGroup,
|
2022-09-22 14:38:21 +00:00
|
|
|
namespace: cfg.alloc.Namespace,
|
2022-06-07 14:18:19 +00:00
|
|
|
restarter: cfg.restarter,
|
2022-09-22 14:38:21 +00:00
|
|
|
providerNamespace: cfg.providerNamespace,
|
2022-06-07 14:18:19 +00:00
|
|
|
taskEnvBuilder: cfg.taskEnvBuilder,
|
|
|
|
delay: shutdownDelay,
|
|
|
|
networkStatus: cfg.networkStatus,
|
|
|
|
logger: cfg.logger.Named(groupServiceHookName),
|
|
|
|
serviceRegWrapper: cfg.serviceRegWrapper,
|
|
|
|
services: tg.Services,
|
|
|
|
shutdownDelayCtx: cfg.shutdownDelayCtx,
|
2019-11-18 18:04:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if cfg.alloc.AllocatedResources != nil {
|
|
|
|
h.networks = cfg.alloc.AllocatedResources.Shared.Networks
|
2020-10-15 19:32:21 +00:00
|
|
|
h.ports = cfg.alloc.AllocatedResources.Shared.Ports
|
2019-11-18 18:04:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if cfg.alloc.DeploymentStatus != nil {
|
|
|
|
h.canary = cfg.alloc.DeploymentStatus.Canary
|
2019-08-14 22:02:00 +00:00
|
|
|
}
|
2021-01-22 19:45:26 +00:00
|
|
|
|
2019-08-14 22:02:00 +00:00
|
|
|
return h
|
|
|
|
}
|
|
|
|
|
|
|
|
func (*groupServiceHook) Name() string {
|
2021-03-16 18:22:21 +00:00
|
|
|
return groupServiceHookName
|
2019-08-14 22:02:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *groupServiceHook) Prerun() error {
|
|
|
|
h.mu.Lock()
|
|
|
|
defer func() {
|
|
|
|
// Mark prerun as true to unblock Updates
|
|
|
|
h.prerun = true
|
2023-04-24 19:24:51 +00:00
|
|
|
// Mark deregistered as false to allow de-registration
|
|
|
|
h.deregistered = false
|
2019-08-14 22:02:00 +00:00
|
|
|
h.mu.Unlock()
|
|
|
|
}()
|
2023-03-17 14:44:21 +00:00
|
|
|
return h.preRunLocked()
|
2021-01-21 16:36:00 +00:00
|
|
|
}
|
2019-11-18 18:04:01 +00:00
|
|
|
|
2023-04-24 19:24:51 +00:00
|
|
|
// caller must hold h.mu
|
2023-03-17 14:44:21 +00:00
|
|
|
func (h *groupServiceHook) preRunLocked() error {
|
2019-11-18 18:04:01 +00:00
|
|
|
if len(h.services) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
services := h.getWorkloadServicesLocked()
|
2022-03-21 09:29:57 +00:00
|
|
|
return h.serviceRegWrapper.RegisterWorkload(services)
|
2019-08-14 22:02:00 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
// Update is run when a job submitter modifies service(s) (but not much else -
|
|
|
|
// otherwise a full alloc replacement would occur).
|
2019-08-14 22:02:00 +00:00
|
|
|
func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error {
|
|
|
|
h.mu.Lock()
|
|
|
|
defer h.mu.Unlock()
|
client: enable configuring enable_tag_override for services
Consul provides a feature of Service Definitions where the tags
associated with a service can be modified through the Catalog API,
overriding the value(s) configured in the agent's service configuration.
To enable this feature, the flag enable_tag_override must be configured
in the service definition.
Previously, Nomad did not allow configuring this flag, and thus the default
value of false was used. Now, it is configurable.
Because Nomad itself acts as a state machine around the the service definitions
of the tasks it manages, it's worth describing what happens when this feature
is enabled and why.
Consider the basic case where there is no Nomad, and your service is provided
to consul as a boring JSON file. The ultimate source of truth for the definition
of that service is the file, and is stored in the agent. Later, Consul performs
"anti-entropy" which synchronizes the Catalog (stored only the leaders). Then
with enable_tag_override=true, the tags field is available for "external"
modification through the Catalog API (rather than directly configuring the
service definition file, or using the Agent API). The important observation
is that if the service definition ever changes (i.e. the file is changed &
config reloaded OR the Agent API is used to modify the service), those
"external" tag values are thrown away, and the new service definition is
once again the source of truth.
In the Nomad case, Nomad itself is the source of truth over the Agent in
the same way the JSON file was the source of truth in the example above.
That means any time Nomad sets a new service definition, any externally
configured tags are going to be replaced. When does this happen? Only on
major lifecycle events, for example when a task is modified because of an
updated job spec from the 'nomad job run <existing>' command. Otherwise,
Nomad's periodic re-sync's with Consul will now no longer try to restore
the externally modified tag values (as long as enable_tag_override=true).
Fixes #2057
2020-02-07 21:22:19 +00:00
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
oldWorkloadServices := h.getWorkloadServicesLocked()
|
2019-11-18 18:04:01 +00:00
|
|
|
|
|
|
|
// Store new updated values out of request
|
|
|
|
canary := false
|
|
|
|
if req.Alloc.DeploymentStatus != nil {
|
|
|
|
canary = req.Alloc.DeploymentStatus.Canary
|
|
|
|
}
|
|
|
|
|
|
|
|
var networks structs.Networks
|
|
|
|
if req.Alloc.AllocatedResources != nil {
|
|
|
|
networks = req.Alloc.AllocatedResources.Shared.Networks
|
2020-10-15 19:32:21 +00:00
|
|
|
h.ports = req.Alloc.AllocatedResources.Shared.Ports
|
2019-11-18 18:04:01 +00:00
|
|
|
}
|
|
|
|
|
2020-04-05 21:30:25 +00:00
|
|
|
tg := req.Alloc.Job.LookupTaskGroup(h.group)
|
|
|
|
var shutdown time.Duration
|
|
|
|
if tg.ShutdownDelay != nil {
|
|
|
|
shutdown = *tg.ShutdownDelay
|
|
|
|
}
|
2020-04-06 15:33:04 +00:00
|
|
|
|
|
|
|
// Update group service hook fields
|
|
|
|
h.networks = networks
|
|
|
|
h.services = tg.Services
|
|
|
|
h.canary = canary
|
2020-04-05 21:30:25 +00:00
|
|
|
h.delay = shutdown
|
2020-04-06 15:33:04 +00:00
|
|
|
h.taskEnvBuilder.UpdateTask(req.Alloc, nil)
|
2020-04-05 21:30:25 +00:00
|
|
|
|
2022-04-06 17:26:22 +00:00
|
|
|
// An update may change the service provider, therefore we need to account
|
|
|
|
// for how namespaces work across providers also.
|
2022-09-22 14:38:21 +00:00
|
|
|
h.providerNamespace = req.Alloc.ServiceProviderNamespace()
|
2022-04-06 17:26:22 +00:00
|
|
|
|
2019-11-18 18:04:01 +00:00
|
|
|
// Create new task services struct with those new values
|
2023-03-17 14:44:21 +00:00
|
|
|
newWorkloadServices := h.getWorkloadServicesLocked()
|
2019-08-14 22:02:00 +00:00
|
|
|
|
|
|
|
if !h.prerun {
|
|
|
|
// Update called before Prerun. Update alloc and exit to allow
|
|
|
|
// Prerun to do initial registration.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-03-21 09:29:57 +00:00
|
|
|
return h.serviceRegWrapper.UpdateWorkload(oldWorkloadServices, newWorkloadServices)
|
2019-08-14 22:02:00 +00:00
|
|
|
}
|
|
|
|
|
2021-01-21 16:36:00 +00:00
|
|
|
func (h *groupServiceHook) PreTaskRestart() error {
|
|
|
|
h.mu.Lock()
|
|
|
|
defer func() {
|
|
|
|
// Mark prerun as true to unblock Updates
|
|
|
|
h.prerun = true
|
2023-04-24 19:24:51 +00:00
|
|
|
// Mark deregistered as false to allow de-registration
|
|
|
|
h.deregistered = false
|
2021-01-21 16:36:00 +00:00
|
|
|
h.mu.Unlock()
|
|
|
|
}()
|
|
|
|
|
|
|
|
h.preKillLocked()
|
2023-03-17 14:44:21 +00:00
|
|
|
return h.preRunLocked()
|
2021-01-21 16:36:00 +00:00
|
|
|
}
|
|
|
|
|
2019-11-18 16:16:25 +00:00
|
|
|
func (h *groupServiceHook) PreKill() {
|
2023-03-17 14:44:21 +00:00
|
|
|
helper.WithLock(&h.mu, h.preKillLocked)
|
2021-01-21 16:36:00 +00:00
|
|
|
}
|
2019-11-18 16:16:25 +00:00
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
// implements the PreKill hook
|
|
|
|
//
|
2023-04-24 19:24:51 +00:00
|
|
|
// caller must hold h.mu
|
2021-01-21 16:36:00 +00:00
|
|
|
func (h *groupServiceHook) preKillLocked() {
|
2021-07-08 16:14:27 +00:00
|
|
|
// If we have a shutdown delay deregister group services and then wait
|
|
|
|
// before continuing to kill tasks.
|
2023-03-17 14:44:21 +00:00
|
|
|
h.deregisterLocked()
|
2019-11-18 16:16:25 +00:00
|
|
|
|
2019-12-04 16:06:01 +00:00
|
|
|
if h.delay == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-07-08 16:14:27 +00:00
|
|
|
h.logger.Debug("delay before killing tasks", "group", h.group, "shutdown_delay", h.delay)
|
2019-12-03 13:58:29 +00:00
|
|
|
|
2022-06-07 14:18:19 +00:00
|
|
|
timer, cancel := helper.NewSafeTimer(h.delay)
|
|
|
|
defer cancel()
|
|
|
|
|
2021-12-13 19:54:53 +00:00
|
|
|
select {
|
|
|
|
// Wait for specified shutdown_delay unless ignored
|
2021-07-08 16:14:27 +00:00
|
|
|
// This will block an agent from shutting down.
|
2022-06-07 14:18:19 +00:00
|
|
|
case <-timer.C:
|
2021-12-13 19:54:53 +00:00
|
|
|
case <-h.shutdownDelayCtx.Done():
|
|
|
|
}
|
2019-11-18 16:16:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *groupServiceHook) Postrun() error {
|
2023-03-17 14:44:21 +00:00
|
|
|
helper.WithLock(&h.mu, h.deregisterLocked)
|
2019-11-18 18:04:01 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
// deregisterLocked will deregister services from Consul/Nomad service provider.
|
|
|
|
//
|
|
|
|
// caller must hold h.lock
|
|
|
|
func (h *groupServiceHook) deregisterLocked() {
|
|
|
|
if h.deregistered {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-11-18 18:04:01 +00:00
|
|
|
if len(h.services) > 0 {
|
2023-03-17 14:44:21 +00:00
|
|
|
workloadServices := h.getWorkloadServicesLocked()
|
2022-03-21 09:29:57 +00:00
|
|
|
h.serviceRegWrapper.RemoveWorkload(workloadServices)
|
2019-11-18 18:04:01 +00:00
|
|
|
}
|
2023-03-17 14:44:21 +00:00
|
|
|
|
|
|
|
h.deregistered = true
|
2019-11-18 18:04:01 +00:00
|
|
|
}
|
|
|
|
|
2023-03-17 14:44:21 +00:00
|
|
|
// getWorkloadServicesLocked returns the set of workload services currently
|
|
|
|
// on the hook.
|
|
|
|
//
|
|
|
|
// caller must hold h.lock
|
|
|
|
func (h *groupServiceHook) getWorkloadServicesLocked() *serviceregistration.WorkloadServices {
|
2019-11-18 18:04:01 +00:00
|
|
|
// Interpolate with the task's environment
|
|
|
|
interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services)
|
|
|
|
|
2020-10-15 19:32:21 +00:00
|
|
|
var netStatus *structs.AllocNetworkStatus
|
2022-06-07 14:18:19 +00:00
|
|
|
if h.networkStatus != nil {
|
|
|
|
netStatus = h.networkStatus.NetworkStatus()
|
2020-10-15 19:32:21 +00:00
|
|
|
}
|
|
|
|
|
2022-09-22 14:38:21 +00:00
|
|
|
info := structs.AllocInfo{
|
|
|
|
AllocID: h.allocID,
|
|
|
|
JobID: h.jobID,
|
|
|
|
Group: h.group,
|
|
|
|
Namespace: h.namespace,
|
|
|
|
}
|
|
|
|
|
2019-11-18 18:04:01 +00:00
|
|
|
// Create task services struct with request's driver metadata
|
2022-03-15 08:38:30 +00:00
|
|
|
return &serviceregistration.WorkloadServices{
|
2022-09-22 14:38:21 +00:00
|
|
|
AllocInfo: info,
|
|
|
|
ProviderNamespace: h.providerNamespace,
|
|
|
|
Restarter: h.restarter,
|
|
|
|
Services: interpolatedServices,
|
|
|
|
Networks: h.networks,
|
|
|
|
NetworkStatus: netStatus,
|
|
|
|
Ports: h.ports,
|
|
|
|
Canary: h.canary,
|
2019-11-18 18:04:01 +00:00
|
|
|
}
|
2019-08-14 22:02:00 +00:00
|
|
|
}
|