Merge pull request #972 from hashicorp/scripts

Moving consul service to executor
This commit is contained in:
Diptanu Choudhury 2016-03-24 00:12:45 -07:00
commit 76343a3748
21 changed files with 760 additions and 897 deletions

View File

@ -30,10 +30,9 @@ type AllocStateUpdater func(alloc *structs.Allocation)
// AllocRunner is used to wrap an allocation and provide the execution context.
type AllocRunner struct {
config *config.Config
updater AllocStateUpdater
logger *log.Logger
consulService *ConsulService
config *config.Config
updater AllocStateUpdater
logger *log.Logger
alloc *structs.Allocation
allocClientStatus string // Explicit status of allocation. Set when there are failures
@ -76,20 +75,19 @@ type allocRunnerState struct {
// NewAllocRunner is used to create a new allocation context
func NewAllocRunner(logger *log.Logger, config *config.Config, updater AllocStateUpdater,
alloc *structs.Allocation, consulService *ConsulService) *AllocRunner {
alloc *structs.Allocation) *AllocRunner {
ar := &AllocRunner{
config: config,
updater: updater,
logger: logger,
alloc: alloc,
consulService: consulService,
dirtyCh: make(chan struct{}, 1),
tasks: make(map[string]*TaskRunner),
taskStates: copyTaskStates(alloc.TaskStates),
restored: make(map[string]struct{}),
updateCh: make(chan *structs.Allocation, 64),
destroyCh: make(chan struct{}),
waitCh: make(chan struct{}),
config: config,
updater: updater,
logger: logger,
alloc: alloc,
dirtyCh: make(chan struct{}, 1),
tasks: make(map[string]*TaskRunner),
taskStates: copyTaskStates(alloc.TaskStates),
restored: make(map[string]struct{}),
updateCh: make(chan *structs.Allocation, 64),
destroyCh: make(chan struct{}),
waitCh: make(chan struct{}),
}
return ar
}
@ -125,7 +123,7 @@ func (r *AllocRunner) RestoreState() error {
task := &structs.Task{Name: name}
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
task, r.consulService)
task)
r.tasks[name] = tr
// Skip tasks in terminal states.
@ -405,7 +403,7 @@ func (r *AllocRunner) Run() {
}
tr := NewTaskRunner(r.logger, r.config, r.setTaskState, r.ctx, r.Alloc(),
task.Copy(), r.consulService)
task.Copy())
r.tasks[task.Name] = tr
go tr.Run()
}

View File

@ -30,13 +30,12 @@ func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) {
conf.AllocDir = os.TempDir()
upd := &MockAllocStateUpdater{}
alloc := mock.Alloc()
consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
if !restarts {
*alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0}
alloc.Job.Type = structs.JobTypeBatch
}
ar := NewAllocRunner(logger, conf, upd.Update, alloc, consulClient)
ar := NewAllocRunner(logger, conf, upd.Update, alloc)
return upd, ar
}
@ -250,9 +249,8 @@ func TestAllocRunner_SaveRestoreState(t *testing.T) {
}
// Create a new alloc runner
consulClient, err := NewConsulService(&consulServiceConfig{ar.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update,
&structs.Allocation{ID: ar.alloc.ID}, consulClient)
&structs.Allocation{ID: ar.alloc.ID})
err = ar2.RestoreState()
if err != nil {
t.Fatalf("err: %v", err)
@ -322,9 +320,8 @@ func TestAllocRunner_SaveRestoreState_TerminalAlloc(t *testing.T) {
ar.destroy = true
// Create a new alloc runner
consulClient, err := NewConsulService(&consulServiceConfig{ar.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
ar2 := NewAllocRunner(ar.logger, ar.config, upd.Update,
&structs.Allocation{ID: ar.alloc.ID}, consulClient)
&structs.Allocation{ID: ar.alloc.ID})
err = ar2.RestoreState()
if err != nil {
t.Fatalf("err: %v", err)

View File

@ -15,6 +15,7 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/consul"
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/nomad"
@ -66,6 +67,10 @@ const (
// allocSyncRetryIntv is the interval on which we retry updating
// the status of the allocation
allocSyncRetryIntv = 5 * time.Second
// consulSyncInterval is the interval at which the client syncs with consul
// to remove services and checks which are no longer valid
consulSyncInterval = 15 * time.Second
)
// DefaultConfig returns the default configuration
@ -89,8 +94,6 @@ type Client struct {
logger *log.Logger
consulService *ConsulService
lastServer net.Addr
lastRPCTime time.Time
lastServerLock sync.Mutex
@ -111,6 +114,8 @@ type Client struct {
// allocUpdates stores allocations that need to be synced to the server.
allocUpdates chan *structs.Allocation
consulService *consul.ConsulService
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
@ -132,11 +137,6 @@ func NewClient(cfg *config.Config) (*Client, error) {
shutdownCh: make(chan struct{}),
}
// Setup the Consul Service
if err := c.setupConsulService(); err != nil {
return nil, fmt.Errorf("failed to create the consul service: %v", err)
}
// Initialize the client
if err := c.init(); err != nil {
return nil, fmt.Errorf("failed to initialize client: %v", err)
@ -167,14 +167,16 @@ func NewClient(cfg *config.Config) (*Client, error) {
// initialized.
c.configCopy = c.config.Copy()
// Start the consul service
go c.consulService.SyncWithConsul()
// Restore the state
if err := c.restoreState(); err != nil {
return nil, fmt.Errorf("failed to restore state: %v", err)
}
// Setup the consul client
if err := c.setupConsulClient(); err != nil {
return nil, fmt.Errorf("failed to create consul client: %v")
}
// Register and then start heartbeating to the servers.
go c.registerAndHeartbeat()
@ -187,31 +189,10 @@ func NewClient(cfg *config.Config) (*Client, error) {
// Start the client!
go c.run()
return c, nil
}
// Start the consul sync
go c.syncConsul()
func (c *Client) setupConsulService() error {
var consulService *ConsulService
var err error
addr := c.config.ReadDefault("consul.address", "127.0.0.1:8500")
token := c.config.Read("consul.token")
auth := c.config.Read("consul.auth")
enableSSL := c.config.ReadBoolDefault("consul.ssl", false)
verifySSL := c.config.ReadBoolDefault("consul.verifyssl", true)
consulServiceCfg := &consulServiceConfig{
logger: c.logger,
consulAddr: addr,
token: token,
auth: auth,
enableSSL: enableSSL,
verifySSL: verifySSL,
node: c.config.Node,
}
if consulService, err = NewConsulService(consulServiceCfg); err != nil {
return err
}
c.consulService = consulService
return nil
return c, nil
}
// init is used to initialize the client and perform any setup
@ -275,9 +256,6 @@ func (c *Client) Shutdown() error {
}
}
// Stop the consul service
c.consulService.ShutDown()
c.shutdown = true
close(c.shutdownCh)
c.connPool.Shutdown()
@ -445,7 +423,7 @@ func (c *Client) restoreState() error {
id := entry.Name()
alloc := &structs.Allocation{ID: id}
c.configLock.RLock()
ar := NewAllocRunner(c.logger, c.configCopy, c.updateAllocStatus, alloc, c.consulService)
ar := NewAllocRunner(c.logger, c.configCopy, c.updateAllocStatus, alloc)
c.configLock.RUnlock()
c.allocs[id] = ar
if err := ar.RestoreState(); err != nil {
@ -1174,7 +1152,7 @@ func (c *Client) updateAlloc(exist, update *structs.Allocation) error {
// addAlloc is invoked when we should add an allocation
func (c *Client) addAlloc(alloc *structs.Allocation) error {
c.configLock.RLock()
ar := NewAllocRunner(c.logger, c.configCopy, c.updateAllocStatus, alloc, c.consulService)
ar := NewAllocRunner(c.logger, c.configCopy, c.updateAllocStatus, alloc)
c.configLock.RUnlock()
go ar.Run()
@ -1184,3 +1162,55 @@ func (c *Client) addAlloc(alloc *structs.Allocation) error {
c.allocLock.Unlock()
return nil
}
// setupConsulClient creates a ConsulService
func (c *Client) setupConsulClient() error {
cfg := consul.ConsulConfig{
Addr: c.config.ReadDefault("consul.address", "127.0.0.1:8500"),
Token: c.config.Read("consul.token"),
Auth: c.config.Read("consul.auth"),
EnableSSL: c.config.ReadBoolDefault("consul.ssl", false),
VerifySSL: c.config.ReadBoolDefault("consul.verifyssl", true),
}
cs, err := consul.NewConsulService(&cfg, c.logger, "")
c.consulService = cs
return err
}
// syncConsul removes services of tasks which are no longer in running state
func (c *Client) syncConsul() {
sync := time.After(consulSyncInterval)
for {
select {
case <-sync:
var runningTasks []*structs.Task
// Get the existing allocs
c.allocLock.RLock()
allocs := make([]*AllocRunner, 0, len(c.allocs))
for _, ar := range c.allocs {
allocs = append(allocs, ar)
}
c.allocLock.RUnlock()
for _, ar := range allocs {
ar.taskStatusLock.RLock()
taskStates := copyTaskStates(ar.taskStates)
ar.taskStatusLock.RUnlock()
for taskName, taskState := range taskStates {
if taskState.State == structs.TaskStateRunning {
if tr, ok := ar.tasks[taskName]; ok {
runningTasks = append(runningTasks, tr.task)
}
}
}
}
if err := c.consulService.KeepServices(runningTasks); err != nil {
c.logger.Printf("[DEBUG] error removing services from non-running tasks: %v", err)
}
case <-c.shutdownCh:
c.logger.Printf("[INFO] client: shutting down consul sync")
return
}
}
}

View File

@ -1,421 +0,0 @@
package client
import (
"crypto/tls"
"fmt"
"log"
"net/http"
"net/url"
"strings"
"sync"
"time"
consul "github.com/hashicorp/consul/api"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/nomad/structs"
)
const (
syncInterval = 5 * time.Second
)
// consulApi is the interface which wraps the actual consul api client
type consulApi interface {
CheckRegister(check *consul.AgentCheckRegistration) error
CheckDeregister(checkID string) error
ServiceRegister(service *consul.AgentServiceRegistration) error
ServiceDeregister(ServiceID string) error
Services() (map[string]*consul.AgentService, error)
Checks() (map[string]*consul.AgentCheck, error)
}
// consulApiClient is the actual implementation of the consulApi which
// talks to the consul agent
type consulApiClient struct {
client *consul.Client
}
func (a *consulApiClient) CheckRegister(check *consul.AgentCheckRegistration) error {
return a.client.Agent().CheckRegister(check)
}
func (a *consulApiClient) CheckDeregister(checkID string) error {
return a.client.Agent().CheckDeregister(checkID)
}
func (a *consulApiClient) ServiceRegister(service *consul.AgentServiceRegistration) error {
return a.client.Agent().ServiceRegister(service)
}
func (a *consulApiClient) ServiceDeregister(serviceID string) error {
return a.client.Agent().ServiceDeregister(serviceID)
}
func (a *consulApiClient) Services() (map[string]*consul.AgentService, error) {
return a.client.Agent().Services()
}
func (a *consulApiClient) Checks() (map[string]*consul.AgentCheck, error) {
return a.client.Agent().Checks()
}
// trackedTask is a Task that we are tracking for changes in service and check
// definitions and keep them sycned with Consul Agent
type trackedTask struct {
task *structs.Task
alloc *structs.Allocation
}
// ConsulService is the service which tracks tasks and syncs the services and
// checks defined in them with Consul Agent
type ConsulService struct {
client consulApi
logger *log.Logger
shutdownCh chan struct{}
node *structs.Node
trackedTasks map[string]*trackedTask
serviceStates map[string]string
allocToService map[string][]string
trackedTaskLock sync.Mutex
}
type consulServiceConfig struct {
logger *log.Logger
consulAddr string
token string
auth string
enableSSL bool
verifySSL bool
node *structs.Node
}
// A factory method to create new consul service
func NewConsulService(config *consulServiceConfig) (*ConsulService, error) {
var err error
var c *consul.Client
cfg := consul.DefaultConfig()
cfg.Address = config.consulAddr
if config.token != "" {
cfg.Token = config.token
}
if config.auth != "" {
var username, password string
if strings.Contains(config.auth, ":") {
split := strings.SplitN(config.auth, ":", 2)
username = split[0]
password = split[1]
} else {
username = config.auth
}
cfg.HttpAuth = &consul.HttpBasicAuth{
Username: username,
Password: password,
}
}
if config.enableSSL {
cfg.Scheme = "https"
}
if config.enableSSL && !config.verifySSL {
cfg.HttpClient.Transport = &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
}
}
if c, err = consul.NewClient(cfg); err != nil {
return nil, err
}
consulService := ConsulService{
client: &consulApiClient{client: c},
logger: config.logger,
node: config.node,
trackedTasks: make(map[string]*trackedTask),
serviceStates: make(map[string]string),
allocToService: make(map[string][]string),
shutdownCh: make(chan struct{}),
}
return &consulService, nil
}
// Register starts tracking a task for changes to it's services and tasks and
// adds/removes services and checks associated with it.
func (c *ConsulService) Register(task *structs.Task, alloc *structs.Allocation) error {
var mErr multierror.Error
c.trackedTaskLock.Lock()
tt := &trackedTask{task: task, alloc: alloc}
c.trackedTasks[fmt.Sprintf("%s-%s", alloc.ID, task.Name)] = tt
// Delete any previously registered service as the same alloc is being
// re-registered.
for _, service := range c.allocToService[alloc.ID] {
delete(c.serviceStates, service)
}
c.trackedTaskLock.Unlock()
for _, service := range task.Services {
// Track the services this alloc is registering.
c.allocToService[alloc.ID] = append(c.allocToService[alloc.ID], service.Name)
c.logger.Printf("[INFO] consul: registering service %s with consul.", service.Name)
if err := c.registerService(service, task, alloc); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
return mErr.ErrorOrNil()
}
// Deregister stops tracking a task for changes to it's services and checks and
// removes all the services and checks associated with the Task
func (c *ConsulService) Deregister(task *structs.Task, alloc *structs.Allocation) error {
var mErr multierror.Error
c.trackedTaskLock.Lock()
delete(c.trackedTasks, fmt.Sprintf("%s-%s", alloc.ID, task.Name))
delete(c.allocToService, alloc.ID)
c.trackedTaskLock.Unlock()
for _, service := range task.Services {
serviceID := alloc.Services[service.Name]
if serviceID == "" {
continue
}
c.logger.Printf("[INFO] consul: deregistering service %v with consul", service.Name)
if err := c.deregisterService(serviceID); err != nil {
c.printLogMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name)
mErr.Errors = append(mErr.Errors, err)
}
}
return mErr.ErrorOrNil()
}
func (c *ConsulService) ShutDown() {
close(c.shutdownCh)
}
// SyncWithConsul is a long lived function that performs calls to sync
// checks and services periodically with Consul Agent
func (c *ConsulService) SyncWithConsul() {
sync := time.After(syncInterval)
for {
select {
case <-sync:
c.performSync()
sync = time.After(syncInterval)
case <-c.shutdownCh:
c.logger.Printf("[INFO] consul: shutting down consul service")
return
}
}
}
// performSync syncs checks and services with Consul and removed tracked
// services which are no longer present in tasks
func (c *ConsulService) performSync() {
// Get the list of the services and that Consul knows about
srvcs, err := c.client.Services()
if err != nil {
return
}
chks, err := c.client.Checks()
if err != nil {
return
}
// Filter the services and checks that isn't managed by consul
consulServices := c.filterConsulServices(srvcs)
consulChecks := c.filterConsulChecks(chks)
knownChecks := make(map[string]struct{})
knownServices := make(map[string]struct{})
c.trackedTaskLock.Lock()
tasks := make([]*trackedTask, 0, len(c.trackedTasks))
for _, trackedTask := range c.trackedTasks {
tasks = append(tasks, trackedTask)
}
c.trackedTaskLock.Unlock()
// Add services and checks which Consul doesn't know about
for _, trackedTask := range tasks {
for _, service := range trackedTask.task.Services {
serviceID := trackedTask.alloc.Services[service.Name]
// Add new services which Consul agent isn't aware of
knownServices[serviceID] = struct{}{}
if _, ok := consulServices[serviceID]; !ok {
c.printLogMessage("[INFO] consul: perform sync, registering service %s with consul.", service.Name)
c.registerService(service, trackedTask.task, trackedTask.alloc)
continue
}
// If a service has changed, re-register it with Consul agent
if service.Hash() != c.serviceStates[serviceID] {
c.printLogMessage("[INFO] consul: perform sync hash change, reregistering service %s with consul.", service.Name)
c.registerService(service, trackedTask.task, trackedTask.alloc)
continue
}
// Add new checks that Consul isn't aware of
for _, check := range service.Checks {
checkID := check.Hash(serviceID)
knownChecks[checkID] = struct{}{}
if _, ok := consulChecks[checkID]; !ok {
host, port := trackedTask.task.FindHostAndPortFor(service.PortLabel)
cr := c.makeCheck(serviceID, check, host, port)
if err := c.registerCheck(cr); err != nil {
c.printLogMessage("[DEBUG] consul: error registering check %q: %v", cr.ID, err)
}
}
}
}
}
// Remove services from the service tracker which no longer exists
for serviceID := range c.serviceStates {
if _, ok := knownServices[serviceID]; !ok {
delete(c.serviceStates, serviceID)
}
}
// Remove services that are not present anymore
for _, consulService := range consulServices {
if _, ok := knownServices[consulService.ID]; !ok {
delete(c.serviceStates, consulService.ID)
c.printLogMessage("[INFO] consul: perform sync, deregistering service %v with consul", consulService.Service)
c.deregisterService(consulService.ID)
}
}
// Remove checks that are not present anymore
for _, consulCheck := range consulChecks {
if _, ok := knownChecks[consulCheck.CheckID]; !ok {
c.deregisterCheck(consulCheck.CheckID)
}
}
}
// registerService registers a Service with Consul
func (c *ConsulService) registerService(service *structs.Service, task *structs.Task, alloc *structs.Allocation) error {
var mErr multierror.Error
host, port := task.FindHostAndPortFor(service.PortLabel)
if host == "" || port == 0 {
return fmt.Errorf("consul: the port:%q marked for registration of service: %q couldn't be found", service.PortLabel, service.Name)
}
serviceID := alloc.Services[service.Name]
c.serviceStates[serviceID] = service.Hash()
asr := &consul.AgentServiceRegistration{
ID: serviceID,
Name: service.Name,
Tags: service.Tags,
Port: port,
Address: host,
}
if err := c.client.ServiceRegister(asr); err != nil {
c.printLogMessage("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err)
mErr.Errors = append(mErr.Errors, err)
}
for _, check := range service.Checks {
cr := c.makeCheck(serviceID, check, host, port)
if err := c.registerCheck(cr); err != nil {
c.printLogMessage("[DEBUG] consul: error while registering check %v with consul: %v", check.Name, err)
mErr.Errors = append(mErr.Errors, err)
}
}
return mErr.ErrorOrNil()
}
// registerCheck registers a check with Consul
func (c *ConsulService) registerCheck(check *consul.AgentCheckRegistration) error {
c.printLogMessage("[INFO] consul: registering check with ID: %s for service: %s", check.ID, check.ServiceID)
return c.client.CheckRegister(check)
}
// deregisterCheck de-registers a check with a specific ID from Consul
func (c *ConsulService) deregisterCheck(checkID string) error {
c.printLogMessage("[INFO] consul: removing check with ID: %v", checkID)
return c.client.CheckDeregister(checkID)
}
// deregisterService de-registers a Service with a specific id from Consul
func (c *ConsulService) deregisterService(serviceID string) error {
delete(c.serviceStates, serviceID)
if err := c.client.ServiceDeregister(serviceID); err != nil {
return err
}
return nil
}
// makeCheck creates a Consul Check Registration struct
func (c *ConsulService) makeCheck(serviceID string, check *structs.ServiceCheck, ip string, port int) *consul.AgentCheckRegistration {
checkID := check.Hash(serviceID)
cr := &consul.AgentCheckRegistration{
ID: checkID,
Name: check.Name,
ServiceID: serviceID,
}
cr.Interval = check.Interval.String()
cr.Timeout = check.Timeout.String()
switch check.Type {
case structs.ServiceCheckHTTP:
if check.Protocol == "" {
check.Protocol = "http"
}
url := url.URL{
Scheme: check.Protocol,
Host: fmt.Sprintf("%s:%d", ip, port),
Path: check.Path,
}
cr.HTTP = url.String()
case structs.ServiceCheckTCP:
cr.TCP = fmt.Sprintf("%s:%d", ip, port)
case structs.ServiceCheckScript:
cr.Script = check.Script // TODO This needs to include the path of the alloc dir and based on driver types
}
return cr
}
// filterConsulServices prunes out all the service whose ids are not prefixed
// with nomad-
func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentService) map[string]*consul.AgentService {
nomadServices := make(map[string]*consul.AgentService)
delete(srvcs, "consul")
for _, srv := range srvcs {
if strings.HasPrefix(srv.ID, structs.NomadConsulPrefix) {
nomadServices[srv.ID] = srv
}
}
return nomadServices
}
// filterConsulChecks prunes out all the consul checks which do not have
// services with id prefixed with noamd-
func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) map[string]*consul.AgentCheck {
nomadChecks := make(map[string]*consul.AgentCheck)
for _, chk := range chks {
if strings.HasPrefix(chk.ServiceID, structs.NomadConsulPrefix) {
nomadChecks[chk.CheckID] = chk
}
}
return nomadChecks
}
// printLogMessage prints log messages only when the node attributes have consul
// related information
func (c *ConsulService) printLogMessage(message string, v ...interface{}) {
if _, ok := c.node.Attributes["consul.version"]; ok {
c.logger.Println(fmt.Sprintf(message, v...))
}
}

355
client/consul/sync.go Normal file
View File

@ -0,0 +1,355 @@
package consul
import (
"crypto/tls"
"fmt"
"log"
"net/http"
"net/url"
"reflect"
"strings"
"sync"
"time"
consul "github.com/hashicorp/consul/api"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/nomad/structs"
)
// ConsulService allows syncing of services and checks with Consul
type ConsulService struct {
client *consul.Client
task *structs.Task
allocID string
trackedServices map[string]*consul.AgentService
trackedChecks map[string]*structs.ServiceCheck
logger *log.Logger
shutdownCh chan struct{}
shutdown bool
shutdownLock sync.Mutex
}
// ConsulConfig is the configuration used to create a new ConsulService client
type ConsulConfig struct {
Addr string
Token string
Auth string
EnableSSL bool
VerifySSL bool
}
const (
// The periodic time interval for syncing services and checks with Consul
syncInterval = 5 * time.Second
)
// NewConsulService returns a new ConsulService
func NewConsulService(config *ConsulConfig, logger *log.Logger, allocID string) (*ConsulService, error) {
var err error
var c *consul.Client
cfg := consul.DefaultConfig()
if config.Addr != "" {
cfg.Address = config.Addr
}
if config.Token != "" {
cfg.Token = config.Token
}
if config.Auth != "" {
var username, password string
if strings.Contains(config.Auth, ":") {
split := strings.SplitN(config.Auth, ":", 2)
username = split[0]
password = split[1]
} else {
username = config.Auth
}
cfg.HttpAuth = &consul.HttpBasicAuth{
Username: username,
Password: password,
}
}
if config.EnableSSL {
cfg.Scheme = "https"
}
if config.EnableSSL && !config.VerifySSL {
cfg.HttpClient.Transport = &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
}
}
if c, err = consul.NewClient(cfg); err != nil {
return nil, err
}
consulService := ConsulService{
client: c,
allocID: allocID,
logger: logger,
trackedServices: make(map[string]*consul.AgentService),
trackedChecks: make(map[string]*structs.ServiceCheck),
shutdownCh: make(chan struct{}),
}
return &consulService, nil
}
// SyncTask sync the services and task with consul
func (c *ConsulService) SyncTask(task *structs.Task) error {
var mErr multierror.Error
c.task = task
taskServices := make(map[string]*consul.AgentService)
taskChecks := make(map[string]*structs.ServiceCheck)
// Register Services and Checks that we don't know about or has changed
for _, service := range task.Services {
srv, err := c.createService(service)
if err != nil {
mErr.Errors = append(mErr.Errors, err)
continue
}
trackedService, ok := c.trackedServices[srv.ID]
if (ok && !reflect.DeepEqual(trackedService, srv)) || !ok {
if err := c.registerService(srv); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
c.trackedServices[srv.ID] = srv
taskServices[srv.ID] = srv
for _, chk := range service.Checks {
checkID := chk.Hash(srv.ID)
if _, ok := c.trackedChecks[checkID]; !ok {
if err := c.registerCheck(chk, srv); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
c.trackedChecks[checkID] = chk
taskChecks[checkID] = chk
}
}
// Remove services that are not present anymore
for _, service := range c.trackedServices {
if _, ok := taskServices[service.ID]; !ok {
if err := c.deregisterService(service.ID); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
delete(c.trackedServices, service.ID)
}
}
// Remove the checks that are not present anymore
for checkID, _ := range c.trackedChecks {
if _, ok := taskChecks[checkID]; !ok {
if err := c.deregisterCheck(checkID); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
delete(c.trackedChecks, checkID)
}
}
return mErr.ErrorOrNil()
}
// Shutdown de-registers the services and checks and shuts down periodic syncing
func (c *ConsulService) Shutdown() error {
var mErr multierror.Error
c.shutdownLock.Lock()
if !c.shutdown {
close(c.shutdownCh)
c.shutdown = true
}
c.shutdownLock.Unlock()
for _, service := range c.trackedServices {
if err := c.client.Agent().ServiceDeregister(service.ID); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
return mErr.ErrorOrNil()
}
// KeepServices removes services from consul which are not present in the list
// of tasks passed to it
func (c *ConsulService) KeepServices(tasks []*structs.Task) error {
var mErr multierror.Error
var services map[string]struct{}
// Indexing the services in the tasks
for _, task := range tasks {
for _, service := range task.Services {
services[service.ID(c.allocID, c.task.Name)] = struct{}{}
}
}
// Get the services from Consul
cServices, err := c.client.Agent().Services()
if err != nil {
return err
}
cServices = c.filterConsulServices(cServices)
// Remove the services from consul which are not in any of the tasks
for _, service := range cServices {
if _, validService := services[service.ID]; !validService {
if err := c.deregisterService(service.ID); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
}
return mErr.ErrorOrNil()
}
// registerCheck registers a check definition with Consul
func (c *ConsulService) registerCheck(check *structs.ServiceCheck, service *consul.AgentService) error {
chkReg := consul.AgentCheckRegistration{
ID: check.Hash(service.ID),
Name: check.Name,
ServiceID: service.ID,
}
chkReg.Timeout = check.Timeout.String()
chkReg.Interval = check.Interval.String()
switch check.Type {
case structs.ServiceCheckHTTP:
if check.Protocol == "" {
check.Protocol = "http"
}
url := url.URL{
Scheme: check.Protocol,
Host: fmt.Sprintf("%s:%d", service.Address, service.Port),
Path: check.Path,
}
chkReg.HTTP = url.String()
case structs.ServiceCheckTCP:
chkReg.TCP = fmt.Sprintf("%s:%d", service.Address, service.Port)
case structs.ServiceCheckScript:
chkReg.TTL = check.Interval.String()
}
return c.client.Agent().CheckRegister(&chkReg)
}
// createService creates a Consul AgentService from a Nomad Service
func (c *ConsulService) createService(service *structs.Service) (*consul.AgentService, error) {
host, port := c.task.FindHostAndPortFor(service.PortLabel)
if host == "" {
return nil, fmt.Errorf("host for the service %q couldn't be found", service.Name)
}
if port == 0 {
return nil, fmt.Errorf("port for the service %q couldn't be found", service.Name)
}
srv := consul.AgentService{
ID: service.ID(c.allocID, c.task.Name),
Service: service.Name,
Tags: service.Tags,
Address: host,
Port: port,
}
return &srv, nil
}
// registerService registers a service with Consul
func (c *ConsulService) registerService(service *consul.AgentService) error {
srvReg := consul.AgentServiceRegistration{
ID: service.ID,
Name: service.Service,
Tags: service.Tags,
Port: service.Port,
Address: service.Address,
}
return c.client.Agent().ServiceRegister(&srvReg)
}
// deregisterService de-registers a service with the given ID from consul
func (c *ConsulService) deregisterService(ID string) error {
return c.client.Agent().ServiceDeregister(ID)
}
// deregisterCheck de-registers a check with a given ID from Consul.
func (c *ConsulService) deregisterCheck(ID string) error {
return c.client.Agent().CheckDeregister(ID)
}
// PeriodicSync triggers periodic syncing of services and checks with Consul.
// This is a long lived go-routine which is stopped during shutdown
func (c *ConsulService) PeriodicSync() {
sync := time.After(syncInterval)
for {
select {
case <-sync:
if err := c.performSync(); err != nil {
c.logger.Printf("[DEBUG] consul: error in syncing task %q: %v", c.task.Name, err)
}
sync = time.After(syncInterval)
case <-c.shutdownCh:
c.logger.Printf("[INFO] consul: shutting down sync for task %q", c.task.Name)
return
}
}
}
// performSync sync the services and checks we are tracking with Consul.
func (c *ConsulService) performSync() error {
var mErr multierror.Error
cServices, err := c.client.Agent().Services()
if err != nil {
return err
}
cChecks, err := c.client.Agent().Checks()
if err != nil {
return err
}
// Add services and checks that consul doesn't have but we do
for serviceID, service := range c.trackedServices {
if _, ok := cServices[serviceID]; !ok {
if err := c.registerService(service); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
}
for checkID, check := range c.trackedChecks {
if chk, ok := cChecks[checkID]; !ok {
if err := c.registerCheck(check, c.trackedServices[chk.ServiceID]); err != nil {
mErr.Errors = append(mErr.Errors, err)
}
}
}
return mErr.ErrorOrNil()
}
// filterConsulServices prunes out all the service whose ids are not prefixed
// with nomad-
func (c *ConsulService) filterConsulServices(srvcs map[string]*consul.AgentService) map[string]*consul.AgentService {
nomadServices := make(map[string]*consul.AgentService)
for _, srv := range srvcs {
if strings.HasPrefix(srv.ID, structs.NomadConsulPrefix) {
nomadServices[srv.ID] = srv
}
}
return nomadServices
}
// filterConsulChecks prunes out all the consul checks which do not have
// services with id prefixed with noamd-
func (c *ConsulService) filterConsulChecks(chks map[string]*consul.AgentCheck) map[string]*consul.AgentCheck {
nomadChecks := make(map[string]*consul.AgentCheck)
for _, chk := range chks {
if strings.HasPrefix(chk.ServiceID, structs.NomadConsulPrefix) {
nomadChecks[chk.CheckID] = chk
}
}
return nomadChecks
}
func (c *ConsulService) consulPresent() bool {
_, err := c.client.Agent().Self()
return err == nil
}

162
client/consul/sync_test.go Normal file
View File

@ -0,0 +1,162 @@
package consul
import (
"fmt"
"log"
"os"
"reflect"
"testing"
"time"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/nomad/structs"
)
var (
logger = log.New(os.Stdout, "", log.LstdFlags)
check1 = structs.ServiceCheck{
Name: "check-foo-1",
Type: structs.ServiceCheckTCP,
Interval: 30 * time.Second,
Timeout: 5 * time.Second,
}
service1 = structs.Service{
Name: "foo-1",
Tags: []string{"tag1", "tag2"},
PortLabel: "port1",
Checks: []*structs.ServiceCheck{
&check1,
},
}
service2 = structs.Service{
Name: "foo-2",
Tags: []string{"tag1", "tag2"},
PortLabel: "port2",
}
)
func TestConsulServiceRegisterServices(t *testing.T) {
allocID := "12"
cs, err := NewConsulService(&ConsulConfig{}, logger, allocID)
if err != nil {
t.Fatalf("Err: %v", err)
}
// Skipping the test if consul isn't present
if !cs.consulPresent() {
return
}
task := mockTask()
if err := cs.SyncTask(task); err != nil {
t.Fatalf("err: %v", err)
}
defer cs.Shutdown()
service1ID := service1.ID(allocID, task.Name)
service2ID := service2.ID(allocID, task.Name)
if err := servicesPresent(t, []string{service1ID, service2ID}, cs); err != nil {
t.Fatalf("err : %v", err)
}
if err := checksPresent(t, []string{check1.Hash(service1ID)}, cs); err != nil {
t.Fatalf("err : %v", err)
}
}
func TestConsulServiceUpdateService(t *testing.T) {
allocID := "12"
cs, err := NewConsulService(&ConsulConfig{}, logger, allocID)
if err != nil {
t.Fatalf("Err: %v", err)
}
// Skipping the test if consul isn't present
if !cs.consulPresent() {
return
}
task := mockTask()
if err := cs.SyncTask(task); err != nil {
t.Fatalf("err: %v", err)
}
defer cs.Shutdown()
//Update Service defn 1
newTags := []string{"tag3"}
task.Services[0].Tags = newTags
if err := cs.SyncTask(task); err != nil {
t.Fatalf("err: %v", err)
}
// Make sure all the services and checks are still present
service1ID := service1.ID(allocID, task.Name)
service2ID := service2.ID(allocID, task.Name)
if err := servicesPresent(t, []string{service1ID, service2ID}, cs); err != nil {
t.Fatalf("err : %v", err)
}
if err := checksPresent(t, []string{check1.Hash(service1ID)}, cs); err != nil {
t.Fatalf("err : %v", err)
}
// check if service defn 1 has been updated
services, err := cs.client.Agent().Services()
if err != nil {
t.Fatalf("errL: %v", err)
}
srv, _ := services[service1ID]
if !reflect.DeepEqual(srv.Tags, newTags) {
t.Fatalf("expected tags: %v, actual: %v", newTags, srv.Tags)
}
}
func servicesPresent(t *testing.T, serviceIDs []string, consulService *ConsulService) error {
var mErr multierror.Error
services, err := consulService.client.Agent().Services()
if err != nil {
t.Fatalf("err: %v", err)
}
for _, serviceID := range serviceIDs {
if _, ok := services[serviceID]; !ok {
mErr.Errors = append(mErr.Errors, fmt.Errorf("service ID %q not synced", serviceID))
}
}
return mErr.ErrorOrNil()
}
func checksPresent(t *testing.T, checkIDs []string, consulService *ConsulService) error {
var mErr multierror.Error
checks, err := consulService.client.Agent().Checks()
if err != nil {
t.Fatalf("err: %v", err)
}
for _, checkID := range checkIDs {
if _, ok := checks[checkID]; !ok {
mErr.Errors = append(mErr.Errors, fmt.Errorf("check ID %q not synced", checkID))
}
}
return mErr.ErrorOrNil()
}
func mockTask() *structs.Task {
task := structs.Task{
Name: "foo",
Services: []*structs.Service{&service1, &service2},
Resources: &structs.Resources{
Networks: []*structs.NetworkResource{
&structs.NetworkResource{
IP: "10.10.11.5",
DynamicPorts: []structs.Port{
structs.Port{
Label: "port1",
Value: 20002,
},
structs.Port{
Label: "port2",
Value: 20003,
},
},
},
},
},
}
return &task
}

View File

@ -1,364 +0,0 @@
package client
import (
"fmt"
consul "github.com/hashicorp/consul/api"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"log"
"os"
"reflect"
"testing"
"time"
)
type mockConsulApiClient struct {
serviceRegisterCallCount int
checkRegisterCallCount int
checkDeregisterCallCount int
serviceDeregisterCallCount int
}
func (a *mockConsulApiClient) CheckRegister(check *consul.AgentCheckRegistration) error {
a.checkRegisterCallCount += 1
return nil
}
func (a *mockConsulApiClient) CheckDeregister(checkID string) error {
a.checkDeregisterCallCount += 1
return nil
}
func (a *mockConsulApiClient) ServiceRegister(service *consul.AgentServiceRegistration) error {
a.serviceRegisterCallCount += 1
return nil
}
func (a *mockConsulApiClient) ServiceDeregister(serviceID string) error {
a.serviceDeregisterCallCount += 1
return nil
}
func (a *mockConsulApiClient) Services() (map[string]*consul.AgentService, error) {
return make(map[string]*consul.AgentService), nil
}
func (a *mockConsulApiClient) Checks() (map[string]*consul.AgentCheck, error) {
return make(map[string]*consul.AgentCheck), nil
}
func newConsulService() *ConsulService {
logger := log.New(os.Stdout, "logger: ", log.Lshortfile)
c, _ := NewConsulService(&consulServiceConfig{logger, "", "", "", false, false, &structs.Node{}})
c.client = &mockConsulApiClient{}
return c
}
func newTask() *structs.Task {
var services []*structs.Service
return &structs.Task{
Name: "redis",
Services: services,
Resources: &structs.Resources{
Networks: []*structs.NetworkResource{
{
IP: "10.10.0.1",
DynamicPorts: []structs.Port{{"db", 20413}},
},
},
},
}
}
func TestConsul_MakeChecks(t *testing.T) {
t.Parallel()
service := &structs.Service{
Name: "Bar",
Checks: []*structs.ServiceCheck{
{
Type: "http",
Path: "/foo/bar",
Interval: 10 * time.Second,
Timeout: 2 * time.Second,
},
{
Type: "http",
Protocol: "https",
Path: "/foo/bar",
Interval: 10 * time.Second,
Timeout: 2 * time.Second,
},
{
Type: "tcp",
Interval: 10 * time.Second,
Timeout: 2 * time.Second,
},
},
}
c := newConsulService()
serviceID := fmt.Sprintf("%s-1234", structs.NomadConsulPrefix)
check1 := c.makeCheck(serviceID, service.Checks[0], "10.10.0.1", 8090)
check2 := c.makeCheck(serviceID, service.Checks[1], "10.10.0.1", 8090)
check3 := c.makeCheck(serviceID, service.Checks[2], "10.10.0.1", 8090)
if check1.HTTP != "http://10.10.0.1:8090/foo/bar" {
t.Fatalf("Invalid http url for check: %v", check1.HTTP)
}
if check2.HTTP != "https://10.10.0.1:8090/foo/bar" {
t.Fatalf("Invalid http url for check: %v", check2.HTTP)
}
if check3.TCP != "10.10.0.1:8090" {
t.Fatalf("Invalid tcp check: %v", check3.TCP)
}
}
func TestConsul_InvalidPortLabelForService(t *testing.T) {
t.Parallel()
task := &structs.Task{
Name: "foo",
Driver: "docker",
Resources: &structs.Resources{
CPU: 500,
MemoryMB: 1024,
DiskMB: 1024,
IOPS: 10,
Networks: []*structs.NetworkResource{
{
Device: "eth0",
CIDR: "255.255.0.0/16",
MBits: 10,
ReservedPorts: []structs.Port{
{
Label: "http",
Value: 8080,
},
{
Label: "ssh",
Value: 2026,
},
},
},
},
},
}
service := &structs.Service{
Name: "foo",
Tags: []string{"a", "b"},
PortLabel: "https",
Checks: make([]*structs.ServiceCheck, 0),
}
c := newConsulService()
if err := c.registerService(service, task, mock.Alloc()); err == nil {
t.Fatalf("Service should be invalid")
}
}
func TestConsul_Services_Deleted_From_Task(t *testing.T) {
t.Parallel()
c := newConsulService()
task := structs.Task{
Name: "redis",
Services: []*structs.Service{
&structs.Service{
Name: "example-cache-redis",
Tags: []string{"global"},
PortLabel: "db",
},
},
Resources: &structs.Resources{
Networks: []*structs.NetworkResource{
{
IP: "10.10.0.1",
DynamicPorts: []structs.Port{{"db", 20413}},
},
},
},
}
c.Register(&task, mock.Alloc())
if len(c.serviceStates) != 1 {
t.Fatalf("Expected tracked services: %v, Actual: %v", 1, len(c.serviceStates))
}
task.Services = []*structs.Service{}
c.performSync()
if len(c.serviceStates) != 0 {
t.Fatalf("Expected tracked services: %v, Actual: %v", 0, len(c.serviceStates))
}
}
func TestConsul_Service_Should_Be_Re_Reregistered_On_Change(t *testing.T) {
t.Parallel()
c := newConsulService()
task := newTask()
s1 := structs.Service{
Name: "example-cache-redis",
Tags: []string{"global"},
PortLabel: "db",
}
task.Services = append(task.Services, &s1)
alloc := mock.Alloc()
serviceID := alloc.Services[s1.Name]
c.Register(task, alloc)
s1.Tags = []string{"frontcache"}
c.performSync()
if len(c.serviceStates) != 1 {
t.Fatal("We should be tracking one service")
}
if c.serviceStates[serviceID] != s1.Hash() {
t.Fatalf("Hash is %v, expected %v", c.serviceStates[serviceID], s1.Hash())
}
}
func TestConsul_AddCheck_To_Service(t *testing.T) {
t.Parallel()
apiClient := &mockConsulApiClient{}
c := newConsulService()
c.client = apiClient
task := newTask()
var checks []*structs.ServiceCheck
s1 := structs.Service{
Name: "example-cache-redis",
Tags: []string{"global"},
PortLabel: "db",
Checks: checks,
}
task.Services = append(task.Services, &s1)
c.Register(task, mock.Alloc())
check1 := structs.ServiceCheck{
Name: "alive",
Type: "tcp",
Interval: 10 * time.Second,
Timeout: 5 * time.Second,
}
s1.Checks = append(s1.Checks, &check1)
c.performSync()
if apiClient.checkRegisterCallCount != 1 {
t.Fatalf("Expected number of check registrations: %v, Actual: %v", 1, apiClient.checkRegisterCallCount)
}
}
func TestConsul_ModifyCheck(t *testing.T) {
t.Parallel()
apiClient := &mockConsulApiClient{}
c := newConsulService()
c.client = apiClient
task := newTask()
var checks []*structs.ServiceCheck
s1 := structs.Service{
Name: "example-cache-redis",
Tags: []string{"global"},
PortLabel: "db",
Checks: checks,
}
task.Services = append(task.Services, &s1)
c.Register(task, mock.Alloc())
check1 := structs.ServiceCheck{
Name: "alive",
Type: "tcp",
Interval: 10 * time.Second,
Timeout: 5 * time.Second,
}
s1.Checks = append(s1.Checks, &check1)
c.performSync()
if apiClient.checkRegisterCallCount != 1 {
t.Fatalf("Expected number of check registrations: %v, Actual: %v", 1, apiClient.checkRegisterCallCount)
}
check1.Timeout = 2 * time.Second
c.performSync()
if apiClient.checkRegisterCallCount != 2 {
t.Fatalf("Expected number of check registrations: %v, Actual: %v", 2, apiClient.checkRegisterCallCount)
}
}
func TestConsul_FilterNomadServicesAndChecks(t *testing.T) {
t.Parallel()
c := newConsulService()
srvs := map[string]*consul.AgentService{
"foo-bar": {
ID: "foo-bar",
Service: "http-frontend",
Tags: []string{"global"},
Port: 8080,
Address: "10.10.1.11",
},
"nomad-registered-service-2121212": {
ID: "nomad-registered-service-2121212",
Service: "identity-service",
Tags: []string{"global"},
Port: 8080,
Address: "10.10.1.11",
},
}
expSrvcs := map[string]*consul.AgentService{
"nomad-registered-service-2121212": {
ID: "nomad-registered-service-2121212",
Service: "identity-service",
Tags: []string{"global"},
Port: 8080,
Address: "10.10.1.11",
},
}
nomadServices := c.filterConsulServices(srvs)
if !reflect.DeepEqual(expSrvcs, nomadServices) {
t.Fatalf("Expected: %v, Actual: %v", expSrvcs, nomadServices)
}
nomadServices = c.filterConsulServices(nil)
if len(nomadServices) != 0 {
t.Fatalf("Expected number of services: %v, Actual: %v", 0, len(nomadServices))
}
chks := map[string]*consul.AgentCheck{
"foo-bar-chk": {
CheckID: "foo-bar-chk",
ServiceID: "foo-bar",
Name: "alive",
},
"212121212": {
CheckID: "212121212",
ServiceID: "nomad-registered-service-2121212",
Name: "ping",
},
}
expChks := map[string]*consul.AgentCheck{
"212121212": {
CheckID: "212121212",
ServiceID: "nomad-registered-service-2121212",
Name: "ping",
},
}
nomadChecks := c.filterConsulChecks(chks)
if !reflect.DeepEqual(expChks, nomadChecks) {
t.Fatalf("Expected: %v, Actual: %v", expChks, nomadChecks)
}
if len(nomadChecks) != 1 {
t.Fatalf("Expected number of checks: %v, Actual: %v", 1, len(nomadChecks))
}
nomadChecks = c.filterConsulChecks(nil)
if len(nomadChecks) != 0 {
t.Fatalf("Expected number of checks: %v, Actual: %v", 0, len(nomadChecks))
}
}

View File

@ -546,8 +546,10 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
TaskEnv: d.taskEnv,
Task: task,
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
PortLowerBound: d.config.ClientMinPort,
PortUpperBound: d.config.ClientMaxPort,
ConsulConfig: consulConfig(d.config),
}
ss, err := exec.LaunchSyslogServer(executorCtx)
if err != nil {
@ -642,6 +644,9 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
if err := exec.RegisterServices(); err != nil {
d.logger.Printf("[ERR] driver.docker: error registering services with consul for task: %v", task)
}
go h.run()
return h, nil
}
@ -827,6 +832,11 @@ func (h *DockerHandle) run() {
h.waitCh <- cstructs.NewWaitResult(exitCode, 0, err)
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.docker: error deregistering services: %v", err)
}
// Shutdown the syslog collector
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)

View File

@ -106,9 +106,11 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
Task: task,
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
ConsulConfig: consulConfig(d.config),
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{
Cmd: command,
@ -138,6 +140,9 @@ func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
if err := exec.RegisterServices(); err != nil {
d.logger.Printf("[ERR] driver.exec: error registering services with consul for task: %v", task)
}
go h.run()
return h, nil
}
@ -274,6 +279,11 @@ func (h *execHandle) run() {
}
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.exec: failed to deregister services: %v", err)
}
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err)
}

View File

@ -18,6 +18,7 @@ import (
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/consul"
"github.com/hashicorp/nomad/client/driver/env"
"github.com/hashicorp/nomad/client/driver/logging"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
@ -34,6 +35,8 @@ type Executor interface {
Exit() error
UpdateLogConfig(logConfig *structs.LogConfig) error
UpdateTask(task *structs.Task) error
RegisterServices() error
DeregisterServices() error
}
// ExecutorContext holds context to configure the command user
@ -49,6 +52,9 @@ type ExecutorContext struct {
// Task is the task whose executor is being launched
Task *structs.Task
// AllocID is the allocation id to which the task belongs
AllocID string
// PortUpperBound is the upper bound of the ports that we can use to start
// the syslog server
PortUpperBound uint
@ -56,6 +62,9 @@ type ExecutorContext struct {
// PortLowerBound is the lower bound of the ports that we can use to start
// the syslog server
PortLowerBound uint
// ConsulConfig is the configuration used to create a consul client
ConsulConfig *consul.ConsulConfig
}
// ExecCommand holds the user command, args, and other isolation related
@ -116,7 +125,8 @@ type UniversalExecutor struct {
groups *cgroupConfig.Cgroup
cgLock sync.Mutex
logger *log.Logger
consulService *consul.ConsulService
logger *log.Logger
}
// NewExecutor returns an Executor
@ -255,6 +265,13 @@ func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
e.lro.FileSize = fileSize
e.lre.MaxFiles = task.LogConfig.MaxFiles
e.lre.FileSize = fileSize
// Re-syncing task with consul service
if e.consulService != nil {
if err := e.consulService.SyncTask(task); err != nil {
return err
}
}
return nil
}
@ -334,6 +351,28 @@ func (e *UniversalExecutor) ShutDown() error {
return nil
}
func (e *UniversalExecutor) RegisterServices() error {
e.logger.Printf("[INFO] executor: registering services")
if e.consulService == nil {
cs, err := consul.NewConsulService(e.ctx.ConsulConfig, e.logger, e.ctx.AllocID)
if err != nil {
return err
}
e.consulService = cs
}
err := e.consulService.SyncTask(e.ctx.Task)
go e.consulService.PeriodicSync()
return err
}
func (e *UniversalExecutor) DeregisterServices() error {
e.logger.Printf("[INFO] executor: de-registering services and shutting down consul service")
if e.consulService != nil {
return e.consulService.Shutdown()
}
return nil
}
// configureTaskDir sets the task dir in the executor
func (e *UniversalExecutor) configureTaskDir() error {
taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name]

View File

@ -68,6 +68,14 @@ func (e *ExecutorRPC) UpdateTask(task *structs.Task) error {
return e.client.Call("Plugin.UpdateTask", task, new(interface{}))
}
func (e *ExecutorRPC) RegisterServices() error {
return e.client.Call("Plugin.RegisterServices", new(interface{}), new(interface{}))
}
func (e *ExecutorRPC) DeregisterServices() error {
return e.client.Call("Plugin.DeregisterServices", new(interface{}), new(interface{}))
}
type ExecutorRPCServer struct {
Impl executor.Executor
}
@ -112,6 +120,14 @@ func (e *ExecutorRPCServer) UpdateTask(args *structs.Task, resp *interface{}) er
return e.Impl.UpdateTask(args)
}
func (e *ExecutorRPCServer) RegisterServices(args interface{}, resp *interface{}) error {
return e.Impl.RegisterServices()
}
func (e *ExecutorRPCServer) DeregisterServices(args interface{}, resp *interface{}) error {
return e.Impl.DeregisterServices()
}
type ExecutorPlugin struct {
logger *log.Logger
Impl *ExecutorRPCServer

View File

@ -159,9 +159,11 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
Task: task,
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
ConsulConfig: consulConfig(d.config),
}
absPath, err := GetAbsolutePath("java")
@ -198,7 +200,9 @@ func (d *JavaDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
if err := h.executor.RegisterServices(); err != nil {
d.logger.Printf("[ERR] driver.java: error registering services with consul for task: %v", task)
}
go h.run()
return h, nil
}
@ -344,6 +348,12 @@ func (h *javaHandle) run() {
}
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.java: failed to kill the deregister services: %v", err)
}
h.executor.Exit()
h.pluginClient.Kill()
}

View File

@ -191,9 +191,11 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
Task: task,
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
ConsulConfig: consulConfig(d.config),
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: args[0], Args: args[1:]}, executorCtx)
if err != nil {
@ -217,6 +219,9 @@ func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle,
waitCh: make(chan *cstructs.WaitResult, 1),
}
if err := h.executor.RegisterServices(); err != nil {
h.logger.Printf("[ERR] driver.qemu: error registering services: %v", err)
}
go h.run()
return h, nil
}
@ -334,6 +339,11 @@ func (h *qemuHandle) run() {
close(h.doneCh)
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.qemu: failed to deregister services: %v", err)
}
h.executor.Exit()
h.pluginClient.Kill()
}

View File

@ -101,9 +101,11 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
Task: task,
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
ConsulConfig: consulConfig(d.config),
}
ps, err := exec.LaunchCmd(&executor.ExecCommand{Cmd: command, Args: driverConfig.Args}, executorCtx)
if err != nil {
@ -126,6 +128,9 @@ func (d *RawExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandl
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
if err := h.executor.RegisterServices(); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: error registering services with consul: %v", err)
}
go h.run()
return h, nil
}
@ -240,6 +245,11 @@ func (h *rawExecHandle) run() {
}
h.waitCh <- &cstructs.WaitResult{ExitCode: ps.ExitCode, Signal: 0, Err: err}
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: failed to deregister services: %v", err)
}
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.raw_exec: error killing executor: %v", err)
}

View File

@ -233,9 +233,11 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
return nil, err
}
executorCtx := &executor.ExecutorContext{
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
Task: task,
TaskEnv: d.taskEnv,
AllocDir: ctx.AllocDir,
AllocID: ctx.AllocID,
Task: task,
ConsulConfig: consulConfig(d.config),
}
absPath, err := GetAbsolutePath("rkt")
@ -262,6 +264,9 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, e
doneCh: make(chan struct{}),
waitCh: make(chan *cstructs.WaitResult, 1),
}
if h.executor.RegisterServices(); err != nil {
h.logger.Printf("[ERR] driver.rkt: error registering services: %v", err)
}
go h.run()
return h, nil
}
@ -357,6 +362,11 @@ func (h *rktHandle) run() {
}
h.waitCh <- cstructs.NewWaitResult(ps.ExitCode, 0, err)
close(h.waitCh)
// Remove services
if err := h.executor.DeregisterServices(); err != nil {
h.logger.Printf("[ERR] driver.rkt: failed to deregister services: %v", err)
}
if err := h.executor.Exit(); err != nil {
h.logger.Printf("[ERR] driver.rkt: error killing executor: %v", err)
}

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/consul"
"github.com/hashicorp/nomad/client/driver/executor"
"github.com/hashicorp/nomad/client/driver/logging"
)
@ -69,6 +70,17 @@ func createLogCollector(config *plugin.ClientConfig, w io.Writer,
return logCollector, syslogClient, nil
}
func consulConfig(clientConfig *config.Config) *consul.ConsulConfig {
cfg := consul.ConsulConfig{
Addr: clientConfig.ReadDefault("consul.address", "127.0.0.1:8500"),
Token: clientConfig.Read("consul.token"),
Auth: clientConfig.Read("consul.auth"),
EnableSSL: clientConfig.ReadBoolDefault("consul.ssl", false),
VerifySSL: clientConfig.ReadBoolDefault("consul.verifyssl", true),
}
return &cfg
}
// killProcess kills a process with the given pid
func killProcess(pid int) error {
proc, err := os.FindProcess(pid)

View File

@ -15,7 +15,6 @@ import (
"github.com/hashicorp/nomad/client/driver"
"github.com/hashicorp/nomad/client/getter"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/hashstructure"
cstructs "github.com/hashicorp/nomad/client/driver/structs"
)
@ -42,7 +41,6 @@ type TaskRunner struct {
ctx *driver.ExecContext
alloc *structs.Allocation
restartTracker *RestartTracker
consulService *ConsulService
task *structs.Task
updateCh chan *structs.Allocation
@ -72,8 +70,7 @@ type TaskStateUpdater func(taskName, state string, event *structs.TaskEvent)
// NewTaskRunner is used to create a new task context
func NewTaskRunner(logger *log.Logger, config *config.Config,
updater TaskStateUpdater, ctx *driver.ExecContext,
alloc *structs.Allocation, task *structs.Task,
consulService *ConsulService) *TaskRunner {
alloc *structs.Allocation, task *structs.Task) *TaskRunner {
// Merge in the task resources
task.Resources = alloc.TaskResources[task.Name]
@ -91,7 +88,6 @@ func NewTaskRunner(logger *log.Logger, config *config.Config,
updater: updater,
logger: logger,
restartTracker: restartTracker,
consulService: consulService,
ctx: ctx,
alloc: alloc,
task: task,
@ -278,18 +274,14 @@ func (r *TaskRunner) run() {
}
}
// Mark the task as started and register it with Consul.
// Mark the task as started
r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
r.consulService.Register(r.task, r.alloc)
// Wait for updates
WAIT:
for {
select {
case waitRes := <-r.handle.WaitCh():
// De-Register the services belonging to the task from consul
r.consulService.Deregister(r.task, r.alloc)
if waitRes == nil {
panic("nil wait")
}
@ -318,7 +310,6 @@ func (r *TaskRunner) run() {
// Store that the task has been destroyed and any associated error.
r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskKilled).SetKillError(err))
r.consulService.Deregister(r.task, r.alloc)
return
}
}
@ -425,22 +416,6 @@ func (r *TaskRunner) handleUpdate(update *structs.Allocation) error {
r.restartTracker.SetPolicy(tg.RestartPolicy)
}
// Hash services returns the hash of the task's services
hashServices := func(task *structs.Task) uint64 {
h, err := hashstructure.Hash(task.Services, nil)
if err != nil {
mErr.Errors = append(mErr.Errors, fmt.Errorf("hashing services failed %#v: %v", task.Services, err))
}
return h
}
// Re-register the task to consul if any of the services have changed.
if hashServices(updatedTask) != hashServices(r.task) {
if err := r.consulService.Register(updatedTask, update); err != nil {
mErr.Errors = append(mErr.Errors, fmt.Errorf("updating services with consul failed: %v", err))
}
}
// Store the updated alloc.
r.alloc = update
r.task = updatedTask

View File

@ -46,7 +46,6 @@ func testTaskRunnerFromAlloc(restarts bool, alloc *structs.Allocation) (*MockTas
conf.AllocDir = os.TempDir()
upd := &MockTaskStateUpdater{}
task := alloc.Job.TaskGroups[0].Tasks[0]
consulClient, _ := NewConsulService(&consulServiceConfig{logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
// Initialize the port listing. This should be done by the offer process but
// we have a mock so that doesn't happen.
task.Resources.Networks[0].ReservedPorts = []structs.Port{{"", 80}}
@ -55,7 +54,7 @@ func testTaskRunnerFromAlloc(restarts bool, alloc *structs.Allocation) (*MockTas
allocDir.Build([]*structs.Task{task})
ctx := driver.NewExecContext(allocDir, alloc.ID)
tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc, task, consulClient)
tr := NewTaskRunner(logger, conf, upd.Update, ctx, alloc, task)
if !restarts {
tr.restartTracker = noRestartsTracker()
}
@ -218,9 +217,8 @@ func TestTaskRunner_SaveRestoreState(t *testing.T) {
}
// Create a new task runner
consulClient, _ := NewConsulService(&consulServiceConfig{tr.logger, "127.0.0.1:8500", "", "", false, false, &structs.Node{}})
tr2 := NewTaskRunner(tr.logger, tr.config, upd.Update,
tr.ctx, tr.alloc, &structs.Task{Name: tr.task.Name}, consulClient)
tr.ctx, tr.alloc, &structs.Task{Name: tr.task.Name})
if err := tr2.RestoreState(); err != nil {
t.Fatalf("err: %v", err)
}

View File

@ -1518,6 +1518,10 @@ func (s *Service) InitFields(job string, taskGroup string, task string) {
}
}
func (s *Service) ID(allocID string, taskName string) string {
return fmt.Sprintf("%s-%s-%s-%s", NomadConsulPrefix, allocID, taskName, s.Hash())
}
// Validate checks if the Check definition is valid
func (s *Service) Validate() error {
var mErr multierror.Error

View File

@ -353,6 +353,7 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
// Set fields based on if we found an allocation option
if option != nil {
// Generate service IDs tasks in this allocation
// COMPAT - This is no longer required and would be removed in v0.4
alloc.PopulateServiceIDs(missing.TaskGroup)
alloc.NodeID = option.Node.ID

View File

@ -254,6 +254,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
// Set fields based on if we found an allocation option
if option != nil {
// Generate service IDs tasks in this allocation
// COMPAT - This is no longer required and would be removed in v0.4
alloc.PopulateServiceIDs(missing.TaskGroup)
alloc.NodeID = option.Node.ID