open-nomad/drivers/docker/reconcile_dangling.go

package docker

import (
	"context"
	"fmt"
	"regexp"
	"sync"
	"time"

	docker "github.com/fsouza/go-dockerclient"
	hclog "github.com/hashicorp/go-hclog"
)

// containerReconciler detects and kills unexpectedly running containers.
//
// Due to Docker architecture and network based communication, it is
// possible for Docker to start a container successfully, but have the
// creation API call fail with a network error.  containerReconciler
// scans for these untracked containers and kill them.
type containerReconciler struct {
	ctx    context.Context
	config *ContainerGCConfig
	client *docker.Client
	logger hclog.Logger

	isDriverHealthy   func() bool
	trackedContainers func() map[string]bool
	isNomadContainer  func(c docker.APIContainers) bool

	once sync.Once
}

func newReconciler(d *Driver) *containerReconciler {
	return &containerReconciler{
		ctx:    d.ctx,
		config: &d.config.GC.DanglingContainers,
		client: client,
		logger: d.logger,

		isDriverHealthy:   func() bool { return d.previouslyDetected() && d.fingerprintSuccessful() },
		trackedContainers: d.trackedContainers,
		isNomadContainer:  isNomadContainer,
	}
}

func (r *containerReconciler) Start() {
	if !r.config.Enabled {
		r.logger.Debug("skipping dangling containers handling; is disabled")
		return
	}

	r.once.Do(func() {
		go r.removeDanglingContainersGoroutine()
	})
}

func (r *containerReconciler) removeDanglingContainersGoroutine() {
	period := r.config.period

	lastIterSucceeded := true

	// ensure that we wait for at least a period or creation timeout
	// for first container GC iteration
	// The initial period is a grace period for restore allocation
	// before a driver may kill containers launched by an earlier nomad
	// process.
	initialDelay := period
	if r.config.CreationGrace > initialDelay {
		initialDelay = r.config.CreationGrace
	}

	timer := time.NewTimer(initialDelay)
	for {
		select {
		case <-timer.C:
			if r.isDriverHealthy() {
				err := r.removeDanglingContainersIteration()
				if err != nil && lastIterSucceeded {
					r.logger.Warn("failed to remove dangling containers", "error", err)
				}
				lastIterSucceeded = (err == nil)
			}

			timer.Reset(period)
		case <-r.ctx.Done():
			return
		}
	}
}

func (r *containerReconciler) removeDanglingContainersIteration() error {
	cutoff := time.Now().Add(-r.config.CreationGrace)
	tracked := r.trackedContainers()
	untracked, err := r.untrackedContainers(tracked, cutoff)
	if err != nil {
		return fmt.Errorf("failed to find untracked containers: %v", err)
	}

	if len(untracked) == 0 {
		return nil
	}

	if r.config.DryRun {
		r.logger.Info("detected untracked containers", "container_ids", untracked)
		return nil
	}

	for _, id := range untracked {
		ctx, cancel := r.dockerAPIQueryContext()
		err := client.RemoveContainer(docker.RemoveContainerOptions{
			Context: ctx,
			ID:      id,
			Force:   true,
		})
		cancel()
		if err != nil {
			r.logger.Warn("failed to remove untracked container", "container_id", id, "error", err)
		} else {
			r.logger.Info("removed untracked container", "container_id", id)
		}
	}

	return nil
}

// untrackedContainers returns the ids of containers that suspected
// to have been started by Nomad but aren't tracked by this driver
func (r *containerReconciler) untrackedContainers(tracked map[string]bool, cutoffTime time.Time) ([]string, error) {
	result := []string{}

	ctx, cancel := r.dockerAPIQueryContext()
	defer cancel()

	cc, err := client.ListContainers(docker.ListContainersOptions{
		Context: ctx,
		All:     false, // only reconcile running containers
	})
	if err != nil {
		return nil, fmt.Errorf("failed to list containers: %v", err)
	}

	cutoff := cutoffTime.Unix()

	for _, c := range cc {
		if tracked[c.ID] {
			continue
		}

		if c.Created > cutoff {
			continue
		}

		if !r.isNomadContainer(c) {
			continue
		}

		result = append(result, c.ID)
	}

	return result, nil
}

// dockerAPIQueryTimeout returns a context for docker API response with an appropriate timeout
// to protect against wedged locked-up API call.
//
// We'll try hitting Docker API on subsequent iteration.
func (r *containerReconciler) dockerAPIQueryContext() (context.Context, context.CancelFunc) {
	// use a reasoanble floor to avoid very small limit
	timeout := 30 * time.Second

	if timeout < r.config.period {
		timeout = r.config.period
	}

	return context.WithTimeout(context.Background(), timeout)
}

func isNomadContainer(c docker.APIContainers) bool {
	if _, ok := c.Labels[dockerLabelAllocID]; ok {
		return true
	}

	// pre-0.10 containers aren't tagged or labeled in any way,
	// so use cheap heuristic based on mount paths
	// before inspecting container details
	if !hasMount(c, "/alloc") ||
		!hasMount(c, "/local") ||
		!hasMount(c, "/secrets") ||
		!hasNomadName(c) {
		return false
	}

	return true
}

func hasMount(c docker.APIContainers, p string) bool {
	for _, m := range c.Mounts {
		if m.Destination == p {
			return true
		}
	}

	return false
}

var nomadContainerNamePattern = regexp.MustCompile(`\/.*-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)

func hasNomadName(c docker.APIContainers) bool {
	for _, n := range c.Names {
		if nomadContainerNamePattern.MatchString(n) {
			return true
		}
	}

	return false
}

func (d *Driver) trackedContainers() map[string]bool {
	d.tasks.lock.RLock()
	defer d.tasks.lock.RUnlock()

	r := make(map[string]bool, len(d.tasks.store))
	for _, h := range d.tasks.store {
		r[h.containerID] = true
	}

	return r
}