open-nomad/client/allocrunner/networking_cni.go
Seth Hoenig a65fbeb3b3
client: manually cleanup leaked iptables rules (#15407)
This PR adds a secondary path for cleaning up iptables created for an allocation
when the normal CNI library fails to do so. This typically happens when the state
of the pause container is unexpected - e.g. deleted out of band from Nomad. Before,
the iptables rules would be leaked which could lead to unexpected nat routing
behavior later on (in addition to leaked resources). With this change, we scan
for the rules created on behalf of the allocation being GC'd and delete them.

Fixes #6385
2022-11-28 11:32:16 -06:00

376 lines
11 KiB
Go

// For now CNI is supported only on Linux.
//
//go:build linux
// +build linux
package allocrunner
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
cni "github.com/containerd/go-cni"
cnilibrary "github.com/containernetworking/cni/libcni"
"github.com/coreos/go-iptables/iptables"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
)
const (
// envCNIPath is the environment variable name to use to derive the CNI path
// when it is not explicitly set by the client
envCNIPath = "CNI_PATH"
// defaultCNIPath is the CNI path to use when it is not set by the client
// and is not set by environment variable
defaultCNIPath = "/opt/cni/bin"
// defaultCNIInterfacePrefix is the network interface to use if not set in
// client config
defaultCNIInterfacePrefix = "eth"
)
type cniNetworkConfigurator struct {
cni cni.CNI
cniConf []byte
ignorePortMappingHostIP bool
rand *rand.Rand
logger log.Logger
}
func newCNINetworkConfigurator(logger log.Logger, cniPath, cniInterfacePrefix, cniConfDir, networkName string, ignorePortMappingHostIP bool) (*cniNetworkConfigurator, error) {
cniConf, err := loadCNIConf(cniConfDir, networkName)
if err != nil {
return nil, fmt.Errorf("failed to load CNI config: %v", err)
}
return newCNINetworkConfiguratorWithConf(logger, cniPath, cniInterfacePrefix, ignorePortMappingHostIP, cniConf)
}
func newCNINetworkConfiguratorWithConf(logger log.Logger, cniPath, cniInterfacePrefix string, ignorePortMappingHostIP bool, cniConf []byte) (*cniNetworkConfigurator, error) {
conf := &cniNetworkConfigurator{
cniConf: cniConf,
rand: rand.New(rand.NewSource(time.Now().Unix())),
logger: logger,
ignorePortMappingHostIP: ignorePortMappingHostIP,
}
if cniPath == "" {
if cniPath = os.Getenv(envCNIPath); cniPath == "" {
cniPath = defaultCNIPath
}
}
if cniInterfacePrefix == "" {
cniInterfacePrefix = defaultCNIInterfacePrefix
}
c, err := cni.New(cni.WithPluginDir(filepath.SplitList(cniPath)),
cni.WithInterfacePrefix(cniInterfacePrefix))
if err != nil {
return nil, err
}
conf.cni = c
return conf, nil
}
// Setup calls the CNI plugins with the add action
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
if err := c.ensureCNIInitialized(); err != nil {
return nil, err
}
// Depending on the version of bridge cni plugin used, a known race could occure
// where two alloc attempt to create the nomad bridge at the same time, resulting
// in one of them to fail. This rety attempts to overcome those erroneous failures.
const retry = 3
var firstError error
var res *cni.CNIResult
for attempt := 1; ; attempt++ {
var err error
if res, err = c.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil {
c.logger.Warn("failed to configure network", "error", err, "attempt", attempt)
switch attempt {
case 1:
firstError = err
case retry:
return nil, fmt.Errorf("failed to configure network: %v", firstError)
}
// Sleep for 1 second + jitter
time.Sleep(time.Second + (time.Duration(c.rand.Int63n(1000)) * time.Millisecond))
continue
}
break
}
if c.logger.IsDebug() {
resultJSON, _ := json.Marshal(res)
c.logger.Debug("received result from CNI", "result", string(resultJSON))
}
return c.cniToAllocNet(res)
}
// cniToAllocNet converts a CNIResult to an AllocNetworkStatus or returns an
// error. The first interface and IP with a sandbox and address set are
// preferred. Failing that the first interface with an IP is selected.
//
// Unfortunately the go-cni library returns interfaces in an unordered map so
// the results may be nondeterministic depending on CNI plugin output.
func (c *cniNetworkConfigurator) cniToAllocNet(res *cni.CNIResult) (*structs.AllocNetworkStatus, error) {
netStatus := new(structs.AllocNetworkStatus)
// Use the first sandbox interface with an IP address
if len(res.Interfaces) > 0 {
for name, iface := range res.Interfaces {
if iface == nil {
// this should never happen but this value is coming from external
// plugins so we should guard against it
delete(res.Interfaces, name)
}
if iface.Sandbox != "" && len(iface.IPConfigs) > 0 {
netStatus.Address = iface.IPConfigs[0].IP.String()
netStatus.InterfaceName = name
break
}
}
}
// If no IP address was found, use the first interface with an address
// found as a fallback
if netStatus.Address == "" {
var found bool
for name, iface := range res.Interfaces {
if len(iface.IPConfigs) > 0 {
ip := iface.IPConfigs[0].IP.String()
c.logger.Debug("no sandbox interface with an address found CNI result, using first available", "interface", name, "ip", ip)
netStatus.Address = ip
netStatus.InterfaceName = name
found = true
break
}
}
if !found {
c.logger.Warn("no address could be found from CNI result")
}
}
// If no IP address could be found, return an error
if netStatus.Address == "" {
return nil, fmt.Errorf("failed to configure network: no interface with an address")
}
// Use the first DNS results.
if len(res.DNS) > 0 {
netStatus.DNS = &structs.DNSConfig{
Servers: res.DNS[0].Nameservers,
Searches: res.DNS[0].Search,
Options: res.DNS[0].Options,
}
}
return netStatus, nil
}
func loadCNIConf(confDir, name string) ([]byte, error) {
files, err := cnilibrary.ConfFiles(confDir, []string{".conf", ".conflist", ".json"})
switch {
case err != nil:
return nil, fmt.Errorf("failed to detect CNI config file: %v", err)
case len(files) == 0:
return nil, fmt.Errorf("no CNI network config found in %s", confDir)
}
// files contains the network config files associated with cni network.
// Use lexicographical way as a defined order for network config files.
sort.Strings(files)
for _, confFile := range files {
if strings.HasSuffix(confFile, ".conflist") {
confList, err := cnilibrary.ConfListFromFile(confFile)
if err != nil {
return nil, fmt.Errorf("failed to load CNI config list file %s: %v", confFile, err)
}
if confList.Name == name {
return confList.Bytes, nil
}
} else {
conf, err := cnilibrary.ConfFromFile(confFile)
if err != nil {
return nil, fmt.Errorf("failed to load CNI config file %s: %v", confFile, err)
}
if conf.Network.Name == name {
return conf.Bytes, nil
}
}
}
return nil, fmt.Errorf("CNI network config not found for name %q", name)
}
// Teardown calls the CNI plugins with the delete action
func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error {
if err := c.ensureCNIInitialized(); err != nil {
return err
}
if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil {
// create a real handle to iptables
ipt, iptErr := iptables.New()
if iptErr != nil {
return fmt.Errorf("failed to detect iptables: %w", iptErr)
}
// most likely the pause container was removed from underneath nomad
return c.forceCleanup(ipt, alloc.ID)
}
return nil
}
// IPTables is a subset of iptables.IPTables
type IPTables interface {
List(table, chain string) ([]string, error)
Delete(table, chain string, rule ...string) error
ClearAndDeleteChain(table, chain string) error
}
var (
// ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g.
// -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799
ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`)
)
// forceCleanup is the backup plan for removing the iptables rule and chain associated with
// an allocation that was using bridge networking. The cni library refuses to handle a
// dirty state - e.g. the pause container is removed out of band, and so we must cleanup
// iptables ourselves to avoid leaking rules.
func (c *cniNetworkConfigurator) forceCleanup(ipt IPTables, allocID string) error {
const (
natTable = "nat"
postRoutingChain = "POSTROUTING"
commentFmt = `--comment "name: \"nomad\" id: \"%s\""`
)
// list the rules on the POSTROUTING chain of the nat table
rules, err := ipt.List(natTable, postRoutingChain)
if err != nil {
return fmt.Errorf("failed to list iptables rules: %w", err)
}
// find the POSTROUTING rule associated with our allocation
matcher := fmt.Sprintf(commentFmt, allocID)
var ruleToPurge string
for _, rule := range rules {
if strings.Contains(rule, matcher) {
ruleToPurge = rule
break
}
}
// no rule found for our allocation, just give up
if ruleToPurge == "" {
return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID)
}
// re-create the rule we need to delete, as tokens
subs := ipRuleRe.FindStringSubmatch(ruleToPurge)
if len(subs) != 4 {
return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID)
}
cidr := subs[1]
id := subs[2]
chainID := subs[3]
toDel := []string{
`-s`,
cidr,
`-m`,
`comment`,
`--comment`,
`name: "nomad" id: "` + id + `"`,
`-j`,
chainID,
}
// remove the jump rule
ok := true
if err = ipt.Delete(natTable, postRoutingChain, toDel...); err != nil {
c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err)
ok = false
}
// remote the associated chain
if err = ipt.ClearAndDeleteChain(natTable, chainID); err != nil {
c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err)
ok = false
}
if !ok {
return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID)
}
return nil
}
func (c *cniNetworkConfigurator) ensureCNIInitialized() error {
if err := c.cni.Status(); cni.IsCNINotInitialized(err) {
return c.cni.Load(cni.WithConfListBytes(c.cniConf))
} else {
return err
}
}
// getPortMapping builds a list of portMapping structs that are used as the
// portmapping capability arguments for the portmap CNI plugin
func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping {
var ports []cni.PortMapping
if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 {
for _, network := range alloc.AllocatedResources.Shared.Networks {
for _, port := range append(network.DynamicPorts, network.ReservedPorts...) {
if port.To < 1 {
port.To = port.Value
}
for _, proto := range []string{"tcp", "udp"} {
ports = append(ports, cni.PortMapping{
HostPort: int32(port.Value),
ContainerPort: int32(port.To),
Protocol: proto,
})
}
}
}
} else {
for _, port := range alloc.AllocatedResources.Shared.Ports {
if port.To < 1 {
port.To = port.Value
}
for _, proto := range []string{"tcp", "udp"} {
portMapping := cni.PortMapping{
HostPort: int32(port.Value),
ContainerPort: int32(port.To),
Protocol: proto,
}
if !ignoreHostIP {
portMapping.HostIP = port.HostIP
}
ports = append(ports, portMapping)
}
}
}
return ports
}