qemu: reduce monitor socket path (#13971)

The QEMU driver can take an optional `graceful_shutdown` configuration
which will create a Unix socket to send ACPI shutdown signal to the VM.

Unix sockets have a hard length limit and the driver implementation
assumed that QEMU versions 2.10.1 were able to handle longer paths. This
is not correct, the linked QEMU fix only changed the behaviour from
silently truncating longer socket paths to throwing an error.

By validating the socket path before starting the QEMU machine we can
provide users a more actionable and meaningful error message, and by
using a shorter socket file name we leave a bit more room for
user-defined values in the path, such as the task name.

The maximum length allowed is also platform-dependant, so validation
needs to be different for each OS.
This commit is contained in:
Luiz Aoqui 2022-08-04 12:10:35 -04:00 committed by GitHub
parent 7a8ec90fbe
commit 9affe31a0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 81 additions and 182 deletions

3
.changelog/13971.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
qemu: use shorter socket file names to reduce the chance of hitting the max path length
```

View File

@ -13,7 +13,6 @@ import (
"strings" "strings"
"time" "time"
"github.com/coreos/go-semver/semver"
hclog "github.com/hashicorp/go-hclog" hclog "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/drivers/shared/eventer" "github.com/hashicorp/nomad/drivers/shared/eventer"
@ -39,14 +38,13 @@ const (
// Represents an ACPI shutdown request to the VM (emulates pressing a physical power button) // Represents an ACPI shutdown request to the VM (emulates pressing a physical power button)
// Reference: https://en.wikibooks.org/wiki/QEMU/Monitor // Reference: https://en.wikibooks.org/wiki/QEMU/Monitor
// Use a short file name since socket paths have a maximum length.
qemuGracefulShutdownMsg = "system_powerdown\n" qemuGracefulShutdownMsg = "system_powerdown\n"
qemuMonitorSocketName = "qemu-monitor.sock" qemuMonitorSocketName = "qm.sock"
// Socket file enabling communication with the Qemu Guest Agent (if enabled and running) // Socket file enabling communication with the Qemu Guest Agent (if enabled and running)
qemuGuestAgentSocketName = "qemu-guest-agent.sock" // Use a short file name since socket paths have a maximum length.
qemuGuestAgentSocketName = "qa.sock"
// Maximum socket path length prior to qemu 2.10.1
qemuLegacyMaxMonitorPathLen = 108
// taskHandleVersion is the version of task handle which this driver sets // taskHandleVersion is the version of task handle which this driver sets
// and understands how to decode driver state // and understands how to decode driver state
@ -70,14 +68,6 @@ var (
versionRegex = regexp.MustCompile(`version (\d[\.\d+]+)`) versionRegex = regexp.MustCompile(`version (\d[\.\d+]+)`)
// Prior to qemu 2.10.1, monitor socket paths are truncated to 108 bytes.
// We should consider this if driver.qemu.version is < 2.10.1 and the
// generated monitor path is too long.
//
// Relevant fix is here:
// https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6
qemuVersionLongSocketPathFix = semver.New("2.10.1")
// pluginInfo is the response returned for the PluginInfo RPC // pluginInfo is the response returned for the PluginInfo RPC
pluginInfo = &base.PluginInfoResponse{ pluginInfo = &base.PluginInfoResponse{
Type: base.PluginTypeDriver, Type: base.PluginTypeDriver,
@ -307,11 +297,19 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
// Try to restore monitor socket path. // Try to restore monitor socket path.
taskDir := filepath.Join(handle.Config.AllocDir, handle.Config.Name) taskDir := filepath.Join(handle.Config.AllocDir, handle.Config.Name)
monitorPath := filepath.Join(taskDir, qemuMonitorSocketName) possiblePaths := []string{
if _, err := os.Stat(monitorPath); err == nil { filepath.Join(taskDir, qemuMonitorSocketName),
d.logger.Debug("found existing monitor socket", "monitor", monitorPath) // Support restoring tasks that used the old socket name.
} else { filepath.Join(taskDir, "qemu-monitor.sock"),
monitorPath = "" }
var monitorPath string
for _, path := range possiblePaths {
if _, err := os.Stat(path); err == nil {
monitorPath = path
d.logger.Debug("found existing monitor socket", "monitor", monitorPath)
break
}
} }
h := &taskHandle{ h := &taskHandle{
@ -468,6 +466,8 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
} }
} }
taskDir := filepath.Join(cfg.AllocDir, cfg.Name)
var monitorPath string var monitorPath string
if driverConfig.GracefulShutdown { if driverConfig.GracefulShutdown {
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
@ -475,14 +475,8 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
} }
// This socket will be used to manage the virtual machine (for example, // This socket will be used to manage the virtual machine (for example,
// to perform graceful shutdowns) // to perform graceful shutdowns)
taskDir := filepath.Join(cfg.AllocDir, cfg.Name) monitorPath = filepath.Join(taskDir, qemuMonitorSocketName)
fingerPrint := d.buildFingerprint() if err := validateSocketPath(monitorPath); err != nil {
if fingerPrint.Attributes == nil {
return nil, nil, fmt.Errorf("unable to get qemu driver version from fingerprinted attributes")
}
monitorPath, err = d.getMonitorPath(taskDir, fingerPrint)
if err != nil {
d.logger.Debug("could not get qemu monitor path", "error", err)
return nil, nil, err return nil, nil, err
} }
d.logger.Debug("got monitor path", "monitorPath", monitorPath) d.logger.Debug("got monitor path", "monitorPath", monitorPath)
@ -494,8 +488,12 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
return nil, nil, errors.New("QEMU Guest Agent socket is unsupported on the Windows platform") return nil, nil, errors.New("QEMU Guest Agent socket is unsupported on the Windows platform")
} }
// This socket will be used to communicate with the Guest Agent (if it's running) // This socket will be used to communicate with the Guest Agent (if it's running)
taskDir := filepath.Join(cfg.AllocDir, cfg.Name) agentSocketPath := filepath.Join(taskDir, qemuGuestAgentSocketName)
args = append(args, "-chardev", fmt.Sprintf("socket,path=%s/%s,server,nowait,id=qga0", taskDir, qemuGuestAgentSocketName)) if err := validateSocketPath(agentSocketPath); err != nil {
return nil, nil, err
}
args = append(args, "-chardev", fmt.Sprintf("socket,path=%s,server,nowait,id=qga0", agentSocketPath))
args = append(args, "-device", "virtio-serial") args = append(args, "-device", "virtio-serial")
args = append(args, "-device", "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0") args = append(args, "-device", "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0")
} }
@ -647,6 +645,8 @@ func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) e
if err := sendQemuShutdown(d.logger, handle.monitorPath, handle.pid); err != nil { if err := sendQemuShutdown(d.logger, handle.monitorPath, handle.pid); err != nil {
d.logger.Debug("error sending graceful shutdown ", "pid", handle.pid, "error", err) d.logger.Debug("error sending graceful shutdown ", "pid", handle.pid, "error", err)
} }
} else {
d.logger.Debug("monitor socket is empty, forcing shutdown")
} }
// TODO(preetha) we are calling shutdown on the executor here // TODO(preetha) we are calling shutdown on the executor here
@ -748,27 +748,16 @@ func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *dr
} }
} }
// getMonitorPath is used to determine whether a qemu monitor socket can be // validateSocketPath provides best effort validation of socket paths since
// safely created and accessed in the task directory by the version of qemu // some rules may be platform-dependant.
// present on the host. If it is safe to use, the socket's full path is func validateSocketPath(path string) error {
// returned along with a nil error. Otherwise, an empty string is returned if maxSocketPathLen > 0 && len(path) > maxSocketPathLen {
// along with a descriptive error. return fmt.Errorf(
func (d *Driver) getMonitorPath(dir string, fingerPrint *drivers.Fingerprint) (string, error) { "socket path %s is longer than the maximum length allowed (%d), try to reduce the task name or Nomad's data_dir if possible.",
var longPathSupport bool path, maxSocketPathLen)
currentQemuVer := fingerPrint.Attributes[driverVersionAttr]
currentQemuSemver := semver.New(currentQemuVer.GoString())
if currentQemuSemver.LessThan(*qemuVersionLongSocketPathFix) {
longPathSupport = false
d.logger.Debug("long socket paths are not available in this version of QEMU", "version", currentQemuVer)
} else {
longPathSupport = true
d.logger.Debug("long socket paths available in this version of QEMU", "version", currentQemuVer)
} }
fullSocketPath := fmt.Sprintf("%s/%s", dir, qemuMonitorSocketName)
if len(fullSocketPath) > qemuLegacyMaxMonitorPathLen && !longPathSupport { return nil
return "", fmt.Errorf("monitor path is too long for this version of qemu")
}
return fullSocketPath, nil
} }
// sendQemuShutdown attempts to issue an ACPI power-off command via the qemu // sendQemuShutdown attempts to issue an ACPI power-off command via the qemu

View File

@ -0,0 +1,12 @@
//go:build darwin || freebsd || netbsd || openbsd
// +build darwin freebsd netbsd openbsd
package qemu
const (
// https://man.openbsd.org/unix.4#ADDRESSING
// https://www.freebsd.org/cgi/man.cgi?query=unix
// https://github.com/apple/darwin-xnu/blob/main/bsd/man/man4/unix.4#L72
// https://man.netbsd.org/unix.4#ADDRESSING
maxSocketPathLen = 104
)

View File

@ -0,0 +1,9 @@
//go:build !linux && !darwin && !freebsd && !netbsd && !openbsd
// +build !linux,!darwin,!freebsd,!netbsd,!openbsd
package qemu
const (
// Don't enforce any path limit.
maxSocketPathLen = 0
)

View File

@ -0,0 +1,9 @@
//go:build linux
// +build linux
package qemu
const (
// https://man7.org/linux/man-pages/man7/unix.7.html
maxSocketPathLen = 108
)

View File

@ -5,7 +5,6 @@ import (
"io" "io"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
"time" "time"
@ -17,7 +16,6 @@ import (
"github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/drivers"
dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils" dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils"
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/hashicorp/nomad/testutil" "github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -88,119 +86,6 @@ func TestQemuDriver_Start_Wait_Stop(t *testing.T) {
} }
// Verifies monitor socket path for old qemu
func TestQemuDriver_GetMonitorPathOldQemu(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)
require := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
d := NewQemuDriver(ctx, testlog.HCLogger(t))
harness := dtestutil.NewDriverHarness(t, d)
task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "linux",
Resources: &drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
MemoryMB: 512,
},
Cpu: structs.AllocatedCpuResources{
CpuShares: 100,
},
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}},
},
},
},
},
}
cleanup := harness.MkAllocDir(task, true)
defer cleanup()
fingerPrint := &drivers.Fingerprint{
Attributes: map[string]*pstructs.Attribute{
driverVersionAttr: pstructs.NewStringAttribute("2.0.0"),
},
}
shortPath := strings.Repeat("x", 10)
qemuDriver := d.(*Driver)
_, err := qemuDriver.getMonitorPath(shortPath, fingerPrint)
require.Nil(err)
longPath := strings.Repeat("x", qemuLegacyMaxMonitorPathLen+100)
_, err = qemuDriver.getMonitorPath(longPath, fingerPrint)
require.NotNil(err)
// Max length includes the '/' separator and socket name
maxLengthCount := qemuLegacyMaxMonitorPathLen - len(qemuMonitorSocketName) - 1
maxLengthLegacyPath := strings.Repeat("x", maxLengthCount)
_, err = qemuDriver.getMonitorPath(maxLengthLegacyPath, fingerPrint)
require.Nil(err)
}
// Verifies monitor socket path for new qemu version
func TestQemuDriver_GetMonitorPathNewQemu(t *testing.T) {
ci.Parallel(t)
ctestutil.QemuCompatible(t)
require := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
d := NewQemuDriver(ctx, testlog.HCLogger(t))
harness := dtestutil.NewDriverHarness(t, d)
task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "linux",
Resources: &drivers.Resources{
NomadResources: &structs.AllocatedTaskResources{
Memory: structs.AllocatedMemoryResources{
MemoryMB: 512,
},
Cpu: structs.AllocatedCpuResources{
CpuShares: 100,
},
Networks: []*structs.NetworkResource{
{
ReservedPorts: []structs.Port{{Label: "main", Value: 22000}, {Label: "web", Value: 80}},
},
},
},
},
}
cleanup := harness.MkAllocDir(task, true)
defer cleanup()
fingerPrint := &drivers.Fingerprint{
Attributes: map[string]*pstructs.Attribute{
driverVersionAttr: pstructs.NewStringAttribute("2.99.99"),
},
}
shortPath := strings.Repeat("x", 10)
qemuDriver := d.(*Driver)
_, err := qemuDriver.getMonitorPath(shortPath, fingerPrint)
require.Nil(err)
// Should not return an error in this qemu version
longPath := strings.Repeat("x", qemuLegacyMaxMonitorPathLen+100)
_, err = qemuDriver.getMonitorPath(longPath, fingerPrint)
require.Nil(err)
// Max length includes the '/' separator and socket name
maxLengthCount := qemuLegacyMaxMonitorPathLen - len(qemuMonitorSocketName) - 1
maxLengthLegacyPath := strings.Repeat("x", maxLengthCount)
_, err = qemuDriver.getMonitorPath(maxLengthLegacyPath, fingerPrint)
require.Nil(err)
}
// copyFile moves an existing file to the destination // copyFile moves an existing file to the destination
func copyFile(src, dst string, t *testing.T) { func copyFile(src, dst string, t *testing.T) {
in, err := os.Open(src) in, err := os.Open(src)

1
go.mod
View File

@ -23,7 +23,6 @@ require (
github.com/containernetworking/cni v1.0.1 github.com/containernetworking/cni v1.0.1
github.com/containernetworking/plugins v1.0.1 github.com/containernetworking/plugins v1.0.1
github.com/coreos/go-iptables v0.6.0 github.com/coreos/go-iptables v0.6.0
github.com/coreos/go-semver v0.3.0
github.com/creack/pty v1.1.18 github.com/creack/pty v1.1.18
github.com/docker/cli v20.10.3-0.20220113150236-6e2838e18645+incompatible github.com/docker/cli v20.10.3-0.20220113150236-6e2838e18645+incompatible
github.com/docker/distribution v2.8.1+incompatible github.com/docker/distribution v2.8.1+incompatible

1
go.sum
View File

@ -352,7 +352,6 @@ github.com/coreos/go-iptables v0.6.0 h1:is9qnZMPYjLd8LYqmm/qlE+wwEgJIkTYdhV3rfZo
github.com/coreos/go-iptables v0.6.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q= github.com/coreos/go-iptables v0.6.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q=
github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=

View File

@ -55,22 +55,16 @@ The `qemu` driver supports the following configuration in the job spec:
signal to virtual machines rather than simply terminating them. This emulates signal to virtual machines rather than simply terminating them. This emulates
a physical power button press, and gives instances a chance to shut down a physical power button press, and gives instances a chance to shut down
cleanly. If the VM is still running after `kill_timeout`, it will be cleanly. If the VM is still running after `kill_timeout`, it will be
forcefully terminated. (Note that forcefully terminated. This feature uses a Unix socket that is placed within
[prior to qemu 2.10.1](https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6), the task directory and operating systems may impose a limit on how long these
the monitor socket path is limited to 108 characters. Graceful shutdown will paths can be. This feature is currently not supported on Windows.
be disabled if QEMU is < 2.10.1 and the generated monitor path exceeds this
length. You may encounter this issue if you set long
[data_dir](/docs/configuration#data_dir)
or
[alloc_dir](/docs/configuration/client#alloc_dir)
paths.) This feature is currently not supported on Windows.
- `guest_agent` `(bool: false)` - Enable support for the [QEMU Guest - `guest_agent` `(bool: false)` - Enable support for the [QEMU Guest
Agent](https://wiki.qemu.org/Features/GuestAgent) for this virtual machine. This Agent](https://wiki.qemu.org/Features/GuestAgent) for this virtual machine.
will add the necessary virtual hardware and create a `qemu-guest-agent.sock` This will add the necessary virtual hardware and create a `qa.sock` file in
file in the task's working directory for interacting with the agent. The QEMU Guest the task's working directory for interacting with the agent. The QEMU Guest
Agent must be running in the guest VM. This feature is currently not supported Agent must be running in the guest VM. This feature is currently not
on Windows. supported on Windows.
- `port_map` - (Optional) A key-value map of port labels. - `port_map` - (Optional) A key-value map of port labels.