package driver import ( "context" "encoding/json" "errors" "fmt" "log" "net" "os" "os/exec" "path/filepath" "regexp" "runtime" "strings" "time" "github.com/coreos/go-semver/semver" plugin "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/driver/executor" dstructs "github.com/hashicorp/nomad/client/driver/structs" "github.com/hashicorp/nomad/client/fingerprint" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/fields" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/mapstructure" ) var ( reQemuVersion = regexp.MustCompile(`version (\d[\.\d+]+)`) // Prior to qemu 2.10.1, monitor socket paths are truncated to 108 bytes. // We should consider this if driver.qemu.version is < 2.10.1 and the // generated monitor path is too long. // // Relevant fix is here: // https://github.com/qemu/qemu/commit/ad9579aaa16d5b385922d49edac2c96c79bcfb6 qemuVersionLongSocketPathFix = semver.New("2.10.1") ) const ( // The key populated in Node Attributes to indicate presence of the Qemu driver qemuDriverAttr = "driver.qemu" qemuDriverVersionAttr = "driver.qemu.version" // Represents an ACPI shutdown request to the VM (emulates pressing a physical power button) // Reference: https://en.wikibooks.org/wiki/QEMU/Monitor qemuGracefulShutdownMsg = "system_powerdown\n" qemuMonitorSocketName = "qemu-monitor.sock" // Maximum socket path length prior to qemu 2.10.1 qemuLegacyMaxMonitorPathLen = 108 ) // QemuDriver is a driver for running images via Qemu // We attempt to chose sane defaults for now, with more configuration available // planned in the future type QemuDriver struct { DriverContext fingerprint.StaticFingerprinter driverConfig *QemuDriverConfig } type QemuDriverConfig struct { ImagePath string `mapstructure:"image_path"` Accelerator string `mapstructure:"accelerator"` GracefulShutdown bool `mapstructure:"graceful_shutdown"` PortMap []map[string]int `mapstructure:"port_map"` // A map of host port labels and to guest ports. Args []string `mapstructure:"args"` // extra arguments to qemu executable } // qemuHandle is returned from Start/Open as a handle to the PID type qemuHandle struct { pluginClient *plugin.Client userPid int executor executor.Executor monitorPath string killTimeout time.Duration maxKillTimeout time.Duration logger *log.Logger version string waitCh chan *dstructs.WaitResult doneCh chan struct{} } // getMonitorPath is used to determine whether a qemu monitor socket can be // safely created and accessed in the task directory by the version of qemu // present on the host. If it is safe to use, the socket's full path is // returned along with a nil error. Otherwise, an empty string is returned // along with a descriptive error. func (d *QemuDriver) getMonitorPath(dir string) (string, error) { var longPathSupport bool currentQemuVer := d.DriverContext.node.Attributes[qemuDriverVersionAttr] currentQemuSemver := semver.New(currentQemuVer) if currentQemuSemver.LessThan(*qemuVersionLongSocketPathFix) { longPathSupport = false d.logger.Printf("[DEBUG] driver.qemu: long socket paths are not available in this version of QEMU (%s)", currentQemuVer) } else { longPathSupport = true d.logger.Printf("[DEBUG] driver.qemu: long socket paths available in this version of QEMU (%s)", currentQemuVer) } fullSocketPath := fmt.Sprintf("%s/%s", dir, qemuMonitorSocketName) if len(fullSocketPath) > qemuLegacyMaxMonitorPathLen && longPathSupport == false { return "", fmt.Errorf("monitor path is too long for this version of qemu") } return fullSocketPath, nil } // NewQemuDriver is used to create a new exec driver func NewQemuDriver(ctx *DriverContext) Driver { return &QemuDriver{DriverContext: *ctx} } // Validate is used to validate the driver configuration func (d *QemuDriver) Validate(config map[string]interface{}) error { fd := &fields.FieldData{ Raw: config, Schema: map[string]*fields.FieldSchema{ "image_path": { Type: fields.TypeString, Required: true, }, "accelerator": { Type: fields.TypeString, }, "graceful_shutdown": { Type: fields.TypeBool, Required: false, }, "port_map": { Type: fields.TypeArray, }, "args": { Type: fields.TypeArray, }, }, } if err := fd.Validate(); err != nil { return err } return nil } func (d *QemuDriver) Abilities() DriverAbilities { return DriverAbilities{ SendSignals: false, Exec: false, } } func (d *QemuDriver) FSIsolation() cstructs.FSIsolation { return cstructs.FSIsolationImage } func (d *QemuDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { bin := "qemu-system-x86_64" if runtime.GOOS == "windows" { // On windows, the "qemu-system-x86_64" command does not respond to the // version flag. bin = "qemu-img" } outBytes, err := exec.Command(bin, "--version").Output() if err != nil { delete(node.Attributes, qemuDriverAttr) return false, nil } out := strings.TrimSpace(string(outBytes)) matches := reQemuVersion.FindStringSubmatch(out) if len(matches) != 2 { delete(node.Attributes, qemuDriverAttr) return false, fmt.Errorf("Unable to parse Qemu version string: %#v", matches) } currentQemuVersion := matches[1] node.Attributes[qemuDriverAttr] = "1" node.Attributes[qemuDriverVersionAttr] = currentQemuVersion return true, nil } func (d *QemuDriver) Prestart(_ *ExecContext, task *structs.Task) (*PrestartResponse, error) { var driverConfig QemuDriverConfig if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { return nil, err } if len(driverConfig.PortMap) > 1 { return nil, fmt.Errorf("Only one port_map block is allowed in the qemu driver config") } d.driverConfig = &driverConfig r := NewPrestartResponse() if len(driverConfig.PortMap) == 1 { r.Network = &cstructs.DriverNetwork{ PortMap: driverConfig.PortMap[0], } } return r, nil } // Run an existing Qemu image. Start() will pull down an existing, valid Qemu // image and save it to the Drivers Allocation Dir func (d *QemuDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) { // Get the image source vmPath := d.driverConfig.ImagePath if vmPath == "" { return nil, fmt.Errorf("image_path must be set") } vmID := filepath.Base(vmPath) // Parse configuration arguments // Create the base arguments accelerator := "tcg" if d.driverConfig.Accelerator != "" { accelerator = d.driverConfig.Accelerator } if task.Resources.MemoryMB < 128 || task.Resources.MemoryMB > 4000000 { return nil, fmt.Errorf("Qemu memory assignment out of bounds") } mem := fmt.Sprintf("%dM", task.Resources.MemoryMB) absPath, err := GetAbsolutePath("qemu-system-x86_64") if err != nil { return nil, err } args := []string{ absPath, "-machine", "type=pc,accel=" + accelerator, "-name", vmID, "-m", mem, "-drive", "file=" + vmPath, "-nographic", } var monitorPath string if d.driverConfig.GracefulShutdown { if runtime.GOOS == "windows" { return nil, errors.New("QEMU graceful shutdown is unsupported on the Windows platform") } // This socket will be used to manage the virtual machine (for example, // to perform graceful shutdowns) monitorPath, err := d.getMonitorPath(ctx.TaskDir.Dir) if err != nil { d.logger.Printf("[ERR] driver.qemu: could not get qemu monitor path: %s", err) return nil, err } d.logger.Printf("[DEBUG] driver.qemu: got monitor path OK: %s", monitorPath) args = append(args, "-monitor", fmt.Sprintf("unix:%s,server,nowait", monitorPath)) } // Add pass through arguments to qemu executable. A user can specify // these arguments in driver task configuration. These arguments are // passed directly to the qemu driver as command line options. // For example, args = [ "-nodefconfig", "-nodefaults" ] // This will allow a VM with embedded configuration to boot successfully. args = append(args, d.driverConfig.Args...) // Check the Resources required Networks to add port mappings. If no resources // are required, we assume the VM is a purely compute job and does not require // the outside world to be able to reach it. VMs ran without port mappings can // still reach out to the world, but without port mappings it is effectively // firewalled protocols := []string{"udp", "tcp"} if len(task.Resources.Networks) > 0 && len(d.driverConfig.PortMap) == 1 { // Loop through the port map and construct the hostfwd string, to map // reserved ports to the ports listenting in the VM // Ex: hostfwd=tcp::22000-:22,hostfwd=tcp::80-:8080 var forwarding []string taskPorts := task.Resources.Networks[0].PortLabels() for label, guest := range d.driverConfig.PortMap[0] { host, ok := taskPorts[label] if !ok { return nil, fmt.Errorf("Unknown port label %q", label) } for _, p := range protocols { forwarding = append(forwarding, fmt.Sprintf("hostfwd=%s::%d-:%d", p, host, guest)) } } if len(forwarding) != 0 { args = append(args, "-netdev", fmt.Sprintf("user,id=user.0,%s", strings.Join(forwarding, ",")), "-device", "virtio-net,netdev=user.0", ) } } // If using KVM, add optimization args if accelerator == "kvm" { if runtime.GOOS == "windows" { return nil, errors.New("KVM accelerator is unsupported on the Windows platform") } args = append(args, "-enable-kvm", "-cpu", "host", // Do we have cores information available to the Driver? // "-smp", fmt.Sprintf("%d", cores), ) } d.logger.Printf("[DEBUG] driver.qemu: starting QemuVM command: %q", strings.Join(args, " ")) pluginLogFile := filepath.Join(ctx.TaskDir.Dir, "executor.out") executorConfig := &dstructs.ExecutorConfig{ LogFile: pluginLogFile, LogLevel: d.config.LogLevel, } exec, pluginClient, err := createExecutor(d.config.LogOutput, d.config, executorConfig) if err != nil { return nil, err } executorCtx := &executor.ExecutorContext{ TaskEnv: ctx.TaskEnv, Driver: "qemu", Task: task, TaskDir: ctx.TaskDir.Dir, LogDir: ctx.TaskDir.LogDir, } if err := exec.SetContext(executorCtx); err != nil { pluginClient.Kill() return nil, fmt.Errorf("failed to set executor context: %v", err) } execCmd := &executor.ExecCommand{ Cmd: args[0], Args: args[1:], User: task.User, } ps, err := exec.LaunchCmd(execCmd) if err != nil { pluginClient.Kill() return nil, err } d.logger.Printf("[INFO] driver.qemu: started new QemuVM: %s", vmID) // Create and Return Handle maxKill := d.DriverContext.config.MaxKillTimeout h := &qemuHandle{ pluginClient: pluginClient, executor: exec, userPid: ps.Pid, killTimeout: GetKillTimeout(task.KillTimeout, maxKill), maxKillTimeout: maxKill, monitorPath: monitorPath, version: d.config.Version.VersionNumber(), logger: d.logger, doneCh: make(chan struct{}), waitCh: make(chan *dstructs.WaitResult, 1), } go h.run() resp := &StartResponse{Handle: h} if len(d.driverConfig.PortMap) == 1 { resp.Network = &cstructs.DriverNetwork{ PortMap: d.driverConfig.PortMap[0], } } return resp, nil } type qemuId struct { Version string KillTimeout time.Duration MaxKillTimeout time.Duration UserPid int PluginConfig *PluginReattachConfig } func (d *QemuDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { id := &qemuId{} if err := json.Unmarshal([]byte(handleID), id); err != nil { return nil, fmt.Errorf("Failed to parse handle %q: %v", handleID, err) } pluginConfig := &plugin.ClientConfig{ Reattach: id.PluginConfig.PluginConfig(), } exec, pluginClient, err := createExecutorWithConfig(pluginConfig, d.config.LogOutput) if err != nil { d.logger.Printf("[ERR] driver.qemu: error connecting to plugin so destroying plugin pid %d and user pid %d", id.PluginConfig.Pid, id.UserPid) if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil { d.logger.Printf("[ERR] driver.qemu: error destroying plugin pid %d and userpid %d: %v", id.PluginConfig.Pid, id.UserPid, e) } return nil, fmt.Errorf("error connecting to plugin: %v", err) } ver, _ := exec.Version() d.logger.Printf("[DEBUG] driver.qemu: version of executor: %v", ver.Version) // Return a driver handle h := &qemuHandle{ pluginClient: pluginClient, executor: exec, userPid: id.UserPid, logger: d.logger, killTimeout: id.KillTimeout, maxKillTimeout: id.MaxKillTimeout, version: id.Version, doneCh: make(chan struct{}), waitCh: make(chan *dstructs.WaitResult, 1), } go h.run() return h, nil } func (d *QemuDriver) Cleanup(*ExecContext, *CreatedResources) error { return nil } func (h *qemuHandle) ID() string { id := qemuId{ Version: h.version, KillTimeout: h.killTimeout, MaxKillTimeout: h.maxKillTimeout, PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()), UserPid: h.userPid, } data, err := json.Marshal(id) if err != nil { h.logger.Printf("[ERR] driver.qemu: failed to marshal ID to JSON: %s", err) } return string(data) } func (h *qemuHandle) WaitCh() chan *dstructs.WaitResult { return h.waitCh } func (h *qemuHandle) Update(task *structs.Task) error { // Store the updated kill timeout. h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout) h.executor.UpdateTask(task) // Update is not possible return nil } func (h *qemuHandle) Exec(ctx context.Context, cmd string, args []string) ([]byte, int, error) { return nil, 0, fmt.Errorf("Qemu driver can't execute commands") } func (h *qemuHandle) Signal(s os.Signal) error { return fmt.Errorf("Qemu driver can't send signals") } func (h *qemuHandle) Kill() error { gracefulShutdownSent := false // Attempt a graceful shutdown only if it was configured in the job if h.monitorPath != "" { if err := sendQemuShutdown(h.logger, h.monitorPath, h.userPid); err == nil { gracefulShutdownSent = true } else { h.logger.Printf("[DEBUG] driver.qemu: error sending graceful shutdown for user process pid %d: %s", h.userPid, err) } } // If Nomad did not send a graceful shutdown signal, issue an interrupt to // the qemu process as a last resort if gracefulShutdownSent == false { if err := h.executor.ShutDown(); err != nil { if h.pluginClient.Exited() { return nil } return fmt.Errorf("executor Shutdown failed: %v", err) } } // If the qemu process exits before the kill timeout is reached, doneChan // will close and we'll exit without an error. If it takes too long, the // timer will fire and we'll attempt to kill the process. select { case <-h.doneCh: return nil case <-time.After(h.killTimeout): h.logger.Printf("[DEBUG] driver.qemu: kill timeout of %s exceeded for user process pid %d", h.killTimeout.String(), h.userPid) if h.pluginClient.Exited() { return nil } if err := h.executor.Exit(); err != nil { return fmt.Errorf("executor Exit failed: %v", err) } return nil } } func (h *qemuHandle) Stats() (*cstructs.TaskResourceUsage, error) { return h.executor.Stats() } func (h *qemuHandle) run() { ps, werr := h.executor.Wait() if ps.ExitCode == 0 && werr != nil { if e := killProcess(h.userPid); e != nil { h.logger.Printf("[ERR] driver.qemu: error killing user process pid %d: %v", h.userPid, e) } } close(h.doneCh) // Exit the executor h.executor.Exit() h.pluginClient.Kill() // Send the results h.waitCh <- &dstructs.WaitResult{ExitCode: ps.ExitCode, Signal: ps.Signal, Err: werr} close(h.waitCh) } // sendQemuShutdown attempts to issue an ACPI power-off command via the qemu // monitor func sendQemuShutdown(logger *log.Logger, monitorPath string, userPid int) error { if monitorPath == "" { return errors.New("monitorPath not set") } monitorSocket, err := net.Dial("unix", monitorPath) if err != nil { logger.Printf("[WARN] driver.qemu: could not connect to qemu monitor %q for user process pid %d: %s", monitorPath, userPid, err) return err } defer monitorSocket.Close() logger.Printf("[DEBUG] driver.qemu: sending graceful shutdown command to qemu monitor socket %q for user process pid %d", monitorPath, userPid) _, err = monitorSocket.Write([]byte(qemuGracefulShutdownMsg)) if err != nil { logger.Printf("[WARN] driver.qemu: failed to send shutdown message %q to monitor socket %q for user process pid %d: %s", qemuGracefulShutdownMsg, monitorPath, userPid, err) } return err }