agent/proxy: check if process is alive in addition to Wait

This commit is contained in:
Mitchell Hashimoto 2018-05-03 18:25:11 -07:00
parent 4301f7f1f5
commit 1d24df3827
No known key found for this signature in database
GPG Key ID: 744E147AA52F5B0A
4 changed files with 40 additions and 20 deletions

View File

@ -165,7 +165,25 @@ func (p *Daemon) keepAlive(stopCh <-chan struct{}, exitedCh chan<- struct{}) {
}
// Wait for the process to exit. Note that if we restored this proxy
// then Wait will always fail because we likely aren't the parent
// process. Therefore, we do an extra sanity check after to use other
// syscalls to verify the process is truly dead.
ps, err := process.Wait()
if _, err := findProcess(process.Pid); err == nil {
select {
case <-time.After(1 * time.Second):
// We want a busy loop, but not too busy. 1 second between
// detecting a process death seems reasonable.
case <-stopCh:
// If we receive a stop request we want to exit immediately.
return
}
continue
}
process = nil
if err != nil {
p.Logger.Printf("[INFO] agent/proxy: daemon exited with error: %s", err)
@ -336,15 +354,10 @@ func (p *Daemon) UnmarshalSnapshot(m map[string]interface{}) error {
Env: s.CommandEnv,
}
// For the pid, we want to find the process.
proc, err := os.FindProcess(s.Pid)
if err != nil {
return err
}
// FindProcess on many systems returns no error even if the process
// is now dead. We perform an extra check that the process is alive.
if err := processAlive(proc); err != nil {
proc, err := findProcess(s.Pid)
if err != nil {
return err
}

View File

@ -3,21 +3,28 @@
package proxy
import (
"fmt"
"os"
"syscall"
)
// processAlive for non-Windows. Note that this very likely doesn't
// findProcess for non-Windows. Note that this very likely doesn't
// work for all non-Windows platforms Go supports and we should expand
// support as we experience it.
func processAlive(p *os.Process) error {
func findProcess(pid int) (*os.Process, error) {
// FindProcess never fails on unix-like systems.
p, err := os.FindProcess(pid)
if err != nil {
return nil, err
}
// On Unix-like systems, we can verify a process is alive by sending
// a 0 signal. This will do nothing to the process but will still
// return errors if the process is gone.
err := p.Signal(syscall.Signal(0))
err = p.Signal(syscall.Signal(0))
if err == nil || err == syscall.EPERM {
return nil
return p, nil
}
return err
return nil, fmt.Errorf("process is dead")
}

View File

@ -3,17 +3,12 @@
package proxy
import (
"fmt"
"os"
)
func processAlive(p *os.Process) error {
func findProcess(pid int) (*os.Process, error) {
// On Windows, os.FindProcess will error if the process is not alive,
// so we don't have to do any further checking. The nature of it being
// non-nil means it seems to be healthy.
if p == nil {
return fmt.Errof("process no longer alive")
}
return nil
return os.FindProcess(pid)
}

View File

@ -151,8 +151,13 @@ func (m *Manager) Restore(path string) error {
return err
}
// Unmarshal the proxy. If there is an error we just continue on and
// ignore it. Errors restoring proxies should be exceptionally rare
// and only under scenarios where the proxy isn't running anymore or
// we won't have permission to access it. We log and continue.
if err := p.UnmarshalSnapshot(sp.Config); err != nil {
return err
m.Logger.Printf("[WARN] agent/proxy: error restoring proxy %q: %s", id, err)
continue
}
proxies[id] = p