2018-09-24 18:37:45 +00:00
|
|
|
package logmon
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io"
|
2019-06-18 12:56:24 +00:00
|
|
|
"os"
|
2018-09-24 18:37:45 +00:00
|
|
|
"strings"
|
2019-01-05 00:08:47 +00:00
|
|
|
"sync"
|
2018-09-24 18:37:45 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
hclog "github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/hashicorp/nomad/client/lib/fifo"
|
2018-11-28 23:25:33 +00:00
|
|
|
"github.com/hashicorp/nomad/client/logmon/logging"
|
2018-09-24 18:37:45 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// processOutputCloseTolerance is the length of time we will wait for the
|
|
|
|
// launched process to close its stdout/stderr before we force close it. If
|
|
|
|
// data is written after this tolerance, we will not capture it.
|
|
|
|
processOutputCloseTolerance = 2 * time.Second
|
|
|
|
)
|
|
|
|
|
|
|
|
type LogConfig struct {
|
|
|
|
// LogDir is the host path where logs are to be written to
|
|
|
|
LogDir string
|
|
|
|
|
|
|
|
// StdoutLogFile is the path relative to LogDir for stdout logging
|
|
|
|
StdoutLogFile string
|
|
|
|
|
|
|
|
// StderrLogFile is the path relative to LogDir for stderr logging
|
|
|
|
StderrLogFile string
|
|
|
|
|
|
|
|
// StdoutFifo is the path on the host to the stdout pipe
|
|
|
|
StdoutFifo string
|
|
|
|
|
|
|
|
// StderrFifo is the path on the host to the stderr pipe
|
|
|
|
StderrFifo string
|
|
|
|
|
|
|
|
// MaxFiles is the max rotated files allowed
|
|
|
|
MaxFiles int
|
|
|
|
|
|
|
|
// MaxFileSizeMB is the max log file size in MB allowed before rotation occures
|
|
|
|
MaxFileSizeMB int
|
|
|
|
}
|
|
|
|
|
|
|
|
type LogMon interface {
|
|
|
|
Start(*LogConfig) error
|
|
|
|
Stop() error
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewLogMon(logger hclog.Logger) LogMon {
|
|
|
|
return &logmonImpl{
|
|
|
|
logger: logger,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type logmonImpl struct {
|
|
|
|
logger hclog.Logger
|
|
|
|
tl *TaskLogger
|
2019-03-19 18:02:36 +00:00
|
|
|
lock sync.Mutex
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (l *logmonImpl) Start(cfg *LogConfig) error {
|
2019-03-19 18:02:36 +00:00
|
|
|
l.lock.Lock()
|
|
|
|
defer l.lock.Unlock()
|
|
|
|
|
|
|
|
// first time Start has been called
|
|
|
|
if l.tl == nil {
|
|
|
|
return l.start(cfg)
|
|
|
|
}
|
|
|
|
|
|
|
|
// stdout and stderr have been closed, this happens during task restarts
|
|
|
|
// restart the TaskLogger
|
|
|
|
if !l.tl.IsRunning() {
|
2019-03-16 03:59:18 +00:00
|
|
|
l.tl.Close()
|
2019-03-19 18:02:36 +00:00
|
|
|
return l.start(cfg)
|
2019-03-16 03:59:18 +00:00
|
|
|
}
|
|
|
|
|
2019-03-19 18:02:36 +00:00
|
|
|
// if the TaskLogger has been created and is currently running, noop
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *logmonImpl) start(cfg *LogConfig) error {
|
2018-09-24 18:37:45 +00:00
|
|
|
tl, err := NewTaskLogger(cfg, l.logger)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
l.tl = tl
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *logmonImpl) Stop() error {
|
2019-03-19 18:02:36 +00:00
|
|
|
l.lock.Lock()
|
|
|
|
defer l.lock.Unlock()
|
2018-09-26 19:32:26 +00:00
|
|
|
if l.tl != nil {
|
|
|
|
l.tl.Close()
|
|
|
|
}
|
2018-09-24 18:37:45 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type TaskLogger struct {
|
|
|
|
config *LogConfig
|
|
|
|
|
|
|
|
// rotator for stdout
|
|
|
|
lro *logRotatorWrapper
|
|
|
|
|
|
|
|
// rotator for stderr
|
|
|
|
lre *logRotatorWrapper
|
|
|
|
}
|
|
|
|
|
2019-03-19 18:02:36 +00:00
|
|
|
// IsRunning will return true as long as one rotator wrapper is still running
|
|
|
|
func (tl *TaskLogger) IsRunning() bool {
|
2020-06-12 13:17:35 +00:00
|
|
|
lroRunning := tl.lro != nil && tl.lro.isRunning()
|
|
|
|
lreRunning := tl.lre != nil && tl.lre.isRunning()
|
2019-03-19 18:02:36 +00:00
|
|
|
|
2020-06-12 13:17:35 +00:00
|
|
|
return lroRunning && lreRunning
|
2019-03-19 18:02:36 +00:00
|
|
|
}
|
|
|
|
|
2018-09-24 18:37:45 +00:00
|
|
|
func (tl *TaskLogger) Close() {
|
2019-01-05 00:08:47 +00:00
|
|
|
var wg sync.WaitGroup
|
2018-09-24 18:37:45 +00:00
|
|
|
if tl.lro != nil {
|
2019-01-05 00:08:47 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
tl.lro.Close()
|
|
|
|
wg.Done()
|
|
|
|
}()
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
|
|
|
if tl.lre != nil {
|
2019-01-05 00:08:47 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
tl.lre.Close()
|
|
|
|
wg.Done()
|
|
|
|
}()
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
2019-01-05 00:08:47 +00:00
|
|
|
wg.Wait()
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewTaskLogger(cfg *LogConfig, logger hclog.Logger) (*TaskLogger, error) {
|
|
|
|
tl := &TaskLogger{config: cfg}
|
|
|
|
|
|
|
|
logFileSize := int64(cfg.MaxFileSizeMB * 1024 * 1024)
|
|
|
|
lro, err := logging.NewFileRotator(cfg.LogDir, cfg.StdoutLogFile,
|
|
|
|
cfg.MaxFiles, logFileSize, logger)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create stdout logfile for %q: %v", cfg.StdoutLogFile, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
wrapperOut, err := newLogRotatorWrapper(cfg.StdoutFifo, logger, lro)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
tl.lro = wrapperOut
|
|
|
|
|
|
|
|
lre, err := logging.NewFileRotator(cfg.LogDir, cfg.StderrLogFile,
|
|
|
|
cfg.MaxFiles, logFileSize, logger)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create stderr logfile for %q: %v", cfg.StderrLogFile, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
wrapperErr, err := newLogRotatorWrapper(cfg.StderrFifo, logger, lre)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
tl.lre = wrapperErr
|
|
|
|
|
|
|
|
return tl, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// logRotatorWrapper wraps our log rotator and exposes a pipe that can feed the
|
|
|
|
// log rotator data. The processOutWriter should be attached to the process and
|
|
|
|
// data will be copied from the reader to the rotator.
|
|
|
|
type logRotatorWrapper struct {
|
|
|
|
fifoPath string
|
2019-07-22 19:49:48 +00:00
|
|
|
rotatorWriter io.WriteCloser
|
2018-09-24 18:37:45 +00:00
|
|
|
hasFinishedCopied chan struct{}
|
|
|
|
logger hclog.Logger
|
2019-03-31 16:50:06 +00:00
|
|
|
|
|
|
|
processOutReader io.ReadCloser
|
2019-04-01 19:52:02 +00:00
|
|
|
openCompleted chan struct{}
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
|
|
|
|
2019-03-19 18:02:36 +00:00
|
|
|
// isRunning will return true until the reader is closed
|
|
|
|
func (l *logRotatorWrapper) isRunning() bool {
|
|
|
|
select {
|
|
|
|
case <-l.hasFinishedCopied:
|
|
|
|
return false
|
|
|
|
default:
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-24 18:37:45 +00:00
|
|
|
// newLogRotatorWrapper takes a rotator and returns a wrapper that has the
|
|
|
|
// processOutWriter to attach to the stdout or stderr of a process.
|
2019-07-22 19:49:48 +00:00
|
|
|
func newLogRotatorWrapper(path string, logger hclog.Logger, rotator io.WriteCloser) (*logRotatorWrapper, error) {
|
2018-09-24 18:37:45 +00:00
|
|
|
logger.Info("opening fifo", "path", path)
|
2019-06-18 12:56:24 +00:00
|
|
|
|
|
|
|
var openFn func() (io.ReadCloser, error)
|
|
|
|
var err error
|
|
|
|
|
2019-08-26 14:10:20 +00:00
|
|
|
_, serr := os.Stat(path)
|
|
|
|
if os.IsNotExist(serr) {
|
2019-06-18 12:56:24 +00:00
|
|
|
openFn, err = fifo.CreateAndRead(path)
|
|
|
|
} else {
|
|
|
|
openFn = func() (io.ReadCloser, error) {
|
|
|
|
return fifo.OpenReader(path)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-24 18:37:45 +00:00
|
|
|
if err != nil {
|
2019-08-26 14:10:20 +00:00
|
|
|
logger.Error("failed to create FIFO", "stat_error", serr, "create_err", err)
|
2018-09-24 18:37:45 +00:00
|
|
|
return nil, fmt.Errorf("failed to create fifo for extracting logs: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
wrap := &logRotatorWrapper{
|
|
|
|
fifoPath: path,
|
|
|
|
rotatorWriter: rotator,
|
|
|
|
hasFinishedCopied: make(chan struct{}),
|
2019-04-01 19:52:02 +00:00
|
|
|
openCompleted: make(chan struct{}),
|
2018-09-24 18:37:45 +00:00
|
|
|
logger: logger,
|
|
|
|
}
|
2019-06-18 12:56:24 +00:00
|
|
|
|
|
|
|
wrap.start(openFn)
|
2018-09-24 18:37:45 +00:00
|
|
|
return wrap, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// start starts a goroutine that copies from the pipe into the rotator. This is
|
|
|
|
// called by the constructor and not the user of the wrapper.
|
2019-06-18 12:56:24 +00:00
|
|
|
func (l *logRotatorWrapper) start(openFn func() (io.ReadCloser, error)) {
|
2018-09-24 18:37:45 +00:00
|
|
|
go func() {
|
|
|
|
defer close(l.hasFinishedCopied)
|
2019-03-31 16:50:06 +00:00
|
|
|
|
2019-06-18 12:56:24 +00:00
|
|
|
reader, err := openFn()
|
2019-03-31 16:50:06 +00:00
|
|
|
if err != nil {
|
2019-06-18 12:56:24 +00:00
|
|
|
l.logger.Warn("failed to open fifo", "error", err)
|
2019-03-31 16:50:06 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
l.processOutReader = reader
|
2019-04-01 19:52:02 +00:00
|
|
|
close(l.openCompleted)
|
2019-03-31 16:50:06 +00:00
|
|
|
|
|
|
|
_, err = io.Copy(l.rotatorWriter, reader)
|
2018-09-24 18:37:45 +00:00
|
|
|
if err != nil {
|
2019-04-01 15:29:02 +00:00
|
|
|
l.logger.Warn("failed to read from log fifo", "error", err)
|
2018-09-24 18:37:45 +00:00
|
|
|
// Close reader to propagate io error across pipe.
|
|
|
|
// Note that this may block until the process exits on
|
|
|
|
// Windows due to
|
|
|
|
// https://github.com/PowerShell/PowerShell/issues/4254
|
|
|
|
// or similar issues. Since this is already running in
|
|
|
|
// a goroutine its safe to block until the process is
|
|
|
|
// force-killed.
|
2019-03-31 16:50:06 +00:00
|
|
|
reader.Close()
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close closes the rotator and the process writer to ensure that the Wait
|
|
|
|
// command exits.
|
|
|
|
func (l *logRotatorWrapper) Close() {
|
|
|
|
// Wait up to the close tolerance before we force close
|
|
|
|
select {
|
|
|
|
case <-l.hasFinishedCopied:
|
|
|
|
case <-time.After(processOutputCloseTolerance):
|
|
|
|
}
|
|
|
|
|
|
|
|
// Closing the read side of a pipe may block on Windows if the process
|
|
|
|
// is being debugged as in:
|
|
|
|
// https://github.com/PowerShell/PowerShell/issues/4254
|
|
|
|
// The pipe will be closed and cleaned up when the process exits.
|
|
|
|
closeDone := make(chan struct{})
|
|
|
|
go func() {
|
|
|
|
defer close(closeDone)
|
2019-04-01 19:52:02 +00:00
|
|
|
|
|
|
|
// we must wait until reader is opened before we can close it, and cannot inteerrupt an in-flight open request
|
|
|
|
// The Close function uses processOutputCloseTolerance to protect against long running open called
|
|
|
|
// and then request will be interrupted and file will be closed on process shutdown
|
|
|
|
<-l.openCompleted
|
|
|
|
|
|
|
|
if l.processOutReader != nil {
|
|
|
|
err := l.processOutReader.Close()
|
|
|
|
if err != nil && !strings.Contains(err.Error(), "file already closed") {
|
|
|
|
l.logger.Warn("error closing read-side of process output pipe", "err", err)
|
|
|
|
}
|
2018-09-24 18:37:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-closeDone:
|
|
|
|
case <-time.After(processOutputCloseTolerance):
|
|
|
|
l.logger.Warn("timed out waiting for read-side of process output pipe to close")
|
|
|
|
}
|
|
|
|
|
|
|
|
l.rotatorWriter.Close()
|
|
|
|
}
|