83089feff5
Add an RPC timeout for logmon. In https://github.com/hashicorp/nomad/issues/6461#issuecomment-559747758 , `logmonClient.Stop` locked up and indefinitely blocked the task runner destroy operation. This is an incremental improvement. We still need to follow up to understand how we got to that state, and the full impact of locked-up Stop and its link to pending allocations on restart.
45 lines
1.1 KiB
Go
45 lines
1.1 KiB
Go
package logmon
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/hashicorp/nomad/client/logmon/proto"
|
|
"github.com/hashicorp/nomad/helper/pluginutils/grpcutils"
|
|
)
|
|
|
|
type logmonClient struct {
|
|
client proto.LogMonClient
|
|
|
|
// doneCtx is closed when the plugin exits
|
|
doneCtx context.Context
|
|
}
|
|
|
|
const logmonRPCTimeout = 1 * time.Minute
|
|
|
|
func (c *logmonClient) Start(cfg *LogConfig) error {
|
|
req := &proto.StartRequest{
|
|
LogDir: cfg.LogDir,
|
|
StdoutFileName: cfg.StdoutLogFile,
|
|
StderrFileName: cfg.StderrLogFile,
|
|
MaxFiles: uint32(cfg.MaxFiles),
|
|
MaxFileSizeMb: uint32(cfg.MaxFileSizeMB),
|
|
StdoutFifo: cfg.StdoutFifo,
|
|
StderrFifo: cfg.StderrFifo,
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), logmonRPCTimeout)
|
|
defer cancel()
|
|
|
|
_, err := c.client.Start(ctx, req)
|
|
return grpcutils.HandleGrpcErr(err, c.doneCtx)
|
|
}
|
|
|
|
func (c *logmonClient) Stop() error {
|
|
req := &proto.StopRequest{}
|
|
ctx, cancel := context.WithTimeout(context.Background(), logmonRPCTimeout)
|
|
defer cancel()
|
|
|
|
_, err := c.client.Stop(ctx, req)
|
|
return grpcutils.HandleGrpcErr(err, c.doneCtx)
|
|
}
|