Add graceful handling of malformed persisted service/check files.

Previously a change was made to make the file writing atomic,
but that wasn't enough to cover something like an OS crash so we
needed something here to handle the situation more gracefully.

Fixes #1221.
This commit is contained in:
Kyle Havlovitz 2018-01-19 14:07:36 -08:00
parent dac4367fb2
commit 17ec4a9394
No known key found for this signature in database
GPG Key ID: 8A5E6B173056AD6C
1 changed files with 24 additions and 2 deletions

View File

@ -2150,12 +2150,23 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig) error {
return fmt.Errorf("failed reading service file %q: %s", file, err)
}
// If the file ended up empty as a result of something like an OS crash, remove
// it for convenience and log about it.
if len(buf) == 0 {
a.logger.Printf("[WARN] Removing leftover empty service file %q", file)
if err := os.Remove(file); err != nil {
a.logger.Printf("[WARN] Error removing leftover empty service file %q: %v", file, err)
}
continue
}
// Try decoding the service definition
var p persistedService
if err := json.Unmarshal(buf, &p); err != nil {
// Backwards-compatibility for pre-0.5.1 persisted services
if err := json.Unmarshal(buf, &p.Service); err != nil {
return fmt.Errorf("failed decoding service file %q: %s", file, err)
a.logger.Printf("[WARN] Failed decoding service file %q: %s", file, err)
continue
}
}
serviceID := p.Service.ID
@ -2231,10 +2242,21 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig) error {
return fmt.Errorf("failed reading check file %q: %s", file, err)
}
// If the file ended up empty as a result of something like an OS crash, remove
// it for convenience and log about it.
if len(buf) == 0 {
a.logger.Printf("[WARN] Removing leftover empty check file %q", file)
if err := os.Remove(file); err != nil {
a.logger.Printf("[WARN] Error removing leftover empty check file %q: %v", file, err)
}
continue
}
// Decode the check
var p persistedCheck
if err := json.Unmarshal(buf, &p); err != nil {
return fmt.Errorf("Failed decoding check file %q: %s", file, err)
a.logger.Printf("[WARN] Failed decoding check file %q: %s", file, err)
continue
}
checkID := p.Check.CheckID