agent: purge service/check registration files for incorrect partitions on reload (#11607)
This commit is contained in:
parent
bd3fb0d0e9
commit
fa7a66cd30
|
@ -0,0 +1,3 @@
|
||||||
|
```release-note:improvement
|
||||||
|
agent: **(Enterprise only)** purge service/check registration files for incorrect partitions on reload
|
||||||
|
```
|
|
@ -1920,7 +1920,7 @@ func (a *Agent) readPersistedServiceConfigs() (map[structs.ServiceID]*structs.Se
|
||||||
file := filepath.Join(configDir, fi.Name())
|
file := filepath.Join(configDir, fi.Name())
|
||||||
buf, err := ioutil.ReadFile(file)
|
buf, err := ioutil.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed reading service config file %q: %s", file, err)
|
return nil, fmt.Errorf("failed reading service config file %q: %w", file, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try decoding the service config definition
|
// Try decoding the service config definition
|
||||||
|
@ -1939,10 +1939,28 @@ func (a *Agent) readPersistedServiceConfigs() (map[structs.ServiceID]*structs.Se
|
||||||
newPath := a.makeServiceConfigFilePath(serviceID)
|
newPath := a.makeServiceConfigFilePath(serviceID)
|
||||||
if file != newPath {
|
if file != newPath {
|
||||||
if err := os.Rename(file, newPath); err != nil {
|
if err := os.Rename(file, newPath); err != nil {
|
||||||
a.logger.Error("Failed renaming service config file from %s to %s", file, newPath, err)
|
a.logger.Error("Failed renaming service config file",
|
||||||
|
"file", file,
|
||||||
|
"targetFile", newPath,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !structs.EqualPartitions(a.AgentEnterpriseMeta().PartitionOrDefault(), p.PartitionOrDefault()) {
|
||||||
|
a.logger.Info("Purging service config file in wrong partition",
|
||||||
|
"file", file,
|
||||||
|
"partition", p.PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
if err := os.Remove(file); err != nil {
|
||||||
|
a.logger.Error("Failed purging service config file",
|
||||||
|
"file", file,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
out[serviceID] = p.Defaults
|
out[serviceID] = p.Defaults
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3043,14 +3061,18 @@ func (a *Agent) loadCheckState(check *structs.HealthCheck) error {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("failed reading file %q: %s", file, err)
|
return fmt.Errorf("failed reading check state %q: %w", file, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := os.Rename(oldFile, file); err != nil {
|
if err := os.Rename(oldFile, file); err != nil {
|
||||||
a.logger.Error("Failed renaming service file from %s to %s", oldFile, file, err)
|
a.logger.Error("Failed renaming check state",
|
||||||
|
"file", oldFile,
|
||||||
|
"targetFile", file,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("failed reading file %q: %s", file, err)
|
return fmt.Errorf("failed reading file %q: %w", file, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3240,7 +3262,7 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return fmt.Errorf("Failed reading services dir %q: %s", svcDir, err)
|
return fmt.Errorf("Failed reading services dir %q: %w", svcDir, err)
|
||||||
}
|
}
|
||||||
for _, fi := range files {
|
for _, fi := range files {
|
||||||
// Skip all dirs
|
// Skip all dirs
|
||||||
|
@ -3258,7 +3280,7 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
file := filepath.Join(svcDir, fi.Name())
|
file := filepath.Join(svcDir, fi.Name())
|
||||||
buf, err := ioutil.ReadFile(file)
|
buf, err := ioutil.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading service file %q: %s", file, err)
|
return fmt.Errorf("failed reading service file %q: %w", file, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try decoding the service definition
|
// Try decoding the service definition
|
||||||
|
@ -3278,10 +3300,28 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
newPath := a.makeServiceFilePath(p.Service.CompoundServiceID())
|
newPath := a.makeServiceFilePath(p.Service.CompoundServiceID())
|
||||||
if file != newPath {
|
if file != newPath {
|
||||||
if err := os.Rename(file, newPath); err != nil {
|
if err := os.Rename(file, newPath); err != nil {
|
||||||
a.logger.Error("Failed renaming service file from %s to %s", file, newPath, err)
|
a.logger.Error("Failed renaming service file",
|
||||||
|
"file", file,
|
||||||
|
"targetFile", newPath,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !structs.EqualPartitions(a.AgentEnterpriseMeta().PartitionOrDefault(), p.Service.PartitionOrDefault()) {
|
||||||
|
a.logger.Info("Purging service file in wrong partition",
|
||||||
|
"file", file,
|
||||||
|
"partition", p.Service.EnterpriseMeta.PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
if err := os.Remove(file); err != nil {
|
||||||
|
a.logger.Error("Failed purging service file",
|
||||||
|
"file", file,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Restore LocallyRegisteredAsSidecar, see persistedService.LocallyRegisteredAsSidecar
|
// Restore LocallyRegisteredAsSidecar, see persistedService.LocallyRegisteredAsSidecar
|
||||||
p.Service.LocallyRegisteredAsSidecar = p.LocallyRegisteredAsSidecar
|
p.Service.LocallyRegisteredAsSidecar = p.LocallyRegisteredAsSidecar
|
||||||
|
|
||||||
|
@ -3294,10 +3334,10 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
"source", p.Source,
|
"source", p.Source,
|
||||||
)
|
)
|
||||||
if err := a.purgeService(serviceID); err != nil {
|
if err := a.purgeService(serviceID); err != nil {
|
||||||
return fmt.Errorf("failed purging service %q: %s", serviceID, err)
|
return fmt.Errorf("failed purging service %q: %w", serviceID, err)
|
||||||
}
|
}
|
||||||
if err := a.purgeServiceConfig(serviceID); err != nil {
|
if err := a.purgeServiceConfig(serviceID); err != nil {
|
||||||
return fmt.Errorf("failed purging service config %q: %s", serviceID, err)
|
return fmt.Errorf("failed purging service config %q: %w", serviceID, err)
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -3310,10 +3350,10 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
"file", file,
|
"file", file,
|
||||||
)
|
)
|
||||||
if err := a.purgeService(serviceID); err != nil {
|
if err := a.purgeService(serviceID); err != nil {
|
||||||
return fmt.Errorf("failed purging service %q: %s", serviceID.String(), err)
|
return fmt.Errorf("failed purging service %q: %w", serviceID.String(), err)
|
||||||
}
|
}
|
||||||
if err := a.purgeServiceConfig(serviceID); err != nil {
|
if err := a.purgeServiceConfig(serviceID); err != nil {
|
||||||
return fmt.Errorf("failed purging service config %q: %s", serviceID.String(), err)
|
return fmt.Errorf("failed purging service config %q: %w", serviceID.String(), err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
a.logger.Debug("restored service definition from file",
|
a.logger.Debug("restored service definition from file",
|
||||||
|
@ -3334,7 +3374,7 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
checkStateSnapshot: snap,
|
checkStateSnapshot: snap,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed adding service %q: %s", serviceID, err)
|
return fmt.Errorf("failed adding service %q: %w", serviceID, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3343,7 +3383,7 @@ func (a *Agent) loadServices(conf *config.RuntimeConfig, snap map[structs.CheckI
|
||||||
if a.State.Service(serviceID) == nil {
|
if a.State.Service(serviceID) == nil {
|
||||||
// This can be cleaned up now.
|
// This can be cleaned up now.
|
||||||
if err := a.purgeServiceConfig(serviceID); err != nil {
|
if err := a.purgeServiceConfig(serviceID); err != nil {
|
||||||
return fmt.Errorf("failed purging service config %q: %s", serviceID, err)
|
return fmt.Errorf("failed purging service config %q: %w", serviceID, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3386,7 +3426,7 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig, snap map[structs.CheckID]
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return fmt.Errorf("Failed reading checks dir %q: %s", checkDir, err)
|
return fmt.Errorf("Failed reading checks dir %q: %w", checkDir, err)
|
||||||
}
|
}
|
||||||
for _, fi := range files {
|
for _, fi := range files {
|
||||||
// Ignore dirs - we only care about the check definition files
|
// Ignore dirs - we only care about the check definition files
|
||||||
|
@ -3398,7 +3438,7 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig, snap map[structs.CheckID]
|
||||||
file := filepath.Join(checkDir, fi.Name())
|
file := filepath.Join(checkDir, fi.Name())
|
||||||
buf, err := ioutil.ReadFile(file)
|
buf, err := ioutil.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading check file %q: %s", file, err)
|
return fmt.Errorf("failed reading check file %q: %w", file, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode the check
|
// Decode the check
|
||||||
|
@ -3416,10 +3456,25 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig, snap map[structs.CheckID]
|
||||||
newPath := filepath.Join(a.config.DataDir, checksDir, checkID.StringHashSHA256())
|
newPath := filepath.Join(a.config.DataDir, checksDir, checkID.StringHashSHA256())
|
||||||
if file != newPath {
|
if file != newPath {
|
||||||
if err := os.Rename(file, newPath); err != nil {
|
if err := os.Rename(file, newPath); err != nil {
|
||||||
a.logger.Error("Failed renaming service file from %s to %s", file, newPath, err)
|
a.logger.Error("Failed renaming check file",
|
||||||
|
"file", file,
|
||||||
|
"targetFile", newPath,
|
||||||
|
"error", err,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !structs.EqualPartitions(a.AgentEnterpriseMeta().PartitionOrDefault(), p.Check.PartitionOrDefault()) {
|
||||||
|
a.logger.Info("Purging check file in wrong partition",
|
||||||
|
"file", file,
|
||||||
|
"partition", p.Check.PartitionOrDefault(),
|
||||||
|
)
|
||||||
|
if err := os.Remove(file); err != nil {
|
||||||
|
return fmt.Errorf("failed purging check %q: %w", checkID, err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
source, ok := ConfigSourceFromName(p.Source)
|
source, ok := ConfigSourceFromName(p.Source)
|
||||||
if !ok {
|
if !ok {
|
||||||
a.logger.Warn("check exists with invalid source, purging",
|
a.logger.Warn("check exists with invalid source, purging",
|
||||||
|
@ -3427,7 +3482,7 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig, snap map[structs.CheckID]
|
||||||
"source", p.Source,
|
"source", p.Source,
|
||||||
)
|
)
|
||||||
if err := a.purgeCheck(checkID); err != nil {
|
if err := a.purgeCheck(checkID); err != nil {
|
||||||
return fmt.Errorf("failed purging check %q: %s", checkID, err)
|
return fmt.Errorf("failed purging check %q: %w", checkID, err)
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -3440,7 +3495,7 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig, snap map[structs.CheckID]
|
||||||
"file", file,
|
"file", file,
|
||||||
)
|
)
|
||||||
if err := a.purgeCheck(checkID); err != nil {
|
if err := a.purgeCheck(checkID); err != nil {
|
||||||
return fmt.Errorf("Failed purging check %q: %s", checkID, err)
|
return fmt.Errorf("Failed purging check %q: %w", checkID, err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Default check to critical to avoid placing potentially unhealthy
|
// Default check to critical to avoid placing potentially unhealthy
|
||||||
|
@ -3460,7 +3515,7 @@ func (a *Agent) loadChecks(conf *config.RuntimeConfig, snap map[structs.CheckID]
|
||||||
"error", err,
|
"error", err,
|
||||||
)
|
)
|
||||||
if err := a.purgeCheck(checkID); err != nil {
|
if err := a.purgeCheck(checkID); err != nil {
|
||||||
return fmt.Errorf("Failed purging check %q: %s", checkID, err)
|
return fmt.Errorf("Failed purging check %q: %w", checkID, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
a.logger.Debug("restored health check from file",
|
a.logger.Debug("restored health check from file",
|
||||||
|
|
Loading…
Reference in New Issue