From 11f68bfca245cdf61c1be9890764f338c7b9f270 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 3 Jul 2017 11:53:54 -0700 Subject: [PATCH 1/2] Add more logging to restore state errors --- client/client.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/client/client.go b/client/client.go index 0316ce30f..578e665d9 100644 --- a/client/client.go +++ b/client/client.go @@ -299,7 +299,15 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic // Restore the state if err := c.restoreState(); err != nil { - return nil, fmt.Errorf("failed to restore state: %v", err) + logger.Printf("[ERR] client: failed to restore state: %v", err) + logger.Printf("[ERR] client: Nomad is unable to start due to corrupt state. "+ + "The safest way to proceed is to manually stop running task processes "+ + "and remove Nomad's state dir (%q) before restarting. Lost allocations "+ + "will be rescheduled.", c.config.StateDir) + logger.Printf("[ERR] client: Corrupt state is often caused by a bug. Please " + + "report as much information as possible to " + + "https://github.com/hashicorp/nomad/issues") + return nil, fmt.Errorf("failed to restore state") } // Register and then start heartbeating to the servers. From 596727230b220205d094b7ab8a2014e34fca1571 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 3 Jul 2017 12:29:21 -0700 Subject: [PATCH 2/2] Suggest wiping out alloc dir too --- client/client.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/client/client.go b/client/client.go index 578e665d9..c75a71241 100644 --- a/client/client.go +++ b/client/client.go @@ -302,8 +302,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic logger.Printf("[ERR] client: failed to restore state: %v", err) logger.Printf("[ERR] client: Nomad is unable to start due to corrupt state. "+ "The safest way to proceed is to manually stop running task processes "+ - "and remove Nomad's state dir (%q) before restarting. Lost allocations "+ - "will be rescheduled.", c.config.StateDir) + "and remove Nomad's state (%q) and alloc (%d) directories before "+ + "restarting. Lost allocations will be rescheduled.", + c.config.StateDir, c.config.AllocDir) logger.Printf("[ERR] client: Corrupt state is often caused by a bug. Please " + "report as much information as possible to " + "https://github.com/hashicorp/nomad/issues")