From 1c3247180532d2aa09775c48a5f282c97bb6b189 Mon Sep 17 00:00:00 2001 From: Anthony Davis Date: Wed, 11 Jan 2023 15:39:24 -0600 Subject: [PATCH] Fix rejoin_after_leave behavior (#15552) --- .changelog/15552.txt | 3 +++ command/agent/agent.go | 1 + command/agent/config.go | 2 +- nomad/server.go | 1 - website/content/docs/upgrade/upgrade-specific.mdx | 13 +++++++++++++ 5 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 .changelog/15552.txt diff --git a/.changelog/15552.txt b/.changelog/15552.txt new file mode 100644 index 000000000..61f2ad411 --- /dev/null +++ b/.changelog/15552.txt @@ -0,0 +1,3 @@ +```release-note:bug +server: Fixed a bug where rejoin_after_leave config was not being respected +``` diff --git a/command/agent/agent.go b/command/agent/agent.go index 9b61112cf..900b16f3f 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -309,6 +309,7 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { conf.RPCAddr.IP = rpcAddr.IP conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() + conf.SerfConfig.RejoinAfterLeave = agentConfig.Server.RejoinAfterLeave // Set up the advertise addresses rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) diff --git a/command/agent/config.go b/command/agent/config.go index 1266ad26d..60de2d2ea 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -547,7 +547,7 @@ type ServerConfig struct { RetryIntervalHCL string `hcl:"retry_interval" json:"-"` // RejoinAfterLeave controls our interaction with the cluster after leave. - // When set to false (default), a leave causes Consul to not rejoin + // When set to false (default), a leave causes Nomad to not rejoin // the cluster until an explicit join is received. If this is set to // true, we ignore the leave, and rejoin the cluster on start. RejoinAfterLeave bool `hcl:"rejoin_after_leave"` diff --git a/nomad/server.go b/nomad/server.go index a0b0bf16f..a796a3931 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1531,7 +1531,6 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) ( return nil, err } } - conf.RejoinAfterLeave = true // LeavePropagateDelay is used to make sure broadcasted leave intents propagate // This value was tuned using https://www.serf.io/docs/internals/simulator.html to // allow for convergence in 99.9% of nodes in a 10 node cluster diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index 4d18594d8..f79271920 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -62,6 +62,18 @@ from the Nomad client by setting [`set_environment_variables`][artifact_env]. The use of filesystem isolation can be disabled in Client configuration by setting [`disable_filesystem_isolation`][artifact_fs_isolation]. +#### Server `rejoin_after_leave` (default: `false`) now enforced + +All Nomad versions prior to v1.5.0 have incorrectly ignored the Server [`rejoin_after_leave`] +configuration option. This bug has been fixed in Nomad version v1.5.0. + +Previous to v1.5.0 the behavior of Nomad `rejoin_after_leave` was always `true`, regardless of +Nomad server configuration, while the documentation incorrectly indicated a default of `false`. + +Cluster operators should be aware that explicit `leave` events (such as `nomad server force-leave`) +will now result in behavior which matches this configuration, and should review whether they +were inadvertently relying on the buggy behavior. + ## Nomad 1.4.0 #### Possible Panic During Upgrades @@ -1545,6 +1557,7 @@ deleted and then Nomad 0.3.0 can be launched. [`sidecar_task.config`]: /docs/job-specification/sidecar_task#config [`raft_protocol`]: /docs/configuration/server#raft_protocol [`raft protocol`]: /docs/configuration/server#raft_protocol +[`rejoin_after_leave`]: /docs/configuration/server#rejoin_after_leave [reserved]: /docs/configuration/client#reserved-parameters [task-config]: /docs/job-specification/task#config [tls-guide]: https://learn.hashicorp.com/tutorials/nomad/security-enable-tls