Fix rejoin_after_leave behavior (#15552)

This commit is contained in:
Anthony Davis 2023-01-11 15:39:24 -06:00 committed by GitHub
parent 7d1059b5ae
commit 1c32471805
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 18 additions and 2 deletions

3
.changelog/15552.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:bug
server: Fixed a bug where rejoin_after_leave config was not being respected
```

View File

@ -309,6 +309,7 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
conf.RPCAddr.IP = rpcAddr.IP
conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
conf.SerfConfig.RejoinAfterLeave = agentConfig.Server.RejoinAfterLeave
// Set up the advertise addresses
rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)

View File

@ -547,7 +547,7 @@ type ServerConfig struct {
RetryIntervalHCL string `hcl:"retry_interval" json:"-"`
// RejoinAfterLeave controls our interaction with the cluster after leave.
// When set to false (default), a leave causes Consul to not rejoin
// When set to false (default), a leave causes Nomad to not rejoin
// the cluster until an explicit join is received. If this is set to
// true, we ignore the leave, and rejoin the cluster on start.
RejoinAfterLeave bool `hcl:"rejoin_after_leave"`

View File

@ -1531,7 +1531,6 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (
return nil, err
}
}
conf.RejoinAfterLeave = true
// LeavePropagateDelay is used to make sure broadcasted leave intents propagate
// This value was tuned using https://www.serf.io/docs/internals/simulator.html to
// allow for convergence in 99.9% of nodes in a 10 node cluster

View File

@ -62,6 +62,18 @@ from the Nomad client by setting [`set_environment_variables`][artifact_env].
The use of filesystem isolation can be disabled in Client configuration by
setting [`disable_filesystem_isolation`][artifact_fs_isolation].
#### Server `rejoin_after_leave` (default: `false`) now enforced
All Nomad versions prior to v1.5.0 have incorrectly ignored the Server [`rejoin_after_leave`]
configuration option. This bug has been fixed in Nomad version v1.5.0.
Previous to v1.5.0 the behavior of Nomad `rejoin_after_leave` was always `true`, regardless of
Nomad server configuration, while the documentation incorrectly indicated a default of `false`.
Cluster operators should be aware that explicit `leave` events (such as `nomad server force-leave`)
will now result in behavior which matches this configuration, and should review whether they
were inadvertently relying on the buggy behavior.
## Nomad 1.4.0
#### Possible Panic During Upgrades
@ -1545,6 +1557,7 @@ deleted and then Nomad 0.3.0 can be launched.
[`sidecar_task.config`]: /docs/job-specification/sidecar_task#config
[`raft_protocol`]: /docs/configuration/server#raft_protocol
[`raft protocol`]: /docs/configuration/server#raft_protocol
[`rejoin_after_leave`]: /docs/configuration/server#rejoin_after_leave
[reserved]: /docs/configuration/client#reserved-parameters
[task-config]: /docs/job-specification/task#config
[tls-guide]: https://learn.hashicorp.com/tutorials/nomad/security-enable-tls