From ac3cf108491f76e624616a922ea11b6a6e9c67db Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Tue, 27 Jul 2021 13:17:55 -0400 Subject: [PATCH] nomad: only activate one-time auth tokens with 1.1.0 (#10952) Fix a panic in handling one-time auth tokens, used to support `nomad ui --authenticate`. If the nomad leader is a 1.1.x with some servers running as 1.0.x, the pre-1.1.0 servers risk crashing and the cluster may lose quorum. That can happen when `nomad authenticate -ui` command is issued, or when the leader scans for expired tokens every 10 minutes. Fixed #10943 . --- .changelog/10952.txt | 3 +++ nomad/acl_endpoint.go | 12 ++++++++++++ nomad/leader.go | 6 ++++++ 3 files changed, 21 insertions(+) create mode 100644 .changelog/10952.txt diff --git a/.changelog/10952.txt b/.changelog/10952.txt new file mode 100644 index 000000000..87e827df5 --- /dev/null +++ b/.changelog/10952.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a panic that may arise when upgrading pre-1.1.0 cluster to 1.1.x and may cause cluster outage +``` diff --git a/nomad/acl_endpoint.go b/nomad/acl_endpoint.go index f38b3064f..b9a55cfa0 100644 --- a/nomad/acl_endpoint.go +++ b/nomad/acl_endpoint.go @@ -851,6 +851,10 @@ func (a *ACL) UpsertOneTimeToken(args *structs.OneTimeTokenUpsertRequest, reply defer metrics.MeasureSince( []string{"nomad", "acl", "upsert_one_time_token"}, time.Now()) + if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) + } + // Snapshot the state state, err := a.srv.State().Snapshot() if err != nil { @@ -899,6 +903,10 @@ func (a *ACL) ExchangeOneTimeToken(args *structs.OneTimeTokenExchangeRequest, re defer metrics.MeasureSince( []string{"nomad", "acl", "exchange_one_time_token"}, time.Now()) + if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) + } + // Snapshot the state state, err := a.srv.State().Snapshot() if err != nil { @@ -952,6 +960,10 @@ func (a *ACL) ExpireOneTimeTokens(args *structs.OneTimeTokenExpireRequest, reply defer metrics.MeasureSince( []string{"nomad", "acl", "expire_one_time_tokens"}, time.Now()) + if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) + } + // Check management level permissions if a.srv.config.ACLEnabled { if acl, err := a.srv.ResolveToken(args.AuthToken); err != nil { diff --git a/nomad/leader.go b/nomad/leader.go index 4f829474a..64f06b848 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -48,6 +48,8 @@ var minClusterIDVersion = version.Must(version.NewVersion("0.10.4")) var minJobRegisterAtomicEvalVersion = version.Must(version.NewVersion("0.12.1")) +var minOneTimeAuthenticationTokenVersion = version.Must(version.NewVersion("1.1.0")) + // monitorLeadership is used to monitor if we acquire or lose our role // as the leader in the Raft cluster. There is some work the leader is // expected to do, so we must react to changes @@ -739,6 +741,10 @@ func (s *Server) schedulePeriodic(stopCh chan struct{}) { s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobCSIVolumeClaimGC, index)) } case <-oneTimeTokenGC.C: + if !ServersMeetMinimumVersion(s.Members(), minOneTimeAuthenticationTokenVersion, false) { + continue + } + if index, ok := getLatest(); ok { s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobOneTimeTokenGC, index)) }