2014-01-10 01:46:33 +00:00
package consul
import (
2019-08-02 04:07:11 +00:00
"bufio"
"fmt"
"io"
2014-01-10 01:46:33 +00:00
"os"
2019-08-02 04:07:11 +00:00
"strings"
2014-01-10 01:46:33 +00:00
"testing"
2014-05-09 01:24:13 +00:00
"time"
2014-10-14 05:14:43 +00:00
2017-07-06 10:34:00 +00:00
"github.com/hashicorp/consul/agent/structs"
2021-01-05 23:04:27 +00:00
tokenStore "github.com/hashicorp/consul/agent/token"
2017-04-19 23:00:11 +00:00
"github.com/hashicorp/consul/api"
2019-08-02 04:07:11 +00:00
"github.com/hashicorp/consul/sdk/testutil"
2019-03-27 12:54:56 +00:00
"github.com/hashicorp/consul/sdk/testutil/retry"
2017-04-19 23:00:11 +00:00
"github.com/hashicorp/consul/testrpc"
2020-07-29 20:05:51 +00:00
"github.com/hashicorp/go-hclog"
2019-07-12 15:52:26 +00:00
msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc"
2014-10-14 05:14:43 +00:00
"github.com/hashicorp/serf/serf"
2018-06-21 22:42:28 +00:00
"github.com/stretchr/testify/require"
2014-01-10 01:46:33 +00:00
)
func TestLeader_RegisterMember ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 20:34:30 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
} )
2014-01-10 01:46:33 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , c1 := testClient ( t )
defer os . RemoveAll ( dir2 )
defer c1 . Shutdown ( )
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , c1 , s1 )
2014-01-10 01:46:33 +00:00
2017-04-19 23:00:11 +00:00
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
2014-01-10 01:46:33 +00:00
2014-01-10 02:02:44 +00:00
// Client should be registered
2015-10-13 05:21:39 +00:00
state := s1 . fsm . State ( )
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node == nil {
r . Fatal ( "client not registered" )
}
} )
2014-01-10 01:46:33 +00:00
// Should have a check
2019-12-10 02:26:41 +00:00
_ , checks , err := state . NodeChecks ( nil , c1 . config . NodeName , nil )
2015-10-12 07:42:09 +00:00
if err != nil {
t . Fatalf ( "err: %v" , err )
}
2014-01-10 01:46:33 +00:00
if len ( checks ) != 1 {
t . Fatalf ( "client missing check" )
}
2017-07-14 05:33:47 +00:00
if checks [ 0 ] . CheckID != structs . SerfCheckID {
2014-01-10 01:46:33 +00:00
t . Fatalf ( "bad check: %v" , checks [ 0 ] )
}
2017-07-14 05:33:47 +00:00
if checks [ 0 ] . Name != structs . SerfCheckName {
2014-01-10 01:46:33 +00:00
t . Fatalf ( "bad check: %v" , checks [ 0 ] )
}
2017-04-19 23:00:11 +00:00
if checks [ 0 ] . Status != api . HealthPassing {
2014-01-10 01:46:33 +00:00
t . Fatalf ( "bad check: %v" , checks [ 0 ] )
}
2014-01-10 02:02:44 +00:00
// Server should be registered
2019-07-17 15:33:38 +00:00
retry . Run ( t , func ( r * retry . R ) {
_ , node , err := state . GetNode ( s1 . config . NodeName )
if err != nil {
r . Fatalf ( "err: %v" , err )
}
if node == nil {
r . Fatalf ( "server not registered" )
}
} )
2014-01-10 02:02:44 +00:00
// Service should be registered
2019-12-10 02:26:41 +00:00
_ , services , err := state . NodeServices ( nil , s1 . config . NodeName , nil )
2015-10-12 07:42:09 +00:00
if err != nil {
t . Fatalf ( "err: %v" , err )
}
2014-01-10 02:02:44 +00:00
if _ , ok := services . Services [ "consul" ] ; ! ok {
t . Fatalf ( "consul service not registered: %v" , services )
}
2014-01-10 01:46:33 +00:00
}
func TestLeader_FailedMember ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 20:34:30 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
} )
2014-01-10 01:46:33 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , c1 := testClient ( t )
defer os . RemoveAll ( dir2 )
defer c1 . Shutdown ( )
2017-04-19 23:00:11 +00:00
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
2014-01-10 01:46:33 +00:00
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , c1 , s1 )
2014-01-10 01:46:33 +00:00
// Fail the member
c1 . Shutdown ( )
// Should be registered
2015-10-13 05:21:39 +00:00
state := s1 . fsm . State ( )
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node == nil {
r . Fatal ( "client not registered" )
}
} )
2014-01-10 01:46:33 +00:00
// Should have a check
2019-12-10 02:26:41 +00:00
_ , checks , err := state . NodeChecks ( nil , c1 . config . NodeName , nil )
2015-10-12 07:42:09 +00:00
if err != nil {
t . Fatalf ( "err: %v" , err )
}
2014-01-10 01:46:33 +00:00
if len ( checks ) != 1 {
t . Fatalf ( "client missing check" )
}
2017-07-14 05:33:47 +00:00
if checks [ 0 ] . CheckID != structs . SerfCheckID {
2014-01-10 01:46:33 +00:00
t . Fatalf ( "bad check: %v" , checks [ 0 ] )
}
2017-07-14 05:33:47 +00:00
if checks [ 0 ] . Name != structs . SerfCheckName {
2014-01-10 01:46:33 +00:00
t . Fatalf ( "bad check: %v" , checks [ 0 ] )
}
2014-05-09 00:04:52 +00:00
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2019-12-10 02:26:41 +00:00
_ , checks , err = state . NodeChecks ( nil , c1 . config . NodeName , nil )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if got , want := checks [ 0 ] . Status , api . HealthCritical ; got != want {
r . Fatalf ( "got status %q want %q" , got , want )
}
} )
2014-01-10 01:46:33 +00:00
}
func TestLeader_LeftMember ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 20:34:30 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
} )
2014-01-10 01:46:33 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , c1 := testClient ( t )
defer os . RemoveAll ( dir2 )
defer c1 . Shutdown ( )
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , c1 , s1 )
2014-01-10 01:46:33 +00:00
2015-10-13 05:21:39 +00:00
state := s1 . fsm . State ( )
2014-01-10 01:46:33 +00:00
// Should be registered
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node == nil {
r . Fatal ( "client not registered" )
}
} )
2014-01-10 01:46:33 +00:00
// Node should leave
c1 . Leave ( )
c1 . Shutdown ( )
// Should be deregistered
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node != nil {
r . Fatal ( "client still registered" )
}
} )
2014-01-10 01:46:33 +00:00
}
2014-03-20 19:51:49 +00:00
func TestLeader_ReapMember ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 20:34:30 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
} )
2014-03-20 19:51:49 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , c1 := testClient ( t )
defer os . RemoveAll ( dir2 )
defer c1 . Shutdown ( )
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , c1 , s1 )
2014-03-20 19:51:49 +00:00
2015-10-13 05:21:39 +00:00
state := s1 . fsm . State ( )
2014-03-20 19:51:49 +00:00
// Should be registered
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node == nil {
r . Fatal ( "client not registered" )
}
} )
2014-03-20 19:51:49 +00:00
// Simulate a node reaping
mems := s1 . LANMembers ( )
var c1mem serf . Member
for _ , m := range mems {
if m . Name == c1 . config . NodeName {
c1mem = m
c1mem . Status = StatusReap
break
}
}
s1 . reconcileCh <- c1mem
2017-01-26 06:11:30 +00:00
// Should be deregistered; we have to poll quickly here because
// anti-entropy will put it back.
reaped := false
for start := time . Now ( ) ; time . Since ( start ) < 5 * time . Second ; {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
t . Fatalf ( "err: %v" , err )
}
2017-01-26 06:11:30 +00:00
if node == nil {
reaped = true
break
}
}
if ! reaped {
2014-05-09 00:04:52 +00:00
t . Fatalf ( "client should not be registered" )
2017-01-26 06:11:30 +00:00
}
2014-03-20 19:51:49 +00:00
}
2020-06-17 10:16:13 +00:00
func TestLeader_CheckServersMeta ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2020-06-17 10:16:13 +00:00
t . Parallel ( )
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
c . ACLsEnabled = true
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "allow"
c . Bootstrap = true
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
c . ACLsEnabled = true
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "allow"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
dir3 , s3 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
c . ACLsEnabled = true
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "allow"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
// Try to join
joinLAN ( t , s1 , s2 )
joinLAN ( t , s1 , s3 )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s2 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s3 . RPC , "dc1" )
state := s1 . fsm . State ( )
consulService := & structs . NodeService {
ID : "consul" ,
Service : "consul" ,
}
// s3 should be registered
retry . Run ( t , func ( r * retry . R ) {
_ , service , err := state . NodeService ( s3 . config . NodeName , "consul" , & consulService . EnterpriseMeta )
if err != nil {
r . Fatalf ( "err: %v" , err )
}
if service == nil {
r . Fatal ( "client not registered" )
}
if service . Meta [ "non_voter" ] != "false" {
r . Fatalf ( "Expected to be non_voter == false, was: %s" , service . Meta [ "non_voter" ] )
}
} )
member := serf . Member { }
for _ , m := range s1 . serfLAN . Members ( ) {
if m . Name == s3 . config . NodeName {
member = m
member . Tags = make ( map [ string ] string )
for key , value := range m . Tags {
member . Tags [ key ] = value
}
}
}
if member . Name != s3 . config . NodeName {
t . Fatal ( "could not find node in serf members" )
}
versionToExpect := "19.7.9"
retry . Run ( t , func ( r * retry . R ) {
2020-11-17 15:53:57 +00:00
// DEPRECATED - remove nonvoter tag in favor of read_replica in a future version of consul
Consul Service meta wrongly computes and exposes non_voter meta (#8731)
* Consul Service meta wrongly computes and exposes non_voter meta
In Serf Tags, entreprise members being non-voters use the tag
`nonvoter=1`, not `non_voter = false`, so non-voters in members
were wrongly displayed as voter.
Demonstration:
```
consul members -detailed|grep voter
consul20-hk5 10.200.100.110:8301 alive acls=1,build=1.8.4+ent,dc=hk5,expect=3,ft_fs=1,ft_ns=1,id=xxxxxxxx-5629-08f2-3a79-10a1ab3849d5,nonvoter=1,port=8300,raft_vsn=3,role=consul,segment=<all>,use_tls=1,vsn=2,vsn_max=3,vsn_min=2,wan_join_port=8302
```
* Added changelog
* Added changelog entry
2020-10-09 21:18:24 +00:00
member . Tags [ "nonvoter" ] = "1"
2020-11-17 15:53:57 +00:00
member . Tags [ "read_replica" ] = "1"
2020-06-17 10:16:13 +00:00
member . Tags [ "build" ] = versionToExpect
err := s1 . handleAliveMember ( member )
if err != nil {
r . Fatalf ( "Unexpected error :%v" , err )
}
_ , service , err := state . NodeService ( s3 . config . NodeName , "consul" , & consulService . EnterpriseMeta )
if err != nil {
r . Fatalf ( "err: %v" , err )
}
if service == nil {
r . Fatal ( "client not registered" )
}
2020-11-17 15:53:57 +00:00
// DEPRECATED - remove non_voter in favor of read_replica in a future version of consul
2020-06-17 10:16:13 +00:00
if service . Meta [ "non_voter" ] != "true" {
Consul Service meta wrongly computes and exposes non_voter meta (#8731)
* Consul Service meta wrongly computes and exposes non_voter meta
In Serf Tags, entreprise members being non-voters use the tag
`nonvoter=1`, not `non_voter = false`, so non-voters in members
were wrongly displayed as voter.
Demonstration:
```
consul members -detailed|grep voter
consul20-hk5 10.200.100.110:8301 alive acls=1,build=1.8.4+ent,dc=hk5,expect=3,ft_fs=1,ft_ns=1,id=xxxxxxxx-5629-08f2-3a79-10a1ab3849d5,nonvoter=1,port=8300,raft_vsn=3,role=consul,segment=<all>,use_tls=1,vsn=2,vsn_max=3,vsn_min=2,wan_join_port=8302
```
* Added changelog
* Added changelog entry
2020-10-09 21:18:24 +00:00
r . Fatalf ( "Expected to be non_voter == true, was: %s" , service . Meta [ "non_voter" ] )
2020-06-17 10:16:13 +00:00
}
2020-11-17 15:53:57 +00:00
if service . Meta [ "read_replica" ] != "true" {
r . Fatalf ( "Expected to be read_replica == true, was: %s" , service . Meta [ "non_voter" ] )
}
2020-06-17 10:16:13 +00:00
newVersion := service . Meta [ "version" ]
if newVersion != versionToExpect {
r . Fatalf ( "Expected version to be updated to %s, was %s" , versionToExpect , newVersion )
}
} )
}
2017-09-27 03:49:41 +00:00
func TestLeader_ReapServer ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-09-27 03:49:41 +00:00
t . Parallel ( )
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-09-27 03:49:41 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "allow"
c . Bootstrap = true
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-09-27 03:49:41 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "allow"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
dir3 , s3 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-09-27 03:49:41 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "allow"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
// Try to join
joinLAN ( t , s1 , s2 )
joinLAN ( t , s1 , s3 )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
2018-09-04 13:02:47 +00:00
testrpc . WaitForLeader ( t , s2 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s3 . RPC , "dc1" )
2017-09-27 03:49:41 +00:00
state := s1 . fsm . State ( )
// s3 should be registered
retry . Run ( t , func ( r * retry . R ) {
_ , node , err := state . GetNode ( s3 . config . NodeName )
if err != nil {
r . Fatalf ( "err: %v" , err )
}
if node == nil {
r . Fatal ( "client not registered" )
}
} )
// call reconcileReaped with a map that does not contain s3
knownMembers := make ( map [ string ] struct { } )
knownMembers [ s1 . config . NodeName ] = struct { } { }
knownMembers [ s2 . config . NodeName ] = struct { } { }
err := s1 . reconcileReaped ( knownMembers )
if err != nil {
t . Fatalf ( "Unexpected error :%v" , err )
}
// s3 should be deregistered
retry . Run ( t , func ( r * retry . R ) {
_ , node , err := state . GetNode ( s3 . config . NodeName )
if err != nil {
r . Fatalf ( "err: %v" , err )
}
if node != nil {
r . Fatalf ( "server with id %v should not be registered" , s3 . config . NodeID )
}
} )
}
2014-04-03 22:51:03 +00:00
func TestLeader_Reconcile_ReapMember ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 20:34:30 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
} )
2014-04-03 22:51:03 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2017-04-19 23:00:11 +00:00
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
2014-04-03 22:51:03 +00:00
// Register a non-existing member
dead := structs . RegisterRequest {
Datacenter : s1 . config . Datacenter ,
Node : "no-longer-around" ,
Address : "127.1.1.1" ,
Check : & structs . HealthCheck {
Node : "no-longer-around" ,
2017-07-14 05:33:47 +00:00
CheckID : structs . SerfCheckID ,
Name : structs . SerfCheckName ,
2017-04-19 23:00:11 +00:00
Status : api . HealthCritical ,
2014-04-03 22:51:03 +00:00
} ,
2017-03-23 20:34:30 +00:00
WriteRequest : structs . WriteRequest {
Token : "root" ,
} ,
2014-04-03 22:51:03 +00:00
}
var out struct { }
if err := s1 . RPC ( "Catalog.Register" , & dead , & out ) ; err != nil {
t . Fatalf ( "err: %v" , err )
}
// Force a reconciliation
if err := s1 . reconcile ( ) ; err != nil {
t . Fatalf ( "err: %v" , err )
}
// Node should be gone
2015-10-13 05:21:39 +00:00
state := s1 . fsm . State ( )
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( "no-longer-around" )
2015-10-12 07:42:09 +00:00
if err != nil {
t . Fatalf ( "err: %v" , err )
}
if node != nil {
2014-04-03 22:51:03 +00:00
t . Fatalf ( "client registered" )
}
}
2014-01-10 01:46:33 +00:00
func TestLeader_Reconcile ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 20:34:30 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
} )
2014-01-10 01:46:33 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , c1 := testClient ( t )
defer os . RemoveAll ( dir2 )
defer c1 . Shutdown ( )
// Join before we have a leader, this should cause a reconcile!
2017-05-05 10:29:49 +00:00
joinLAN ( t , c1 , s1 )
2014-01-10 01:46:33 +00:00
// Should not be registered
2015-10-13 05:21:39 +00:00
state := s1 . fsm . State ( )
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
t . Fatalf ( "err: %v" , err )
}
if node != nil {
2014-01-10 01:46:33 +00:00
t . Fatalf ( "client registered" )
}
// Should be registered
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
_ , node , err := state . GetNode ( c1 . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node == nil {
r . Fatal ( "client not registered" )
}
} )
2014-01-10 01:46:33 +00:00
}
2014-01-10 20:55:55 +00:00
2017-03-23 22:01:46 +00:00
func TestLeader_Reconcile_Races ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-03-23 22:01:46 +00:00
dir1 , s1 := testServer ( t )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2017-04-19 23:00:11 +00:00
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
2017-03-23 22:01:46 +00:00
dir2 , c1 := testClient ( t )
defer os . RemoveAll ( dir2 )
defer c1 . Shutdown ( )
2017-05-05 10:29:49 +00:00
joinLAN ( t , c1 , s1 )
2017-03-23 22:01:46 +00:00
// Wait for the server to reconcile the client and register it.
state := s1 . fsm . State ( )
var nodeAddr string
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2017-03-23 22:01:46 +00:00
_ , node , err := state . GetNode ( c1 . config . NodeName )
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2017-03-23 22:01:46 +00:00
}
2017-05-05 11:58:13 +00:00
if node == nil {
r . Fatal ( "client not registered" )
2017-03-23 22:01:46 +00:00
}
2017-05-05 11:58:13 +00:00
nodeAddr = node . Address
} )
2017-03-23 22:01:46 +00:00
// Add in some metadata via the catalog (as if the agent synced it
2018-03-19 16:56:00 +00:00
// there). We also set the serfHealth check to failing so the reconcile
2017-03-23 22:01:46 +00:00
// will attempt to flip it back
req := structs . RegisterRequest {
Datacenter : s1 . config . Datacenter ,
Node : c1 . config . NodeName ,
ID : c1 . config . NodeID ,
Address : nodeAddr ,
NodeMeta : map [ string ] string { "hello" : "world" } ,
Check : & structs . HealthCheck {
Node : c1 . config . NodeName ,
2017-07-14 05:33:47 +00:00
CheckID : structs . SerfCheckID ,
Name : structs . SerfCheckName ,
2017-04-19 23:00:11 +00:00
Status : api . HealthCritical ,
2017-03-23 22:01:46 +00:00
Output : "" ,
} ,
}
var out struct { }
if err := s1 . RPC ( "Catalog.Register" , & req , & out ) ; err != nil {
t . Fatalf ( "err: %v" , err )
}
// Force a reconcile and make sure the metadata stuck around.
if err := s1 . reconcile ( ) ; err != nil {
t . Fatalf ( "err: %v" , err )
}
_ , node , err := state . GetNode ( c1 . config . NodeName )
if err != nil {
t . Fatalf ( "err: %v" , err )
}
if node == nil {
t . Fatalf ( "bad" )
}
if hello , ok := node . Meta [ "hello" ] ; ! ok || hello != "world" {
t . Fatalf ( "bad" )
}
// Fail the member and wait for the health to go critical.
c1 . Shutdown ( )
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2019-12-10 02:26:41 +00:00
_ , checks , err := state . NodeChecks ( nil , c1 . config . NodeName , nil )
2017-03-23 22:01:46 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2017-03-23 22:01:46 +00:00
}
2017-05-05 11:58:13 +00:00
if got , want := checks [ 0 ] . Status , api . HealthCritical ; got != want {
r . Fatalf ( "got state %q want %q" , got , want )
}
} )
2017-03-23 22:01:46 +00:00
// Make sure the metadata didn't get clobbered.
_ , node , err = state . GetNode ( c1 . config . NodeName )
if err != nil {
t . Fatalf ( "err: %v" , err )
}
if node == nil {
t . Fatalf ( "bad" )
}
if hello , ok := node . Meta [ "hello" ] ; ! ok || hello != "world" {
t . Fatalf ( "bad" )
}
}
2014-01-10 20:55:55 +00:00
func TestLeader_LeftServer ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2014-01-10 20:55:55 +00:00
dir1 , s1 := testServer ( t )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServerDCBootstrap ( t , "dc1" , false )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
dir3 , s3 := testServerDCBootstrap ( t , "dc1" , false )
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
2017-05-09 04:57:06 +00:00
// Put s1 last so we don't trigger a leader election.
servers := [ ] * Server { s2 , s3 , s1 }
2014-01-10 20:55:55 +00:00
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , s2 , s1 )
joinLAN ( t , s3 , s1 )
2014-01-10 20:55:55 +00:00
for _ , s := range servers {
2017-05-05 07:23:28 +00:00
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 3 ) ) } )
2014-01-10 20:55:55 +00:00
}
2017-05-09 04:57:06 +00:00
// Kill any server
servers [ 0 ] . Shutdown ( )
2014-01-10 20:55:55 +00:00
2017-05-09 04:57:06 +00:00
// Force remove the non-leader (transition to left state)
2019-10-04 21:10:02 +00:00
if err := servers [ 1 ] . RemoveFailedNode ( servers [ 0 ] . config . NodeName , false ) ; err != nil {
2017-05-09 04:57:06 +00:00
t . Fatalf ( "err: %v" , err )
}
2014-01-10 20:55:55 +00:00
2017-05-09 04:57:06 +00:00
// Wait until the remaining servers show only 2 peers.
for _ , s := range servers [ 1 : ] {
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 2 ) ) } )
}
2017-06-27 22:04:50 +00:00
s1 . Shutdown ( )
2014-01-10 20:55:55 +00:00
}
2014-01-20 23:56:29 +00:00
2014-10-14 05:14:43 +00:00
func TestLeader_LeftLeader ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2014-10-14 05:14:43 +00:00
dir1 , s1 := testServer ( t )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServerDCBootstrap ( t , "dc1" , false )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
dir3 , s3 := testServerDCBootstrap ( t , "dc1" , false )
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
servers := [ ] * Server { s1 , s2 , s3 }
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , s2 , s1 )
joinLAN ( t , s3 , s1 )
2014-10-14 05:14:43 +00:00
for _ , s := range servers {
2017-05-05 07:23:28 +00:00
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 3 ) ) } )
2014-10-14 05:14:43 +00:00
}
// Kill the leader!
var leader * Server
for _ , s := range servers {
if s . IsLeader ( ) {
leader = s
break
}
}
2014-12-10 15:02:23 +00:00
if leader == nil {
t . Fatalf ( "Should have a leader" )
}
2017-06-16 15:49:54 +00:00
if ! leader . isReadyForConsistentReads ( ) {
t . Fatalf ( "Expected leader to be ready for consistent reads " )
}
2014-10-14 05:14:43 +00:00
leader . Leave ( )
2017-06-16 03:41:30 +00:00
if leader . isReadyForConsistentReads ( ) {
2017-06-16 15:49:54 +00:00
t . Fatalf ( "Expected consistent read state to be false " )
2017-06-16 03:41:30 +00:00
}
2014-10-14 05:14:43 +00:00
leader . Shutdown ( )
time . Sleep ( 100 * time . Millisecond )
var remain * Server
for _ , s := range servers {
if s == leader {
continue
}
remain = s
2017-05-05 07:23:28 +00:00
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 2 ) ) } )
2014-10-14 05:14:43 +00:00
}
// Verify the old leader is deregistered
2015-10-13 05:21:39 +00:00
state := remain . fsm . State ( )
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-14 02:18:43 +00:00
_ , node , err := state . GetNode ( leader . config . NodeName )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatalf ( "err: %v" , err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if node != nil {
r . Fatal ( "leader should be deregistered" )
}
} )
2014-10-14 05:14:43 +00:00
}
2014-01-20 23:56:29 +00:00
func TestLeader_MultiBootstrap ( t * testing . T ) {
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2014-01-20 23:56:29 +00:00
dir1 , s1 := testServer ( t )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServer ( t )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
servers := [ ] * Server { s1 , s2 }
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , s2 , s1 )
2014-01-20 23:56:29 +00:00
for _ , s := range servers {
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
if got , want := len ( s . serfLAN . Members ( ) ) , 2 ; got != want {
r . Fatalf ( "got %d peers want %d" , got , want )
}
} )
2014-01-20 23:56:29 +00:00
}
// Ensure we don't have multiple raft peers
for _ , s := range servers {
2020-09-25 17:46:38 +00:00
peers , _ := s . autopilot . NumVoters ( )
2016-07-28 19:11:28 +00:00
if peers != 1 {
2014-01-20 23:56:29 +00:00
t . Fatalf ( "should only have 1 raft peer!" )
}
}
}
2014-12-15 22:37:49 +00:00
func TestLeader_TombstoneGC_Reset ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2014-12-15 22:37:49 +00:00
dir1 , s1 := testServer ( t )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServerDCBootstrap ( t , "dc1" , false )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
dir3 , s3 := testServerDCBootstrap ( t , "dc1" , false )
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
servers := [ ] * Server { s1 , s2 , s3 }
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , s2 , s1 )
joinLAN ( t , s3 , s1 )
2014-12-15 22:37:49 +00:00
for _ , s := range servers {
2017-05-05 07:23:28 +00:00
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 3 ) ) } )
2014-12-15 22:37:49 +00:00
}
var leader * Server
for _ , s := range servers {
if s . IsLeader ( ) {
leader = s
break
}
}
if leader == nil {
t . Fatalf ( "Should have a leader" )
}
// Check that the leader has a pending GC expiration
if ! leader . tombstoneGC . PendingExpiration ( ) {
t . Fatalf ( "should have pending expiration" )
}
// Kill the leader
leader . Shutdown ( )
time . Sleep ( 100 * time . Millisecond )
// Wait for a new leader
leader = nil
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2014-12-15 22:37:49 +00:00
for _ , s := range servers {
if s . IsLeader ( ) {
leader = s
2017-05-05 11:58:13 +00:00
return
2014-12-15 22:37:49 +00:00
}
}
2017-05-05 11:58:13 +00:00
r . Fatal ( "no leader" )
} )
2014-12-15 22:37:49 +00:00
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
if ! leader . tombstoneGC . PendingExpiration ( ) {
r . Fatal ( "leader has no pending GC expiration" )
}
} )
2014-12-15 22:37:49 +00:00
}
2014-12-19 00:02:08 +00:00
func TestLeader_ReapTombstones ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2014-12-19 00:02:08 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
2017-03-23 20:34:30 +00:00
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-03-23 20:34:30 +00:00
c . ACLMasterToken = "root"
c . ACLDefaultPolicy = "deny"
2014-12-19 00:02:08 +00:00
c . TombstoneTTL = 50 * time . Millisecond
c . TombstoneTTLGranularity = 10 * time . Millisecond
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2015-10-13 23:43:52 +00:00
codec := rpcClient ( t , s1 )
2014-12-19 00:02:08 +00:00
2017-04-19 23:00:11 +00:00
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
2014-12-19 00:02:08 +00:00
// Create a KV entry
arg := structs . KVSRequest {
Datacenter : "dc1" ,
2017-04-19 23:00:11 +00:00
Op : api . KVSet ,
2014-12-19 00:02:08 +00:00
DirEnt : structs . DirEntry {
Key : "test" ,
Value : [ ] byte ( "test" ) ,
} ,
2017-03-23 20:34:30 +00:00
WriteRequest : structs . WriteRequest {
Token : "root" ,
} ,
2014-12-19 00:02:08 +00:00
}
var out bool
2015-10-13 23:43:52 +00:00
if err := msgpackrpc . CallWithCodec ( codec , "KVS.Apply" , & arg , & out ) ; err != nil {
2014-12-19 00:02:08 +00:00
t . Fatalf ( "err: %v" , err )
}
2015-10-12 07:42:09 +00:00
// Delete the KV entry (tombstoned).
2017-04-19 23:00:11 +00:00
arg . Op = api . KVDelete
2015-10-13 23:43:52 +00:00
if err := msgpackrpc . CallWithCodec ( codec , "KVS.Apply" , & arg , & out ) ; err != nil {
2014-12-19 00:02:08 +00:00
t . Fatalf ( "err: %v" , err )
}
2015-10-13 18:48:35 +00:00
// Make sure there's a tombstone.
state := s1 . fsm . State ( )
2019-07-12 15:52:26 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-13 18:48:35 +00:00
snap := state . Snapshot ( )
defer snap . Close ( )
2015-10-19 22:51:11 +00:00
stones , err := snap . Tombstones ( )
2015-10-13 18:48:35 +00:00
if err != nil {
2019-07-12 15:52:26 +00:00
r . Fatalf ( "err: %s" , err )
2015-10-13 18:48:35 +00:00
}
2015-10-19 22:51:11 +00:00
if stones . Next ( ) == nil {
2019-07-12 15:52:26 +00:00
r . Fatalf ( "missing tombstones" )
2015-10-19 21:56:22 +00:00
}
2015-10-19 22:51:11 +00:00
if stones . Next ( ) != nil {
2019-07-12 15:52:26 +00:00
r . Fatalf ( "unexpected extra tombstones" )
2015-10-13 18:48:35 +00:00
}
2019-07-12 15:52:26 +00:00
} )
2014-12-19 00:02:08 +00:00
2015-10-12 07:42:09 +00:00
// Check that the new leader has a pending GC expiration by
2015-10-13 18:48:35 +00:00
// watching for the tombstone to get removed.
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2015-10-13 18:48:35 +00:00
snap := state . Snapshot ( )
defer snap . Close ( )
2015-10-19 22:51:11 +00:00
stones , err := snap . Tombstones ( )
2015-10-12 07:42:09 +00:00
if err != nil {
2017-05-05 11:58:13 +00:00
r . Fatal ( err )
2015-10-12 07:42:09 +00:00
}
2017-05-05 11:58:13 +00:00
if stones . Next ( ) != nil {
r . Fatal ( "should have no tombstones" )
}
} )
2014-12-19 00:02:08 +00:00
}
2017-02-17 18:49:16 +00:00
2017-02-22 20:53:32 +00:00
func TestLeader_RollRaftServer ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-06-27 13:22:18 +00:00
t . Parallel ( )
2017-02-22 20:53:32 +00:00
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . Bootstrap = true
c . Datacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . Bootstrap = false
c . Datacenter = "dc1"
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
2017-09-25 22:27:04 +00:00
dir3 , s3 := testServerWithConfig ( t , func ( c * Config ) {
c . Bootstrap = false
c . Datacenter = "dc1"
} )
2017-02-22 20:53:32 +00:00
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
servers := [ ] * Server { s1 , s2 , s3 }
// Try to join
2017-05-05 10:29:49 +00:00
joinLAN ( t , s2 , s1 )
joinLAN ( t , s3 , s1 )
2017-02-22 20:53:32 +00:00
for _ , s := range servers {
2017-05-05 07:23:28 +00:00
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 3 ) ) } )
2017-02-22 20:53:32 +00:00
}
// Kill the v1 server
s2 . Shutdown ( )
for _ , s := range [ ] * Server { s1 , s3 } {
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2020-09-25 17:46:38 +00:00
// autopilot should force removal of the shutdown node
r . Check ( wantPeers ( s , 2 ) )
2017-05-05 11:58:13 +00:00
} )
2017-02-22 20:53:32 +00:00
}
2020-09-25 17:46:38 +00:00
// Replace the dead server with a new one
2017-02-22 20:53:32 +00:00
dir4 , s4 := testServerWithConfig ( t , func ( c * Config ) {
c . Bootstrap = false
c . Datacenter = "dc1"
} )
defer os . RemoveAll ( dir4 )
defer s4 . Shutdown ( )
2017-05-05 10:29:49 +00:00
joinLAN ( t , s4 , s1 )
2017-02-22 20:53:32 +00:00
servers [ 1 ] = s4
// Make sure the dead server is removed and we're back to 3 total peers
for _ , s := range servers {
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2020-09-25 17:46:38 +00:00
r . Check ( wantPeers ( s , 3 ) )
2017-05-05 11:58:13 +00:00
} )
2017-02-22 20:53:32 +00:00
}
2017-02-24 04:32:13 +00:00
}
2017-03-15 23:50:42 +00:00
func TestLeader_ChangeServerID ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2018-08-07 12:29:48 +00:00
conf := func ( c * Config ) {
c . Bootstrap = false
c . BootstrapExpect = 3
c . Datacenter = "dc1"
c . RaftConfig . ProtocolVersion = 3
}
dir1 , s1 := testServerWithConfig ( t , conf )
2017-03-15 23:50:42 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2018-08-07 12:29:48 +00:00
dir2 , s2 := testServerWithConfig ( t , conf )
2017-03-15 23:50:42 +00:00
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
2018-08-07 12:29:48 +00:00
dir3 , s3 := testServerWithConfig ( t , conf )
2017-03-15 23:50:42 +00:00
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
servers := [ ] * Server { s1 , s2 , s3 }
2017-09-25 22:27:04 +00:00
// Try to join and wait for all servers to get promoted
2017-05-05 10:29:49 +00:00
joinLAN ( t , s2 , s1 )
joinLAN ( t , s3 , s1 )
2017-03-15 23:50:42 +00:00
for _ , s := range servers {
2018-09-04 13:02:47 +00:00
testrpc . WaitForTestAgent ( t , s . RPC , "dc1" )
2017-05-05 07:23:28 +00:00
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 3 ) ) } )
2017-03-15 23:50:42 +00:00
}
// Shut down a server, freeing up its address/port
2018-08-07 12:29:48 +00:00
s3 . Shutdown ( )
2017-03-15 23:50:42 +00:00
2017-05-05 11:58:13 +00:00
retry . Run ( t , func ( r * retry . R ) {
2017-03-15 23:50:42 +00:00
alive := 0
for _ , m := range s1 . LANMembers ( ) {
if m . Status == serf . StatusAlive {
alive ++
}
}
2017-05-05 11:58:13 +00:00
if got , want := alive , 2 ; got != want {
r . Fatalf ( "got %d alive members want %d" , got , want )
}
} )
2017-03-15 23:50:42 +00:00
// Bring up a new server with s3's address that will get a different ID
dir4 , s4 := testServerWithConfig ( t , func ( c * Config ) {
c . Bootstrap = false
2018-08-07 12:29:48 +00:00
c . BootstrapExpect = 3
c . Datacenter = "dc1"
c . RaftConfig . ProtocolVersion = 3
2017-03-15 23:50:42 +00:00
c . SerfLANConfig . MemberlistConfig = s3 . config . SerfLANConfig . MemberlistConfig
c . RPCAddr = s3 . config . RPCAddr
c . RPCAdvertise = s3 . config . RPCAdvertise
} )
defer os . RemoveAll ( dir4 )
defer s4 . Shutdown ( )
2019-07-12 15:52:26 +00:00
2017-05-05 10:29:49 +00:00
joinLAN ( t , s4 , s1 )
2019-07-12 15:52:26 +00:00
testrpc . WaitForLeader ( t , s4 . RPC , "dc1" )
2017-03-15 23:50:42 +00:00
servers [ 2 ] = s4
2017-09-25 22:27:04 +00:00
// While integrating #3327 it uncovered that this test was flaky. The
// connection pool would use the same TCP connection to the old server
// which would give EOF errors to the autopilot health check RPC call.
// To make this more reliable we changed the connection pool to throw
// away the connection if it sees an EOF error, since there's no way
// that connection is going to work again. This made this test reliable
// since it will make a new connection to s4.
retry . Run ( t , func ( r * retry . R ) {
r . Check ( wantRaft ( servers ) )
for _ , s := range servers {
2019-07-12 15:52:26 +00:00
// Make sure the dead server is removed and we're back below 4
2017-09-25 22:27:04 +00:00
r . Check ( wantPeers ( s , 3 ) )
}
} )
2017-03-16 01:27:17 +00:00
}
2017-08-03 00:05:18 +00:00
2019-03-08 06:42:54 +00:00
func TestLeader_ChangeNodeID ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2019-03-08 06:42:54 +00:00
t . Parallel ( )
2019-03-13 22:39:01 +00:00
dir1 , s1 := testServer ( t )
2019-03-08 06:42:54 +00:00
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2019-03-13 22:39:01 +00:00
dir2 , s2 := testServerDCBootstrap ( t , "dc1" , false )
2019-03-08 06:42:54 +00:00
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
2019-03-13 22:39:01 +00:00
dir3 , s3 := testServerDCBootstrap ( t , "dc1" , false )
2019-03-08 06:42:54 +00:00
defer os . RemoveAll ( dir3 )
defer s3 . Shutdown ( )
servers := [ ] * Server { s1 , s2 , s3 }
// Try to join and wait for all servers to get promoted
joinLAN ( t , s2 , s1 )
joinLAN ( t , s3 , s1 )
for _ , s := range servers {
testrpc . WaitForTestAgent ( t , s . RPC , "dc1" )
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 3 ) ) } )
}
// Shut down a server, freeing up its address/port
s3 . Shutdown ( )
2020-02-07 00:37:53 +00:00
// wait for s1.LANMembers() to show s3 as StatusFailed or StatusLeft on
2019-03-08 06:42:54 +00:00
retry . Run ( t , func ( r * retry . R ) {
2020-02-07 00:37:53 +00:00
var gone bool
2019-03-08 06:42:54 +00:00
for _ , m := range s1 . LANMembers ( ) {
2020-02-07 00:37:53 +00:00
if m . Name == s3 . config . NodeName && ( m . Status == serf . StatusFailed || m . Status == serf . StatusLeft ) {
gone = true
2019-03-08 06:42:54 +00:00
}
}
2020-02-07 00:37:53 +00:00
require . True ( r , gone , "s3 has not been detected as failed or left after shutdown" )
2019-03-08 06:42:54 +00:00
} )
2019-03-13 22:39:01 +00:00
// Bring up a new server with s3's name that will get a different ID
2019-03-08 06:42:54 +00:00
dir4 , s4 := testServerWithConfig ( t , func ( c * Config ) {
c . Bootstrap = false
c . Datacenter = "dc1"
c . NodeName = s3 . config . NodeName
} )
defer os . RemoveAll ( dir4 )
defer s4 . Shutdown ( )
joinLAN ( t , s4 , s1 )
servers [ 2 ] = s4
2019-03-13 22:39:01 +00:00
// Make sure the dead server is gone from both Raft and Serf and we're back to 3 total peers
2019-03-08 06:42:54 +00:00
retry . Run ( t , func ( r * retry . R ) {
r . Check ( wantRaft ( servers ) )
for _ , s := range servers {
r . Check ( wantPeers ( s , 3 ) )
}
} )
retry . Run ( t , func ( r * retry . R ) {
for _ , m := range s1 . LANMembers ( ) {
2019-03-18 23:15:36 +00:00
require . Equal ( r , serf . StatusAlive , m . Status )
2019-03-08 06:42:54 +00:00
}
} )
}
2017-08-03 00:05:18 +00:00
func TestLeader_ACL_Initialization ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2017-08-03 00:05:18 +00:00
t . Parallel ( )
tests := [ ] struct {
name string
build string
master string
bootstrap bool
} {
2018-10-19 16:04:07 +00:00
{ "old version, no master" , "0.8.0" , "" , true } ,
{ "old version, master" , "0.8.0" , "root" , false } ,
{ "new version, no master" , "0.9.1" , "" , true } ,
{ "new version, master" , "0.9.1" , "root" , false } ,
2017-08-03 00:05:18 +00:00
}
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
conf := func ( c * Config ) {
c . Build = tt . build
c . Bootstrap = true
c . Datacenter = "dc1"
c . ACLDatacenter = "dc1"
2018-10-19 16:04:07 +00:00
c . ACLsEnabled = true
2017-08-03 00:05:18 +00:00
c . ACLMasterToken = tt . master
}
dir1 , s1 := testServerWithConfig ( t , conf )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2019-01-07 21:53:54 +00:00
testrpc . WaitForTestAgent ( t , s1 . RPC , "dc1" )
2017-08-03 00:05:18 +00:00
if tt . master != "" {
2019-10-24 18:38:09 +00:00
_ , master , err := s1 . fsm . State ( ) . ACLTokenGetBySecret ( nil , tt . master , nil )
2018-10-31 20:00:46 +00:00
require . NoError ( t , err )
require . NotNil ( t , master )
2017-08-03 00:05:18 +00:00
}
2019-10-24 18:38:09 +00:00
_ , anon , err := s1 . fsm . State ( ) . ACLTokenGetBySecret ( nil , anonymousToken , nil )
2018-10-31 20:00:46 +00:00
require . NoError ( t , err )
require . NotNil ( t , anon )
2017-08-03 00:05:18 +00:00
2018-10-19 16:04:07 +00:00
canBootstrap , _ , err := s1 . fsm . State ( ) . CanBootstrapACLToken ( )
2018-10-31 20:00:46 +00:00
require . NoError ( t , err )
require . Equal ( t , tt . bootstrap , canBootstrap )
2019-10-24 18:38:09 +00:00
_ , policy , err := s1 . fsm . State ( ) . ACLPolicyGetByID ( nil , structs . ACLPolicyGlobalManagementID , nil )
2018-10-31 20:00:46 +00:00
require . NoError ( t , err )
require . NotNil ( t , policy )
2017-08-03 00:05:18 +00:00
} )
}
}
2018-06-21 22:42:28 +00:00
2018-10-31 20:00:46 +00:00
func TestLeader_ACLUpgrade ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2018-10-31 20:00:46 +00:00
t . Parallel ( )
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . ACLsEnabled = true
c . ACLMasterToken = "root"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
2019-01-07 21:53:54 +00:00
testrpc . WaitForTestAgent ( t , s1 . RPC , "dc1" )
2018-10-31 20:00:46 +00:00
codec := rpcClient ( t , s1 )
defer codec . Close ( )
// create a legacy management ACL
mgmt := structs . ACLRequest {
Datacenter : "dc1" ,
Op : structs . ACLSet ,
ACL : structs . ACL {
Name : "Management token" ,
Type : structs . ACLTokenTypeManagement ,
} ,
WriteRequest : structs . WriteRequest { Token : "root" } ,
}
var mgmt_id string
require . NoError ( t , msgpackrpc . CallWithCodec ( codec , "ACL.Apply" , & mgmt , & mgmt_id ) )
// wait for it to be upgraded
retry . Run ( t , func ( t * retry . R ) {
2019-10-24 18:38:09 +00:00
_ , token , err := s1 . fsm . State ( ) . ACLTokenGetBySecret ( nil , mgmt_id , nil )
2018-10-31 20:00:46 +00:00
require . NoError ( t , err )
require . NotNil ( t , token )
require . NotEqual ( t , "" , token . AccessorID )
require . Equal ( t , structs . ACLTokenTypeManagement , token . Type )
require . Len ( t , token . Policies , 1 )
require . Equal ( t , structs . ACLPolicyGlobalManagementID , token . Policies [ 0 ] . ID )
} )
// create a legacy management ACL
client := structs . ACLRequest {
Datacenter : "dc1" ,
Op : structs . ACLSet ,
ACL : structs . ACL {
Name : "Management token" ,
Type : structs . ACLTokenTypeClient ,
Rules : ` node "" { policy = "read"} ` ,
} ,
WriteRequest : structs . WriteRequest { Token : "root" } ,
}
var client_id string
require . NoError ( t , msgpackrpc . CallWithCodec ( codec , "ACL.Apply" , & client , & client_id ) )
// wait for it to be upgraded
retry . Run ( t , func ( t * retry . R ) {
2019-10-24 18:38:09 +00:00
_ , token , err := s1 . fsm . State ( ) . ACLTokenGetBySecret ( nil , client_id , nil )
2018-10-31 20:00:46 +00:00
require . NoError ( t , err )
require . NotNil ( t , token )
require . NotEqual ( t , "" , token . AccessorID )
require . Len ( t , token . Policies , 0 )
require . Equal ( t , structs . ACLTokenTypeClient , token . Type )
require . Equal ( t , client . ACL . Rules , token . Rules )
} )
}
2019-04-26 18:25:03 +00:00
2021-01-05 23:04:27 +00:00
func TestLeader_ACLUpgrade_IsStickyEvenIfSerfTagsRegress ( t * testing . T ) {
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
t . Parallel ( )
// We test this by having two datacenters with one server each. They
// initially come up and complete the migration, then we power them both
// off. We leave the primary off permanently, and then we stand up the
// secondary. Hopefully it should transition to ENABLED instead of being
// stuck in LEGACY.
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . Datacenter = "dc1"
c . ACLDatacenter = "dc1"
c . ACLsEnabled = true
c . ACLMasterToken = "root"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
codec := rpcClient ( t , s1 )
defer codec . Close ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . Datacenter = "dc2"
c . ACLDatacenter = "dc1"
c . ACLsEnabled = true
c . ACLTokenReplication = false
c . ACLReplicationRate = 100
c . ACLReplicationBurst = 100
c . ACLReplicationApplyLimit = 1000000
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
codec2 := rpcClient ( t , s2 )
defer codec2 . Close ( )
s2 . tokens . UpdateReplicationToken ( "root" , tokenStore . TokenSourceConfig )
testrpc . WaitForLeader ( t , s2 . RPC , "dc2" )
waitForLeaderEstablishment ( t , s2 )
// Create the WAN link
joinWAN ( t , s2 , s1 )
waitForLeaderEstablishment ( t , s1 )
waitForLeaderEstablishment ( t , s2 )
waitForNewACLs ( t , s1 )
waitForNewACLs ( t , s2 )
waitForNewACLReplication ( t , s2 , structs . ACLReplicatePolicies , 1 , 0 , 0 )
// Everybody has the management policy.
retry . Run ( t , func ( r * retry . R ) {
_ , policy1 , err := s1 . fsm . State ( ) . ACLPolicyGetByID ( nil , structs . ACLPolicyGlobalManagementID , structs . DefaultEnterpriseMeta ( ) )
require . NoError ( r , err )
require . NotNil ( r , policy1 )
_ , policy2 , err := s2 . fsm . State ( ) . ACLPolicyGetByID ( nil , structs . ACLPolicyGlobalManagementID , structs . DefaultEnterpriseMeta ( ) )
require . NoError ( r , err )
require . NotNil ( r , policy2 )
} )
// Shutdown s1 and s2.
s1 . Shutdown ( )
s2 . Shutdown ( )
// Restart just s2
dir2new , s2new := testServerWithConfig ( t , func ( c * Config ) {
c . Datacenter = "dc2"
c . ACLDatacenter = "dc1"
c . ACLsEnabled = true
c . ACLTokenReplication = false
c . ACLReplicationRate = 100
c . ACLReplicationBurst = 100
c . ACLReplicationApplyLimit = 1000000
c . DataDir = s2 . config . DataDir
c . NodeName = s2 . config . NodeName
c . NodeID = s2 . config . NodeID
} )
defer os . RemoveAll ( dir2new )
defer s2new . Shutdown ( )
waitForLeaderEstablishment ( t , s2new )
// It should be able to transition without connectivity to the primary.
waitForNewACLs ( t , s2new )
}
2019-04-26 18:25:03 +00:00
func TestLeader_ConfigEntryBootstrap ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2019-04-26 18:25:03 +00:00
t . Parallel ( )
global_entry_init := & structs . ProxyConfigEntry {
Kind : structs . ProxyDefaults ,
Name : structs . ProxyConfigGlobal ,
Config : map [ string ] interface { } {
2019-04-29 22:08:09 +00:00
"foo" : "bar" ,
2019-04-26 18:25:03 +00:00
"bar" : int64 ( 1 ) ,
} ,
}
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . Build = "1.5.0"
c . ConfigEntryBootstrap = [ ] structs . ConfigEntry {
global_entry_init ,
}
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
testrpc . WaitForTestAgent ( t , s1 . RPC , "dc1" )
retry . Run ( t , func ( t * retry . R ) {
2020-01-24 15:04:58 +00:00
_ , entry , err := s1 . fsm . State ( ) . ConfigEntry ( nil , structs . ProxyDefaults , structs . ProxyConfigGlobal , structs . DefaultEnterpriseMeta ( ) )
2019-04-26 18:25:03 +00:00
require . NoError ( t , err )
require . NotNil ( t , entry )
global , ok := entry . ( * structs . ProxyConfigEntry )
require . True ( t , ok )
require . Equal ( t , global_entry_init . Kind , global . Kind )
require . Equal ( t , global_entry_init . Name , global . Name )
require . Equal ( t , global_entry_init . Config , global . Config )
} )
}
2019-08-02 04:07:11 +00:00
func TestLeader_ConfigEntryBootstrap_Fail ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2019-08-02 04:07:11 +00:00
t . Parallel ( )
2020-11-13 22:19:12 +00:00
type testcase struct {
name string
entries [ ] structs . ConfigEntry
serverCB func ( c * Config )
expectMessage string
}
cases := [ ] testcase {
{
name : "service-splitter without L7 protocol" ,
entries : [ ] structs . ConfigEntry {
& structs . ServiceSplitterConfigEntry {
Kind : structs . ServiceSplitter ,
Name : "web" ,
Splits : [ ] structs . ServiceSplit {
{ Weight : 100 , Service : "web" } ,
} ,
} ,
} ,
expectMessage : ` Failed to apply configuration entry "service-splitter" / "web": discovery chain "web" uses a protocol "tcp" that does not permit advanced routing or splitting behavior" ` ,
} ,
{
name : "service-intentions without migration" ,
entries : [ ] structs . ConfigEntry {
& structs . ServiceIntentionsConfigEntry {
Kind : structs . ServiceIntentions ,
Name : "web" ,
Sources : [ ] * structs . SourceIntention {
{
Name : "debug" ,
Action : structs . IntentionActionAllow ,
} ,
} ,
} ,
2019-08-02 04:07:11 +00:00
} ,
2020-11-13 22:19:12 +00:00
serverCB : func ( c * Config ) {
c . OverrideInitialSerfTags = func ( tags map [ string ] string ) {
tags [ "ft_si" ] = "0"
}
} ,
expectMessage : ` Refusing to apply configuration entry "service-intentions" / "web" because intentions are still being migrated to config entries ` ,
} ,
{
name : "service-intentions without Connect" ,
entries : [ ] structs . ConfigEntry {
& structs . ServiceIntentionsConfigEntry {
Kind : structs . ServiceIntentions ,
Name : "web" ,
Sources : [ ] * structs . SourceIntention {
{
Name : "debug" ,
Action : structs . IntentionActionAllow ,
} ,
} ,
} ,
} ,
serverCB : func ( c * Config ) {
c . ConnectEnabled = false
} ,
expectMessage : ` Refusing to apply configuration entry "service-intentions" / "web" because Connect must be enabled to bootstrap intentions" ` ,
2020-07-29 20:05:51 +00:00
} ,
}
2019-08-02 04:07:11 +00:00
2020-11-13 22:19:12 +00:00
for _ , tc := range cases {
tc := tc
t . Run ( tc . name , func ( t * testing . T ) {
pr , pw := io . Pipe ( )
defer pw . Close ( )
var (
ch = make ( chan string , 1 )
applyErrorLine string
)
go func ( ) {
defer pr . Close ( )
scan := bufio . NewScanner ( pr )
for scan . Scan ( ) {
line := scan . Text ( )
if strings . Contains ( line , "failed to establish leadership" ) {
applyErrorLine = line
ch <- ""
return
}
if strings . Contains ( line , "successfully established leadership" ) {
ch <- "leadership should not have gotten here if config entries properly failed"
return
}
}
if scan . Err ( ) != nil {
ch <- fmt . Sprintf ( "ERROR: %v" , scan . Err ( ) )
} else {
ch <- "should not get here"
}
} ( )
_ , config := testServerConfig ( t )
config . Build = "1.6.0"
config . ConfigEntryBootstrap = tc . entries
if tc . serverCB != nil {
tc . serverCB ( config )
}
2020-09-11 16:43:29 +00:00
2020-11-13 22:19:12 +00:00
logger := hclog . NewInterceptLogger ( & hclog . LoggerOptions {
Name : config . NodeName ,
Level : hclog . Debug ,
Output : io . MultiWriter ( pw , testutil . NewLogBuffer ( t ) ) ,
} )
2020-09-14 22:31:07 +00:00
2020-11-13 22:19:12 +00:00
deps := newDefaultDeps ( t , config )
deps . Logger = logger
2020-07-29 20:05:51 +00:00
2020-11-13 22:19:12 +00:00
srv , err := NewServer ( config , deps )
require . NoError ( t , err )
defer srv . Shutdown ( )
select {
case result := <- ch :
require . Empty ( t , result )
if tc . expectMessage != "" {
require . Contains ( t , applyErrorLine , tc . expectMessage )
}
case <- time . After ( time . Second ) :
t . Fatal ( "timeout waiting for a result from tailing logs" )
}
} )
2020-07-29 20:05:51 +00:00
}
2019-08-02 04:07:11 +00:00
}
2020-04-14 15:54:27 +00:00
func TestLeader_ACLLegacyReplication ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2020-04-14 15:54:27 +00:00
t . Parallel ( )
// This test relies on configuring a secondary DC with no route to the primary DC
// Having no route will cause the ACL mode checking of the primary to "fail". In this
// scenario legacy ACL replication should be enabled without also running new ACL
// replication routines.
cb := func ( c * Config ) {
c . Datacenter = "dc2"
c . ACLTokenReplication = true
}
2020-06-16 16:54:27 +00:00
_ , srv , _ := testACLServerWithConfig ( t , cb , true )
2020-04-14 15:54:27 +00:00
waitForLeaderEstablishment ( t , srv )
require . True ( t , srv . leaderRoutineManager . IsRunning ( legacyACLReplicationRoutineName ) )
require . False ( t , srv . leaderRoutineManager . IsRunning ( aclPolicyReplicationRoutineName ) )
require . False ( t , srv . leaderRoutineManager . IsRunning ( aclRoleReplicationRoutineName ) )
require . False ( t , srv . leaderRoutineManager . IsRunning ( aclTokenReplicationRoutineName ) )
}
2020-06-04 21:05:27 +00:00
func TestDatacenterSupportsFederationStates ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2020-06-04 21:05:27 +00:00
addGateway := func ( t * testing . T , srv * Server , dc , node string ) {
t . Helper ( )
arg := structs . RegisterRequest {
Datacenter : dc ,
Node : node ,
Address : "127.0.0.1" ,
Service : & structs . NodeService {
Kind : structs . ServiceKindMeshGateway ,
ID : "mesh-gateway" ,
Service : "mesh-gateway" ,
Port : 8080 ,
} ,
}
var out struct { }
require . NoError ( t , srv . RPC ( "Catalog.Register" , & arg , & out ) )
}
t . Run ( "one node primary with old version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
s1 . updateSerfTags ( "ft_fs" , "0" )
waitForLeaderEstablishment ( t , s1 )
addGateway ( t , s1 , "dc1" , "node1" )
retry . Run ( t , func ( r * retry . R ) {
if s1 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 1 shouldn't activate fedstates" )
}
} )
} )
t . Run ( "one node primary with new version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
addGateway ( t , s1 , "dc1" , "node1" )
retry . Run ( t , func ( r * retry . R ) {
if ! s1 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 1 didn't activate fedstates" )
}
} )
// Wait until after AE runs at least once.
retry . Run ( t , func ( r * retry . R ) {
arg := structs . FederationStateQuery {
Datacenter : "dc1" ,
TargetDatacenter : "dc1" ,
}
var out structs . FederationStateResponse
require . NoError ( r , s1 . RPC ( "FederationState.Get" , & arg , & out ) )
require . NotNil ( r , out . State )
require . Len ( r , out . State . MeshGateways , 1 )
} )
} )
t . Run ( "two node primary with mixed versions" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
s1 . updateSerfTags ( "ft_fs" , "0" )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
// Put s1 last so we don't trigger a leader election.
servers := [ ] * Server { s2 , s1 }
// Try to join
joinLAN ( t , s2 , s1 )
for _ , s := range servers {
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 2 ) ) } )
}
waitForLeaderEstablishment ( t , s1 )
addGateway ( t , s1 , "dc1" , "node1" )
retry . Run ( t , func ( r * retry . R ) {
if s1 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 1 shouldn't activate fedstates" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if s2 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 2 shouldn't activate fedstates" )
}
} )
} )
t . Run ( "two node primary with new version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
// Put s1 last so we don't trigger a leader election.
servers := [ ] * Server { s2 , s1 }
// Try to join
joinLAN ( t , s2 , s1 )
for _ , s := range servers {
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 2 ) ) } )
}
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s2 . RPC , "dc1" )
addGateway ( t , s1 , "dc1" , "node1" )
retry . Run ( t , func ( r * retry . R ) {
if ! s1 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 1 didn't activate fedstates" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if ! s2 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 2 didn't activate fedstates" )
}
} )
// Wait until after AE runs at least once.
retry . Run ( t , func ( r * retry . R ) {
arg := structs . DCSpecificRequest {
Datacenter : "dc1" ,
}
var out structs . IndexedFederationStates
require . NoError ( r , s1 . RPC ( "FederationState.List" , & arg , & out ) )
require . Len ( r , out . States , 1 )
require . Len ( r , out . States [ 0 ] . MeshGateways , 1 )
} )
} )
t . Run ( "primary and secondary with new version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc2"
c . PrimaryDatacenter = "dc1"
c . FederationStateReplicationRate = 100
c . FederationStateReplicationBurst = 100
c . FederationStateReplicationApplyLimit = 1000000
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
waitForLeaderEstablishment ( t , s2 )
// Try to join
joinWAN ( t , s2 , s1 )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s1 . RPC , "dc2" )
addGateway ( t , s1 , "dc1" , "node1" )
addGateway ( t , s2 , "dc2" , "node2" )
retry . Run ( t , func ( r * retry . R ) {
if ! s1 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 1 didn't activate fedstates" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if ! s2 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 2 didn't activate fedstates" )
}
} )
// Wait until after AE runs at least once for both.
retry . Run ( t , func ( r * retry . R ) {
arg := structs . DCSpecificRequest {
Datacenter : "dc1" ,
}
var out structs . IndexedFederationStates
require . NoError ( r , s1 . RPC ( "FederationState.List" , & arg , & out ) )
require . Len ( r , out . States , 2 )
require . Len ( r , out . States [ 0 ] . MeshGateways , 1 )
require . Len ( r , out . States [ 1 ] . MeshGateways , 1 )
} )
// Wait until after replication runs for the secondary.
retry . Run ( t , func ( r * retry . R ) {
arg := structs . DCSpecificRequest {
Datacenter : "dc2" ,
}
var out structs . IndexedFederationStates
require . NoError ( r , s1 . RPC ( "FederationState.List" , & arg , & out ) )
require . Len ( r , out . States , 2 )
require . Len ( r , out . States [ 0 ] . MeshGateways , 1 )
require . Len ( r , out . States [ 1 ] . MeshGateways , 1 )
} )
} )
t . Run ( "primary and secondary with mixed versions" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
s1 . updateSerfTags ( "ft_fs" , "0" )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc2"
c . PrimaryDatacenter = "dc1"
c . FederationStateReplicationRate = 100
c . FederationStateReplicationBurst = 100
c . FederationStateReplicationApplyLimit = 1000000
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
waitForLeaderEstablishment ( t , s2 )
// Try to join
joinWAN ( t , s2 , s1 )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s1 . RPC , "dc2" )
addGateway ( t , s1 , "dc1" , "node1" )
addGateway ( t , s2 , "dc2" , "node2" )
retry . Run ( t , func ( r * retry . R ) {
if s1 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 1 shouldn't activate fedstates" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if s2 . DatacenterSupportsFederationStates ( ) {
r . Fatal ( "server 2 shouldn't activate fedstates" )
}
} )
} )
}
2020-10-06 18:24:05 +00:00
func TestDatacenterSupportsIntentionsAsConfigEntries ( t * testing . T ) {
2020-12-07 18:42:55 +00:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
2020-10-06 18:24:05 +00:00
addLegacyIntention := func ( srv * Server , dc , src , dest string , allow bool ) error {
ixn := & structs . Intention {
SourceNS : structs . IntentionDefaultNamespace ,
SourceName : src ,
DestinationNS : structs . IntentionDefaultNamespace ,
DestinationName : dest ,
SourceType : structs . IntentionSourceConsul ,
Meta : map [ string ] string { } ,
}
if allow {
ixn . Action = structs . IntentionActionAllow
} else {
ixn . Action = structs . IntentionActionDeny
}
//nolint:staticcheck
ixn . UpdatePrecedence ( )
//nolint:staticcheck
ixn . SetHash ( )
arg := structs . IntentionRequest {
Datacenter : dc ,
Op : structs . IntentionOpCreate ,
Intention : ixn ,
}
var id string
return srv . RPC ( "Intention.Apply" , & arg , & id )
}
getConfigEntry := func ( srv * Server , dc , kind , name string ) ( structs . ConfigEntry , error ) {
arg := structs . ConfigEntryQuery {
Datacenter : dc ,
Kind : kind ,
Name : name ,
}
var reply structs . ConfigEntryResponse
if err := srv . RPC ( "ConfigEntry.Get" , & arg , & reply ) ; err != nil {
return nil , err
}
return reply . Entry , nil
}
disableServiceIntentions := func ( tags map [ string ] string ) {
tags [ "ft_si" ] = "0"
}
defaultEntMeta := structs . DefaultEnterpriseMeta ( )
t . Run ( "one node primary with old version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . OverrideInitialSerfTags = disableServiceIntentions
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
retry . Run ( t , func ( r * retry . R ) {
if s1 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 1 shouldn't activate service-intentions" )
}
} )
testutil . RequireErrorContains ( t ,
addLegacyIntention ( s1 , "dc1" , "web" , "api" , true ) ,
ErrIntentionsNotUpgradedYet . Error ( ) ,
)
} )
t . Run ( "one node primary with new version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
retry . Run ( t , func ( r * retry . R ) {
if ! s1 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 1 didn't activate service-intentions" )
}
} )
// try to write a using the legacy API and it should work
require . NoError ( t , addLegacyIntention ( s1 , "dc1" , "web" , "api" , true ) )
// read it back as a config entry and that should work too
raw , err := getConfigEntry ( s1 , "dc1" , structs . ServiceIntentions , "api" )
require . NoError ( t , err )
require . NotNil ( t , raw )
got , ok := raw . ( * structs . ServiceIntentionsConfigEntry )
require . True ( t , ok )
require . Len ( t , got . Sources , 1 )
expect := & structs . ServiceIntentionsConfigEntry {
Kind : structs . ServiceIntentions ,
Name : "api" ,
EnterpriseMeta : * defaultEntMeta ,
Sources : [ ] * structs . SourceIntention {
{
Name : "web" ,
EnterpriseMeta : * defaultEntMeta ,
Action : structs . IntentionActionAllow ,
Type : structs . IntentionSourceConsul ,
Precedence : 9 ,
LegacyMeta : map [ string ] string { } ,
LegacyID : got . Sources [ 0 ] . LegacyID ,
// steal
LegacyCreateTime : got . Sources [ 0 ] . LegacyCreateTime ,
LegacyUpdateTime : got . Sources [ 0 ] . LegacyUpdateTime ,
} ,
} ,
RaftIndex : got . RaftIndex ,
}
require . Equal ( t , expect , got )
} )
t . Run ( "two node primary with mixed versions" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . OverrideInitialSerfTags = disableServiceIntentions
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
// Put s1 last so we don't trigger a leader election.
servers := [ ] * Server { s2 , s1 }
// Try to join
joinLAN ( t , s2 , s1 )
for _ , s := range servers {
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 2 ) ) } )
}
waitForLeaderEstablishment ( t , s1 )
retry . Run ( t , func ( r * retry . R ) {
if s1 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 1 shouldn't activate service-intentions" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if s2 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 2 shouldn't activate service-intentions" )
}
} )
testutil . RequireErrorContains ( t ,
addLegacyIntention ( s1 , "dc1" , "web" , "api" , true ) ,
ErrIntentionsNotUpgradedYet . Error ( ) ,
)
testutil . RequireErrorContains ( t ,
addLegacyIntention ( s2 , "dc1" , "web" , "api" , true ) ,
ErrIntentionsNotUpgradedYet . Error ( ) ,
)
} )
t . Run ( "two node primary with new version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . Bootstrap = false
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
// Put s1 last so we don't trigger a leader election.
servers := [ ] * Server { s2 , s1 }
// Try to join
joinLAN ( t , s2 , s1 )
for _ , s := range servers {
retry . Run ( t , func ( r * retry . R ) { r . Check ( wantPeers ( s , 2 ) ) } )
}
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s2 . RPC , "dc1" )
retry . Run ( t , func ( r * retry . R ) {
if ! s1 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 1 didn't activate service-intentions" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if ! s2 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 2 didn't activate service-intentions" )
}
} )
// try to write a using the legacy API and it should work from both sides
require . NoError ( t , addLegacyIntention ( s1 , "dc1" , "web" , "api" , true ) )
require . NoError ( t , addLegacyIntention ( s2 , "dc1" , "web2" , "api" , true ) )
// read it back as a config entry and that should work too
raw , err := getConfigEntry ( s1 , "dc1" , structs . ServiceIntentions , "api" )
require . NoError ( t , err )
require . NotNil ( t , raw )
raw , err = getConfigEntry ( s2 , "dc1" , structs . ServiceIntentions , "api" )
require . NoError ( t , err )
require . NotNil ( t , raw )
} )
t . Run ( "primary and secondary with new version" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc2"
c . PrimaryDatacenter = "dc1"
c . ConfigReplicationRate = 100
c . ConfigReplicationBurst = 100
c . ConfigReplicationApplyLimit = 1000000
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
waitForLeaderEstablishment ( t , s2 )
// Try to join
joinWAN ( t , s2 , s1 )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s1 . RPC , "dc2" )
retry . Run ( t , func ( r * retry . R ) {
if ! s1 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 1 didn't activate service-intentions" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if ! s2 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 2 didn't activate service-intentions" )
}
} )
// try to write a using the legacy API
require . NoError ( t , addLegacyIntention ( s1 , "dc1" , "web" , "api" , true ) )
// read it back as a config entry and that should work too
raw , err := getConfigEntry ( s1 , "dc1" , structs . ServiceIntentions , "api" )
require . NoError ( t , err )
require . NotNil ( t , raw )
// Wait until after replication runs for the secondary.
retry . Run ( t , func ( r * retry . R ) {
raw , err = getConfigEntry ( s2 , "dc1" , structs . ServiceIntentions , "api" )
require . NoError ( r , err )
require . NotNil ( r , raw )
} )
} )
t . Run ( "primary and secondary with mixed versions" , func ( t * testing . T ) {
dir1 , s1 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node1"
c . Datacenter = "dc1"
c . PrimaryDatacenter = "dc1"
c . OverrideInitialSerfTags = disableServiceIntentions
} )
defer os . RemoveAll ( dir1 )
defer s1 . Shutdown ( )
waitForLeaderEstablishment ( t , s1 )
dir2 , s2 := testServerWithConfig ( t , func ( c * Config ) {
c . NodeName = "node2"
c . Datacenter = "dc2"
c . PrimaryDatacenter = "dc1"
c . ConfigReplicationRate = 100
c . ConfigReplicationBurst = 100
c . ConfigReplicationApplyLimit = 1000000
} )
defer os . RemoveAll ( dir2 )
defer s2 . Shutdown ( )
waitForLeaderEstablishment ( t , s2 )
// Try to join
joinWAN ( t , s2 , s1 )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s1 . RPC , "dc2" )
retry . Run ( t , func ( r * retry . R ) {
if s1 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 1 shouldn't activate service-intentions" )
}
} )
retry . Run ( t , func ( r * retry . R ) {
if s2 . DatacenterSupportsIntentionsAsConfigEntries ( ) {
r . Fatal ( "server 2 shouldn't activate service-intentions" )
}
} )
testutil . RequireErrorContains ( t ,
addLegacyIntention ( s1 , "dc1" , "web" , "api" , true ) ,
ErrIntentionsNotUpgradedYet . Error ( ) ,
)
testutil . RequireErrorContains ( t ,
addLegacyIntention ( s2 , "dc1" , "web" , "api" , true ) ,
ErrIntentionsNotUpgradedYet . Error ( ) ,
)
} )
}