open-consul/sdk/testutil/server.go

515 lines
16 KiB
Go
Raw Normal View History

package testutil
// TestServer is a test helper. It uses a fork/exec model to create
// a test Consul server instance in the background and initialize it
// with some data and/or services. The test server can then be used
// to run a unit test, and offers an easy API to tear itself down
// when the test has completed. The only prerequisite is to have a consul
// binary available on the $PATH.
//
// This package does not use Consul's official API client. This is
// because we use TestServer to test the API client, which would
// otherwise cause an import cycle.
import (
"context"
"encoding/json"
"fmt"
2015-03-11 01:08:14 +00:00
"io"
"io/ioutil"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"syscall"
"testing"
"time"
"github.com/hashicorp/consul/sdk/freeport"
"github.com/hashicorp/consul/sdk/testutil/retry"
2015-10-22 18:14:22 +00:00
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/go-uuid"
"github.com/pkg/errors"
)
// TestPerformanceConfig configures the performance parameters.
type TestPerformanceConfig struct {
RaftMultiplier uint `json:"raft_multiplier,omitempty"`
}
2015-03-11 04:53:51 +00:00
// TestPortConfig configures the various ports used for services
// provided by the Consul server.
type TestPortConfig struct {
2018-06-11 20:25:13 +00:00
DNS int `json:"dns,omitempty"`
HTTP int `json:"http,omitempty"`
HTTPS int `json:"https,omitempty"`
SerfLan int `json:"serf_lan,omitempty"`
SerfWan int `json:"serf_wan,omitempty"`
Server int `json:"server,omitempty"`
ProxyMinPort int `json:"proxy_min_port,omitempty"`
ProxyMaxPort int `json:"proxy_max_port,omitempty"`
}
2015-03-11 04:53:51 +00:00
// TestAddressConfig contains the bind addresses for various
// components of the Consul server.
type TestAddressConfig struct {
HTTP string `json:"http,omitempty"`
}
// TestNetworkSegment contains the configuration for a network segment.
type TestNetworkSegment struct {
Name string `json:"name"`
Bind string `json:"bind"`
Port int `json:"port"`
Advertise string `json:"advertise"`
}
2015-03-11 04:53:51 +00:00
// TestServerConfig is the main server configuration struct.
type TestServerConfig struct {
NodeName string `json:"node_name"`
NodeID string `json:"node_id"`
NodeMeta map[string]string `json:"node_meta,omitempty"`
Performance *TestPerformanceConfig `json:"performance,omitempty"`
Bootstrap bool `json:"bootstrap,omitempty"`
Server bool `json:"server,omitempty"`
DataDir string `json:"data_dir,omitempty"`
Datacenter string `json:"datacenter,omitempty"`
Segments []TestNetworkSegment `json:"segments"`
DisableCheckpoint bool `json:"disable_update_check"`
LogLevel string `json:"log_level,omitempty"`
Bind string `json:"bind_addr,omitempty"`
Addresses *TestAddressConfig `json:"addresses,omitempty"`
Ports *TestPortConfig `json:"ports,omitempty"`
RaftProtocol int `json:"raft_protocol,omitempty"`
ACLMasterToken string `json:"acl_master_token,omitempty"`
ACLDatacenter string `json:"acl_datacenter,omitempty"`
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
PrimaryDatacenter string `json:"primary_datacenter,omitempty"`
ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
2018-10-19 16:28:36 +00:00
ACL TestACLs `json:"acl,omitempty"`
Encrypt string `json:"encrypt,omitempty"`
CAFile string `json:"ca_file,omitempty"`
CertFile string `json:"cert_file,omitempty"`
KeyFile string `json:"key_file,omitempty"`
VerifyIncoming bool `json:"verify_incoming,omitempty"`
VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"`
VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"`
VerifyOutgoing bool `json:"verify_outgoing,omitempty"`
EnableScriptChecks bool `json:"enable_script_checks,omitempty"`
Connect map[string]interface{} `json:"connect,omitempty"`
New command: consul debug (#4754) * agent/debug: add package for debugging, host info * api: add v1/agent/host endpoint * agent: add v1/agent/host endpoint * command/debug: implementation of static capture * command/debug: tests and only configured targets * agent/debug: add basic test for host metrics * command/debug: add methods for dynamic data capture * api: add debug/pprof endpoints * command/debug: add pprof * command/debug: timing, wg, logs to disk * vendor: add gopsutil/disk * command/debug: add a usage section * website: add docs for consul debug * agent/host: require operator:read * api/host: improve docs and no retry timing * command/debug: fail on extra arguments * command/debug: fixup file permissions to 0644 * command/debug: remove server flags * command/debug: improve clarity of usage section * api/debug: add Trace for profiling, fix profile * command/debug: capture profile and trace at the same time * command/debug: add index document * command/debug: use "clusters" in place of members * command/debug: remove address in output * command/debug: improve comment on metrics sleep * command/debug: clarify usage * agent: always register pprof handlers and protect This will allow us to avoid a restart of a target agent for profiling by always registering the pprof handlers. Given this is a potentially sensitive path, it is protected with an operator:read ACL and enable debug being set to true on the target agent. enable_debug still requires a restart. If ACLs are disabled, enable_debug is sufficient. * command/debug: use trace.out instead of .prof More in line with golang docs. * agent: fix comment wording * agent: wrap table driven tests in t.run()
2018-10-17 20:20:35 +00:00
EnableDebug bool `json:"enable_debug,omitempty"`
ReadyTimeout time.Duration `json:"-"`
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
Stdout io.Writer `json:"-"`
Stderr io.Writer `json:"-"`
Args []string `json:"-"`
ReturnPorts func() `json:"-"`
}
New ACLs (#4791) This PR is almost a complete rewrite of the ACL system within Consul. It brings the features more in line with other HashiCorp products. Obviously there is quite a bit left to do here but most of it is related docs, testing and finishing the last few commands in the CLI. I will update the PR description and check off the todos as I finish them over the next few days/week. Description At a high level this PR is mainly to split ACL tokens from Policies and to split the concepts of Authorization from Identities. A lot of this PR is mostly just to support CRUD operations on ACLTokens and ACLPolicies. These in and of themselves are not particularly interesting. The bigger conceptual changes are in how tokens get resolved, how backwards compatibility is handled and the separation of policy from identity which could lead the way to allowing for alternative identity providers. On the surface and with a new cluster the ACL system will look very similar to that of Nomads. Both have tokens and policies. Both have local tokens. The ACL management APIs for both are very similar. I even ripped off Nomad's ACL bootstrap resetting procedure. There are a few key differences though. Nomad requires token and policy replication where Consul only requires policy replication with token replication being opt-in. In Consul local tokens only work with token replication being enabled though. All policies in Nomad are globally applicable. In Consul all policies are stored and replicated globally but can be scoped to a subset of the datacenters. This allows for more granular access management. Unlike Nomad, Consul has legacy baggage in the form of the original ACL system. The ramifications of this are: A server running the new system must still support other clients using the legacy system. A client running the new system must be able to use the legacy RPCs when the servers in its datacenter are running the legacy system. The primary ACL DC's servers running in legacy mode needs to be a gate that keeps everything else in the entire multi-DC cluster running in legacy mode. So not only does this PR implement the new ACL system but has a legacy mode built in for when the cluster isn't ready for new ACLs. Also detecting that new ACLs can be used is automatic and requires no configuration on the part of administrators. This process is detailed more in the "Transitioning from Legacy to New ACL Mode" section below.
2018-10-19 16:04:07 +00:00
type TestACLs struct {
Enabled bool `json:"enabled,omitempty"`
TokenReplication bool `json:"enable_token_replication,omitempty"`
PolicyTTL string `json:"policy_ttl,omitempty"`
TokenTTL string `json:"token_ttl,omitempty"`
DownPolicy string `json:"down_policy,omitempty"`
DefaultPolicy string `json:"default_policy,omitempty"`
EnableKeyListPolicy bool `json:"enable_key_list_policy,omitempty"`
Tokens TestTokens `json:"tokens,omitempty"`
DisabledTTL string `json:"disabled_ttl,omitempty"`
}
type TestTokens struct {
Master string `json:"master,omitempty"`
Replication string `json:"replication,omitempty"`
AgentMaster string `json:"agent_master,omitempty"`
Default string `json:"default,omitempty"`
Agent string `json:"agent,omitempty"`
}
2015-03-11 04:53:51 +00:00
// ServerConfigCallback is a function interface which can be
// passed to NewTestServerConfig to modify the server config.
type ServerConfigCallback func(c *TestServerConfig)
2015-03-11 04:53:51 +00:00
// defaultServerConfig returns a new TestServerConfig struct
// with all of the listen ports incremented by one.
func defaultServerConfig(t TestingTB) *TestServerConfig {
nodeID, err := uuid.GenerateUUID()
if err != nil {
panic(err)
}
ports := freeport.MustTake(6)
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
logBuffer := NewLogBuffer(t)
return &TestServerConfig{
New config parser, HCL support, multiple bind addrs (#3480) * new config parser for agent This patch implements a new config parser for the consul agent which makes the following changes to the previous implementation: * add HCL support * all configuration fragments in tests and for default config are expressed as HCL fragments * HCL fragments can be provided on the command line so that they can eventually replace the command line flags. * HCL/JSON fragments are parsed into a temporary Config structure which can be merged using reflection (all values are pointers). The existing merge logic of overwrite for values and append for slices has been preserved. * A single builder process generates a typed runtime configuration for the agent. The new implementation is more strict and fails in the builder process if no valid runtime configuration can be generated. Therefore, additional validations in other parts of the code should be removed. The builder also pre-computes all required network addresses so that no address/port magic should be required where the configuration is used and should therefore be removed. * Upgrade github.com/hashicorp/hcl to support int64 * improve error messages * fix directory permission test * Fix rtt test * Fix ForceLeave test * Skip performance test for now until we know what to do * Update github.com/hashicorp/memberlist to update log prefix * Make memberlist use the default logger * improve config error handling * do not fail on non-existing data-dir * experiment with non-uniform timeouts to get a handle on stalled leader elections * Run tests for packages separately to eliminate the spurious port conflicts * refactor private address detection and unify approach for ipv4 and ipv6. Fixes #2825 * do not allow unix sockets for DNS * improve bind and advertise addr error handling * go through builder using test coverage * minimal update to the docs * more coverage tests fixed * more tests * fix makefile * cleanup * fix port conflicts with external port server 'porter' * stop test server on error * do not run api test that change global ENV concurrently with the other tests * Run remaining api tests concurrently * no need for retry with the port number service * monkey patch race condition in go-sockaddr until we understand why that fails * monkey patch hcl decoder race condidtion until we understand why that fails * monkey patch spurious errors in strings.EqualFold from here * add test for hcl decoder race condition. Run with go test -parallel 128 * Increase timeout again * cleanup * don't log port allocations by default * use base command arg parsing to format help output properly * handle -dc deprecation case in Build * switch autopilot.max_trailing_logs to int * remove duplicate test case * remove unused methods * remove comments about flag/config value inconsistencies * switch got and want around since the error message was misleading. * Removes a stray debug log. * Removes a stray newline in imports. * Fixes TestACL_Version8. * Runs go fmt. * Adds a default case for unknown address types. * Reoders and reformats some imports. * Adds some comments and fixes typos. * Reorders imports. * add unix socket support for dns later * drop all deprecated flags and arguments * fix wrong field name * remove stray node-id file * drop unnecessary patch section in test * drop duplicate test * add test for LeaveOnTerm and SkipLeaveOnInt in client mode * drop "bla" and add clarifying comment for the test * split up tests to support enterprise/non-enterprise tests * drop raft multiplier and derive values during build phase * sanitize runtime config reflectively and add test * detect invalid config fields * fix tests with invalid config fields * use different values for wan sanitiziation test * drop recursor in favor of recursors * allow dns_config.udp_answer_limit to be zero * make sure tests run on machines with multiple ips * Fix failing tests in a few more places by providing a bind address in the test * Gets rid of skipped TestAgent_CheckPerformanceSettings and adds case for builder. * Add porter to server_test.go to make tests there less flaky * go fmt
2017-09-25 18:40:42 +00:00
NodeName: "node-" + nodeID,
NodeID: nodeID,
DisableCheckpoint: true,
Performance: &TestPerformanceConfig{
RaftMultiplier: 1,
},
Bootstrap: true,
Server: true,
LogLevel: "debug",
Bind: "127.0.0.1",
Addresses: &TestAddressConfig{},
Ports: &TestPortConfig{
New config parser, HCL support, multiple bind addrs (#3480) * new config parser for agent This patch implements a new config parser for the consul agent which makes the following changes to the previous implementation: * add HCL support * all configuration fragments in tests and for default config are expressed as HCL fragments * HCL fragments can be provided on the command line so that they can eventually replace the command line flags. * HCL/JSON fragments are parsed into a temporary Config structure which can be merged using reflection (all values are pointers). The existing merge logic of overwrite for values and append for slices has been preserved. * A single builder process generates a typed runtime configuration for the agent. The new implementation is more strict and fails in the builder process if no valid runtime configuration can be generated. Therefore, additional validations in other parts of the code should be removed. The builder also pre-computes all required network addresses so that no address/port magic should be required where the configuration is used and should therefore be removed. * Upgrade github.com/hashicorp/hcl to support int64 * improve error messages * fix directory permission test * Fix rtt test * Fix ForceLeave test * Skip performance test for now until we know what to do * Update github.com/hashicorp/memberlist to update log prefix * Make memberlist use the default logger * improve config error handling * do not fail on non-existing data-dir * experiment with non-uniform timeouts to get a handle on stalled leader elections * Run tests for packages separately to eliminate the spurious port conflicts * refactor private address detection and unify approach for ipv4 and ipv6. Fixes #2825 * do not allow unix sockets for DNS * improve bind and advertise addr error handling * go through builder using test coverage * minimal update to the docs * more coverage tests fixed * more tests * fix makefile * cleanup * fix port conflicts with external port server 'porter' * stop test server on error * do not run api test that change global ENV concurrently with the other tests * Run remaining api tests concurrently * no need for retry with the port number service * monkey patch race condition in go-sockaddr until we understand why that fails * monkey patch hcl decoder race condidtion until we understand why that fails * monkey patch spurious errors in strings.EqualFold from here * add test for hcl decoder race condition. Run with go test -parallel 128 * Increase timeout again * cleanup * don't log port allocations by default * use base command arg parsing to format help output properly * handle -dc deprecation case in Build * switch autopilot.max_trailing_logs to int * remove duplicate test case * remove unused methods * remove comments about flag/config value inconsistencies * switch got and want around since the error message was misleading. * Removes a stray debug log. * Removes a stray newline in imports. * Fixes TestACL_Version8. * Runs go fmt. * Adds a default case for unknown address types. * Reoders and reformats some imports. * Adds some comments and fixes typos. * Reorders imports. * add unix socket support for dns later * drop all deprecated flags and arguments * fix wrong field name * remove stray node-id file * drop unnecessary patch section in test * drop duplicate test * add test for LeaveOnTerm and SkipLeaveOnInt in client mode * drop "bla" and add clarifying comment for the test * split up tests to support enterprise/non-enterprise tests * drop raft multiplier and derive values during build phase * sanitize runtime config reflectively and add test * detect invalid config fields * fix tests with invalid config fields * use different values for wan sanitiziation test * drop recursor in favor of recursors * allow dns_config.udp_answer_limit to be zero * make sure tests run on machines with multiple ips * Fix failing tests in a few more places by providing a bind address in the test * Gets rid of skipped TestAgent_CheckPerformanceSettings and adds case for builder. * Add porter to server_test.go to make tests there less flaky * go fmt
2017-09-25 18:40:42 +00:00
DNS: ports[0],
HTTP: ports[1],
HTTPS: ports[2],
SerfLan: ports[3],
SerfWan: ports[4],
Server: ports[5],
},
2017-05-12 19:04:34 +00:00
ReadyTimeout: 10 * time.Second,
Connect: map[string]interface{}{
"enabled": true,
"ca_config": map[string]interface{}{
// const TestClusterID causes import cycle so hard code it here.
"cluster_id": "11111111-2222-3333-4444-555555555555",
},
},
ReturnPorts: func() {
freeport.Return(ports)
},
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
Stdout: logBuffer,
Stderr: logBuffer,
}
}
2015-03-11 04:53:51 +00:00
// TestService is used to serialize a service definition.
2015-03-11 01:47:45 +00:00
type TestService struct {
ID string `json:",omitempty"`
Name string `json:",omitempty"`
Tags []string `json:",omitempty"`
Address string `json:",omitempty"`
Port int `json:",omitempty"`
}
2015-03-11 04:53:51 +00:00
// TestCheck is used to serialize a check definition.
2015-03-11 01:47:45 +00:00
type TestCheck struct {
ID string `json:",omitempty"`
Name string `json:",omitempty"`
ServiceID string `json:",omitempty"`
TTL string `json:",omitempty"`
}
2015-03-11 23:10:07 +00:00
// TestKVResponse is what we use to decode KV data.
type TestKVResponse struct {
Value string
}
2015-03-11 04:53:51 +00:00
// TestServer is the main server wrapper struct.
type TestServer struct {
cmd *exec.Cmd
Config *TestServerConfig
2017-04-14 20:37:29 +00:00
HTTPAddr string
HTTPSAddr string
LANAddr string
WANAddr string
2017-04-21 00:02:42 +00:00
HTTPClient *http.Client
tmpdir string
}
// NewTestServerConfigT creates a new TestServer, and makes a call to an optional
// callback function to modify the configuration. If there is an error
// configuring or starting the server, the server will NOT be running when the
// function returns (thus you do not need to stop it).
func NewTestServerConfigT(t TestingTB, cb ServerConfigCallback) (*TestServer, error) {
path, err := exec.LookPath("consul")
if err != nil || path == "" {
return nil, fmt.Errorf("consul not found on $PATH - download and install " +
"consul or skip this test")
}
prefix := "consul"
if t != nil {
// Use test name for tmpdir if available
prefix = strings.Replace(t.Name(), "/", "_", -1)
}
tmpdir, err := ioutil.TempDir("", prefix)
if err != nil {
return nil, errors.Wrap(err, "failed to create tempdir")
}
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
cfg := defaultServerConfig(t)
cfg.DataDir = filepath.Join(tmpdir, "data")
if cb != nil {
cb(cfg)
}
b, err := json.Marshal(cfg)
if err != nil {
cfg.ReturnPorts()
os.RemoveAll(tmpdir)
return nil, errors.Wrap(err, "failed marshaling json")
}
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
t.Logf("CONFIG JSON: %s", string(b))
configFile := filepath.Join(tmpdir, "config.json")
if err := ioutil.WriteFile(configFile, b, 0644); err != nil {
cfg.ReturnPorts()
os.RemoveAll(tmpdir)
return nil, errors.Wrap(err, "failed writing config content")
}
// Start the server
args := []string{"agent", "-config-file", configFile}
args = append(args, cfg.Args...)
cmd := exec.Command("consul", args...)
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
cmd.Stdout = cfg.Stdout
cmd.Stderr = cfg.Stderr
if err := cmd.Start(); err != nil {
cfg.ReturnPorts()
os.RemoveAll(tmpdir)
return nil, errors.Wrap(err, "failed starting command")
}
httpAddr := fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTP)
client := cleanhttp.DefaultClient()
if strings.HasPrefix(cfg.Addresses.HTTP, "unix://") {
httpAddr = cfg.Addresses.HTTP
tr := cleanhttp.DefaultTransport()
tr.DialContext = func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", httpAddr[len("unix://"):])
}
client = &http.Client{Transport: tr}
}
server := &TestServer{
Config: cfg,
cmd: cmd,
2017-04-14 20:37:29 +00:00
HTTPAddr: httpAddr,
HTTPSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTPS),
LANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfLan),
WANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfWan),
2017-04-21 00:02:42 +00:00
HTTPClient: client,
tmpdir: tmpdir,
}
// Wait for the server to be ready
if err := server.waitForAPI(); err != nil {
testutil: NewLogBuffer - buffer logs until a test fails Replaces #7559 Running tests in parallel, with background goroutines, results in test output not being associated with the correct test. `go test` does not make any guarantees about output from goroutines being attributed to the correct test case. Attaching log output from background goroutines also cause data races. If the goroutine outlives the test, it will race with the test being marked done. Previously this was noticed as a panic when logging, but with the race detector enabled it is shown as a data race. The previous solution did not address the problem of correct test attribution because test output could still be hidden when it was associated with a test that did not fail. You would have to look at all of the log output to find the relevant lines. It also made debugging test failures more difficult because each log line was very long. This commit attempts a new approach. Instead of printing all the logs, only print when a test fails. This should work well when there are a small number of failures, but may not work well when there are many test failures at the same time. In those cases the failures are unlikely a result of a specific test, and the log output is likely less useful. All of the logs are printed from the test goroutine, so they should be associated with the correct test. Also removes some test helpers that were not used, or only had a single caller. Packages which expose many functions with similar names can be difficult to use correctly. Related: https://github.com/golang/go/issues/38458 (may be fixed in go1.15) https://github.com/golang/go/issues/38382#issuecomment-612940030
2020-05-06 20:40:16 +00:00
if err := server.Stop(); err != nil {
t.Logf("server stop failed with: %v", err)
}
return nil, err
}
return server, nil
}
2015-03-11 04:53:51 +00:00
// Stop stops the test Consul server, and removes the Consul data
// directory once we are done.
func (s *TestServer) Stop() error {
defer s.Config.ReturnPorts()
defer os.RemoveAll(s.tmpdir)
// There was no process
if s.cmd == nil {
return nil
}
if s.cmd.Process != nil {
if runtime.GOOS == "windows" {
if err := s.cmd.Process.Kill(); err != nil {
return errors.Wrap(err, "failed to kill consul server")
}
} else { // interrupt is not supported in windows
if err := s.cmd.Process.Signal(os.Interrupt); err != nil {
return errors.Wrap(err, "failed to kill consul server")
}
}
}
waitDone := make(chan error)
go func() {
waitDone <- s.cmd.Wait()
close(waitDone)
}()
// wait for the process to exit to be sure that the data dir can be
// deleted on all platforms.
select {
case err := <-waitDone:
return err
case <-time.After(10 * time.Second):
s.cmd.Process.Signal(syscall.SIGABRT)
s.cmd.Wait()
return fmt.Errorf("timeout waiting for server to stop gracefully")
}
}
// waitForAPI waits for the /status/leader HTTP endpoint to start
// responding. This is an indication that the agent has started,
// but will likely return before a leader is elected.
// Note: We do not check for a successful response status because
// we want this function to return without error even when
// there's no leader elected.
func (s *TestServer) waitForAPI() error {
var failed bool
// This retry replicates the logic of retry.Run to allow for nested retries.
// By returning an error we can wrap TestServer creation with retry.Run
// in makeClientWithConfig.
timer := retry.TwoSeconds()
deadline := time.Now().Add(timer.Timeout)
for !time.Now().After(deadline) {
time.Sleep(timer.Wait)
url := s.url("/v1/status/leader")
resp, err := s.masterGet(url)
if err != nil {
failed = true
continue
}
resp.Body.Close()
failed = false
}
if failed {
return fmt.Errorf("api unavailable")
}
return nil
}
2015-03-11 04:53:51 +00:00
// waitForLeader waits for the Consul server's HTTP API to become
// available, and then waits for a known leader and an index of
// 2 or more to be observed to confirm leader election is done.
func (s *TestServer) WaitForLeader(t *testing.T) {
retry.Run(t, func(r *retry.R) {
// Query the API and check the status code.
url := s.url("/v1/catalog/nodes")
resp, err := s.masterGet(url)
if err != nil {
r.Fatalf("failed http get '%s': %v", url, err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
r.Fatal("failed OK response", err)
}
// Ensure we have a leader and a node registration.
if leader := resp.Header.Get("X-Consul-KnownLeader"); leader != "true" {
r.Fatalf("Consul leader status: %#v", leader)
}
index, err := strconv.ParseInt(resp.Header.Get("X-Consul-Index"), 10, 64)
if err != nil {
r.Fatal("bad consul index", err)
}
if index < 2 {
r.Fatal("consul index should be at least 2")
}
})
2015-03-11 01:08:14 +00:00
}
// WaitForActiveCARoot waits until the server can return a Connect CA meaning
// connect has completed bootstrapping and is ready to use.
func (s *TestServer) WaitForActiveCARoot(t *testing.T) {
// don't need to fully decode the response
type rootsResponse struct {
ActiveRootID string
TrustDomain string
Roots []interface{}
}
retry.Run(t, func(r *retry.R) {
// Query the API and check the status code.
url := s.url("/v1/agent/connect/ca/roots")
resp, err := s.masterGet(url)
if err != nil {
r.Fatalf("failed http get '%s': %v", url, err)
}
defer resp.Body.Close()
// Roots will return an error status until it's been bootstrapped. We could
// parse the body and sanity check but that causes either import cycles
// since this is used in both `api` and consul test or duplication. The 200
// is all we really need to wait for.
if err := s.requireOK(resp); err != nil {
r.Fatal("failed OK response", err)
}
var roots rootsResponse
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&roots); err != nil {
r.Fatal(err)
}
if roots.ActiveRootID == "" || len(roots.Roots) < 1 {
r.Fatalf("/v1/agent/connect/ca/roots returned 200 but without roots: %+v", roots)
}
})
}
// WaitForSerfCheck ensures we have a node with serfHealth check registered
// Behavior mirrors testrpc.WaitForTestAgent but avoids the dependency cycle in api pkg
func (s *TestServer) WaitForSerfCheck(t *testing.T) {
retry.Run(t, func(r *retry.R) {
// Query the API and check the status code.
url := s.url("/v1/catalog/nodes?index=0")
resp, err := s.masterGet(url)
if err != nil {
r.Fatal("failed http get", err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
r.Fatal("failed OK response", err)
}
// Watch for the anti-entropy sync to finish.
var payload []map[string]interface{}
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&payload); err != nil {
r.Fatal(err)
}
if len(payload) < 1 {
r.Fatal("No nodes")
}
// Ensure the serfHealth check is registered
url = s.url(fmt.Sprintf("/v1/health/node/%s", payload[0]["Node"]))
resp, err = s.masterGet(url)
if err != nil {
r.Fatal("failed http get", err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
r.Fatal("failed OK response", err)
}
dec = json.NewDecoder(resp.Body)
if err = dec.Decode(&payload); err != nil {
r.Fatal(err)
}
var found bool
for _, check := range payload {
if check["CheckID"].(string) == "serfHealth" {
found = true
break
}
}
if !found {
r.Fatal("missing serfHealth registration")
}
})
}
func (s *TestServer) masterGet(url string) (*http.Response, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
if s.Config.ACL.Tokens.Master != "" {
req.Header.Set("x-consul-token", s.Config.ACL.Tokens.Master)
}
return s.HTTPClient.Do(req)
}