From 4279bc8b344e4855ef9e2757bdcbaac8f341025c Mon Sep 17 00:00:00 2001 From: Michael Golowka <72365+pcman312@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:52:35 -0600 Subject: [PATCH] Validate hostnames when using TLS in Cassandra (#11365) --- builtin/logical/cassandra/backend_test.go | 17 +- changelog/11365.txt | 3 + go.mod | 2 +- go.sum | 6 +- .../testhelpers/cassandra/cassandrahelper.go | 98 +- physical/cassandra/cassandra.go | 34 +- physical/cassandra/cassandra_test.go | 4 +- plugins/database/cassandra/cassandra_test.go | 33 +- .../database/cassandra/connection_producer.go | 109 +- .../cassandra/connection_producer_test.go | 95 ++ .../test-fixtures/{ => no_tls}/cassandra.yaml | 0 .../test-fixtures/with_tls/.cassandra/cqlshrc | 3 + .../test-fixtures/with_tls/cassandra.yaml | 1279 +++++++++++++++++ .../test-fixtures/with_tls/gencert.sh | 46 + .../test-fixtures/with_tls/keystore.jks | 3 + vendor/github.com/gocql/gocql/.travis.yml | 6 +- vendor/github.com/gocql/gocql/AUTHORS | 5 + vendor/github.com/gocql/gocql/README.md | 72 +- vendor/github.com/gocql/gocql/conn.go | 61 +- .../github.com/gocql/gocql/connectionpool.go | 45 +- vendor/github.com/gocql/gocql/control.go | 8 +- vendor/github.com/gocql/gocql/doc.go | 317 +++- vendor/github.com/gocql/gocql/events.go | 82 +- vendor/github.com/gocql/gocql/frame.go | 59 +- vendor/github.com/gocql/gocql/helpers.go | 11 +- vendor/github.com/gocql/gocql/host_source.go | 51 +- .../gocql/gocql/install_test_deps.sh | 6 - vendor/github.com/gocql/gocql/marshal.go | 126 +- vendor/github.com/gocql/gocql/metadata.go | 32 +- vendor/github.com/gocql/gocql/policies.go | 88 +- .../github.com/gocql/gocql/prepared_cache.go | 12 - .../github.com/gocql/gocql/query_executor.go | 31 +- vendor/github.com/gocql/gocql/ring.go | 23 - vendor/github.com/gocql/gocql/session.go | 98 +- vendor/github.com/gocql/gocql/token.go | 2 +- vendor/github.com/gocql/gocql/topology.go | 40 +- vendor/github.com/gocql/gocql/uuid.go | 3 +- vendor/modules.txt | 2 +- 38 files changed, 2386 insertions(+), 526 deletions(-) create mode 100644 changelog/11365.txt create mode 100644 plugins/database/cassandra/connection_producer_test.go rename plugins/database/cassandra/test-fixtures/{ => no_tls}/cassandra.yaml (100%) create mode 100644 plugins/database/cassandra/test-fixtures/with_tls/.cassandra/cqlshrc create mode 100644 plugins/database/cassandra/test-fixtures/with_tls/cassandra.yaml create mode 100755 plugins/database/cassandra/test-fixtures/with_tls/gencert.sh create mode 100644 plugins/database/cassandra/test-fixtures/with_tls/keystore.jks diff --git a/builtin/logical/cassandra/backend_test.go b/builtin/logical/cassandra/backend_test.go index fcf8e02b7..1b76dfe6c 100644 --- a/builtin/logical/cassandra/backend_test.go +++ b/builtin/logical/cassandra/backend_test.go @@ -20,13 +20,18 @@ func TestBackend_basic(t *testing.T) { t.Fatal(err) } - cleanup, hostname := cassandra.PrepareTestContainer(t, "latest") + copyFromTo := map[string]string{ + "test-fixtures/cassandra.yaml": "/etc/cassandra/cassandra.yaml", + } + host, cleanup := cassandra.PrepareTestContainer(t, + cassandra.CopyFromTo(copyFromTo), + ) defer cleanup() logicaltest.Test(t, logicaltest.TestCase{ LogicalBackend: b, Steps: []logicaltest.TestStep{ - testAccStepConfig(t, hostname), + testAccStepConfig(t, host.ConnectionURL()), testAccStepRole(t), testAccStepReadCreds(t, "test"), }, @@ -41,13 +46,17 @@ func TestBackend_roleCrud(t *testing.T) { t.Fatal(err) } - cleanup, hostname := cassandra.PrepareTestContainer(t, "latest") + copyFromTo := map[string]string{ + "test-fixtures/cassandra.yaml": "/etc/cassandra/cassandra.yaml", + } + host, cleanup := cassandra.PrepareTestContainer(t, + cassandra.CopyFromTo(copyFromTo)) defer cleanup() logicaltest.Test(t, logicaltest.TestCase{ LogicalBackend: b, Steps: []logicaltest.TestStep{ - testAccStepConfig(t, hostname), + testAccStepConfig(t, host.ConnectionURL()), testAccStepRole(t), testAccStepRoleWithOptions(t), testAccStepReadRole(t, "test", testRole), diff --git a/changelog/11365.txt b/changelog/11365.txt new file mode 100644 index 000000000..cf99d5290 --- /dev/null +++ b/changelog/11365.txt @@ -0,0 +1,3 @@ +```release-note:bug +secrets/database/cassandra: Fixed issue where hostnames were not being validated when using TLS +``` diff --git a/go.mod b/go.mod index 7ed935a1b..e92bf2ab5 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,7 @@ require ( github.com/go-ole/go-ole v1.2.4 // indirect github.com/go-sql-driver/mysql v1.5.0 github.com/go-test/deep v1.0.7 - github.com/gocql/gocql v0.0.0-20200624222514-34081eda590e + github.com/gocql/gocql v0.0.0-20210401103645-80ab1e13e309 github.com/golang/protobuf v1.4.2 github.com/google/go-github v17.0.0+incompatible github.com/google/go-metrics-stackdriver v0.2.0 diff --git a/go.sum b/go.sum index 61af54353..08b2f517a 100644 --- a/go.sum +++ b/go.sum @@ -442,8 +442,8 @@ github.com/gobuffalo/packd v0.1.0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWe github.com/gobuffalo/packr/v2 v2.0.9/go.mod h1:emmyGweYTm6Kdper+iywB6YK5YzuKchGtJQZ0Odn4pQ= github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/VCm/3ptBN+0= github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw= -github.com/gocql/gocql v0.0.0-20200624222514-34081eda590e h1:SroDcndcOU9BVAduPf/PXihXoR2ZYTQYLXbupbqxAyQ= -github.com/gocql/gocql v0.0.0-20200624222514-34081eda590e/go.mod h1:DL0ekTmBSTdlNF25Orwt/JMzqIq3EJ4MVa/J/uK64OY= +github.com/gocql/gocql v0.0.0-20210401103645-80ab1e13e309 h1:8MHuCGYDXh0skFrLumkCMlt9C29hxhqNx39+Haemeqw= +github.com/gocql/gocql v0.0.0-20210401103645-80ab1e13e309/go.mod h1:DL0ekTmBSTdlNF25Orwt/JMzqIq3EJ4MVa/J/uK64OY= github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= @@ -1084,6 +1084,7 @@ github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6So github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.6.2 h1:aIihoIOHCiLZHxyoNQ+ABL4NKhFTgKLBdMLyEAh98m0= github.com/rogpeppe/go-internal v1.6.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rs/zerolog v1.4.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= @@ -1631,6 +1632,7 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= +gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= diff --git a/helper/testhelpers/cassandra/cassandrahelper.go b/helper/testhelpers/cassandra/cassandrahelper.go index 2e7805d16..8c03b5ec5 100644 --- a/helper/testhelpers/cassandra/cassandrahelper.go +++ b/helper/testhelpers/cassandra/cassandrahelper.go @@ -2,9 +2,10 @@ package cassandra import ( "context" - "errors" "fmt" + "net" "os" + "path/filepath" "testing" "time" @@ -12,33 +13,75 @@ import ( "github.com/hashicorp/vault/helper/testhelpers/docker" ) -func PrepareTestContainer(t *testing.T, version string) (func(), string) { +type containerConfig struct { + version string + copyFromTo map[string]string + sslOpts *gocql.SslOptions +} + +type ContainerOpt func(*containerConfig) + +func Version(version string) ContainerOpt { + return func(cfg *containerConfig) { + cfg.version = version + } +} + +func CopyFromTo(copyFromTo map[string]string) ContainerOpt { + return func(cfg *containerConfig) { + cfg.copyFromTo = copyFromTo + } +} + +func SslOpts(sslOpts *gocql.SslOptions) ContainerOpt { + return func(cfg *containerConfig) { + cfg.sslOpts = sslOpts + } +} + +type Host struct { + Name string + Port string +} + +func (h Host) ConnectionURL() string { + return net.JoinHostPort(h.Name, h.Port) +} + +func PrepareTestContainer(t *testing.T, opts ...ContainerOpt) (Host, func()) { t.Helper() if os.Getenv("CASSANDRA_HOSTS") != "" { - return func() {}, os.Getenv("CASSANDRA_HOSTS") + host, port, err := net.SplitHostPort(os.Getenv("CASSANDRA_HOSTS")) + if err != nil { + t.Fatalf("Failed to split host & port from CASSANDRA_HOSTS (%s): %s", os.Getenv("CASSANDRA_HOSTS"), err) + } + h := Host{ + Name: host, + Port: port, + } + return h, func() {} } - if version == "" { - version = "3.11" + containerCfg := &containerConfig{ + version: "3.11", } - var copyFromTo map[string]string - cwd, _ := os.Getwd() - fixturePath := fmt.Sprintf("%s/test-fixtures/", cwd) - if _, err := os.Stat(fixturePath); err != nil { - if !errors.Is(err, os.ErrNotExist) { - // If it doesn't exist, no biggie - t.Fatal(err) - } - } else { - copyFromTo = map[string]string{ - fixturePath: "/etc/cassandra", + for _, opt := range opts { + opt(containerCfg) + } + + copyFromTo := map[string]string{} + for from, to := range containerCfg.copyFromTo { + absFrom, err := filepath.Abs(from) + if err != nil { + t.Fatalf("Unable to get absolute path for file %s", from) } + copyFromTo[absFrom] = to } runner, err := docker.NewServiceRunner(docker.RunOptions{ ImageRepo: "cassandra", - ImageTag: version, + ImageTag: containerCfg.version, Ports: []string{"9042/tcp"}, CopyFromTo: copyFromTo, Env: []string{"CASSANDRA_BROADCAST_ADDRESS=127.0.0.1"}, @@ -58,6 +101,8 @@ func PrepareTestContainer(t *testing.T, version string) (func(), string) { clusterConfig.ProtoVersion = 4 clusterConfig.Port = port + clusterConfig.SslOpts = containerCfg.sslOpts + session, err := clusterConfig.CreateSession() if err != nil { return nil, fmt.Errorf("error creating session: %s", err) @@ -65,19 +110,19 @@ func PrepareTestContainer(t *testing.T, version string) (func(), string) { defer session.Close() // Create keyspace - q := session.Query(`CREATE KEYSPACE "vault" WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };`) - if err := q.Exec(); err != nil { + query := session.Query(`CREATE KEYSPACE "vault" WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };`) + if err := query.Exec(); err != nil { t.Fatalf("could not create cassandra keyspace: %v", err) } // Create table - q = session.Query(`CREATE TABLE "vault"."entries" ( + query = session.Query(`CREATE TABLE "vault"."entries" ( bucket text, key text, value blob, PRIMARY KEY (bucket, key) ) WITH CLUSTERING ORDER BY (key ASC);`) - if err := q.Exec(); err != nil { + if err := query.Exec(); err != nil { t.Fatalf("could not create cassandra table: %v", err) } return cfg, nil @@ -85,5 +130,14 @@ func PrepareTestContainer(t *testing.T, version string) (func(), string) { if err != nil { t.Fatalf("Could not start docker cassandra: %s", err) } - return svc.Cleanup, svc.Config.Address() + + host, port, err := net.SplitHostPort(svc.Config.Address()) + if err != nil { + t.Fatalf("Failed to split host & port from address (%s): %s", svc.Config.Address(), err) + } + h := Host{ + Name: host, + Port: port, + } + return h, svc.Cleanup } diff --git a/physical/cassandra/cassandra.go b/physical/cassandra/cassandra.go index 9a5ea13b7..93c5721eb 100644 --- a/physical/cassandra/cassandra.go +++ b/physical/cassandra/cassandra.go @@ -10,11 +10,10 @@ import ( "strings" "time" - "github.com/hashicorp/errwrap" - log "github.com/hashicorp/go-hclog" - metrics "github.com/armon/go-metrics" "github.com/gocql/gocql" + "github.com/hashicorp/errwrap" + log "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/sdk/helper/certutil" "github.com/hashicorp/vault/sdk/physical" ) @@ -180,20 +179,18 @@ func setupCassandraTLS(conf map[string]string, cluster *gocql.ClusterConfig) err if err != nil { return err } - } else { - if pemJSONPath, ok := conf["pem_json_file"]; ok { - pemJSONData, err := ioutil.ReadFile(pemJSONPath) - if err != nil { - return errwrap.Wrapf(fmt.Sprintf("error reading json bundle from %q: {{err}}", pemJSONPath), err) - } - pemJSON, err := certutil.ParsePKIJSON([]byte(pemJSONData)) - if err != nil { - return err - } - tlsConfig, err = pemJSON.GetTLSConfig(certutil.TLSClient) - if err != nil { - return err - } + } else if pemJSONPath, ok := conf["pem_json_file"]; ok { + pemJSONData, err := ioutil.ReadFile(pemJSONPath) + if err != nil { + return errwrap.Wrapf(fmt.Sprintf("error reading json bundle from %q: {{err}}", pemJSONPath), err) + } + pemJSON, err := certutil.ParsePKIJSON([]byte(pemJSONData)) + if err != nil { + return err + } + tlsConfig, err = pemJSON.GetTLSConfig(certutil.TLSClient) + if err != nil { + return err } } @@ -225,7 +222,8 @@ func setupCassandraTLS(conf map[string]string, cluster *gocql.ClusterConfig) err } cluster.SslOpts = &gocql.SslOptions{ - Config: tlsConfig.Clone(), + Config: tlsConfig, + EnableHostVerification: !tlsConfig.InsecureSkipVerify, } return nil } diff --git a/physical/cassandra/cassandra_test.go b/physical/cassandra/cassandra_test.go index 12469889f..ea1e4e129 100644 --- a/physical/cassandra/cassandra_test.go +++ b/physical/cassandra/cassandra_test.go @@ -19,13 +19,13 @@ func TestCassandraBackend(t *testing.T) { t.Skip("skipping race test in CI pending https://github.com/gocql/gocql/pull/1474") } - cleanup, hosts := cassandra.PrepareTestContainer(t, "") + host, cleanup := cassandra.PrepareTestContainer(t) defer cleanup() // Run vault tests logger := logging.NewVaultLogger(log.Debug) b, err := NewCassandraBackend(map[string]string{ - "hosts": hosts, + "hosts": host.ConnectionURL(), "protocol_version": "3", }, logger) if err != nil { diff --git a/plugins/database/cassandra/cassandra_test.go b/plugins/database/cassandra/cassandra_test.go index 1c5282f36..197eeb4bc 100644 --- a/plugins/database/cassandra/cassandra_test.go +++ b/plugins/database/cassandra/cassandra_test.go @@ -3,7 +3,6 @@ package cassandra import ( "context" "reflect" - "strings" "testing" "time" @@ -17,14 +16,16 @@ import ( ) func getCassandra(t *testing.T, protocolVersion interface{}) (*Cassandra, func()) { - cleanup, connURL := cassandra.PrepareTestContainer(t, "latest") - pieces := strings.Split(connURL, ":") + host, cleanup := cassandra.PrepareTestContainer(t, + cassandra.Version("latest"), + cassandra.CopyFromTo(insecureFileMounts), + ) db := new() initReq := dbplugin.InitializeRequest{ Config: map[string]interface{}{ - "hosts": connURL, - "port": pieces[1], + "hosts": host.ConnectionURL(), + "port": host.Port, "username": "cassandra", "password": "cassandra", "protocol_version": protocolVersion, @@ -34,8 +35,8 @@ func getCassandra(t *testing.T, protocolVersion interface{}) (*Cassandra, func() } expectedConfig := map[string]interface{}{ - "hosts": connURL, - "port": pieces[1], + "hosts": host.ConnectionURL(), + "port": host.Port, "username": "cassandra", "password": "cassandra", "protocol_version": protocolVersion, @@ -53,7 +54,7 @@ func getCassandra(t *testing.T, protocolVersion interface{}) (*Cassandra, func() return db, cleanup } -func TestCassandra_Initialize(t *testing.T) { +func TestInitialize(t *testing.T) { db, cleanup := getCassandra(t, 4) defer cleanup() @@ -66,7 +67,7 @@ func TestCassandra_Initialize(t *testing.T) { defer cleanup() } -func TestCassandra_CreateUser(t *testing.T) { +func TestCreateUser(t *testing.T) { type testCase struct { // Config will have the hosts & port added to it during the test config map[string]interface{} @@ -126,15 +127,17 @@ func TestCassandra_CreateUser(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - cleanup, connURL := cassandra.PrepareTestContainer(t, "latest") - pieces := strings.Split(connURL, ":") + host, cleanup := cassandra.PrepareTestContainer(t, + cassandra.Version("latest"), + cassandra.CopyFromTo(insecureFileMounts), + ) defer cleanup() db := new() config := test.config - config["hosts"] = connURL - config["port"] = pieces[1] + config["hosts"] = host.ConnectionURL() + config["port"] = host.Port initReq := dbplugin.InitializeRequest{ Config: config, @@ -162,7 +165,7 @@ func TestCassandra_CreateUser(t *testing.T) { } } -func TestMyCassandra_UpdateUserPassword(t *testing.T) { +func TestUpdateUserPassword(t *testing.T) { db, cleanup := getCassandra(t, 4) defer cleanup() @@ -198,7 +201,7 @@ func TestMyCassandra_UpdateUserPassword(t *testing.T) { assertCreds(t, db.Hosts, db.Port, createResp.Username, newPassword, 5*time.Second) } -func TestCassandra_DeleteUser(t *testing.T) { +func TestDeleteUser(t *testing.T) { db, cleanup := getCassandra(t, 4) defer cleanup() diff --git a/plugins/database/cassandra/connection_producer.go b/plugins/database/cassandra/connection_producer.go index cd79ef24e..4e24ff1d3 100644 --- a/plugins/database/cassandra/connection_producer.go +++ b/plugins/database/cassandra/connection_producer.go @@ -8,14 +8,13 @@ import ( "sync" "time" + "github.com/gocql/gocql" + dbplugin "github.com/hashicorp/vault/sdk/database/dbplugin/v5" "github.com/hashicorp/vault/sdk/database/helper/connutil" "github.com/hashicorp/vault/sdk/database/helper/dbutil" "github.com/hashicorp/vault/sdk/helper/certutil" "github.com/hashicorp/vault/sdk/helper/parseutil" "github.com/hashicorp/vault/sdk/helper/tlsutil" - - "github.com/gocql/gocql" - dbplugin "github.com/hashicorp/vault/sdk/database/dbplugin/v5" "github.com/mitchellh/mapstructure" ) @@ -40,9 +39,7 @@ type cassandraConnectionProducer struct { connectTimeout time.Duration socketKeepAlive time.Duration - certificate string - privateKey string - issuingCA string + certBundle *certutil.CertBundle rawConfig map[string]interface{} Initialized bool @@ -99,9 +96,7 @@ func (c *cassandraConnectionProducer) Initialize(ctx context.Context, req dbplug if err != nil { return fmt.Errorf("error marshaling PEM information: %w", err) } - c.certificate = certBundle.Certificate - c.privateKey = certBundle.PrivateKey - c.issuingCA = certBundle.IssuingCA + c.certBundle = certBundle c.TLS = true case len(c.PemBundle) != 0: @@ -113,9 +108,11 @@ func (c *cassandraConnectionProducer) Initialize(ctx context.Context, req dbplug if err != nil { return fmt.Errorf("error marshaling PEM information: %w", err) } - c.certificate = certBundle.Certificate - c.privateKey = certBundle.PrivateKey - c.issuingCA = certBundle.IssuingCA + c.certBundle = certBundle + c.TLS = true + } + + if c.InsecureTLS { c.TLS = true } @@ -185,49 +182,13 @@ func (c *cassandraConnectionProducer) createSession(ctx context.Context) (*gocql clusterConfig.Timeout = c.connectTimeout clusterConfig.SocketKeepalive = c.socketKeepAlive + if c.TLS { - var tlsConfig *tls.Config - if len(c.certificate) > 0 || len(c.issuingCA) > 0 { - if len(c.certificate) > 0 && len(c.privateKey) == 0 { - return nil, fmt.Errorf("found certificate for TLS authentication but no private key") - } - - certBundle := &certutil.CertBundle{} - if len(c.certificate) > 0 { - certBundle.Certificate = c.certificate - certBundle.PrivateKey = c.privateKey - } - if len(c.issuingCA) > 0 { - certBundle.IssuingCA = c.issuingCA - } - - parsedCertBundle, err := certBundle.ToParsedCertBundle() - if err != nil { - return nil, fmt.Errorf("failed to parse certificate bundle: %w", err) - } - - tlsConfig, err = parsedCertBundle.GetTLSConfig(certutil.TLSClient) - if err != nil || tlsConfig == nil { - return nil, fmt.Errorf("failed to get TLS configuration: tlsConfig:%#v err:%w", tlsConfig, err) - } - tlsConfig.InsecureSkipVerify = c.InsecureTLS - - if c.TLSMinVersion != "" { - var ok bool - tlsConfig.MinVersion, ok = tlsutil.TLSLookup[c.TLSMinVersion] - if !ok { - return nil, fmt.Errorf("invalid 'tls_min_version' in config") - } - } else { - // MinVersion was not being set earlier. Reset it to - // zero to gracefully handle upgrades. - tlsConfig.MinVersion = 0 - } - } - - clusterConfig.SslOpts = &gocql.SslOptions{ - Config: tlsConfig, + sslOpts, err := getSslOpts(c.certBundle, c.TLSMinVersion, c.InsecureTLS) + if err != nil { + return nil, err } + clusterConfig.SslOpts = sslOpts } if c.LocalDatacenter != "" { @@ -269,6 +230,48 @@ func (c *cassandraConnectionProducer) createSession(ctx context.Context) (*gocql return session, nil } +func getSslOpts(certBundle *certutil.CertBundle, minTLSVersion string, insecureSkipVerify bool) (*gocql.SslOptions, error) { + tlsConfig := &tls.Config{} + if certBundle != nil { + if certBundle.Certificate == "" && certBundle.PrivateKey != "" { + return nil, fmt.Errorf("found private key for TLS authentication but no certificate") + } + if certBundle.Certificate != "" && certBundle.PrivateKey == "" { + return nil, fmt.Errorf("found certificate for TLS authentication but no private key") + } + + parsedCertBundle, err := certBundle.ToParsedCertBundle() + if err != nil { + return nil, fmt.Errorf("failed to parse certificate bundle: %w", err) + } + + tlsConfig, err = parsedCertBundle.GetTLSConfig(certutil.TLSClient) + if err != nil { + return nil, fmt.Errorf("failed to get TLS configuration: tlsConfig:%#v err:%w", tlsConfig, err) + } + } + + tlsConfig.InsecureSkipVerify = insecureSkipVerify + + if minTLSVersion != "" { + var ok bool + tlsConfig.MinVersion, ok = tlsutil.TLSLookup[minTLSVersion] + if !ok { + return nil, fmt.Errorf("invalid 'tls_min_version' in config") + } + } else { + // MinVersion was not being set earlier. Reset it to + // zero to gracefully handle upgrades. + tlsConfig.MinVersion = 0 + } + + opts := &gocql.SslOptions{ + Config: tlsConfig, + EnableHostVerification: !insecureSkipVerify, + } + return opts, nil +} + func (c *cassandraConnectionProducer) secretValues() map[string]string { return map[string]string{ c.Password: "[password]", diff --git a/plugins/database/cassandra/connection_producer_test.go b/plugins/database/cassandra/connection_producer_test.go new file mode 100644 index 000000000..3ddfe4097 --- /dev/null +++ b/plugins/database/cassandra/connection_producer_test.go @@ -0,0 +1,95 @@ +package cassandra + +import ( + "context" + "crypto/tls" + "testing" + "time" + + "github.com/gocql/gocql" + "github.com/hashicorp/vault/helper/testhelpers/cassandra" + "github.com/hashicorp/vault/sdk/database/dbplugin/v5" +) + +var ( + insecureFileMounts = map[string]string{ + "test-fixtures/no_tls/cassandra.yaml": "/etc/cassandra/cassandra.yaml", + } + secureFileMounts = map[string]string{ + "test-fixtures/with_tls/cassandra.yaml": "/etc/cassandra/cassandra.yaml", + "test-fixtures/with_tls/keystore.jks": "/etc/cassandra/keystore.jks", + "test-fixtures/with_tls/.cassandra": "/root/.cassandra/", + } +) + +func TestTLSConnection(t *testing.T) { + type testCase struct { + config map[string]interface{} + expectErr bool + } + + tests := map[string]testCase{ + "tls not specified": { + config: map[string]interface{}{}, + expectErr: true, + }, + "unrecognized certificate": { + config: map[string]interface{}{ + "tls": "true", + }, + expectErr: true, + }, + "insecure TLS": { + config: map[string]interface{}{ + "tls": "true", + "insecure_tls": true, + }, + expectErr: false, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + host, cleanup := cassandra.PrepareTestContainer(t, + cassandra.Version("3.11.9"), + cassandra.CopyFromTo(secureFileMounts), + cassandra.SslOpts(&gocql.SslOptions{ + Config: &tls.Config{InsecureSkipVerify: true}, + EnableHostVerification: false, + }), + ) + defer cleanup() + + // Set values that we don't know until the cassandra container is started + config := map[string]interface{}{ + "hosts": host.ConnectionURL(), + "port": host.Port, + "username": "cassandra", + "password": "cassandra", + "protocol_version": "3", + "connect_timeout": "20s", + } + // Then add any values specified in the test config. Generally for these tests they shouldn't overlap + for k, v := range test.config { + config[k] = v + } + + db := new() + initReq := dbplugin.InitializeRequest{ + Config: config, + VerifyConnection: true, + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + _, err := db.Initialize(ctx, initReq) + if test.expectErr && err == nil { + t.Fatalf("err expected, got nil") + } + if !test.expectErr && err != nil { + t.Fatalf("no error expected, got: %s", err) + } + }) + } +} diff --git a/plugins/database/cassandra/test-fixtures/cassandra.yaml b/plugins/database/cassandra/test-fixtures/no_tls/cassandra.yaml similarity index 100% rename from plugins/database/cassandra/test-fixtures/cassandra.yaml rename to plugins/database/cassandra/test-fixtures/no_tls/cassandra.yaml diff --git a/plugins/database/cassandra/test-fixtures/with_tls/.cassandra/cqlshrc b/plugins/database/cassandra/test-fixtures/with_tls/.cassandra/cqlshrc new file mode 100644 index 000000000..6a226e4b6 --- /dev/null +++ b/plugins/database/cassandra/test-fixtures/with_tls/.cassandra/cqlshrc @@ -0,0 +1,3 @@ +[ssl] +validate = false +version = SSLv23 diff --git a/plugins/database/cassandra/test-fixtures/with_tls/cassandra.yaml b/plugins/database/cassandra/test-fixtures/with_tls/cassandra.yaml new file mode 100644 index 000000000..1ce720f18 --- /dev/null +++ b/plugins/database/cassandra/test-fixtures/with_tls/cassandra.yaml @@ -0,0 +1,1279 @@ +# Cassandra storage config YAML + +# NOTE: +# See http://wiki.apache.org/cassandra/StorageConfiguration for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: 'Test Cluster' + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# If you already have a cluster with 1 token per node, and wish to migrate to +# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations +num_tokens: 256 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replication strategy used by the specified +# keyspace. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. +# allocate_tokens_for_keyspace: KEYSPACE + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# See http://wiki.apache.org/cassandra/HintedHandoff +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: PasswordAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: CassandraAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. You should leave this +# alone for new clusters. The partitioner can NOT be changed without +# reloading all data, so when upgrading you should set this to the +# same partitioner you were already using. +# +# Besides Murmur3Partitioner, partitioners included for backwards +# compatibility include RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. Cassandra +# will spread data evenly across them, subject to the granularity of +# the configured compaction strategy. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +# data_file_directories: +# - /var/lib/cassandra/data + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +# commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down gossip and Thrift and kill the JVM, so the node can be replaced. +# +# stop +# shut down gossip and Thrift, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the Thrift prepared statement cache +# +# If you do not use Thrift at all, it is safe to leave this value at "auto". +# +# See description of 'prepared_statements_cache_size_mb' above for more information. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +thrift_prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +# saved_caches_directory: /var/lib/cassandra/saved_caches + +# commitlog_sync may be either "periodic" or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been fsynced to disk. It will wait +# commitlog_sync_batch_window_in_ms milliseconds between fsyncs. +# This window should be kept short because the writer threads will +# be unable to do extra work while waiting. (You may need to increase +# concurrent_writes for the same reason.) +# +# commitlog_sync: batch +# commitlog_sync_batch_window_in_ms: 2 +# +# the other option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "127.0.0.1" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used as an +# cache that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +# memtable_heap_space_in_mb: 2048 +# memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Limits the maximum Merkle tree depth to avoid consuming too much +# memory during repairs. +# +# The default setting of 18 generates trees of maximum size around +# 50 MiB / tree. If you are running out of memory during repairs consider +# lowering this to 15 (~6 MiB / tree) or lower, but try not to lower it +# too much past that or you will lose too much resolution and stream +# too much redundant data during repair. Cannot be set lower than 10. +# +# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. +# +# repair_session_max_tree_depth: 18 + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for encrypted communication. Unused unless enabled in +# encryption_options +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: localhost + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +# broadcast_address: 1.2.3.4 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# Please note that the address on which the native transport is bound is the +# same as the rpc_address. The port however is different and specified below. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +# native_transport_port_ssl: 9142 +# The maximum threads for handling requests when the native transport is used. +# This is similar to rpc_max_threads though the default differs slightly (and +# there is no native_transport_min_threads, idle threads will always be stopped +# after 30 seconds). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Whether to start the thrift rpc server. +start_rpc: false + +# The address or interface to bind the Thrift RPC service and native transport +# server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: localhost + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# port for Thrift to listen for clients on +rpc_port: 9160 + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +# broadcast_rpc_address: 1.2.3.4 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Cassandra provides two out-of-the-box options for the RPC Server: +# +# sync +# One thread per thrift connection. For a very large number of clients, memory +# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size +# per thread, and that will correspond to your use of virtual memory (but physical memory +# may be limited depending on use of stack space). +# +# hsha +# Stands for "half synchronous, half asynchronous." All thrift clients are handled +# asynchronously using a small number of threads that does not vary with the amount +# of thrift clients (and thus scales well to many clients). The rpc requests are still +# synchronous (one thread per active request). If hsha is selected then it is essential +# that rpc_max_threads is changed from the default value of unlimited. +# +# The default is sync because on Windows hsha is about 30% slower. On Linux, +# sync/hsha performance is about the same, with hsha of course using less memory. +# +# Alternatively, can provide your own RPC server by providing the fully-qualified class name +# of an o.a.c.t.TServerFactory that can create an instance of it. +rpc_server_type: sync + +# Uncomment rpc_min|max_thread to set request pool size limits. +# +# Regardless of your choice of RPC server (see above), the number of maximum requests in the +# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync +# RPC server, it also dictates the number of clients that can be connected at all). +# +# The default is unlimited and thus provides no protection against clients overwhelming the server. You are +# encouraged to set a maximum that makes sense for you in production, but do keep in mind that +# rpc_max_threads represents the maximum number of client requests this server may execute concurrently. +# +# rpc_min_threads: 16 +# rpc_max_threads: 2048 + +# uncomment to set socket buffer sizes on rpc connections +# rpc_send_buff_size_in_bytes: +# rpc_recv_buff_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_send_buff_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_recv_buff_size_in_bytes: + +# Frame size for thrift (maximum message length). +thrift_framed_transport_size_in_mb: 15 + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this account for all types +# of compaction, including validation compaction. +compaction_throughput_mb_per_sec: 16 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# How long the coordinator should wait for read operations to complete +read_request_timeout_in_ms: 5000 +# How long the coordinator should wait for seq or index scans to complete +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete +write_request_timeout_in_ms: 2000 +# How long the coordinator should wait for counter writes to complete +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations +request_timeout_in_ms: 10000 + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: before enabling this property make sure to ntp is installed +# and the times are synchronized between the nodes. +cross_node_timeout: false + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: SimpleSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero and read_repair_chance is < 1.0, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 0.1 + +# request_scheduler -- Set this to a class that implements +# RequestScheduler, which will schedule incoming client requests +# according to the specific policy. This is useful for multi-tenancy +# with a single Cassandra cluster. +# NOTE: This is specifically for requests from the client and does +# not affect inter node communication. +# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place +# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of +# client requests to a node with a separate queue for each +# request_scheduler_id. The scheduler is further customized by +# request_scheduler_options as described below. +request_scheduler: org.apache.cassandra.scheduler.NoScheduler + +# Scheduler Options vary based on the type of scheduler +# +# NoScheduler +# Has no options +# +# RoundRobin +# throttle_limit +# The throttle_limit is the number of in-flight +# requests per client. Requests beyond +# that limit are queued up until +# running requests can complete. +# The value of 80 here is twice the number of +# concurrent_reads + concurrent_writes. +# default_weight +# default_weight is optional and allows for +# overriding the default which is 1. +# weights +# Weights are optional and will default to 1 or the +# overridden default_weight. The weight translates into how +# many requests are handled during each turn of the +# RoundRobin, based on the scheduler id. +# +# request_scheduler_options: +# throttle_limit: 80 +# default_weight: 5 +# weights: +# Keyspace1: 1 +# Keyspace2: 5 + +# request_scheduler_id -- An identifier based on which to perform +# the request scheduling. Currently the only valid option is keyspace. +# request_scheduler_id: keyspace + +# Enable or disable inter-node encryption +# JVM defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# *NOTE* No custom encryption options are enabled at the moment +# The available internode options are : all, none, dc, rack +# +# If set to dc cassandra will encrypt the traffic between the DCs +# If set to rack cassandra will encrypt the traffic between the racks +# +# The passwords used in these options must match the passwords used when generating +# the keystore and truststore. For instructions on generating these files, see: +# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# +server_encryption_options: + internode_encryption: none + keystore: conf/.keystore + keystore_password: cassandra + truststore: conf/.truststore + truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + # require_client_auth: false + # require_endpoint_verification: false + +# enable or disable client/server encryption. +client_encryption_options: + enabled: true + # If enabled and optional is set to true encrypted and unencrypted connections are handled. + optional: false + keystore: /etc/cassandra/keystore.jks + keystore_password: cassandra + # require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a +# mechanism called replica filtering protection to ensure that results from stale replicas do +# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This +# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly +# stale results returned by the replicas, the more rows materialized during the query. +replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement +# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level +gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Back-pressure settings # +# If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation +# sent to replicas, with the aim of reducing pressure on overloaded replicas. +back_pressure_enabled: false +# The back-pressure strategy applied. +# The default implementation, RateBasedBackPressure, takes three arguments: +# high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests. +# If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor; +# if above high ratio, the rate limiting is increased by the given factor; +# such factor is usually best configured between 1 and 10, use larger values for a faster recovery +# at the expense of potentially more dropped mutations; +# the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica, +# if SLOW at the speed of the slowest one. +# New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and +# provide a public constructor accepting a Map. +back_pressure_strategy: + - class_name: org.apache.cassandra.net.RateBasedBackPressure + parameters: + - high_ratio: 0.90 + factor: 5 + flow: FAST + +# Coalescing Strategies # +# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). +# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in +# virtualized environments, the point at which an application can be bound by network packet processing can be +# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal +# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process +# is sufficient for many applications such that no load starvation is experienced even without coalescing. +# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages +# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one +# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching +# and increasing cache friendliness of network message processing. +# See CASSANDRA-8692 for details. + +# Strategy to use for coalescing messages in OutboundTcpConnection. +# Can be fixed, movingaverage, timehorizon, disabled (default). +# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. +# otc_coalescing_strategy: DISABLED + +# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first +# message is received before it will be sent with any accompanying messages. For moving average this is the +# maximum amount of time that will be waited as well as the interval at which messages must arrive on average +# for coalescing to be enabled. +# otc_coalescing_window_us: 200 + +# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128. +# otc_coalescing_enough_coalesced_messages: 8 + +# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection. +# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory +# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value +# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU +# time and queue contention while iterating the backlog of messages. +# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions. +# +# otc_backlog_expiration_interval_ms: 200 + + +######################### +# EXPERIMENTAL FEATURES # +######################### + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# Enables SASI index creation on this node. +# SASI indexes are considered experimental and are not recommended for production use. +enable_sasi_indexes: true diff --git a/plugins/database/cassandra/test-fixtures/with_tls/gencert.sh b/plugins/database/cassandra/test-fixtures/with_tls/gencert.sh new file mode 100755 index 000000000..2aad4b049 --- /dev/null +++ b/plugins/database/cassandra/test-fixtures/with_tls/gencert.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +################################################################ +# Usage: ./gencert.sh +# +# Generates a keystore.jks file that can be used with a +# Cassandra server for TLS connections. This does not update +# a cassandra config file. +################################################################ + +set -e + +KEYFILE="key.pem" +CERTFILE="cert.pem" +PKCSFILE="keystore.p12" +JKSFILE="keystore.jks" + +HOST="127.0.0.1" +NAME="cassandra" +ALIAS="cassandra" +PASSWORD="cassandra" + +echo "# Generating certificate keypair..." +go run /usr/local/go/src/crypto/tls/generate_cert.go --host=${HOST} + +echo "# Creating keystore..." +openssl pkcs12 -export -in ${CERTFILE} -inkey ${KEYFILE} -name ${NAME} -password pass:${PASSWORD} > ${PKCSFILE} + +echo "# Creating Java key store" +if [ -e "${JKSFILE}" ]; then + echo "# Removing old key store" + rm ${JKSFILE} +fi + +set +e +keytool -importkeystore \ + -srckeystore ${PKCSFILE} \ + -srcstoretype PKCS12 \ + -srcstorepass ${PASSWORD} \ + -destkeystore ${JKSFILE} \ + -deststorepass ${PASSWORD} \ + -destkeypass ${PASSWORD} \ + -alias ${ALIAS} + +echo "# Removing intermediate files" +rm ${KEYFILE} ${CERTFILE} ${PKCSFILE} diff --git a/plugins/database/cassandra/test-fixtures/with_tls/keystore.jks b/plugins/database/cassandra/test-fixtures/with_tls/keystore.jks new file mode 100644 index 000000000..6a42fe8c7 --- /dev/null +++ b/plugins/database/cassandra/test-fixtures/with_tls/keystore.jks @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28194cee8064e120205dd4ace2aad535f470baeef2e2f0847eb249346ee239ec +size 2469 diff --git a/vendor/github.com/gocql/gocql/.travis.yml b/vendor/github.com/gocql/gocql/.travis.yml index e1e9efd34..5ccd93871 100644 --- a/vendor/github.com/gocql/gocql/.travis.yml +++ b/vendor/github.com/gocql/gocql/.travis.yml @@ -31,8 +31,10 @@ env: AUTH=false go: - - 1.13.x - - 1.14.x + - 1.15.x + - 1.16.x + +go_import_path: github.com/gocql/gocql install: - ./install_test_deps.sh $TRAVIS_REPO_SLUG diff --git a/vendor/github.com/gocql/gocql/AUTHORS b/vendor/github.com/gocql/gocql/AUTHORS index e908b94ad..bf28c1209 100644 --- a/vendor/github.com/gocql/gocql/AUTHORS +++ b/vendor/github.com/gocql/gocql/AUTHORS @@ -115,3 +115,8 @@ Pavel Buchinchik Rintaro Okamura Yura Sokolov ; Jorge Bay +Dmitriy Kozlov +Alexey Romanovsky +Jaume Marhuenda Beltran +Piotr Dulikowski +Árni Dagur \ No newline at end of file diff --git a/vendor/github.com/gocql/gocql/README.md b/vendor/github.com/gocql/gocql/README.md index e5ebd3f4b..2600d55d7 100644 --- a/vendor/github.com/gocql/gocql/README.md +++ b/vendor/github.com/gocql/gocql/README.md @@ -19,8 +19,8 @@ The following matrix shows the versions of Go and Cassandra that are tested with Go/Cassandra | 2.1.x | 2.2.x | 3.x.x -------------| -------| ------| --------- -1.13 | yes | yes | yes -1.14 | yes | yes | yes +1.15 | yes | yes | yes +1.16 | yes | yes | yes Gocql has been tested in production against many different versions of Cassandra. Due to limits in our CI setup we only test against the latest 3 major releases, which coincide with the official support from the Apache project. @@ -114,73 +114,7 @@ statement. Example ------- -```go -/* Before you execute the program, Launch `cqlsh` and execute: -create keyspace example with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }; -create table example.tweet(timeline text, id UUID, text text, PRIMARY KEY(id)); -create index on example.tweet(timeline); -*/ -package main - -import ( - "fmt" - "log" - - "github.com/gocql/gocql" -) - -func main() { - // connect to the cluster - cluster := gocql.NewCluster("192.168.1.1", "192.168.1.2", "192.168.1.3") - cluster.Keyspace = "example" - cluster.Consistency = gocql.Quorum - session, _ := cluster.CreateSession() - defer session.Close() - - // insert a tweet - if err := session.Query(`INSERT INTO tweet (timeline, id, text) VALUES (?, ?, ?)`, - "me", gocql.TimeUUID(), "hello world").Exec(); err != nil { - log.Fatal(err) - } - - var id gocql.UUID - var text string - - /* Search for a specific set of records whose 'timeline' column matches - * the value 'me'. The secondary index that we created earlier will be - * used for optimizing the search */ - if err := session.Query(`SELECT id, text FROM tweet WHERE timeline = ? LIMIT 1`, - "me").Consistency(gocql.One).Scan(&id, &text); err != nil { - log.Fatal(err) - } - fmt.Println("Tweet:", id, text) - - // list all tweets - iter := session.Query(`SELECT id, text FROM tweet WHERE timeline = ?`, "me").Iter() - for iter.Scan(&id, &text) { - fmt.Println("Tweet:", id, text) - } - if err := iter.Close(); err != nil { - log.Fatal(err) - } -} -``` - - -Authentication -------- - -```go -cluster := gocql.NewCluster("192.168.1.1", "192.168.1.2", "192.168.1.3") -cluster.Authenticator = gocql.PasswordAuthenticator{ - Username: "user", - Password: "password" -} -cluster.Keyspace = "example" -cluster.Consistency = gocql.Quorum -session, _ := cluster.CreateSession() -defer session.Close() -``` +See [package documentation](https://pkg.go.dev/github.com/gocql/gocql#pkg-examples). Data Binding ------------ diff --git a/vendor/github.com/gocql/gocql/conn.go b/vendor/github.com/gocql/gocql/conn.go index 9e349c281..bc7ed44cb 100644 --- a/vendor/github.com/gocql/gocql/conn.go +++ b/vendor/github.com/gocql/gocql/conn.go @@ -44,8 +44,8 @@ func approve(authenticator string) bool { return false } -//JoinHostPort is a utility to return a address string that can be used -//gocql.Conn to form a connection with a host. +// JoinHostPort is a utility to return an address string that can be used +// by `gocql.Conn` to form a connection with a host. func JoinHostPort(addr string, port int) string { addr = strings.TrimSpace(addr) if _, _, err := net.SplitHostPort(addr); err != nil { @@ -80,6 +80,19 @@ func (p PasswordAuthenticator) Success(data []byte) error { return nil } +// SslOptions configures TLS use. +// +// Warning: Due to historical reasons, the SslOptions is insecure by default, so you need to set EnableHostVerification +// to true if no Config is set. Most users should set SslOptions.Config to a *tls.Config. +// SslOptions and Config.InsecureSkipVerify interact as follows: +// +// Config.InsecureSkipVerify | EnableHostVerification | Result +// Config is nil | false | do not verify host +// Config is nil | true | verify host +// false | false | verify host +// true | false | do not verify host +// false | true | verify host +// true | true | verify host type SslOptions struct { *tls.Config @@ -89,9 +102,12 @@ type SslOptions struct { CertPath string KeyPath string CaPath string //optional depending on server config - // If you want to verify the hostname and server cert (like a wildcard for cass cluster) then you should turn this on - // This option is basically the inverse of InSecureSkipVerify - // See InSecureSkipVerify in http://golang.org/pkg/crypto/tls/ for more info + // If you want to verify the hostname and server cert (like a wildcard for cass cluster) then you should turn this + // on. + // This option is basically the inverse of tls.Config.InsecureSkipVerify. + // See InsecureSkipVerify in http://golang.org/pkg/crypto/tls/ for more info. + // + // See SslOptions documentation to see how EnableHostVerification interacts with the provided tls.Config. EnableHostVerification bool } @@ -125,7 +141,7 @@ func (fn connErrorHandlerFn) HandleError(conn *Conn, err error, closed bool) { // which may be serving more queries just fine. // Default is 0, should not be changed concurrently with queries. // -// depreciated +// Deprecated. var TimeoutLimit int64 = 0 // Conn is a single connection to a Cassandra node. It can be used to execute @@ -213,14 +229,26 @@ func (s *Session) dialWithoutObserver(ctx context.Context, host *HostInfo, cfg * dialer = d } - conn, err := dialer.DialContext(ctx, "tcp", host.HostnameAndPort()) + addr := host.HostnameAndPort() + conn, err := dialer.DialContext(ctx, "tcp", addr) if err != nil { return nil, err } if cfg.tlsConfig != nil { // the TLS config is safe to be reused by connections but it must not // be modified after being used. - tconn := tls.Client(conn, cfg.tlsConfig) + tlsConfig := cfg.tlsConfig + if !tlsConfig.InsecureSkipVerify && tlsConfig.ServerName == "" { + colonPos := strings.LastIndex(addr, ":") + if colonPos == -1 { + colonPos = len(addr) + } + hostname := addr[:colonPos] + // clone config to avoid modifying the shared one. + tlsConfig = tlsConfig.Clone() + tlsConfig.ServerName = hostname + } + tconn := tls.Client(conn, tlsConfig) if err := tconn.Handshake(); err != nil { conn.Close() return nil, err @@ -845,6 +873,10 @@ func (w *writeCoalescer) writeFlusher(interval time.Duration) { } func (c *Conn) exec(ctx context.Context, req frameWriter, tracer Tracer) (*framer, error) { + if ctxErr := ctx.Err(); ctxErr != nil { + return nil, ctxErr + } + // TODO: move tracer onto conn stream, ok := c.streams.GetStream() if !ok { @@ -1173,12 +1205,16 @@ func (c *Conn) executeQuery(ctx context.Context, qry *Query) *Iter { } if x.meta.morePages() && !qry.disableAutoPage { + newQry := new(Query) + *newQry = *qry + newQry.pageState = copyBytes(x.meta.pagingState) + newQry.metrics = &queryMetrics{m: make(map[string]*hostMetrics)} + iter.next = &nextIter{ - qry: qry, + qry: newQry, pos: int((1 - qry.prefetch) * float64(x.numRows)), } - iter.next.qry.pageState = copyBytes(x.meta.pagingState) if iter.next.pos < 1 { iter.next.pos = 1 } @@ -1359,10 +1395,11 @@ func (c *Conn) executeBatch(ctx context.Context, batch *Batch) *Iter { } func (c *Conn) query(ctx context.Context, statement string, values ...interface{}) (iter *Iter) { - q := c.session.Query(statement, values...).Consistency(One) - q.trace = nil + q := c.session.Query(statement, values...).Consistency(One).Trace(nil) q.skipPrepare = true q.disableSkipMetadata = true + // we want to keep the query on this connection + q.conn = c return c.executeQuery(ctx, q) } diff --git a/vendor/github.com/gocql/gocql/connectionpool.go b/vendor/github.com/gocql/gocql/connectionpool.go index 1d2419bf4..75c28bc90 100644 --- a/vendor/github.com/gocql/gocql/connectionpool.go +++ b/vendor/github.com/gocql/gocql/connectionpool.go @@ -28,14 +28,31 @@ type SetPartitioner interface { } func setupTLSConfig(sslOpts *SslOptions) (*tls.Config, error) { + // Config.InsecureSkipVerify | EnableHostVerification | Result + // Config is nil | true | verify host + // Config is nil | false | do not verify host + // false | false | verify host + // true | false | do not verify host + // false | true | verify host + // true | true | verify host + var tlsConfig *tls.Config if sslOpts.Config == nil { - sslOpts.Config = &tls.Config{} + tlsConfig = &tls.Config{ + InsecureSkipVerify: !sslOpts.EnableHostVerification, + } + } else { + // use clone to avoid race. + tlsConfig = sslOpts.Config.Clone() + } + + if tlsConfig.InsecureSkipVerify && sslOpts.EnableHostVerification { + tlsConfig.InsecureSkipVerify = false } // ca cert is optional if sslOpts.CaPath != "" { - if sslOpts.RootCAs == nil { - sslOpts.RootCAs = x509.NewCertPool() + if tlsConfig.RootCAs == nil { + tlsConfig.RootCAs = x509.NewCertPool() } pem, err := ioutil.ReadFile(sslOpts.CaPath) @@ -43,7 +60,7 @@ func setupTLSConfig(sslOpts *SslOptions) (*tls.Config, error) { return nil, fmt.Errorf("connectionpool: unable to open CA certs: %v", err) } - if !sslOpts.RootCAs.AppendCertsFromPEM(pem) { + if !tlsConfig.RootCAs.AppendCertsFromPEM(pem) { return nil, errors.New("connectionpool: failed parsing or CA certs") } } @@ -53,13 +70,10 @@ func setupTLSConfig(sslOpts *SslOptions) (*tls.Config, error) { if err != nil { return nil, fmt.Errorf("connectionpool: unable to load X509 key pair: %v", err) } - sslOpts.Certificates = append(sslOpts.Certificates, mycert) + tlsConfig.Certificates = append(tlsConfig.Certificates, mycert) } - sslOpts.InsecureSkipVerify = !sslOpts.EnableHostVerification - - // return clone to avoid race - return sslOpts.Config.Clone(), nil + return tlsConfig, nil } type policyConnPool struct { @@ -238,12 +252,6 @@ func (p *policyConnPool) removeHost(ip net.IP) { go pool.Close() } -func (p *policyConnPool) hostUp(host *HostInfo) { - // TODO(zariel): have a set of up hosts and down hosts, we can internally - // detect down hosts, then try to reconnect to them. - p.addHost(host) -} - func (p *policyConnPool) hostDown(ip net.IP) { // TODO(zariel): mark host as down so we can try to connect to it later, for // now just treat it has removed. @@ -429,6 +437,8 @@ func (pool *hostConnPool) fill() { } return } + // notify the session that this node is connected + go pool.session.handleNodeUp(pool.host.ConnectAddress(), pool.port) // filled one fillCount-- @@ -440,6 +450,11 @@ func (pool *hostConnPool) fill() { // mark the end of filling pool.fillingStopped(err != nil) + + if err == nil && startCount > 0 { + // notify the session that this node is connected again + go pool.session.handleNodeUp(pool.host.ConnectAddress(), pool.port) + } }() } diff --git a/vendor/github.com/gocql/gocql/control.go b/vendor/github.com/gocql/gocql/control.go index aa5cf3570..9ab9c1c82 100644 --- a/vendor/github.com/gocql/gocql/control.go +++ b/vendor/github.com/gocql/gocql/control.go @@ -125,7 +125,7 @@ func hostInfo(addr string, defaultPort int) ([]*HostInfo, error) { if err != nil { return nil, err } else if len(ips) == 0 { - return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr) + return nil, fmt.Errorf("no IP's returned from DNS lookup for %q", addr) } // Filter to v4 addresses if any present @@ -177,7 +177,7 @@ func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) { return conn, nil } - Logger.Printf("gocql: unable to dial control conn %v: %v\n", host.ConnectAddress(), err) + Logger.Printf("gocql: unable to dial control conn %v:%v: %v\n", host.ConnectAddress(), host.Port(), err) } return nil, err @@ -285,8 +285,6 @@ func (c *controlConn) setupConn(conn *Conn) error { } c.conn.Store(ch) - c.session.handleNodeUp(host.ConnectAddress(), host.Port(), false) - return nil } @@ -452,6 +450,8 @@ func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter for { iter = c.withConn(func(conn *Conn) *Iter { + // we want to keep the query on the control connection + q.conn = conn return conn.executeQuery(context.TODO(), q) }) diff --git a/vendor/github.com/gocql/gocql/doc.go b/vendor/github.com/gocql/gocql/doc.go index 5c4b041a1..8ca13dbd8 100644 --- a/vendor/github.com/gocql/gocql/doc.go +++ b/vendor/github.com/gocql/gocql/doc.go @@ -4,6 +4,319 @@ // Package gocql implements a fast and robust Cassandra driver for the // Go programming language. +// +// Connecting to the cluster +// +// Pass a list of initial node IP addresses to NewCluster to create a new cluster configuration: +// +// cluster := gocql.NewCluster("192.168.1.1", "192.168.1.2", "192.168.1.3") +// +// Port can be specified as part of the address, the above is equivalent to: +// +// cluster := gocql.NewCluster("192.168.1.1:9042", "192.168.1.2:9042", "192.168.1.3:9042") +// +// It is recommended to use the value set in the Cassandra config for broadcast_address or listen_address, +// an IP address not a domain name. This is because events from Cassandra will use the configured IP +// address, which is used to index connected hosts. If the domain name specified resolves to more than 1 IP address +// then the driver may connect multiple times to the same host, and will not mark the node being down or up from events. +// +// Then you can customize more options (see ClusterConfig): +// +// cluster.Keyspace = "example" +// cluster.Consistency = gocql.Quorum +// cluster.ProtoVersion = 4 +// +// The driver tries to automatically detect the protocol version to use if not set, but you might want to set the +// protocol version explicitly, as it's not defined which version will be used in certain situations (for example +// during upgrade of the cluster when some of the nodes support different set of protocol versions than other nodes). +// +// When ready, create a session from the configuration. Don't forget to Close the session once you are done with it: +// +// session, err := cluster.CreateSession() +// if err != nil { +// return err +// } +// defer session.Close() +// +// Authentication +// +// CQL protocol uses a SASL-based authentication mechanism and so consists of an exchange of server challenges and +// client response pairs. The details of the exchanged messages depend on the authenticator used. +// +// To use authentication, set ClusterConfig.Authenticator or ClusterConfig.AuthProvider. +// +// PasswordAuthenticator is provided to use for username/password authentication: +// +// cluster := gocql.NewCluster("192.168.1.1", "192.168.1.2", "192.168.1.3") +// cluster.Authenticator = gocql.PasswordAuthenticator{ +// Username: "user", +// Password: "password" +// } +// session, err := cluster.CreateSession() +// if err != nil { +// return err +// } +// defer session.Close() +// +// Transport layer security +// +// It is possible to secure traffic between the client and server with TLS. +// +// To use TLS, set the ClusterConfig.SslOpts field. SslOptions embeds *tls.Config so you can set that directly. +// There are also helpers to load keys/certificates from files. +// +// Warning: Due to historical reasons, the SslOptions is insecure by default, so you need to set EnableHostVerification +// to true if no Config is set. Most users should set SslOptions.Config to a *tls.Config. +// SslOptions and Config.InsecureSkipVerify interact as follows: +// +// Config.InsecureSkipVerify | EnableHostVerification | Result +// Config is nil | false | do not verify host +// Config is nil | true | verify host +// false | false | verify host +// true | false | do not verify host +// false | true | verify host +// true | true | verify host +// +// For example: +// +// cluster := gocql.NewCluster("192.168.1.1", "192.168.1.2", "192.168.1.3") +// cluster.SslOpts = &gocql.SslOptions{ +// EnableHostVerification: true, +// } +// session, err := cluster.CreateSession() +// if err != nil { +// return err +// } +// defer session.Close() +// +// Executing queries +// +// Create queries with Session.Query. Query values must not be reused between different executions and must not be +// modified after starting execution of the query. +// +// To execute a query without reading results, use Query.Exec: +// +// err := session.Query(`INSERT INTO tweet (timeline, id, text) VALUES (?, ?, ?)`, +// "me", gocql.TimeUUID(), "hello world").WithContext(ctx).Exec() +// +// Single row can be read by calling Query.Scan: +// +// err := session.Query(`SELECT id, text FROM tweet WHERE timeline = ? LIMIT 1`, +// "me").WithContext(ctx).Consistency(gocql.One).Scan(&id, &text) +// +// Multiple rows can be read using Iter.Scanner: +// +// scanner := session.Query(`SELECT id, text FROM tweet WHERE timeline = ?`, +// "me").WithContext(ctx).Iter().Scanner() +// for scanner.Next() { +// var ( +// id gocql.UUID +// text string +// ) +// err = scanner.Scan(&id, &text) +// if err != nil { +// log.Fatal(err) +// } +// fmt.Println("Tweet:", id, text) +// } +// // scanner.Err() closes the iterator, so scanner nor iter should be used afterwards. +// if err := scanner.Err(); err != nil { +// log.Fatal(err) +// } +// +// See Example for complete example. +// +// Prepared statements +// +// The driver automatically prepares DML queries (SELECT/INSERT/UPDATE/DELETE/BATCH statements) and maintains a cache +// of prepared statements. +// CQL protocol does not support preparing other query types. +// +// When using CQL protocol >= 4, it is possible to use gocql.UnsetValue as the bound value of a column. +// This will cause the database to ignore writing the column. +// The main advantage is the ability to keep the same prepared statement even when you don't +// want to update some fields, where before you needed to make another prepared statement. +// +// Executing multiple queries concurrently +// +// Session is safe to use from multiple goroutines, so to execute multiple concurrent queries, just execute them +// from several worker goroutines. Gocql provides synchronously-looking API (as recommended for Go APIs) and the queries +// are executed asynchronously at the protocol level. +// +// results := make(chan error, 2) +// go func() { +// results <- session.Query(`INSERT INTO tweet (timeline, id, text) VALUES (?, ?, ?)`, +// "me", gocql.TimeUUID(), "hello world 1").Exec() +// }() +// go func() { +// results <- session.Query(`INSERT INTO tweet (timeline, id, text) VALUES (?, ?, ?)`, +// "me", gocql.TimeUUID(), "hello world 2").Exec() +// }() +// +// Nulls +// +// Null values are are unmarshalled as zero value of the type. If you need to distinguish for example between text +// column being null and empty string, you can unmarshal into *string variable instead of string. +// +// var text *string +// err := scanner.Scan(&text) +// if err != nil { +// // handle error +// } +// if text != nil { +// // not null +// } +// else { +// // null +// } +// +// See Example_nulls for full example. +// +// Reusing slices +// +// The driver reuses backing memory of slices when unmarshalling. This is an optimization so that a buffer does not +// need to be allocated for every processed row. However, you need to be careful when storing the slices to other +// memory structures. +// +// scanner := session.Query(`SELECT myints FROM table WHERE pk = ?`, "key").WithContext(ctx).Iter().Scanner() +// var myInts []int +// for scanner.Next() { +// // This scan reuses backing store of myInts for each row. +// err = scanner.Scan(&myInts) +// if err != nil { +// log.Fatal(err) +// } +// } +// +// When you want to save the data for later use, pass a new slice every time. A common pattern is to declare the +// slice variable within the scanner loop: +// +// scanner := session.Query(`SELECT myints FROM table WHERE pk = ?`, "key").WithContext(ctx).Iter().Scanner() +// for scanner.Next() { +// var myInts []int +// // This scan always gets pointer to fresh myInts slice, so does not reuse memory. +// err = scanner.Scan(&myInts) +// if err != nil { +// log.Fatal(err) +// } +// } +// +// Paging +// +// The driver supports paging of results with automatic prefetch, see ClusterConfig.PageSize, Session.SetPrefetch, +// Query.PageSize, and Query.Prefetch. +// +// It is also possible to control the paging manually with Query.PageState (this disables automatic prefetch). +// Manual paging is useful if you want to store the page state externally, for example in a URL to allow users +// browse pages in a result. You might want to sign/encrypt the paging state when exposing it externally since +// it contains data from primary keys. +// +// Paging state is specific to the CQL protocol version and the exact query used. It is meant as opaque state that +// should not be modified. If you send paging state from different query or protocol version, then the behaviour +// is not defined (you might get unexpected results or an error from the server). For example, do not send paging state +// returned by node using protocol version 3 to a node using protocol version 4. Also, when using protocol version 4, +// paging state between Cassandra 2.2 and 3.0 is incompatible (https://issues.apache.org/jira/browse/CASSANDRA-10880). +// +// The driver does not check whether the paging state is from the same protocol version/statement. +// You might want to validate yourself as this could be a problem if you store paging state externally. +// For example, if you store paging state in a URL, the URLs might become broken when you upgrade your cluster. +// +// Call Query.PageState(nil) to fetch just the first page of the query results. Pass the page state returned by +// Iter.PageState to Query.PageState of a subsequent query to get the next page. If the length of slice returned +// by Iter.PageState is zero, there are no more pages available (or an error occurred). +// +// Using too low values of PageSize will negatively affect performance, a value below 100 is probably too low. +// While Cassandra returns exactly PageSize items (except for last page) in a page currently, the protocol authors +// explicitly reserved the right to return smaller or larger amount of items in a page for performance reasons, so don't +// rely on the page having the exact count of items. +// +// See Example_paging for an example of manual paging. +// +// Dynamic list of columns +// +// There are certain situations when you don't know the list of columns in advance, mainly when the query is supplied +// by the user. Iter.Columns, Iter.RowData, Iter.MapScan and Iter.SliceMap can be used to handle this case. +// +// See Example_dynamicColumns. +// +// Batches +// +// The CQL protocol supports sending batches of DML statements (INSERT/UPDATE/DELETE) and so does gocql. +// Use Session.NewBatch to create a new batch and then fill-in details of individual queries. +// Then execute the batch with Session.ExecuteBatch. +// +// Logged batches ensure atomicity, either all or none of the operations in the batch will succeed, but they have +// overhead to ensure this property. +// Unlogged batches don't have the overhead of logged batches, but don't guarantee atomicity. +// Updates of counters are handled specially by Cassandra so batches of counter updates have to use CounterBatch type. +// A counter batch can only contain statements to update counters. +// +// For unlogged batches it is recommended to send only single-partition batches (i.e. all statements in the batch should +// involve only a single partition). +// Multi-partition batch needs to be split by the coordinator node and re-sent to +// correct nodes. +// With single-partition batches you can send the batch directly to the node for the partition without incurring the +// additional network hop. +// +// It is also possible to pass entire BEGIN BATCH .. APPLY BATCH statement to Query.Exec. +// There are differences how those are executed. +// BEGIN BATCH statement passed to Query.Exec is prepared as a whole in a single statement. +// Session.ExecuteBatch prepares individual statements in the batch. +// If you have variable-length batches using the same statement, using Session.ExecuteBatch is more efficient. +// +// See Example_batch for an example. +// +// Lightweight transactions +// +// Query.ScanCAS or Query.MapScanCAS can be used to execute a single-statement lightweight transaction (an +// INSERT/UPDATE .. IF statement) and reading its result. See example for Query.MapScanCAS. +// +// Multiple-statement lightweight transactions can be executed as a logged batch that contains at least one conditional +// statement. All the conditions must return true for the batch to be applied. You can use Session.ExecuteBatchCAS and +// Session.MapExecuteBatchCAS when executing the batch to learn about the result of the LWT. See example for +// Session.MapExecuteBatchCAS. +// +// Retries and speculative execution +// +// Queries can be marked as idempotent. Marking the query as idempotent tells the driver that the query can be executed +// multiple times without affecting its result. Non-idempotent queries are not eligible for retrying nor speculative +// execution. +// +// Idempotent queries are retried in case of errors based on the configured RetryPolicy. +// +// Queries can be retried even before they fail by setting a SpeculativeExecutionPolicy. The policy can +// cause the driver to retry on a different node if the query is taking longer than a specified delay even before the +// driver receives an error or timeout from the server. When a query is speculatively executed, the original execution +// is still executing. The two parallel executions of the query race to return a result, the first received result will +// be returned. +// +// User-defined types +// +// UDTs can be mapped (un)marshaled from/to map[string]interface{} a Go struct (or a type implementing +// UDTUnmarshaler, UDTMarshaler, Unmarshaler or Marshaler interfaces). +// +// For structs, cql tag can be used to specify the CQL field name to be mapped to a struct field: +// +// type MyUDT struct { +// FieldA int32 `cql:"a"` +// FieldB string `cql:"b"` +// } +// +// See Example_userDefinedTypesMap, Example_userDefinedTypesStruct, ExampleUDTMarshaler, ExampleUDTUnmarshaler. +// +// Metrics and tracing +// +// It is possible to provide observer implementations that could be used to gather metrics: +// +// - QueryObserver for monitoring individual queries. +// - BatchObserver for monitoring batch queries. +// - ConnectObserver for monitoring new connections from the driver to the database. +// - FrameHeaderObserver for monitoring individual protocol frames. +// +// CQL protocol also supports tracing of queries. When enabled, the database will write information about +// internal events that happened during execution of the query. You can use Query.Trace to request tracing and receive +// the session ID that the database used to store the trace information in system_traces.sessions and +// system_traces.events tables. NewTraceWriter returns an implementation of Tracer that writes the events to a writer. +// Gathering trace information might be essential for debugging and optimizing queries, but writing traces has overhead, +// so this feature should not be used on production systems with very high load unless you know what you are doing. package gocql // import "github.com/gocql/gocql" - -// TODO(tux21b): write more docs. diff --git a/vendor/github.com/gocql/gocql/events.go b/vendor/github.com/gocql/gocql/events.go index f7f727bd6..c633825be 100644 --- a/vendor/github.com/gocql/gocql/events.go +++ b/vendor/github.com/gocql/gocql/events.go @@ -164,55 +164,43 @@ func (s *Session) handleNodeEvent(frames []frame) { switch f.change { case "NEW_NODE": - s.handleNewNode(f.host, f.port, true) + s.handleNewNode(f.host, f.port) case "REMOVED_NODE": s.handleRemovedNode(f.host, f.port) case "MOVED_NODE": // java-driver handles this, not mentioned in the spec // TODO(zariel): refresh token map case "UP": - s.handleNodeUp(f.host, f.port, true) + s.handleNodeUp(f.host, f.port) case "DOWN": s.handleNodeDown(f.host, f.port) } } } -func (s *Session) addNewNode(host *HostInfo) { - if s.cfg.filterHost(host) { - return - } - - host.setState(NodeUp) - s.pool.addHost(host) - s.policy.AddHost(host) -} - -func (s *Session) handleNewNode(ip net.IP, port int, waitForBinary bool) { - if gocqlDebug { - Logger.Printf("gocql: Session.handleNewNode: %s:%d\n", ip.String(), port) - } - - ip, port = s.cfg.translateAddressPort(ip, port) - +func (s *Session) addNewNode(ip net.IP, port int) { // Get host info and apply any filters to the host hostInfo, err := s.hostSource.getHostInfo(ip, port) if err != nil { Logger.Printf("gocql: events: unable to fetch host info for (%s:%d): %v\n", ip, port, err) return } else if hostInfo == nil { - // If hostInfo is nil, this host was filtered out by cfg.HostFilter + // ignore if it's null because we couldn't find it return } - if t := hostInfo.Version().nodeUpDelay(); t > 0 && waitForBinary { + if t := hostInfo.Version().nodeUpDelay(); t > 0 { time.Sleep(t) } // should this handle token moving? hostInfo = s.ring.addOrUpdate(hostInfo) - s.addNewNode(hostInfo) + if !s.cfg.filterHost(hostInfo) { + // we let the pool call handleNodeUp to change the host state + s.pool.addHost(hostInfo) + s.policy.AddHost(hostInfo) + } if s.control != nil && !s.cfg.IgnorePeerAddr { // TODO(zariel): debounce ring refresh @@ -220,6 +208,22 @@ func (s *Session) handleNewNode(ip net.IP, port int, waitForBinary bool) { } } +func (s *Session) handleNewNode(ip net.IP, port int) { + if gocqlDebug { + Logger.Printf("gocql: Session.handleNewNode: %s:%d\n", ip.String(), port) + } + + ip, port = s.cfg.translateAddressPort(ip, port) + + // if we already have the host and it's already up, then do nothing + host := s.ring.getHost(ip) + if host != nil && host.IsUp() { + return + } + + s.addNewNode(ip, port) +} + func (s *Session) handleRemovedNode(ip net.IP, port int) { if gocqlDebug { Logger.Printf("gocql: Session.handleRemovedNode: %s:%d\n", ip.String(), port) @@ -232,45 +236,37 @@ func (s *Session) handleRemovedNode(ip net.IP, port int) { if host == nil { host = &HostInfo{connectAddress: ip, port: port} } - - if s.cfg.HostFilter != nil && !s.cfg.HostFilter.Accept(host) { - return - } + s.ring.removeHost(ip) host.setState(NodeDown) - s.policy.RemoveHost(host) - s.pool.removeHost(ip) - s.ring.removeHost(ip) + if !s.cfg.filterHost(host) { + s.policy.RemoveHost(host) + s.pool.removeHost(ip) + } if !s.cfg.IgnorePeerAddr { s.hostSource.refreshRing() } } -func (s *Session) handleNodeUp(eventIp net.IP, eventPort int, waitForBinary bool) { +func (s *Session) handleNodeUp(eventIp net.IP, eventPort int) { if gocqlDebug { Logger.Printf("gocql: Session.handleNodeUp: %s:%d\n", eventIp.String(), eventPort) } - ip, _ := s.cfg.translateAddressPort(eventIp, eventPort) + ip, port := s.cfg.translateAddressPort(eventIp, eventPort) host := s.ring.getHost(ip) if host == nil { - // TODO(zariel): avoid the need to translate twice in this - // case - s.handleNewNode(eventIp, eventPort, waitForBinary) + s.addNewNode(ip, port) return } - if s.cfg.HostFilter != nil && !s.cfg.HostFilter.Accept(host) { - return - } + host.setState(NodeUp) - if t := host.Version().nodeUpDelay(); t > 0 && waitForBinary { - time.Sleep(t) + if !s.cfg.filterHost(host) { + s.policy.HostUp(host) } - - s.addNewNode(host) } func (s *Session) handleNodeDown(ip net.IP, port int) { @@ -283,11 +279,11 @@ func (s *Session) handleNodeDown(ip net.IP, port int) { host = &HostInfo{connectAddress: ip, port: port} } - if s.cfg.HostFilter != nil && !s.cfg.HostFilter.Accept(host) { + host.setState(NodeDown) + if s.cfg.filterHost(host) { return } - host.setState(NodeDown) s.policy.HostDown(host) s.pool.hostDown(ip) } diff --git a/vendor/github.com/gocql/gocql/frame.go b/vendor/github.com/gocql/gocql/frame.go index 5fc948895..99aba2b8a 100644 --- a/vendor/github.com/gocql/gocql/frame.go +++ b/vendor/github.com/gocql/gocql/frame.go @@ -311,26 +311,10 @@ var ( const maxFrameHeaderSize = 9 -func writeInt(p []byte, n int32) { - p[0] = byte(n >> 24) - p[1] = byte(n >> 16) - p[2] = byte(n >> 8) - p[3] = byte(n) -} - func readInt(p []byte) int32 { return int32(p[0])<<24 | int32(p[1])<<16 | int32(p[2])<<8 | int32(p[3]) } -func writeShort(p []byte, n uint16) { - p[0] = byte(n >> 8) - p[1] = byte(n) -} - -func readShort(p []byte) uint16 { - return uint16(p[0])<<8 | uint16(p[1]) -} - type frameHeader struct { version protoVersion flags byte @@ -854,7 +838,7 @@ func (w *writePrepareFrame) writeFrame(f *framer, streamID int) error { if f.proto > protoVersion4 { flags |= flagWithPreparedKeyspace } else { - panic(fmt.Errorf("The keyspace can only be set with protocol 5 or higher")) + panic(fmt.Errorf("the keyspace can only be set with protocol 5 or higher")) } } if f.proto > protoVersion4 { @@ -1502,7 +1486,7 @@ func (f *framer) writeQueryParams(opts *queryParams) { if f.proto > protoVersion4 { flags |= flagWithKeyspace } else { - panic(fmt.Errorf("The keyspace can only be set with protocol 5 or higher")) + panic(fmt.Errorf("the keyspace can only be set with protocol 5 or higher")) } } @@ -1792,16 +1776,6 @@ func (f *framer) readShort() (n uint16) { return } -func (f *framer) readLong() (n int64) { - if len(f.rbuf) < 8 { - panic(fmt.Errorf("not enough bytes in buffer to read long require 8 got: %d", len(f.rbuf))) - } - n = int64(f.rbuf[0])<<56 | int64(f.rbuf[1])<<48 | int64(f.rbuf[2])<<40 | int64(f.rbuf[3])<<32 | - int64(f.rbuf[4])<<24 | int64(f.rbuf[5])<<16 | int64(f.rbuf[6])<<8 | int64(f.rbuf[7]) - f.rbuf = f.rbuf[8:] - return -} - func (f *framer) readString() (s string) { size := f.readShort() @@ -1915,19 +1889,6 @@ func (f *framer) readConsistency() Consistency { return Consistency(f.readShort()) } -func (f *framer) readStringMap() map[string]string { - size := f.readShort() - m := make(map[string]string, size) - - for i := 0; i < int(size); i++ { - k := f.readString() - v := f.readString() - m[k] = v - } - - return m -} - func (f *framer) readBytesMap() map[string][]byte { size := f.readShort() m := make(map[string][]byte, size) @@ -2037,10 +1998,6 @@ func (f *framer) writeLongString(s string) { f.wbuf = append(f.wbuf, s...) } -func (f *framer) writeUUID(u *UUID) { - f.wbuf = append(f.wbuf, u[:]...) -} - func (f *framer) writeStringList(l []string) { f.writeShort(uint16(len(l))) for _, s := range l { @@ -2073,18 +2030,6 @@ func (f *framer) writeShortBytes(p []byte) { f.wbuf = append(f.wbuf, p...) } -func (f *framer) writeInet(ip net.IP, port int) { - f.wbuf = append(f.wbuf, - byte(len(ip)), - ) - - f.wbuf = append(f.wbuf, - []byte(ip)..., - ) - - f.writeInt(int32(port)) -} - func (f *framer) writeConsistency(cons Consistency) { f.writeShort(uint16(cons)) } diff --git a/vendor/github.com/gocql/gocql/helpers.go b/vendor/github.com/gocql/gocql/helpers.go index eb07f4f69..cafb508c4 100644 --- a/vendor/github.com/gocql/gocql/helpers.go +++ b/vendor/github.com/gocql/gocql/helpers.go @@ -270,15 +270,6 @@ func getApacheCassandraType(class string) Type { } } -func typeCanBeNull(typ TypeInfo) bool { - switch typ.(type) { - case CollectionType, UDTTypeInfo, TupleTypeInfo: - return false - } - - return true -} - func (r *RowData) rowMap(m map[string]interface{}) { for i, column := range r.Columns { val := dereference(r.Values[i]) @@ -372,7 +363,7 @@ func (iter *Iter) SliceMap() ([]map[string]interface{}, error) { // iter := session.Query(`SELECT * FROM mytable`).Iter() // for { // // New map each iteration -// row = make(map[string]interface{}) +// row := make(map[string]interface{}) // if !iter.MapScan(row) { // break // } diff --git a/vendor/github.com/gocql/gocql/host_source.go b/vendor/github.com/gocql/gocql/host_source.go index f8ab3c109..f6716ca27 100644 --- a/vendor/github.com/gocql/gocql/host_source.go +++ b/vendor/github.com/gocql/gocql/host_source.go @@ -147,13 +147,6 @@ func (h *HostInfo) Peer() net.IP { return h.peer } -func (h *HostInfo) setPeer(peer net.IP) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.peer = peer - return h -} - func (h *HostInfo) invalidConnectAddr() bool { h.mu.RLock() defer h.mu.RUnlock() @@ -233,13 +226,6 @@ func (h *HostInfo) DataCenter() string { return dc } -func (h *HostInfo) setDataCenter(dataCenter string) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.dataCenter = dataCenter - return h -} - func (h *HostInfo) Rack() string { h.mu.RLock() rack := h.rack @@ -247,26 +233,12 @@ func (h *HostInfo) Rack() string { return rack } -func (h *HostInfo) setRack(rack string) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.rack = rack - return h -} - func (h *HostInfo) HostID() string { h.mu.RLock() defer h.mu.RUnlock() return h.hostId } -func (h *HostInfo) setHostID(hostID string) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.hostId = hostID - return h -} - func (h *HostInfo) WorkLoad() string { h.mu.RLock() defer h.mu.RUnlock() @@ -303,13 +275,6 @@ func (h *HostInfo) Version() cassVersion { return h.version } -func (h *HostInfo) setVersion(major, minor, patch int) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.version = cassVersion{major, minor, patch} - return h -} - func (h *HostInfo) State() nodeState { h.mu.RLock() defer h.mu.RUnlock() @@ -329,26 +294,12 @@ func (h *HostInfo) Tokens() []string { return h.tokens } -func (h *HostInfo) setTokens(tokens []string) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.tokens = tokens - return h -} - func (h *HostInfo) Port() int { h.mu.RLock() defer h.mu.RUnlock() return h.port } -func (h *HostInfo) setPort(port int) *HostInfo { - h.mu.Lock() - defer h.mu.Unlock() - h.port = port - return h -} - func (h *HostInfo) update(from *HostInfo) { if h == from { return @@ -689,7 +640,7 @@ func (r *ringDescriber) refreshRing() error { // TODO: move this to session for _, h := range hosts { - if filter := r.session.cfg.HostFilter; filter != nil && !filter.Accept(h) { + if r.session.cfg.filterHost(h) { continue } diff --git a/vendor/github.com/gocql/gocql/install_test_deps.sh b/vendor/github.com/gocql/gocql/install_test_deps.sh index 77fac8d47..1484ac4e5 100644 --- a/vendor/github.com/gocql/gocql/install_test_deps.sh +++ b/vendor/github.com/gocql/gocql/install_test_deps.sh @@ -8,9 +8,3 @@ git clone https://github.com/pcmanus/ccm.git pushd ccm ./setup.py install --user popd - -if [ "$1" != "gocql/gocql" ]; then - USER=$(echo $1 | cut -f1 -d'/') - cd ../.. - mv ${USER} gocql -fi diff --git a/vendor/github.com/gocql/gocql/marshal.go b/vendor/github.com/gocql/gocql/marshal.go index e95c1c8f9..02532772c 100644 --- a/vendor/github.com/gocql/gocql/marshal.go +++ b/vendor/github.com/gocql/gocql/marshal.go @@ -44,6 +44,52 @@ type Unmarshaler interface { // Marshal returns the CQL encoding of the value for the Cassandra // internal type described by the info parameter. +// +// nil is serialized as CQL null. +// If value implements Marshaler, its MarshalCQL method is called to marshal the data. +// If value is a pointer, the pointed-to value is marshaled. +// +// Supported conversions are as follows, other type combinations may be added in the future: +// +// CQL type | Go type (value) | Note +// varchar, ascii, blob, text | string, []byte | +// boolean | bool | +// tinyint, smallint, int | integer types | +// tinyint, smallint, int | string | formatted as base 10 number +// bigint, counter | integer types | +// bigint, counter | big.Int | +// bigint, counter | string | formatted as base 10 number +// float | float32 | +// double | float64 | +// decimal | inf.Dec | +// time | int64 | nanoseconds since start of day +// time | time.Duration | duration since start of day +// timestamp | int64 | milliseconds since Unix epoch +// timestamp | time.Time | +// list, set | slice, array | +// list, set | map[X]struct{} | +// map | map[X]Y | +// uuid, timeuuid | gocql.UUID | +// uuid, timeuuid | [16]byte | raw UUID bytes +// uuid, timeuuid | []byte | raw UUID bytes, length must be 16 bytes +// uuid, timeuuid | string | hex representation, see ParseUUID +// varint | integer types | +// varint | big.Int | +// varint | string | value of number in decimal notation +// inet | net.IP | +// inet | string | IPv4 or IPv6 address string +// tuple | slice, array | +// tuple | struct | fields are marshaled in order of declaration +// user-defined type | gocql.UDTMarshaler | MarshalUDT is called +// user-defined type | map[string]interface{} | +// user-defined type | struct | struct fields' cql tags are used for column names +// date | int64 | milliseconds since Unix epoch to start of day (in UTC) +// date | time.Time | start of day (in UTC) +// date | string | parsed using "2006-01-02" format +// duration | int64 | duration in nanoseconds +// duration | time.Duration | +// duration | gocql.Duration | +// duration | string | parsed with time.ParseDuration func Marshal(info TypeInfo, value interface{}) ([]byte, error) { if info.Version() < protoVersion1 { panic("protocol version not set") @@ -118,6 +164,44 @@ func Marshal(info TypeInfo, value interface{}) ([]byte, error) { // Unmarshal parses the CQL encoded data based on the info parameter that // describes the Cassandra internal data type and stores the result in the // value pointed by value. +// +// If value implements Unmarshaler, it's UnmarshalCQL method is called to +// unmarshal the data. +// If value is a pointer to pointer, it is set to nil if the CQL value is +// null. Otherwise, nulls are unmarshalled as zero value. +// +// Supported conversions are as follows, other type combinations may be added in the future: +// +// CQL type | Go type (value) | Note +// varchar, ascii, blob, text | *string | +// varchar, ascii, blob, text | *[]byte | non-nil buffer is reused +// bool | *bool | +// tinyint, smallint, int, bigint, counter | *integer types | +// tinyint, smallint, int, bigint, counter | *big.Int | +// tinyint, smallint, int, bigint, counter | *string | formatted as base 10 number +// float | *float32 | +// double | *float64 | +// decimal | *inf.Dec | +// time | *int64 | nanoseconds since start of day +// time | *time.Duration | +// timestamp | *int64 | milliseconds since Unix epoch +// timestamp | *time.Time | +// list, set | *slice, *array | +// map | *map[X]Y | +// uuid, timeuuid | *string | see UUID.String +// uuid, timeuuid | *[]byte | raw UUID bytes +// uuid, timeuuid | *gocql.UUID | +// timeuuid | *time.Time | timestamp of the UUID +// inet | *net.IP | +// inet | *string | IPv4 or IPv6 address string +// tuple | *slice, *array | +// tuple | *struct | struct fields are set in order of declaration +// user-defined types | gocql.UDTUnmarshaler | UnmarshalUDT is called +// user-defined types | *map[string]interface{} | +// user-defined types | *struct | cql tag is used to determine field name +// date | *time.Time | time of beginning of the day (in UTC) +// date | *string | formatted with 2006-01-02 format +// duration | *gocql.Duration | func Unmarshal(info TypeInfo, data []byte, value interface{}) error { if v, ok := value.(Unmarshaler); ok { return v.UnmarshalCQL(info, data) @@ -1690,6 +1774,8 @@ func marshalUUID(info TypeInfo, value interface{}) ([]byte, error) { return nil, nil case UUID: return val.Bytes(), nil + case [16]byte: + return val[:], nil case []byte: if len(val) != 16 { return nil, marshalErrorf("can not marshal []byte %d bytes long into %s, must be exactly 16 bytes long", len(val), info) @@ -1711,7 +1797,7 @@ func marshalUUID(info TypeInfo, value interface{}) ([]byte, error) { } func unmarshalUUID(info TypeInfo, data []byte, value interface{}) error { - if data == nil || len(data) == 0 { + if len(data) == 0 { switch v := value.(type) { case *string: *v = "" @@ -1726,9 +1812,22 @@ func unmarshalUUID(info TypeInfo, data []byte, value interface{}) error { return nil } + if len(data) != 16 { + return unmarshalErrorf("unable to parse UUID: UUIDs must be exactly 16 bytes long") + } + + switch v := value.(type) { + case *[16]byte: + copy((*v)[:], data) + return nil + case *UUID: + copy((*v)[:], data) + return nil + } + u, err := UUIDFromBytes(data) if err != nil { - return unmarshalErrorf("Unable to parse UUID: %s", err) + return unmarshalErrorf("unable to parse UUID: %s", err) } switch v := value.(type) { @@ -1738,9 +1837,6 @@ func unmarshalUUID(info TypeInfo, data []byte, value interface{}) error { case *[]byte: *v = u[:] return nil - case *UUID: - *v = u - return nil } return unmarshalErrorf("can not unmarshal X %s into %T", info, value) } @@ -1942,7 +2038,7 @@ func unmarshalTuple(info TypeInfo, data []byte, value interface{}) error { for i, elem := range tuple.Elems { // each element inside data is a [bytes] var p []byte - if len(data) > 4 { + if len(data) >= 4 { p, data = readBytes(data) } err := Unmarshal(elem, p, v[i]) @@ -1971,7 +2067,7 @@ func unmarshalTuple(info TypeInfo, data []byte, value interface{}) error { for i, elem := range tuple.Elems { var p []byte - if len(data) > 4 { + if len(data) >= 4 { p, data = readBytes(data) } @@ -1982,7 +2078,11 @@ func unmarshalTuple(info TypeInfo, data []byte, value interface{}) error { switch rv.Field(i).Kind() { case reflect.Ptr: - rv.Field(i).Set(reflect.ValueOf(v)) + if p != nil { + rv.Field(i).Set(reflect.ValueOf(v)) + } else { + rv.Field(i).Set(reflect.Zero(reflect.TypeOf(v))) + } default: rv.Field(i).Set(reflect.ValueOf(v).Elem()) } @@ -2001,7 +2101,7 @@ func unmarshalTuple(info TypeInfo, data []byte, value interface{}) error { for i, elem := range tuple.Elems { var p []byte - if len(data) > 4 { + if len(data) >= 4 { p, data = readBytes(data) } @@ -2012,7 +2112,11 @@ func unmarshalTuple(info TypeInfo, data []byte, value interface{}) error { switch rv.Index(i).Kind() { case reflect.Ptr: - rv.Index(i).Set(reflect.ValueOf(v)) + if p != nil { + rv.Index(i).Set(reflect.ValueOf(v)) + } else { + rv.Index(i).Set(reflect.Zero(reflect.TypeOf(v))) + } default: rv.Index(i).Set(reflect.ValueOf(v).Elem()) } @@ -2050,7 +2154,7 @@ func marshalUDT(info TypeInfo, value interface{}) ([]byte, error) { case Marshaler: return v.MarshalCQL(info) case unsetColumn: - return nil, unmarshalErrorf("Invalid request: UnsetValue is unsupported for user defined types") + return nil, unmarshalErrorf("invalid request: UnsetValue is unsupported for user defined types") case UDTMarshaler: var buf []byte for _, e := range udt.Elements { diff --git a/vendor/github.com/gocql/gocql/metadata.go b/vendor/github.com/gocql/gocql/metadata.go index e586dd48f..6cd2b4bc0 100644 --- a/vendor/github.com/gocql/gocql/metadata.go +++ b/vendor/github.com/gocql/gocql/metadata.go @@ -324,10 +324,10 @@ func compileMetadata( keyspace.Functions[functions[i].Name] = &functions[i] } keyspace.Aggregates = make(map[string]*AggregateMetadata, len(aggregates)) - for _, aggregate := range aggregates { - aggregate.FinalFunc = *keyspace.Functions[aggregate.finalFunc] - aggregate.StateFunc = *keyspace.Functions[aggregate.stateFunc] - keyspace.Aggregates[aggregate.Name] = &aggregate + for i, _ := range aggregates { + aggregates[i].FinalFunc = *keyspace.Functions[aggregates[i].finalFunc] + aggregates[i].StateFunc = *keyspace.Functions[aggregates[i].stateFunc] + keyspace.Aggregates[aggregates[i].Name] = &aggregates[i] } keyspace.Views = make(map[string]*ViewMetadata, len(views)) for i := range views { @@ -347,9 +347,9 @@ func compileMetadata( keyspace.UserTypes[types[i].Name] = &types[i] } keyspace.MaterializedViews = make(map[string]*MaterializedViewMetadata, len(materializedViews)) - for _, materializedView := range materializedViews { - materializedView.BaseTable = keyspace.Tables[materializedView.baseTableName] - keyspace.MaterializedViews[materializedView.Name] = &materializedView + for i, _ := range materializedViews { + materializedViews[i].BaseTable = keyspace.Tables[materializedViews[i].baseTableName] + keyspace.MaterializedViews[materializedViews[i].Name] = &materializedViews[i] } // add columns from the schema data @@ -559,7 +559,7 @@ func getKeyspaceMetadata(session *Session, keyspaceName string) (*KeyspaceMetada iter.Scan(&keyspace.DurableWrites, &replication) err := iter.Close() if err != nil { - return nil, fmt.Errorf("Error querying keyspace schema: %v", err) + return nil, fmt.Errorf("error querying keyspace schema: %v", err) } keyspace.StrategyClass = replication["class"] @@ -585,13 +585,13 @@ func getKeyspaceMetadata(session *Session, keyspaceName string) (*KeyspaceMetada iter.Scan(&keyspace.DurableWrites, &keyspace.StrategyClass, &strategyOptionsJSON) err := iter.Close() if err != nil { - return nil, fmt.Errorf("Error querying keyspace schema: %v", err) + return nil, fmt.Errorf("error querying keyspace schema: %v", err) } err = json.Unmarshal(strategyOptionsJSON, &keyspace.StrategyOptions) if err != nil { return nil, fmt.Errorf( - "Invalid JSON value '%s' as strategy_options for in keyspace '%s': %v", + "invalid JSON value '%s' as strategy_options for in keyspace '%s': %v", strategyOptionsJSON, keyspace.Name, err, ) } @@ -703,7 +703,7 @@ func getTableMetadata(session *Session, keyspaceName string) ([]TableMetadata, e if err != nil { iter.Close() return nil, fmt.Errorf( - "Invalid JSON value '%s' as key_aliases for in table '%s': %v", + "invalid JSON value '%s' as key_aliases for in table '%s': %v", keyAliasesJSON, table.Name, err, ) } @@ -716,7 +716,7 @@ func getTableMetadata(session *Session, keyspaceName string) ([]TableMetadata, e if err != nil { iter.Close() return nil, fmt.Errorf( - "Invalid JSON value '%s' as column_aliases for in table '%s': %v", + "invalid JSON value '%s' as column_aliases for in table '%s': %v", columnAliasesJSON, table.Name, err, ) } @@ -728,7 +728,7 @@ func getTableMetadata(session *Session, keyspaceName string) ([]TableMetadata, e err := iter.Close() if err != nil && err != ErrNotFound { - return nil, fmt.Errorf("Error querying table schema: %v", err) + return nil, fmt.Errorf("error querying table schema: %v", err) } return tables, nil @@ -777,7 +777,7 @@ func (s *Session) scanColumnMetadataV1(keyspace string) ([]ColumnMetadata, error err := json.Unmarshal(indexOptionsJSON, &column.Index.Options) if err != nil { return nil, fmt.Errorf( - "Invalid JSON value '%s' as index_options for column '%s' in table '%s': %v", + "invalid JSON value '%s' as index_options for column '%s' in table '%s': %v", indexOptionsJSON, column.Name, column.Table, @@ -837,7 +837,7 @@ func (s *Session) scanColumnMetadataV2(keyspace string) ([]ColumnMetadata, error err := json.Unmarshal(indexOptionsJSON, &column.Index.Options) if err != nil { return nil, fmt.Errorf( - "Invalid JSON value '%s' as index_options for column '%s' in table '%s': %v", + "invalid JSON value '%s' as index_options for column '%s' in table '%s': %v", indexOptionsJSON, column.Name, column.Table, @@ -915,7 +915,7 @@ func getColumnMetadata(session *Session, keyspaceName string) ([]ColumnMetadata, } if err != nil && err != ErrNotFound { - return nil, fmt.Errorf("Error querying column schema: %v", err) + return nil, fmt.Errorf("error querying column schema: %v", err) } return columns, nil diff --git a/vendor/github.com/gocql/gocql/policies.go b/vendor/github.com/gocql/gocql/policies.go index 62d809c86..06e1c13be 100644 --- a/vendor/github.com/gocql/gocql/policies.go +++ b/vendor/github.com/gocql/gocql/policies.go @@ -1,9 +1,12 @@ // Copyright (c) 2012 The gocql Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//This file will be the future home for more policies + package gocql +//This file will be the future home for more policies + + import ( "context" "errors" @@ -37,12 +40,6 @@ func (c *cowHostList) get() []*HostInfo { return *l } -func (c *cowHostList) set(list []*HostInfo) { - c.mu.Lock() - c.list.Store(&list) - c.mu.Unlock() -} - // add will add a host if it not already in the list func (c *cowHostList) add(host *HostInfo) bool { c.mu.Lock() @@ -68,33 +65,6 @@ func (c *cowHostList) add(host *HostInfo) bool { return true } -func (c *cowHostList) update(host *HostInfo) { - c.mu.Lock() - l := c.get() - - if len(l) == 0 { - c.mu.Unlock() - return - } - - found := false - newL := make([]*HostInfo, len(l)) - for i := range l { - if host.Equal(l[i]) { - newL[i] = host - found = true - } else { - newL[i] = l[i] - } - } - - if found { - c.list.Store(&newL) - } - - c.mu.Unlock() -} - func (c *cowHostList) remove(ip net.IP) bool { c.mu.Lock() l := c.get() @@ -304,7 +274,10 @@ type HostSelectionPolicy interface { KeyspaceChanged(KeyspaceUpdateEvent) Init(*Session) IsLocal(host *HostInfo) bool - //Pick returns an iteration function over selected hosts + // Pick returns an iteration function over selected hosts. + // Multiple attempts of a single query execution won't call the returned NextHost function concurrently, + // so it's safe to have internal state without additional synchronization as long as every call to Pick returns + // a different instance of NextHost. Pick(ExecutableQuery) NextHost } @@ -880,6 +853,51 @@ func (d *dcAwareRR) Pick(q ExecutableQuery) NextHost { return roundRobbin(int(nextStartOffset), d.localHosts.get(), d.remoteHosts.get()) } +// ReadyPolicy defines a policy for when a HostSelectionPolicy can be used. After +// each host connects during session initialization, the Ready method will be +// called. If you only need a single Host to be up you can wrap a +// HostSelectionPolicy policy with SingleHostReadyPolicy. +type ReadyPolicy interface { + Ready() bool +} + +// SingleHostReadyPolicy wraps a HostSelectionPolicy and returns Ready after a +// single host has been added via HostUp +func SingleHostReadyPolicy(p HostSelectionPolicy) *singleHostReadyPolicy { + return &singleHostReadyPolicy{ + HostSelectionPolicy: p, + } +} + +type singleHostReadyPolicy struct { + HostSelectionPolicy + ready bool + readyMux sync.Mutex +} + +func (s *singleHostReadyPolicy) HostUp(host *HostInfo) { + s.HostSelectionPolicy.HostUp(host) + + s.readyMux.Lock() + s.ready = true + s.readyMux.Unlock() +} + +func (s *singleHostReadyPolicy) Ready() bool { + s.readyMux.Lock() + ready := s.ready + s.readyMux.Unlock() + if !ready { + return false + } + + // in case the wrapped policy is also a ReadyPolicy, defer to that + if rdy, ok := s.HostSelectionPolicy.(ReadyPolicy); ok { + return rdy.Ready() + } + return true +} + // ConvictionPolicy interface is used by gocql to determine if a host should be // marked as DOWN based on the error and host info type ConvictionPolicy interface { diff --git a/vendor/github.com/gocql/gocql/prepared_cache.go b/vendor/github.com/gocql/gocql/prepared_cache.go index 3abeada21..346f36f4d 100644 --- a/vendor/github.com/gocql/gocql/prepared_cache.go +++ b/vendor/github.com/gocql/gocql/prepared_cache.go @@ -14,18 +14,6 @@ type preparedLRU struct { lru *lru.Cache } -// Max adjusts the maximum size of the cache and cleans up the oldest records if -// the new max is lower than the previous value. Not concurrency safe. -func (p *preparedLRU) max(max int) { - p.mu.Lock() - defer p.mu.Unlock() - - for p.lru.Len() > max { - p.lru.RemoveOldest() - } - p.lru.MaxEntries = max -} - func (p *preparedLRU) clear() { p.mu.Lock() defer p.mu.Unlock() diff --git a/vendor/github.com/gocql/gocql/query_executor.go b/vendor/github.com/gocql/gocql/query_executor.go index 6dd912db7..755deae5c 100644 --- a/vendor/github.com/gocql/gocql/query_executor.go +++ b/vendor/github.com/gocql/gocql/query_executor.go @@ -2,6 +2,7 @@ package gocql import ( "context" + "sync" "time" ) @@ -34,14 +35,15 @@ func (q *queryExecutor) attemptQuery(ctx context.Context, qry ExecutableQuery, c return iter } -func (q *queryExecutor) speculate(ctx context.Context, qry ExecutableQuery, sp SpeculativeExecutionPolicy, results chan *Iter) *Iter { +func (q *queryExecutor) speculate(ctx context.Context, qry ExecutableQuery, sp SpeculativeExecutionPolicy, + hostIter NextHost, results chan *Iter) *Iter { ticker := time.NewTicker(sp.Delay()) defer ticker.Stop() for i := 0; i < sp.Attempts(); i++ { select { case <-ticker.C: - go q.run(ctx, qry, results) + go q.run(ctx, qry, hostIter, results) case <-ctx.Done(): return &Iter{err: ctx.Err()} case iter := <-results: @@ -53,11 +55,23 @@ func (q *queryExecutor) speculate(ctx context.Context, qry ExecutableQuery, sp S } func (q *queryExecutor) executeQuery(qry ExecutableQuery) (*Iter, error) { + hostIter := q.policy.Pick(qry) + // check if the query is not marked as idempotent, if // it is, we force the policy to NonSpeculative sp := qry.speculativeExecutionPolicy() if !qry.IsIdempotent() || sp.Attempts() == 0 { - return q.do(qry.Context(), qry), nil + return q.do(qry.Context(), qry, hostIter), nil + } + + // When speculative execution is enabled, we could be accessing the host iterator from multiple goroutines below. + // To ensure we don't call it concurrently, we wrap the returned NextHost function here to synchronize access to it. + var mu sync.Mutex + origHostIter := hostIter + hostIter = func() SelectedHost { + mu.Lock() + defer mu.Unlock() + return origHostIter() } ctx, cancel := context.WithCancel(qry.Context()) @@ -66,12 +80,12 @@ func (q *queryExecutor) executeQuery(qry ExecutableQuery) (*Iter, error) { results := make(chan *Iter, 1) // Launch the main execution - go q.run(ctx, qry, results) + go q.run(ctx, qry, hostIter, results) // The speculative executions are launched _in addition_ to the main // execution, on a timer. So Speculation{2} would make 3 executions running // in total. - if iter := q.speculate(ctx, qry, sp, results); iter != nil { + if iter := q.speculate(ctx, qry, sp, hostIter, results); iter != nil { return iter, nil } @@ -83,8 +97,7 @@ func (q *queryExecutor) executeQuery(qry ExecutableQuery) (*Iter, error) { } } -func (q *queryExecutor) do(ctx context.Context, qry ExecutableQuery) *Iter { - hostIter := q.policy.Pick(qry) +func (q *queryExecutor) do(ctx context.Context, qry ExecutableQuery, hostIter NextHost) *Iter { selectedHost := hostIter() rt := qry.retryPolicy() @@ -153,9 +166,9 @@ func (q *queryExecutor) do(ctx context.Context, qry ExecutableQuery) *Iter { return &Iter{err: ErrNoConnections} } -func (q *queryExecutor) run(ctx context.Context, qry ExecutableQuery, results chan<- *Iter) { +func (q *queryExecutor) run(ctx context.Context, qry ExecutableQuery, hostIter NextHost, results chan<- *Iter) { select { - case results <- q.do(ctx, qry): + case results <- q.do(ctx, qry, hostIter): case <-ctx.Done(): } } diff --git a/vendor/github.com/gocql/gocql/ring.go b/vendor/github.com/gocql/gocql/ring.go index 856afae37..2798949f3 100644 --- a/vendor/github.com/gocql/gocql/ring.go +++ b/vendor/github.com/gocql/gocql/ring.go @@ -63,29 +63,6 @@ func (r *ring) currentHosts() map[string]*HostInfo { return hosts } -func (r *ring) addHost(host *HostInfo) bool { - // TODO(zariel): key all host info by HostID instead of - // ip addresses - if host.invalidConnectAddr() { - panic(fmt.Sprintf("invalid host: %v", host)) - } - ip := host.ConnectAddress().String() - - r.mu.Lock() - if r.hosts == nil { - r.hosts = make(map[string]*HostInfo) - } - - _, ok := r.hosts[ip] - if !ok { - r.hostList = append(r.hostList, host) - } - - r.hosts[ip] = host - r.mu.Unlock() - return ok -} - func (r *ring) addOrUpdate(host *HostInfo) *HostInfo { if existingHost, ok := r.addHostIfMissing(host); ok { existingHost.update(host) diff --git a/vendor/github.com/gocql/gocql/session.go b/vendor/github.com/gocql/gocql/session.go index c627e4bbd..46b15ae76 100644 --- a/vendor/github.com/gocql/gocql/session.go +++ b/vendor/github.com/gocql/gocql/session.go @@ -27,7 +27,7 @@ import ( // scenario is to have one global session object to interact with the // whole Cassandra cluster. // -// This type extends the Node interface by adding a convinient query builder +// This type extends the Node interface by adding a convenient query builder // and automatically sets a default consistency level on all operations // that do not have a consistency level set. type Session struct { @@ -62,7 +62,6 @@ type Session struct { schemaEvents *eventDebouncer // ring metadata - hosts []HostInfo useSystemSchema bool hasAggregatesAndFunctions bool @@ -227,18 +226,44 @@ func (s *Session) init() error { } hosts = hosts[:0] + // each host will increment left and decrement it after connecting and once + // there's none left, we'll close hostCh + var left int64 + // we will receive up to len(hostMap) of messages so create a buffer so we + // don't end up stuck in a goroutine if we stopped listening + connectedCh := make(chan struct{}, len(hostMap)) + // we add one here because we don't want to end up closing hostCh until we're + // done looping and the decerement code might be reached before we've looped + // again + atomic.AddInt64(&left, 1) for _, host := range hostMap { - host = s.ring.addOrUpdate(host) + host := s.ring.addOrUpdate(host) if s.cfg.filterHost(host) { continue } - host.setState(NodeUp) - s.pool.addHost(host) + atomic.AddInt64(&left, 1) + go func() { + s.pool.addHost(host) + connectedCh <- struct{}{} + + // if there are no hosts left, then close the hostCh to unblock the loop + // below if its still waiting + if atomic.AddInt64(&left, -1) == 0 { + close(connectedCh) + } + }() hosts = append(hosts, host) } + // once we're done looping we subtract the one we initially added and check + // to see if we should close + if atomic.AddInt64(&left, -1) == 0 { + close(connectedCh) + } + // before waiting for them to connect, add them all to the policy so we can + // utilize efficiencies by calling AddHosts if the policy supports it type bulkAddHosts interface { AddHosts([]*HostInfo) } @@ -250,6 +275,15 @@ func (s *Session) init() error { } } + readyPolicy, _ := s.policy.(ReadyPolicy) + // now loop over connectedCh until it's closed (meaning we've connected to all) + // or until the policy says we're ready + for range connectedCh { + if readyPolicy != nil && readyPolicy.Ready() { + break + } + } + // TODO(zariel): we probably dont need this any more as we verify that we // can connect to one of the endpoints supplied by using the control conn. // See if there are any connections in the pool @@ -320,7 +354,8 @@ func (s *Session) reconnectDownedHosts(intv time.Duration) { if h.IsUp() { continue } - s.handleNodeUp(h.ConnectAddress(), h.Port(), true) + // we let the pool call handleNodeUp to change the host state + s.pool.addHost(h) } case <-s.ctx.Done(): return @@ -806,6 +841,7 @@ type Query struct { trace Tracer observer QueryObserver session *Session + conn *Conn rt RetryPolicy spec SpeculativeExecutionPolicy binding func(q *QueryInfo) ([]interface{}, error) @@ -1094,12 +1130,17 @@ func (q *Query) speculativeExecutionPolicy() SpeculativeExecutionPolicy { return q.spec } +// IsIdempotent returns whether the query is marked as idempotent. +// Non-idempotent query won't be retried. +// See "Retries and speculative execution" in package docs for more details. func (q *Query) IsIdempotent() bool { return q.idempotent } // Idempotent marks the query as being idempotent or not depending on // the value. +// Non-idempotent query won't be retried. +// See "Retries and speculative execution" in package docs for more details. func (q *Query) Idempotent(value bool) *Query { q.idempotent = value return q @@ -1164,6 +1205,11 @@ func (q *Query) Iter() *Iter { if isUseStatement(q.stmt) { return &Iter{err: ErrUseStmt} } + // if the query was specifically run on a connection then re-use that + // connection when fetching the next results + if q.conn != nil { + return q.conn.executeQuery(q.Context(), q) + } return q.session.executeQuery(q) } @@ -1195,6 +1241,10 @@ func (q *Query) Scan(dest ...interface{}) error { // statement containing an IF clause). If the transaction fails because // the existing values did not match, the previous values will be stored // in dest. +// +// As for INSERT .. IF NOT EXISTS, previous values will be returned as if +// SELECT * FROM. So using ScanCAS with INSERT is inherently prone to +// column mismatching. Use MapScanCAS to capture them safely. func (q *Query) ScanCAS(dest ...interface{}) (applied bool, err error) { q.disableSkipMetadata = true iter := q.Iter() @@ -1423,7 +1473,7 @@ func (iter *Iter) Scan(dest ...interface{}) bool { } if iter.next != nil && iter.pos >= iter.next.pos { - go iter.next.fetch() + iter.next.fetchAsync() } // currently only support scanning into an expand tuple, such that its the same @@ -1517,16 +1567,31 @@ func (iter *Iter) NumRows() int { return iter.numRows } +// nextIter holds state for fetching a single page in an iterator. +// single page might be attempted multiple times due to retries. type nextIter struct { - qry *Query - pos int - once sync.Once - next *Iter + qry *Query + pos int + oncea sync.Once + once sync.Once + next *Iter +} + +func (n *nextIter) fetchAsync() { + n.oncea.Do(func() { + go n.fetch() + }) } func (n *nextIter) fetch() *Iter { n.once.Do(func() { - n.next = n.qry.session.executeQuery(n.qry) + // if the query was specifically run on a connection then re-use that + // connection when fetching the next results + if n.qry.conn != nil { + n.next = n.qry.conn.executeQuery(n.qry.Context(), n.qry) + } else { + n.next = n.qry.session.executeQuery(n.qry) + } }) return n.next } @@ -1536,7 +1601,6 @@ type Batch struct { Entries []BatchEntry Cons Consistency routingKey []byte - routingKeyBuffer []byte CustomPayload map[string][]byte rt RetryPolicy spec SpeculativeExecutionPolicy @@ -1733,7 +1797,7 @@ func (b *Batch) WithTimestamp(timestamp int64) *Batch { func (b *Batch) attempt(keyspace string, end, start time.Time, iter *Iter, host *HostInfo) { latency := end.Sub(start) - _, metricsForHost := b.metrics.attempt(1, latency, host, b.observer != nil) + attempt, metricsForHost := b.metrics.attempt(1, latency, host, b.observer != nil) if b.observer == nil { return @@ -1753,6 +1817,7 @@ func (b *Batch) attempt(keyspace string, end, start time.Time, iter *Iter, host Host: host, Metrics: metricsForHost, Err: iter.err, + Attempt: attempt, }) } @@ -1968,7 +2033,6 @@ type ObservedQuery struct { Err error // Attempt is the index of attempt at executing this query. - // An attempt might be either retry or fetching next page of a query. // The first attempt is number zero and any retries have non-zero attempt number. Attempt int } @@ -1999,6 +2063,10 @@ type ObservedBatch struct { // The metrics per this host Metrics *hostMetrics + + // Attempt is the index of attempt at executing this query. + // The first attempt is number zero and any retries have non-zero attempt number. + Attempt int } // BatchObserver is the interface implemented by batch observers / stat collectors. diff --git a/vendor/github.com/gocql/gocql/token.go b/vendor/github.com/gocql/gocql/token.go index 9ae69b67a..7471299a9 100644 --- a/vendor/github.com/gocql/gocql/token.go +++ b/vendor/github.com/gocql/gocql/token.go @@ -153,7 +153,7 @@ func newTokenRing(partitioner string, hosts []*HostInfo) (*tokenRing, error) { } else if strings.HasSuffix(partitioner, "RandomPartitioner") { tokenRing.partitioner = randomPartitioner{} } else { - return nil, fmt.Errorf("Unsupported partitioner '%s'", partitioner) + return nil, fmt.Errorf("unsupported partitioner '%s'", partitioner) } for _, host := range hosts { diff --git a/vendor/github.com/gocql/gocql/topology.go b/vendor/github.com/gocql/gocql/topology.go index 3f8a75e8c..885e35df2 100644 --- a/vendor/github.com/gocql/gocql/topology.go +++ b/vendor/github.com/gocql/gocql/topology.go @@ -46,32 +46,35 @@ type placementStrategy interface { replicationFactor(dc string) int } -func getReplicationFactorFromOpts(keyspace string, val interface{}) int { - // TODO: dont really want to panic here, but is better - // than spamming +func getReplicationFactorFromOpts(val interface{}) (int, error) { switch v := val.(type) { case int: - if v <= 0 { - panic(fmt.Sprintf("invalid replication_factor %d. Is the %q keyspace configured correctly?", v, keyspace)) + if v < 0 { + return 0, fmt.Errorf("invalid replication_factor %d", v) } - return v + return v, nil case string: n, err := strconv.Atoi(v) if err != nil { - panic(fmt.Sprintf("invalid replication_factor. Is the %q keyspace configured correctly? %v", keyspace, err)) - } else if n <= 0 { - panic(fmt.Sprintf("invalid replication_factor %d. Is the %q keyspace configured correctly?", n, keyspace)) + return 0, fmt.Errorf("invalid replication_factor %q: %v", v, err) + } else if n < 0 { + return 0, fmt.Errorf("invalid replication_factor %d", n) } - return n + return n, nil default: - panic(fmt.Sprintf("unkown replication_factor type %T", v)) + return 0, fmt.Errorf("unknown replication_factor type %T", v) } } func getStrategy(ks *KeyspaceMetadata) placementStrategy { switch { case strings.Contains(ks.StrategyClass, "SimpleStrategy"): - return &simpleStrategy{rf: getReplicationFactorFromOpts(ks.Name, ks.StrategyOptions["replication_factor"])} + rf, err := getReplicationFactorFromOpts(ks.StrategyOptions["replication_factor"]) + if err != nil { + Logger.Printf("parse rf for keyspace %q: %v", ks.Name, err) + return nil + } + return &simpleStrategy{rf: rf} case strings.Contains(ks.StrategyClass, "NetworkTopologyStrategy"): dcs := make(map[string]int) for dc, rf := range ks.StrategyOptions { @@ -79,14 +82,21 @@ func getStrategy(ks *KeyspaceMetadata) placementStrategy { continue } - dcs[dc] = getReplicationFactorFromOpts(ks.Name+":dc="+dc, rf) + rf, err := getReplicationFactorFromOpts(rf) + if err != nil { + Logger.Println("parse rf for keyspace %q, dc %q: %v", err) + // skip DC if the rf is invalid/unsupported, so that we can at least work with other working DCs. + continue + } + + dcs[dc] = rf } return &networkTopology{dcs: dcs} case strings.Contains(ks.StrategyClass, "LocalStrategy"): return nil default: - // TODO: handle unknown replicas and just return the primary host for a token - panic(fmt.Sprintf("unsupported strategy class: %v", ks.StrategyClass)) + Logger.Printf("parse rf for keyspace %q: unsupported strategy class: %v", ks.StrategyClass) + return nil } } diff --git a/vendor/github.com/gocql/gocql/uuid.go b/vendor/github.com/gocql/gocql/uuid.go index 13ad38379..09727a017 100644 --- a/vendor/github.com/gocql/gocql/uuid.go +++ b/vendor/github.com/gocql/gocql/uuid.go @@ -2,11 +2,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +package gocql + // The uuid package can be used to generate and parse universally unique // identifiers, a standardized format in the form of a 128 bit number. // // http://tools.ietf.org/html/rfc4122 -package gocql import ( "crypto/rand" diff --git a/vendor/modules.txt b/vendor/modules.txt index e6f82c075..ccf3c397a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -402,7 +402,7 @@ github.com/go-stack/stack github.com/go-test/deep # github.com/go-yaml/yaml v2.1.0+incompatible github.com/go-yaml/yaml -# github.com/gocql/gocql v0.0.0-20200624222514-34081eda590e +# github.com/gocql/gocql v0.0.0-20210401103645-80ab1e13e309 ## explicit github.com/gocql/gocql github.com/gocql/gocql/internal/lru