2017-01-13 19:47:16 +00:00
|
|
|
package state
|
|
|
|
|
|
|
|
import (
|
2020-05-20 16:43:33 +00:00
|
|
|
"errors"
|
2017-01-13 19:47:16 +00:00
|
|
|
"fmt"
|
2020-04-08 18:37:24 +00:00
|
|
|
"reflect"
|
2017-01-13 19:47:16 +00:00
|
|
|
"strings"
|
|
|
|
|
2017-07-06 10:34:00 +00:00
|
|
|
"github.com/hashicorp/consul/agent/structs"
|
2017-04-19 23:00:11 +00:00
|
|
|
"github.com/hashicorp/consul/api"
|
2020-04-23 23:16:04 +00:00
|
|
|
"github.com/hashicorp/consul/lib"
|
2017-01-13 19:47:16 +00:00
|
|
|
"github.com/hashicorp/consul/types"
|
2020-03-19 13:11:20 +00:00
|
|
|
memdb "github.com/hashicorp/go-memdb"
|
2019-11-25 17:57:35 +00:00
|
|
|
"github.com/hashicorp/go-uuid"
|
2017-01-13 19:47:16 +00:00
|
|
|
)
|
|
|
|
|
2018-03-09 16:11:39 +00:00
|
|
|
const (
|
2020-04-16 21:00:48 +00:00
|
|
|
servicesTableName = "services"
|
|
|
|
gatewayServicesTableName = "gateway-services"
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
|
|
|
// serviceLastExtinctionIndexName keeps track of the last raft index when the last instance
|
|
|
|
// of any service was unregistered. This is used by blocking queries on missing services.
|
|
|
|
serviceLastExtinctionIndexName = "service_last_extinction"
|
2018-03-09 16:11:39 +00:00
|
|
|
)
|
|
|
|
|
2017-11-29 01:03:34 +00:00
|
|
|
// nodesTableSchema returns a new table schema used for storing node
|
|
|
|
// information.
|
|
|
|
func nodesTableSchema() *memdb.TableSchema {
|
|
|
|
return &memdb.TableSchema{
|
|
|
|
Name: "nodes",
|
|
|
|
Indexes: map[string]*memdb.IndexSchema{
|
2020-06-16 17:19:31 +00:00
|
|
|
"id": {
|
2017-11-29 01:03:34 +00:00
|
|
|
Name: "id",
|
|
|
|
AllowMissing: false,
|
|
|
|
Unique: true,
|
|
|
|
Indexer: &memdb.StringFieldIndex{
|
|
|
|
Field: "Node",
|
|
|
|
Lowercase: true,
|
|
|
|
},
|
|
|
|
},
|
2020-06-16 17:19:31 +00:00
|
|
|
"uuid": {
|
2017-11-29 01:03:34 +00:00
|
|
|
Name: "uuid",
|
|
|
|
AllowMissing: true,
|
|
|
|
Unique: true,
|
|
|
|
Indexer: &memdb.UUIDFieldIndex{
|
|
|
|
Field: "ID",
|
|
|
|
},
|
|
|
|
},
|
2020-06-16 17:19:31 +00:00
|
|
|
"meta": {
|
2017-11-29 01:03:34 +00:00
|
|
|
Name: "meta",
|
|
|
|
AllowMissing: true,
|
|
|
|
Unique: false,
|
|
|
|
Indexer: &memdb.StringMapFieldIndex{
|
|
|
|
Field: "Meta",
|
|
|
|
Lowercase: false,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// gatewayServicesTableNameSchema returns a new table schema used to store information
|
2020-04-08 18:37:24 +00:00
|
|
|
// about services associated with terminating gateways.
|
2020-04-16 21:00:48 +00:00
|
|
|
func gatewayServicesTableNameSchema() *memdb.TableSchema {
|
2020-04-08 18:37:24 +00:00
|
|
|
return &memdb.TableSchema{
|
2020-04-16 21:00:48 +00:00
|
|
|
Name: gatewayServicesTableName,
|
2020-04-08 18:37:24 +00:00
|
|
|
Indexes: map[string]*memdb.IndexSchema{
|
|
|
|
"id": {
|
|
|
|
Name: "id",
|
|
|
|
AllowMissing: false,
|
|
|
|
Unique: true,
|
|
|
|
Indexer: &memdb.CompoundIndex{
|
|
|
|
Indexes: []memdb.Indexer{
|
2020-06-12 14:57:41 +00:00
|
|
|
&ServiceNameIndex{
|
2020-04-08 18:37:24 +00:00
|
|
|
Field: "Gateway",
|
|
|
|
},
|
2020-06-12 14:57:41 +00:00
|
|
|
&ServiceNameIndex{
|
2020-04-08 18:37:24 +00:00
|
|
|
Field: "Service",
|
|
|
|
},
|
2020-04-21 21:06:23 +00:00
|
|
|
&memdb.IntFieldIndex{
|
|
|
|
Field: "Port",
|
|
|
|
},
|
2020-04-08 18:37:24 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"gateway": {
|
|
|
|
Name: "gateway",
|
|
|
|
AllowMissing: false,
|
|
|
|
Unique: false,
|
2020-06-12 14:57:41 +00:00
|
|
|
Indexer: &ServiceNameIndex{
|
2020-04-08 18:37:24 +00:00
|
|
|
Field: "Gateway",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"service": {
|
|
|
|
Name: "service",
|
|
|
|
AllowMissing: true,
|
|
|
|
Unique: false,
|
2020-06-12 14:57:41 +00:00
|
|
|
Indexer: &ServiceNameIndex{
|
2020-04-08 18:37:24 +00:00
|
|
|
Field: "Service",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
type ServiceNameIndex struct {
|
2020-04-08 18:37:24 +00:00
|
|
|
Field string
|
|
|
|
}
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
func (index *ServiceNameIndex) FromObject(obj interface{}) (bool, []byte, error) {
|
2020-04-08 18:37:24 +00:00
|
|
|
v := reflect.ValueOf(obj)
|
|
|
|
v = reflect.Indirect(v) // Dereference the pointer if any
|
|
|
|
|
|
|
|
fv := v.FieldByName(index.Field)
|
|
|
|
isPtr := fv.Kind() == reflect.Ptr
|
|
|
|
fv = reflect.Indirect(fv)
|
|
|
|
if !isPtr && !fv.IsValid() || !fv.CanInterface() {
|
|
|
|
return false, nil,
|
|
|
|
fmt.Errorf("field '%s' for %#v is invalid %v ", index.Field, obj, isPtr)
|
|
|
|
}
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
name, ok := fv.Interface().(structs.ServiceName)
|
2020-04-08 18:37:24 +00:00
|
|
|
if !ok {
|
2020-06-12 14:57:41 +00:00
|
|
|
return false, nil, fmt.Errorf("Field 'ServiceName' is not of type structs.ServiceName")
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Enforce lowercase and add null character as terminator
|
2020-06-12 14:57:41 +00:00
|
|
|
id := strings.ToLower(name.String()) + "\x00"
|
2020-04-08 18:37:24 +00:00
|
|
|
|
|
|
|
return true, []byte(id), nil
|
|
|
|
}
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
func (index *ServiceNameIndex) FromArgs(args ...interface{}) ([]byte, error) {
|
2020-04-08 18:37:24 +00:00
|
|
|
if len(args) != 1 {
|
|
|
|
return nil, fmt.Errorf("must provide only a single argument")
|
|
|
|
}
|
2020-06-12 14:57:41 +00:00
|
|
|
name, ok := args[0].(structs.ServiceName)
|
2020-04-08 18:37:24 +00:00
|
|
|
if !ok {
|
2020-06-12 14:57:41 +00:00
|
|
|
return nil, fmt.Errorf("argument must be of type structs.ServiceName: %#v", args[0])
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Enforce lowercase and add null character as terminator
|
2020-06-12 14:57:41 +00:00
|
|
|
id := strings.ToLower(name.String()) + "\x00"
|
2020-04-08 18:37:24 +00:00
|
|
|
|
|
|
|
return []byte(strings.ToLower(id)), nil
|
|
|
|
}
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
func (index *ServiceNameIndex) PrefixFromArgs(args ...interface{}) ([]byte, error) {
|
2020-04-08 18:37:24 +00:00
|
|
|
val, err := index.FromArgs(args...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Strip the null terminator, the rest is a prefix
|
|
|
|
n := len(val)
|
|
|
|
if n > 0 {
|
|
|
|
return val[:n-1], nil
|
|
|
|
}
|
|
|
|
return val, nil
|
|
|
|
}
|
|
|
|
|
2017-11-29 01:03:34 +00:00
|
|
|
func init() {
|
|
|
|
registerSchema(nodesTableSchema)
|
|
|
|
registerSchema(servicesTableSchema)
|
|
|
|
registerSchema(checksTableSchema)
|
2020-04-16 21:00:48 +00:00
|
|
|
registerSchema(gatewayServicesTableNameSchema)
|
2017-11-29 01:03:34 +00:00
|
|
|
}
|
|
|
|
|
2017-02-01 22:20:25 +00:00
|
|
|
const (
|
|
|
|
// minUUIDLookupLen is used as a minimum length of a node name required before
|
|
|
|
// we test to see if the name is actually a UUID and perform an ID-based node
|
|
|
|
// lookup.
|
2017-02-02 20:12:18 +00:00
|
|
|
minUUIDLookupLen = 2
|
2017-02-01 22:20:25 +00:00
|
|
|
)
|
|
|
|
|
2017-02-02 20:13:58 +00:00
|
|
|
func resizeNodeLookupKey(s string) string {
|
|
|
|
l := len(s)
|
|
|
|
|
|
|
|
if l%2 != 0 {
|
|
|
|
return s[0 : l-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// Nodes is used to pull the full list of nodes for use during snapshots.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Snapshot) Nodes() (memdb.ResultIterator, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
iter, err := s.tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return iter, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Services is used to pull the full list of services for a given node for use
|
|
|
|
// during snapshots.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Snapshot) Services(node string) (memdb.ResultIterator, error) {
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.store.catalogServiceListByNode(s.tx, node, structs.WildcardEnterpriseMeta(), true)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return iter, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Checks is used to pull the full list of checks for a given node for use
|
|
|
|
// during snapshots.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Snapshot) Checks(node string) (memdb.ResultIterator, error) {
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.store.catalogListChecksByNode(s.tx, node, structs.WildcardEnterpriseMeta())
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return iter, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Registration is used to make sure a node, service, and check registration is
|
|
|
|
// performed within a single transaction to avoid race conditions on state
|
|
|
|
// updates.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Restore) Registration(idx uint64, req *structs.RegisterRequest) error {
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.store.ensureRegistrationTxn(s.tx, idx, req); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// EnsureRegistration is used to make sure a node, service, and check
|
|
|
|
// registration is performed within a single transaction to avoid race
|
|
|
|
// conditions on state updates.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) EnsureRegistration(idx uint64, req *structs.RegisterRequest) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.ensureRegistrationTxn(tx, idx, req); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureCheckIfNodeMatches(tx *txn, idx uint64, node string, check *structs.HealthCheck) error {
|
2018-08-14 16:45:33 +00:00
|
|
|
if check.Node != node {
|
|
|
|
return fmt.Errorf("check node %q does not match node %q",
|
|
|
|
check.Node, node)
|
|
|
|
}
|
|
|
|
if err := s.ensureCheckTxn(tx, idx, check); err != nil {
|
|
|
|
return fmt.Errorf("failed inserting check: %s on node %q", err, check.Node)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// ensureRegistrationTxn is used to make sure a node, service, and check
|
|
|
|
// registration is performed within a single transaction to avoid race
|
|
|
|
// conditions on state updates.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureRegistrationTxn(tx *txn, idx uint64, req *structs.RegisterRequest) error {
|
2019-12-10 02:26:41 +00:00
|
|
|
if _, err := s.validateRegisterRequestTxn(tx, req); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-01-20 05:55:57 +00:00
|
|
|
// Create a node structure.
|
2017-01-13 19:47:16 +00:00
|
|
|
node := &structs.Node{
|
2017-01-18 22:26:42 +00:00
|
|
|
ID: req.ID,
|
2017-01-13 19:47:16 +00:00
|
|
|
Node: req.Node,
|
|
|
|
Address: req.Address,
|
2017-04-18 12:02:24 +00:00
|
|
|
Datacenter: req.Datacenter,
|
2017-01-13 19:47:16 +00:00
|
|
|
TaggedAddresses: req.TaggedAddresses,
|
|
|
|
Meta: req.NodeMeta,
|
|
|
|
}
|
2017-01-20 05:55:57 +00:00
|
|
|
|
|
|
|
// Since this gets called for all node operations (service and check
|
|
|
|
// updates) and churn on the node itself is basically none after the
|
|
|
|
// node updates itself the first time, it's worth seeing if we need to
|
|
|
|
// modify the node at all so we prevent watch churn and useless writes
|
|
|
|
// and modify index bumps on the node.
|
|
|
|
{
|
|
|
|
existing, err := tx.First("nodes", "id", node.Node)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("node lookup failed: %s", err)
|
|
|
|
}
|
|
|
|
if existing == nil || req.ChangesNode(existing.(*structs.Node)) {
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.ensureNodeTxn(tx, idx, node); err != nil {
|
2017-01-20 05:55:57 +00:00
|
|
|
return fmt.Errorf("failed inserting node: %s", err)
|
|
|
|
}
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2017-01-20 05:55:57 +00:00
|
|
|
// Add the service, if any. We perform a similar check as we do for the
|
|
|
|
// node info above to make sure we actually need to update the service
|
|
|
|
// definition in order to prevent useless churn if nothing has changed.
|
2017-01-13 19:47:16 +00:00
|
|
|
if req.Service != nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
_, existing, err := firstWatchCompoundWithTxn(tx, "services", "id", &req.Service.EnterpriseMeta, req.Node, req.Service.ID)
|
2017-01-20 05:55:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
if existing == nil || !(existing.(*structs.ServiceNode).ToNodeService()).IsSame(req.Service) {
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.ensureServiceTxn(tx, idx, req.Node, req.Service); err != nil {
|
2017-01-20 05:55:57 +00:00
|
|
|
return fmt.Errorf("failed inserting service: %s", err)
|
|
|
|
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add the checks, if any.
|
|
|
|
if req.Check != nil {
|
2018-08-14 16:45:33 +00:00
|
|
|
if err := s.ensureCheckIfNodeMatches(tx, idx, req.Node, req.Check); err != nil {
|
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, check := range req.Checks {
|
2018-08-14 16:45:33 +00:00
|
|
|
if err := s.ensureCheckIfNodeMatches(tx, idx, req.Node, check); err != nil {
|
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// EnsureNode is used to upsert node registration or modification.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) EnsureNode(idx uint64, node *structs.Node) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Call the node upsert
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.ensureNodeTxn(tx, idx, node); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
// ensureNoNodeWithSimilarNameTxn checks that no other node has conflict in its name
|
|
|
|
// If allowClashWithoutID then, getting a conflict on another node without ID will be allowed
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *txn, node *structs.Node, allowClashWithoutID bool) error {
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
// Retrieve all of the nodes
|
|
|
|
enodes, err := tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("Cannot lookup all nodes: %s", err)
|
|
|
|
}
|
|
|
|
for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() {
|
|
|
|
enode := nodeIt.(*structs.Node)
|
|
|
|
if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID {
|
2019-03-08 06:42:54 +00:00
|
|
|
// Look up the existing node's Serf health check to see if it's failed.
|
|
|
|
// If it is, the node can be renamed.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, enodeCheck, err := firstWatchCompoundWithTxn(tx, "checks", "id", structs.DefaultEnterpriseMeta(), enode.Node, string(structs.SerfCheckID))
|
2019-03-08 06:42:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("Cannot get status of node %s: %s", enode.Node, err)
|
|
|
|
}
|
|
|
|
|
2019-03-18 23:15:36 +00:00
|
|
|
// Get the node health. If there's no Serf health check, we consider it safe to rename
|
|
|
|
// the node as it's likely an external node registration not managed by Consul.
|
2019-03-13 23:23:05 +00:00
|
|
|
var nodeHealthy bool
|
|
|
|
if enodeCheck != nil {
|
|
|
|
enodeSerfCheck, ok := enodeCheck.(*structs.HealthCheck)
|
|
|
|
if ok {
|
|
|
|
nodeHealthy = enodeSerfCheck.Status != api.HealthCritical
|
|
|
|
}
|
2019-03-08 06:42:54 +00:00
|
|
|
}
|
|
|
|
|
2019-03-13 23:23:05 +00:00
|
|
|
if !(enode.ID == "" && allowClashWithoutID) && nodeHealthy {
|
2019-08-28 19:57:05 +00:00
|
|
|
return fmt.Errorf("Node name %s is reserved by node %s with name %s (%s)", node.Node, enode.ID, enode.Node, enode.Address)
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-12-03 07:51:18 +00:00
|
|
|
// ensureNodeCASTxn updates a node only if the existing index matches the given index.
|
|
|
|
// Returns a bool indicating if a write happened and any error.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureNodeCASTxn(tx *txn, idx uint64, node *structs.Node) (bool, error) {
|
2018-12-03 07:51:18 +00:00
|
|
|
// Retrieve the existing entry.
|
2019-01-09 19:59:23 +00:00
|
|
|
existing, err := getNodeTxn(tx, node.Node)
|
2018-12-03 07:51:18 +00:00
|
|
|
if err != nil {
|
2019-01-09 19:59:23 +00:00
|
|
|
return false, err
|
2018-12-03 07:51:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the we should do the set. A ModifyIndex of 0 means that
|
|
|
|
// we are doing a set-if-not-exists.
|
|
|
|
if node.ModifyIndex == 0 && existing != nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
if node.ModifyIndex != 0 && existing == nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
2019-01-09 19:59:23 +00:00
|
|
|
if existing != nil && node.ModifyIndex != 0 && node.ModifyIndex != existing.ModifyIndex {
|
2018-12-03 07:51:18 +00:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform the update.
|
|
|
|
if err := s.ensureNodeTxn(tx, idx, node); err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// ensureNodeTxn is the inner function called to actually create a node
|
|
|
|
// registration or modify an existing one in the state store. It allows
|
|
|
|
// passing in a memdb transaction so it may be part of a larger txn.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureNodeTxn(tx *txn, idx uint64, node *structs.Node) error {
|
2017-03-27 07:15:21 +00:00
|
|
|
// See if there's an existing node with this UUID, and make sure the
|
|
|
|
// name is the same.
|
|
|
|
var n *structs.Node
|
|
|
|
if node.ID != "" {
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
existing, err := getNodeIDTxn(tx, node.ID)
|
2017-03-27 07:15:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("node lookup failed: %s", err)
|
|
|
|
}
|
|
|
|
if existing != nil {
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
n = existing
|
2017-03-27 07:15:21 +00:00
|
|
|
if n.Node != node.Node {
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
// Lets first get all nodes and check whether name do match, we do not allow clash on nodes without ID
|
|
|
|
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, false)
|
|
|
|
if dupNameError != nil {
|
2019-08-28 19:57:05 +00:00
|
|
|
return fmt.Errorf("Error while renaming Node ID: %q (%s): %s", node.ID, node.Address, dupNameError)
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
}
|
|
|
|
// We are actually renaming a node, remove its reference first
|
|
|
|
err := s.deleteNodeTxn(tx, idx, n.Node)
|
|
|
|
if err != nil {
|
2019-08-28 19:57:05 +00:00
|
|
|
return fmt.Errorf("Error while renaming Node ID: %q (%s) from %s to %s",
|
|
|
|
node.ID, node.Address, n.Node, node.Node)
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// We allow to "steal" another node name that would have no ID
|
|
|
|
// It basically means that we allow upgrading a node without ID and add the ID
|
|
|
|
dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, true)
|
|
|
|
if dupNameError != nil {
|
|
|
|
return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError)
|
2017-03-27 07:15:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
// TODO: else Node.ID == "" should be forbidden in future Consul releases
|
|
|
|
// See https://github.com/hashicorp/consul/pull/3983 for context
|
2017-03-27 07:15:21 +00:00
|
|
|
|
|
|
|
// Check for an existing node by name to support nodes with no IDs.
|
|
|
|
if n == nil {
|
|
|
|
existing, err := tx.First("nodes", "id", node.Node)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("node name lookup failed: %s", err)
|
|
|
|
}
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
|
2017-03-27 07:15:21 +00:00
|
|
|
if existing != nil {
|
|
|
|
n = existing.(*structs.Node)
|
|
|
|
}
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
// WARNING, for compatibility reasons with tests, we do not check
|
|
|
|
// for case insensitive matches, which may lead to DB corruption
|
|
|
|
// See https://github.com/hashicorp/consul/pull/3983 for context
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2017-03-27 07:15:21 +00:00
|
|
|
// Get the indexes.
|
|
|
|
if n != nil {
|
|
|
|
node.CreateIndex = n.CreateIndex
|
2018-10-11 11:42:39 +00:00
|
|
|
node.ModifyIndex = n.ModifyIndex
|
|
|
|
// We do not need to update anything
|
|
|
|
if node.IsSame(n) {
|
|
|
|
return nil
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
node.ModifyIndex = idx
|
|
|
|
} else {
|
|
|
|
node.CreateIndex = idx
|
|
|
|
node.ModifyIndex = idx
|
|
|
|
}
|
|
|
|
|
2017-03-27 07:15:21 +00:00
|
|
|
// Insert the node and update the index.
|
2017-01-13 19:47:16 +00:00
|
|
|
if err := tx.Insert("nodes", node); err != nil {
|
|
|
|
return fmt.Errorf("failed inserting node: %s", err)
|
|
|
|
}
|
|
|
|
if err := tx.Insert("index", &IndexEntry{"nodes", idx}); err != nil {
|
|
|
|
return fmt.Errorf("failed updating index: %s", err)
|
|
|
|
}
|
2019-03-11 14:48:19 +00:00
|
|
|
// Update the node's service indexes as the node information is included
|
|
|
|
// in health queries and we would otherwise miss node updates in some cases
|
|
|
|
// for those queries.
|
|
|
|
if err := s.updateAllServiceIndexesOfNode(tx, idx, node.Node); err != nil {
|
|
|
|
return fmt.Errorf("failed updating index: %s", err)
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-02-01 22:20:25 +00:00
|
|
|
// GetNode is used to retrieve a node registration by node name ID.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) GetNode(id string) (uint64, *structs.Node, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2017-01-24 19:53:02 +00:00
|
|
|
idx := maxIndexTxn(tx, "nodes")
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Retrieve the node from the state store
|
2018-12-12 17:14:02 +00:00
|
|
|
node, err := getNodeTxn(tx, id)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("node lookup failed: %s", err)
|
|
|
|
}
|
2018-12-12 17:14:02 +00:00
|
|
|
return idx, node, nil
|
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func getNodeTxn(tx *txn, nodeName string) (*structs.Node, error) {
|
2018-12-12 17:14:02 +00:00
|
|
|
node, err := tx.First("nodes", "id", nodeName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("node lookup failed: %s", err)
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
if node != nil {
|
2018-12-12 17:14:02 +00:00
|
|
|
return node.(*structs.Node), nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
2018-12-12 17:14:02 +00:00
|
|
|
return nil, nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func getNodeIDTxn(tx *txn, id types.NodeID) (*structs.Node, error) {
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
strnode := string(id)
|
|
|
|
uuidValue, err := uuid.ParseUUID(strnode)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("node lookup by ID failed, wrong UUID: %v for '%s'", err, strnode)
|
|
|
|
}
|
|
|
|
|
|
|
|
node, err := tx.First("nodes", "uuid", uuidValue)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("node lookup by ID failed: %s", err)
|
|
|
|
}
|
|
|
|
if node != nil {
|
|
|
|
return node.(*structs.Node), nil
|
|
|
|
}
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2017-02-01 22:20:25 +00:00
|
|
|
// GetNodeID is used to retrieve a node registration by node ID.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) {
|
2017-02-01 22:20:25 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
|
|
|
idx := maxIndexTxn(tx, "nodes")
|
|
|
|
|
|
|
|
// Retrieve the node from the state store
|
Allow to rename nodes with IDs, will fix #3974 and #4413 (#4415)
* Allow to rename nodes with IDs, will fix #3974 and #4413
This change allow to rename any well behaving recent agent with an
ID to be renamed safely, ie: without taking the name of another one
with case insensitive comparison.
Deprecated behaviour warning
----------------------------
Due to asceding compatibility, it is still possible however to
"take" the name of another name by not providing any ID.
Note that when not providing any ID, it is possible to have 2 nodes
having similar names with case differences, ie: myNode and mynode
which might lead to DB corruption on Consul server side and
lead to server not properly restarting.
See #3983 and #4399 for Context about this change.
Disabling registration of nodes without IDs as specified in #4414
should probably be the way to go eventually.
* Removed the case-insensitive search when adding a node within the else
block since it breaks the test TestAgentAntiEntropy_Services
While the else case is probably legit, it will be fixed with #4414 in
a later release.
* Added again the test in the else to avoid duplicated names, but
enforce this test only for nodes having IDs.
Thus most tests without any ID will work, and allows us fixing
* Added more tests regarding request with/without IDs.
`TestStateStore_EnsureNode` now test registration and renaming with IDs
`TestStateStore_EnsureNodeDeprecated` tests registration without IDs
and tests removing an ID from a node as well as updated a node
without its ID (deprecated behaviour kept for backwards compatibility)
* Do not allow renaming in case of conflict, including when other node has no ID
* Fixed function GetNodeID that was not working due to wrong type when searching node from its ID
Thus, all tests about renaming were not working properly.
Added the full test cas that allowed me to detect it.
* Better error messages, more tests when nodeID is not a valid UUID in GetNodeID()
* Added separate TestStateStore_GetNodeID to test GetNodeID.
More complete test coverage for GetNodeID
* Added new unit test `TestStateStore_ensureNoNodeWithSimilarNameTxn`
Also fixed comments to be clearer after remarks from @banks
* Fixed error message in unit test to match test case
* Use uuid.ParseUUID to parse Node.ID as requested by @mkeeler
2018-08-10 15:30:45 +00:00
|
|
|
node, err := getNodeIDTxn(tx, id)
|
|
|
|
return idx, node, err
|
2017-02-01 22:20:25 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// Nodes is used to return all of the known nodes.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) Nodes(ws memdb.WatchSet) (uint64, structs.Nodes, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2017-01-20 07:36:50 +00:00
|
|
|
idx := maxIndexTxn(tx, "nodes")
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Retrieve all of the nodes
|
|
|
|
nodes, err := tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed nodes lookup: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(nodes.WatchCh())
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Create and return the nodes list.
|
|
|
|
var results structs.Nodes
|
|
|
|
for node := nodes.Next(); node != nil; node = nodes.Next() {
|
|
|
|
results = append(results, node.(*structs.Node))
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
2017-01-14 01:45:34 +00:00
|
|
|
// NodesByMeta is used to return all nodes with the given metadata key/value pairs.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) NodesByMeta(ws memdb.WatchSet, filters map[string]string) (uint64, structs.Nodes, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2017-01-20 07:36:50 +00:00
|
|
|
idx := maxIndexTxn(tx, "nodes")
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Retrieve all of the nodes
|
|
|
|
var args []interface{}
|
|
|
|
for key, value := range filters {
|
|
|
|
args = append(args, key, value)
|
2017-01-14 01:45:34 +00:00
|
|
|
break
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
nodes, err := tx.Get("nodes", "meta", args...)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed nodes lookup: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(nodes.WatchCh())
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Create and return the nodes list.
|
|
|
|
var results structs.Nodes
|
|
|
|
for node := nodes.Next(); node != nil; node = nodes.Next() {
|
2017-01-14 01:45:34 +00:00
|
|
|
n := node.(*structs.Node)
|
|
|
|
if len(filters) <= 1 || structs.SatisfiesMetaFilters(n.Meta, filters) {
|
|
|
|
results = append(results, n)
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeleteNode is used to delete a given node by its ID.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) DeleteNode(idx uint64, nodeName string) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Call the node deletion.
|
2017-01-18 22:26:42 +00:00
|
|
|
if err := s.deleteNodeTxn(tx, idx, nodeName); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 07:51:18 +00:00
|
|
|
// deleteNodeCASTxn is used to try doing a node delete operation with a given
|
|
|
|
// raft index. If the CAS index specified is not equal to the last observed index for
|
|
|
|
// the given check, then the call is a noop, otherwise a normal check delete is invoked.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) deleteNodeCASTxn(tx *txn, idx, cidx uint64, nodeName string) (bool, error) {
|
2018-12-03 07:51:18 +00:00
|
|
|
// Look up the node.
|
2019-01-09 19:59:23 +00:00
|
|
|
node, err := getNodeTxn(tx, nodeName)
|
2018-12-03 07:51:18 +00:00
|
|
|
if err != nil {
|
2019-01-09 19:59:23 +00:00
|
|
|
return false, err
|
2018-12-03 07:51:18 +00:00
|
|
|
}
|
|
|
|
if node == nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the existing index does not match the provided CAS
|
|
|
|
// index arg, then we shouldn't update anything and can safely
|
|
|
|
// return early here.
|
2019-01-09 19:59:23 +00:00
|
|
|
if node.ModifyIndex != cidx {
|
|
|
|
return false, nil
|
2018-12-03 07:51:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Call the actual deletion if the above passed.
|
|
|
|
if err := s.deleteNodeTxn(tx, idx, nodeName); err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// deleteNodeTxn is the inner method used for removing a node from
|
|
|
|
// the store within a given transaction.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) deleteNodeTxn(tx *txn, idx uint64, nodeName string) error {
|
2017-01-13 19:47:16 +00:00
|
|
|
// Look up the node.
|
2017-01-18 22:26:42 +00:00
|
|
|
node, err := tx.First("nodes", "id", nodeName)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("node lookup failed: %s", err)
|
|
|
|
}
|
|
|
|
if node == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete all services associated with the node and update the service index.
|
2017-01-18 22:26:42 +00:00
|
|
|
services, err := tx.Get("services", "node", nodeName)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
var deleteServices []*structs.ServiceNode
|
2017-01-13 19:47:16 +00:00
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
2018-02-19 17:29:22 +00:00
|
|
|
svc := service.(*structs.ServiceNode)
|
2019-12-10 02:26:41 +00:00
|
|
|
deleteServices = append(deleteServices, svc)
|
|
|
|
|
|
|
|
if err := s.catalogUpdateServiceIndexes(tx, svc.ServiceName, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
2019-07-12 21:19:37 +00:00
|
|
|
}
|
2019-12-19 16:15:37 +00:00
|
|
|
if err := s.catalogUpdateServiceKindIndexes(tx, svc.ServiceKind, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Do the delete in a separate loop so we don't trash the iterator.
|
2019-12-10 02:26:41 +00:00
|
|
|
for _, svc := range deleteServices {
|
|
|
|
if err := s.deleteServiceTxn(tx, idx, nodeName, svc.ServiceID, &svc.EnterpriseMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete all checks associated with the node. This will invalidate
|
|
|
|
// sessions as necessary.
|
2017-01-18 22:26:42 +00:00
|
|
|
checks, err := tx.Get("checks", "node", nodeName)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed check lookup: %s", err)
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
var deleteChecks []*structs.HealthCheck
|
2017-01-13 19:47:16 +00:00
|
|
|
for check := checks.Next(); check != nil; check = checks.Next() {
|
2019-12-10 02:26:41 +00:00
|
|
|
deleteChecks = append(deleteChecks, check.(*structs.HealthCheck))
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Do the delete in a separate loop so we don't trash the iterator.
|
2019-12-10 02:26:41 +00:00
|
|
|
for _, chk := range deleteChecks {
|
|
|
|
if err := s.deleteCheckTxn(tx, idx, nodeName, chk.CheckID, &chk.EnterpriseMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-14 14:36:07 +00:00
|
|
|
// Delete any coordinates associated with this node.
|
|
|
|
coords, err := tx.Get("coordinates", "node", nodeName)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed coordinate lookup: %s", err)
|
|
|
|
}
|
2017-08-14 14:36:07 +00:00
|
|
|
for coord := coords.Next(); coord != nil; coord = coords.Next() {
|
2017-01-13 19:47:16 +00:00
|
|
|
if err := tx.Delete("coordinates", coord); err != nil {
|
|
|
|
return fmt.Errorf("failed deleting coordinate: %s", err)
|
|
|
|
}
|
|
|
|
if err := tx.Insert("index", &IndexEntry{"coordinates", idx}); err != nil {
|
|
|
|
return fmt.Errorf("failed updating index: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete the node and update the index.
|
|
|
|
if err := tx.Delete("nodes", node); err != nil {
|
|
|
|
return fmt.Errorf("failed deleting node: %s", err)
|
|
|
|
}
|
|
|
|
if err := tx.Insert("index", &IndexEntry{"nodes", idx}); err != nil {
|
|
|
|
return fmt.Errorf("failed updating index: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Invalidate any sessions for this node.
|
2019-11-25 17:57:35 +00:00
|
|
|
toDelete, err := s.allNodeSessionsTxn(tx, nodeName)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
2019-11-25 17:57:35 +00:00
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2019-11-25 17:07:04 +00:00
|
|
|
for _, session := range toDelete {
|
|
|
|
if err := s.deleteSessionTxn(tx, idx, session.ID, &session.EnterpriseMeta); err != nil {
|
|
|
|
return fmt.Errorf("failed to delete session '%s': %v", session.ID, err)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// EnsureService is called to upsert creation of a given NodeService.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) EnsureService(idx uint64, node string, svc *structs.NodeService) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Call the service registration upsert
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.ensureServiceTxn(tx, idx, node, svc); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2020-05-20 16:43:33 +00:00
|
|
|
var errCASCompareFailed = errors.New("compare-and-set: comparison failed")
|
|
|
|
|
2018-12-03 08:41:24 +00:00
|
|
|
// ensureServiceCASTxn updates a service only if the existing index matches the given index.
|
2020-05-20 20:34:14 +00:00
|
|
|
// Returns an error if the write didn't happen and nil if write was successful.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureServiceCASTxn(tx *txn, idx uint64, node string, svc *structs.NodeService) error {
|
2018-12-12 09:15:43 +00:00
|
|
|
// Retrieve the existing service.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, existing, err := firstWatchCompoundWithTxn(tx, "services", "id", &svc.EnterpriseMeta, node, svc.ID)
|
2018-12-03 08:41:24 +00:00
|
|
|
if err != nil {
|
2020-05-20 16:43:33 +00:00
|
|
|
return fmt.Errorf("failed service lookup: %s", err)
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the we should do the set. A ModifyIndex of 0 means that
|
|
|
|
// we are doing a set-if-not-exists.
|
|
|
|
if svc.ModifyIndex == 0 && existing != nil {
|
2020-05-20 16:43:33 +00:00
|
|
|
return errCASCompareFailed
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
|
|
|
if svc.ModifyIndex != 0 && existing == nil {
|
2020-05-20 16:43:33 +00:00
|
|
|
return errCASCompareFailed
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
2019-06-17 14:41:04 +00:00
|
|
|
e, ok := existing.(*structs.ServiceNode)
|
2018-12-03 08:41:24 +00:00
|
|
|
if ok && svc.ModifyIndex != 0 && svc.ModifyIndex != e.ModifyIndex {
|
2020-05-20 16:43:33 +00:00
|
|
|
return errCASCompareFailed
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
|
|
|
|
2020-05-20 16:43:33 +00:00
|
|
|
return s.ensureServiceTxn(tx, idx, node, svc)
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// ensureServiceTxn is used to upsert a service registration within an
|
|
|
|
// existing memdb transaction.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureServiceTxn(tx *txn, idx uint64, node string, svc *structs.NodeService) error {
|
2017-01-13 19:47:16 +00:00
|
|
|
// Check for existing service
|
2019-12-10 02:26:41 +00:00
|
|
|
_, existing, err := firstWatchCompoundWithTxn(tx, "services", "id", &svc.EnterpriseMeta, node, svc.ID)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
|
2020-03-09 20:59:02 +00:00
|
|
|
if err = structs.ValidateServiceMetadata(svc.Kind, svc.Meta, false); err != nil {
|
2018-03-27 20:22:42 +00:00
|
|
|
return fmt.Errorf("Invalid Service Meta for node %s and serviceID %s: %v", node, svc.ID, err)
|
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// Check if this service is covered by a gateway's wildcard specifier
|
2020-04-17 16:24:34 +00:00
|
|
|
err = s.checkGatewayWildcardsAndUpdate(tx, idx, svc)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
2020-04-17 16:24:34 +00:00
|
|
|
return fmt.Errorf("failed updating gateway mapping: %s", err)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// Create the service node entry and populate the indexes. Note that
|
2017-01-18 22:26:42 +00:00
|
|
|
// conversion doesn't populate any of the node-specific information.
|
|
|
|
// That's always populated when we read from the state store.
|
2017-01-13 19:47:16 +00:00
|
|
|
entry := svc.ToServiceNode(node)
|
|
|
|
// Get the node
|
|
|
|
n, err := tx.First("nodes", "id", node)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
|
|
|
if n == nil {
|
|
|
|
return ErrMissingNode
|
|
|
|
}
|
2018-10-11 11:42:39 +00:00
|
|
|
if existing != nil {
|
|
|
|
serviceNode := existing.(*structs.ServiceNode)
|
|
|
|
entry.CreateIndex = serviceNode.CreateIndex
|
|
|
|
entry.ModifyIndex = serviceNode.ModifyIndex
|
2019-03-06 17:13:28 +00:00
|
|
|
// We cannot return here because: we want to keep existing behavior (ex: failed node lookup -> ErrMissingNode)
|
2018-10-11 11:42:39 +00:00
|
|
|
// It might be modified in future, but it requires changing many unit tests
|
|
|
|
// Enforcing saving the entry also ensures that if we add default values in .ToServiceNode()
|
|
|
|
// those values will be saved even if node is not really modified for a while.
|
|
|
|
if entry.IsSameService(serviceNode) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
entry.CreateIndex = idx
|
|
|
|
}
|
|
|
|
entry.ModifyIndex = idx
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Insert the service and update the index
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.catalogInsertService(tx, entry)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Services returns all services along with a list of associated tags.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) Services(ws memdb.WatchSet, entMeta *structs.EnterpriseMeta) (uint64, structs.Services, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogServicesMaxIndex(tx, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// List all the services.
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceList(tx, entMeta, false)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed querying services: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(services.WatchCh())
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Rip through the services and enumerate them and their unique set of
|
|
|
|
// tags.
|
|
|
|
unique := make(map[string]map[string]struct{})
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode)
|
|
|
|
tags, ok := unique[svc.ServiceName]
|
|
|
|
if !ok {
|
|
|
|
unique[svc.ServiceName] = make(map[string]struct{})
|
|
|
|
tags = unique[svc.ServiceName]
|
|
|
|
}
|
|
|
|
for _, tag := range svc.ServiceTags {
|
|
|
|
tags[tag] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the output structure.
|
|
|
|
var results = make(structs.Services)
|
|
|
|
for service, tags := range unique {
|
2020-04-01 08:54:51 +00:00
|
|
|
results[service] = make([]string, 0, len(tags))
|
2017-04-20 18:42:22 +00:00
|
|
|
for tag := range tags {
|
2017-01-13 19:47:16 +00:00
|
|
|
results[service] = append(results[service], tag)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
2020-01-24 15:04:58 +00:00
|
|
|
func (s *Store) ServiceList(ws memdb.WatchSet, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceList, error) {
|
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
return s.serviceListTxn(tx, ws, entMeta)
|
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) serviceListTxn(tx *txn, ws memdb.WatchSet, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceList, error) {
|
2020-01-24 15:04:58 +00:00
|
|
|
idx := s.catalogServicesMaxIndex(tx, entMeta)
|
|
|
|
|
|
|
|
services, err := s.catalogServiceList(tx, entMeta, true)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed querying services: %s", err)
|
|
|
|
}
|
|
|
|
ws.Add(services.WatchCh())
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
unique := make(map[structs.ServiceName]struct{})
|
2020-01-24 15:04:58 +00:00
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode)
|
|
|
|
unique[svc.CompoundServiceName()] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
results := make(structs.ServiceList, 0, len(unique))
|
2020-06-16 17:19:31 +00:00
|
|
|
for sn := range unique {
|
2020-06-12 15:34:02 +00:00
|
|
|
results = append(results, structs.ServiceName{Name: sn.Name, EnterpriseMeta: sn.EnterpriseMeta})
|
2020-01-24 15:04:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
2017-01-14 01:45:34 +00:00
|
|
|
// ServicesByNodeMeta returns all services, filtered by the given node metadata.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ServicesByNodeMeta(ws memdb.WatchSet, filters map[string]string, entMeta *structs.EnterpriseMeta) (uint64, structs.Services, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogServicesMaxIndex(tx, entMeta)
|
|
|
|
if nodeIdx := maxIndexTxn(tx, "nodes"); nodeIdx > idx {
|
|
|
|
idx = nodeIdx
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Retrieve all of the nodes with the meta k/v pair
|
|
|
|
var args []interface{}
|
|
|
|
for key, value := range filters {
|
|
|
|
args = append(args, key, value)
|
2017-01-14 01:45:34 +00:00
|
|
|
break
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
nodes, err := tx.Get("nodes", "meta", args...)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed nodes lookup: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(nodes.WatchCh())
|
|
|
|
|
|
|
|
// We don't want to track an unlimited number of services, so we pull a
|
|
|
|
// top-level watch to use as a fallback.
|
2019-12-10 02:26:41 +00:00
|
|
|
allServices, err := s.catalogServiceList(tx, entMeta, false)
|
2017-01-20 07:36:50 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed services lookup: %s", err)
|
|
|
|
}
|
|
|
|
allServicesCh := allServices.WatchCh()
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Populate the services map
|
|
|
|
unique := make(map[string]map[string]struct{})
|
|
|
|
for node := nodes.Next(); node != nil; node = nodes.Next() {
|
|
|
|
n := node.(*structs.Node)
|
2017-01-14 01:45:34 +00:00
|
|
|
if len(filters) > 1 && !structs.SatisfiesMetaFilters(n.Meta, filters) {
|
|
|
|
continue
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// List all the services on the node
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceListByNode(tx, n.Node, entMeta, false)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed querying services: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.AddWithLimit(watchLimit, services.WatchCh(), allServicesCh)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Rip through the services and enumerate them and their unique set of
|
|
|
|
// tags.
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode)
|
|
|
|
tags, ok := unique[svc.ServiceName]
|
|
|
|
if !ok {
|
|
|
|
unique[svc.ServiceName] = make(map[string]struct{})
|
|
|
|
tags = unique[svc.ServiceName]
|
|
|
|
}
|
|
|
|
for _, tag := range svc.ServiceTags {
|
|
|
|
tags[tag] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the output structure.
|
|
|
|
var results = make(structs.Services)
|
|
|
|
for service, tags := range unique {
|
2020-04-01 08:54:51 +00:00
|
|
|
results[service] = make([]string, 0, len(tags))
|
2017-04-20 18:42:22 +00:00
|
|
|
for tag := range tags {
|
2017-01-13 19:47:16 +00:00
|
|
|
results[service] = append(results[service], tag)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
2018-02-19 18:30:25 +00:00
|
|
|
// maxIndexForService return the maximum Raft Index for a service
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
// If the index is not set for the service, it will return the missing
|
|
|
|
// service index.
|
|
|
|
// The service_last_extinction is set to the last raft index when a service
|
|
|
|
// was unregistered (or 0 if no services were ever unregistered). This
|
|
|
|
// allows blocking queries to
|
|
|
|
// * return when the last instance of a service is removed
|
|
|
|
// * block until an instance for this service is available, or another
|
|
|
|
// service is unregistered.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) maxIndexForService(tx *txn, serviceName string, serviceExists, checks bool, entMeta *structs.EnterpriseMeta) uint64 {
|
2019-12-10 02:26:41 +00:00
|
|
|
idx, _ := s.maxIndexAndWatchChForService(tx, serviceName, serviceExists, checks, entMeta)
|
2019-03-15 20:18:48 +00:00
|
|
|
return idx
|
|
|
|
}
|
|
|
|
|
|
|
|
// maxIndexAndWatchChForService return the maximum Raft Index for a service. If
|
|
|
|
// the index is not set for the service, it will return the missing service
|
|
|
|
// index. The service_last_extinction is set to the last raft index when a
|
|
|
|
// service was unregistered (or 0 if no services were ever unregistered). This
|
|
|
|
// allows blocking queries to
|
|
|
|
// * return when the last instance of a service is removed
|
|
|
|
// * block until an instance for this service is available, or another
|
|
|
|
// service is unregistered.
|
|
|
|
//
|
|
|
|
// It also _may_ return a watch chan to add to a WatchSet. It will only return
|
|
|
|
// one if the service exists, and has a service index. If it doesn't then nil is
|
|
|
|
// returned for the chan. This allows for blocking watchers to _only_ watch this
|
|
|
|
// one chan in the common case, falling back to watching all touched MemDB
|
|
|
|
// indexes in more complicated cases.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) maxIndexAndWatchChForService(tx *txn, serviceName string, serviceExists, checks bool, entMeta *structs.EnterpriseMeta) (uint64, <-chan struct{}) {
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
if !serviceExists {
|
2019-12-10 02:26:41 +00:00
|
|
|
res, err := s.catalogServiceLastExtinctionIndex(tx, entMeta)
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
if missingIdx, ok := res.(*IndexEntry); ok && err == nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
// Note safe to only watch the extinction index as it's not updated when new instances come along so return nil watchCh
|
2019-03-15 20:18:48 +00:00
|
|
|
return missingIdx.Value, nil
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
ch, res, err := s.catalogServiceMaxIndex(tx, serviceName, entMeta)
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
if idx, ok := res.(*IndexEntry); ok && err == nil {
|
2019-03-15 20:18:48 +00:00
|
|
|
return idx.Value, ch
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.catalogMaxIndex(tx, entMeta, checks), nil
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
|
|
|
|
2020-04-29 22:52:27 +00:00
|
|
|
// Wrapper for maxIndexAndWatchChForService that operates on a list of ServiceNodes
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) maxIndexAndWatchChsForServiceNodes(tx *txn,
|
2020-05-08 15:44:34 +00:00
|
|
|
nodes structs.ServiceNodes, watchChecks bool) (uint64, []<-chan struct{}) {
|
2020-04-29 22:52:27 +00:00
|
|
|
|
|
|
|
var watchChans []<-chan struct{}
|
|
|
|
var maxIdx uint64
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
seen := make(map[structs.ServiceName]bool)
|
2020-04-29 22:52:27 +00:00
|
|
|
for i := 0; i < len(nodes); i++ {
|
2020-06-12 14:57:41 +00:00
|
|
|
sn := structs.NewServiceName(nodes[i].ServiceName, &nodes[i].EnterpriseMeta)
|
|
|
|
if ok := seen[sn]; !ok {
|
|
|
|
idx, svcCh := s.maxIndexAndWatchChForService(tx, sn.Name, true, watchChecks, &sn.EnterpriseMeta)
|
2020-04-29 22:52:27 +00:00
|
|
|
if idx > maxIdx {
|
|
|
|
maxIdx = idx
|
|
|
|
}
|
|
|
|
if svcCh != nil {
|
|
|
|
watchChans = append(watchChans, svcCh)
|
|
|
|
}
|
2020-06-12 14:57:41 +00:00
|
|
|
seen[sn] = true
|
2020-04-29 22:52:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return maxIdx, watchChans
|
|
|
|
}
|
|
|
|
|
2018-03-09 18:01:42 +00:00
|
|
|
// ConnectServiceNodes returns the nodes associated with a Connect
|
|
|
|
// compatible destination for the given service name. This will include
|
|
|
|
// both proxies and native integrations.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ConnectServiceNodes(ws memdb.WatchSet, serviceName string, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceNodes, error) {
|
|
|
|
return s.serviceNodes(ws, serviceName, true, entMeta)
|
2018-03-09 18:01:42 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// ServiceNodes returns the nodes associated with a given service name.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ServiceNodes(ws memdb.WatchSet, serviceName string, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceNodes, error) {
|
|
|
|
return s.serviceNodes(ws, serviceName, false, entMeta)
|
2018-03-09 18:01:42 +00:00
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) serviceNodes(ws memdb.WatchSet, serviceName string, connect bool, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceNodes, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2018-03-09 18:01:42 +00:00
|
|
|
// Function for lookup
|
2019-12-10 02:26:41 +00:00
|
|
|
index := "service"
|
|
|
|
if connect {
|
|
|
|
index = "connect"
|
2018-03-09 18:01:42 +00:00
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceNodeList(tx, serviceName, index, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(services.WatchCh())
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
var results structs.ServiceNodes
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
results = append(results, service.(*structs.ServiceNode))
|
|
|
|
}
|
|
|
|
|
2020-04-08 18:37:24 +00:00
|
|
|
// If we are querying for Connect nodes, the associated proxy might be a gateway.
|
|
|
|
// Gateways are tracked in a separate table, and we append them to the result set.
|
|
|
|
// We append rather than replace since it allows users to migrate a service
|
|
|
|
// to the mesh with a mix of sidecars and gateways until all its instances have a sidecar.
|
2020-04-27 22:25:37 +00:00
|
|
|
var idx uint64
|
2020-04-08 18:37:24 +00:00
|
|
|
if connect {
|
|
|
|
// Look up gateway nodes associated with the service
|
2020-04-29 22:52:27 +00:00
|
|
|
gwIdx, nodes, err := s.serviceGatewayNodes(tx, ws, serviceName, structs.ServiceKindTerminatingGateway, entMeta)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed gateway nodes lookup: %v", err)
|
|
|
|
}
|
2020-04-27 22:25:37 +00:00
|
|
|
if idx < gwIdx {
|
|
|
|
idx = gwIdx
|
|
|
|
}
|
2020-04-16 21:00:48 +00:00
|
|
|
|
2020-04-29 22:52:27 +00:00
|
|
|
// Watch for index changes to the gateway nodes
|
2020-05-08 15:44:34 +00:00
|
|
|
svcIdx, chans := s.maxIndexAndWatchChsForServiceNodes(tx, nodes, false)
|
2020-04-29 22:52:27 +00:00
|
|
|
if svcIdx > idx {
|
|
|
|
idx = svcIdx
|
|
|
|
}
|
|
|
|
for _, ch := range chans {
|
2020-04-16 21:00:48 +00:00
|
|
|
ws.Add(ch)
|
|
|
|
}
|
2020-04-29 22:52:27 +00:00
|
|
|
|
2020-04-08 18:37:24 +00:00
|
|
|
for i := 0; i < len(nodes); i++ {
|
|
|
|
results = append(results, nodes[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-24 07:37:21 +00:00
|
|
|
// Fill in the node details.
|
2017-01-20 07:36:50 +00:00
|
|
|
results, err = s.parseServiceNodes(tx, ws, results)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed parsing service nodes: %s", err)
|
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
|
|
|
// Get the table index.
|
2020-05-08 15:44:34 +00:00
|
|
|
// TODO (gateways) (freddy) Why do we always consider the main service index here?
|
|
|
|
// This doesn't seem to make sense for Connect when there's more than 1 result
|
2020-04-27 22:25:37 +00:00
|
|
|
svcIdx := s.maxIndexForService(tx, serviceName, len(results) > 0, false, entMeta)
|
|
|
|
if idx < svcIdx {
|
|
|
|
idx = svcIdx
|
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ServiceTagNodes returns the nodes associated with a given service, filtering
|
2018-10-11 11:50:05 +00:00
|
|
|
// out services that don't contain the given tags.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ServiceTagNodes(ws memdb.WatchSet, service string, tags []string, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceNodes, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// List all the services.
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceNodeList(tx, service, "service", entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(services.WatchCh())
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Gather all the services and apply the tag filter.
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
serviceExists := false
|
2017-01-13 19:47:16 +00:00
|
|
|
var results structs.ServiceNodes
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode)
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
serviceExists = true
|
2018-10-11 11:50:05 +00:00
|
|
|
if !serviceTagsFilter(svc, tags) {
|
2017-01-13 19:47:16 +00:00
|
|
|
results = append(results, svc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-24 07:37:21 +00:00
|
|
|
// Fill in the node details.
|
2017-01-20 07:36:50 +00:00
|
|
|
results, err = s.parseServiceNodes(tx, ws, results)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed parsing service nodes: %s", err)
|
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.maxIndexForService(tx, service, serviceExists, false, entMeta)
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// serviceTagFilter returns true (should filter) if the given service node
|
|
|
|
// doesn't contain the given tag.
|
|
|
|
func serviceTagFilter(sn *structs.ServiceNode, tag string) bool {
|
|
|
|
tag = strings.ToLower(tag)
|
|
|
|
|
|
|
|
// Look for the lower cased version of the tag.
|
|
|
|
for _, t := range sn.ServiceTags {
|
|
|
|
if strings.ToLower(t) == tag {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we didn't hit the tag above then we should filter.
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2018-10-11 11:50:05 +00:00
|
|
|
// serviceTagsFilter returns true (should filter) if the given service node
|
|
|
|
// doesn't contain the given set of tags.
|
|
|
|
func serviceTagsFilter(sn *structs.ServiceNode, tags []string) bool {
|
|
|
|
for _, tag := range tags {
|
|
|
|
if serviceTagFilter(sn, tag) {
|
|
|
|
// If any one of the expected tags was not found, filter the service
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If all tags were found, don't filter the service
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-05-03 20:54:14 +00:00
|
|
|
// ServiceAddressNodes returns the nodes associated with a given service, filtering
|
|
|
|
// out services that don't match the given serviceAddress
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ServiceAddressNodes(ws memdb.WatchSet, address string, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceNodes, error) {
|
2018-05-03 20:54:14 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// List all the services.
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceList(tx, entMeta, true)
|
2018-05-03 20:54:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
ws.Add(services.WatchCh())
|
|
|
|
|
|
|
|
// Gather all the services and apply the tag filter.
|
|
|
|
var results structs.ServiceNodes
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode)
|
|
|
|
if svc.ServiceAddress == address {
|
|
|
|
results = append(results, svc)
|
2019-06-21 17:16:17 +00:00
|
|
|
} else {
|
|
|
|
for _, addr := range svc.ServiceTaggedAddresses {
|
|
|
|
if addr.Address == address {
|
|
|
|
results = append(results, svc)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2018-05-03 20:54:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fill in the node details.
|
|
|
|
results, err = s.parseServiceNodes(tx, ws, results)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed parsing service nodes: %s", err)
|
|
|
|
}
|
|
|
|
return 0, results, nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// parseServiceNodes iterates over a services query and fills in the node details,
|
|
|
|
// returning a ServiceNodes slice.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) parseServiceNodes(tx *txn, ws memdb.WatchSet, services structs.ServiceNodes) (structs.ServiceNodes, error) {
|
2017-01-20 07:36:50 +00:00
|
|
|
// We don't want to track an unlimited number of nodes, so we pull a
|
|
|
|
// top-level watch to use as a fallback.
|
|
|
|
allNodes, err := tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed nodes lookup: %s", err)
|
|
|
|
}
|
|
|
|
allNodesCh := allNodes.WatchCh()
|
|
|
|
|
|
|
|
// Fill in the node data for each service instance.
|
2017-01-13 19:47:16 +00:00
|
|
|
var results structs.ServiceNodes
|
|
|
|
for _, sn := range services {
|
|
|
|
// Note that we have to clone here because we don't want to
|
|
|
|
// modify the node-related fields on the object in the database,
|
|
|
|
// which is what we are referencing.
|
|
|
|
s := sn.PartialClone()
|
|
|
|
|
|
|
|
// Grab the corresponding node record.
|
2017-01-20 07:36:50 +00:00
|
|
|
watchCh, n, err := tx.FirstWatch("nodes", "id", sn.Node)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.AddWithLimit(watchLimit, watchCh, allNodesCh)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Populate the node-related fields. The tagged addresses may be
|
|
|
|
// used by agents to perform address translation if they are
|
|
|
|
// configured to do that.
|
|
|
|
node := n.(*structs.Node)
|
2017-01-18 22:26:42 +00:00
|
|
|
s.ID = node.ID
|
2017-01-13 19:47:16 +00:00
|
|
|
s.Address = node.Address
|
2017-04-18 12:02:24 +00:00
|
|
|
s.Datacenter = node.Datacenter
|
2017-01-13 19:47:16 +00:00
|
|
|
s.TaggedAddresses = node.TaggedAddresses
|
|
|
|
s.NodeMeta = node.Meta
|
|
|
|
|
|
|
|
results = append(results, s)
|
|
|
|
}
|
|
|
|
return results, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NodeService is used to retrieve a specific service associated with the given
|
|
|
|
// node.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) NodeService(nodeName string, serviceID string, entMeta *structs.EnterpriseMeta) (uint64, *structs.NodeService, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogServicesMaxIndex(tx, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Query the service
|
2019-12-10 02:26:41 +00:00
|
|
|
service, err := s.getNodeServiceTxn(tx, nodeName, serviceID, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
2017-01-18 22:26:42 +00:00
|
|
|
return 0, nil, fmt.Errorf("failed querying service for node %q: %s", nodeName, err)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 08:41:24 +00:00
|
|
|
return idx, service, nil
|
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) getNodeServiceTxn(tx *txn, nodeName, serviceID string, entMeta *structs.EnterpriseMeta) (*structs.NodeService, error) {
|
2018-12-03 08:41:24 +00:00
|
|
|
// Query the service
|
2019-12-10 02:26:41 +00:00
|
|
|
_, service, err := firstWatchCompoundWithTxn(tx, "services", "id", entMeta, nodeName, serviceID)
|
2018-12-03 08:41:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed querying service for node %q: %s", nodeName, err)
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
if service != nil {
|
2018-12-03 08:41:24 +00:00
|
|
|
return service.(*structs.ServiceNode).ToNodeService(), nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
2018-12-03 08:41:24 +00:00
|
|
|
|
|
|
|
return nil, nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) nodeServices(ws memdb.WatchSet, nodeNameOrID string, entMeta *structs.EnterpriseMeta, allowWildcard bool) (bool, uint64, *structs.Node, memdb.ResultIterator, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogMaxIndex(tx, entMeta, false)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
2017-02-01 22:20:25 +00:00
|
|
|
// Query the node by node name
|
2017-02-01 22:59:24 +00:00
|
|
|
watchCh, n, err := tx.FirstWatch("nodes", "id", nodeNameOrID)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
return true, 0, nil, nil, fmt.Errorf("node lookup failed: %s", err)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
2017-02-01 22:20:25 +00:00
|
|
|
|
2017-02-01 23:18:00 +00:00
|
|
|
if n != nil {
|
|
|
|
ws.Add(watchCh)
|
|
|
|
} else {
|
|
|
|
if len(nodeNameOrID) < minUUIDLookupLen {
|
2017-02-01 22:20:25 +00:00
|
|
|
ws.Add(watchCh)
|
2019-12-10 02:26:41 +00:00
|
|
|
return true, 0, nil, nil, nil
|
2017-02-01 22:20:25 +00:00
|
|
|
}
|
2017-02-01 23:18:00 +00:00
|
|
|
|
2017-02-01 23:51:25 +00:00
|
|
|
// Attempt to lookup the node by its node ID
|
2017-02-02 20:13:58 +00:00
|
|
|
iter, err := tx.Get("nodes", "uuid_prefix", resizeNodeLookupKey(nodeNameOrID))
|
2017-02-01 23:18:00 +00:00
|
|
|
if err != nil {
|
2017-02-01 23:51:25 +00:00
|
|
|
ws.Add(watchCh)
|
|
|
|
// TODO(sean@): We could/should log an error re: the uuid_prefix lookup
|
|
|
|
// failing once a logger has been introduced to the catalog.
|
2019-12-10 02:26:41 +00:00
|
|
|
return true, 0, nil, nil, nil
|
2017-02-01 23:18:00 +00:00
|
|
|
}
|
2017-02-01 23:51:25 +00:00
|
|
|
|
2017-02-01 23:18:00 +00:00
|
|
|
n = iter.Next()
|
|
|
|
if n == nil {
|
2017-02-01 23:51:25 +00:00
|
|
|
// No nodes matched, even with the Node ID: add a watch on the node name.
|
2017-02-01 23:18:00 +00:00
|
|
|
ws.Add(watchCh)
|
2019-12-10 02:26:41 +00:00
|
|
|
return true, 0, nil, nil, nil
|
2017-02-01 23:18:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
idWatchCh := iter.WatchCh()
|
|
|
|
if iter.Next() != nil {
|
2017-02-01 23:51:25 +00:00
|
|
|
// More than one match present: Watch on the node name channel and return
|
|
|
|
// an empty result (node lookups can not be ambiguous).
|
2017-02-01 23:18:00 +00:00
|
|
|
ws.Add(watchCh)
|
2019-12-10 02:26:41 +00:00
|
|
|
return true, 0, nil, nil, nil
|
2017-02-01 23:18:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ws.Add(idWatchCh)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
2017-02-01 22:20:25 +00:00
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
node := n.(*structs.Node)
|
2017-02-01 23:41:10 +00:00
|
|
|
nodeName := node.Node
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Read all of the services
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceListByNode(tx, nodeName, entMeta, allowWildcard)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
return true, 0, nil, nil, fmt.Errorf("failed querying services for node %q: %s", nodeName, err)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
2017-01-20 07:36:50 +00:00
|
|
|
ws.Add(services.WatchCh())
|
2017-01-13 19:47:16 +00:00
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
return false, idx, node, services, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NodeServices is used to query service registrations by node name or UUID.
|
|
|
|
func (s *Store) NodeServices(ws memdb.WatchSet, nodeNameOrID string, entMeta *structs.EnterpriseMeta) (uint64, *structs.NodeServices, error) {
|
|
|
|
done, idx, node, services, err := s.nodeServices(ws, nodeNameOrID, entMeta, false)
|
|
|
|
if done || err != nil {
|
|
|
|
return idx, nil, err
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// Initialize the node services struct
|
|
|
|
ns := &structs.NodeServices{
|
|
|
|
Node: node,
|
|
|
|
Services: make(map[string]*structs.NodeService),
|
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
if services != nil {
|
|
|
|
// Add all of the services to the map.
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode).ToNodeService()
|
|
|
|
ns.Services[svc.ID] = svc
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return idx, ns, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NodeServices is used to query service registrations by node name or UUID.
|
|
|
|
func (s *Store) NodeServiceList(ws memdb.WatchSet, nodeNameOrID string, entMeta *structs.EnterpriseMeta) (uint64, *structs.NodeServiceList, error) {
|
|
|
|
done, idx, node, services, err := s.nodeServices(ws, nodeNameOrID, entMeta, true)
|
|
|
|
if done || err != nil {
|
|
|
|
return idx, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if idx == 0 {
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize the node services struct
|
|
|
|
ns := &structs.NodeServiceList{
|
|
|
|
Node: node,
|
|
|
|
}
|
|
|
|
|
|
|
|
if services != nil {
|
|
|
|
// Add all of the services to the map.
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
svc := service.(*structs.ServiceNode).ToNodeService()
|
|
|
|
ns.Services = append(ns.Services, svc)
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return idx, ns, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeleteService is used to delete a given service associated with a node.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) DeleteService(idx uint64, nodeName, serviceID string, entMeta *structs.EnterpriseMeta) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Call the service deletion
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.deleteServiceTxn(tx, idx, nodeName, serviceID, entMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 08:41:24 +00:00
|
|
|
// deleteServiceCASTxn is used to try doing a service delete operation with a given
|
|
|
|
// raft index. If the CAS index specified is not equal to the last observed index for
|
|
|
|
// the given service, then the call is a noop, otherwise a normal delete is invoked.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) deleteServiceCASTxn(tx *txn, idx, cidx uint64, nodeName, serviceID string, entMeta *structs.EnterpriseMeta) (bool, error) {
|
2018-12-03 08:41:24 +00:00
|
|
|
// Look up the service.
|
2019-12-10 02:26:41 +00:00
|
|
|
service, err := s.getNodeServiceTxn(tx, nodeName, serviceID, entMeta)
|
2018-12-03 08:41:24 +00:00
|
|
|
if err != nil {
|
2019-01-09 19:59:23 +00:00
|
|
|
return false, fmt.Errorf("service lookup failed: %s", err)
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
|
|
|
if service == nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the existing index does not match the provided CAS
|
|
|
|
// index arg, then we shouldn't update anything and can safely
|
|
|
|
// return early here.
|
2019-01-09 19:59:23 +00:00
|
|
|
if service.ModifyIndex != cidx {
|
|
|
|
return false, nil
|
2018-12-03 08:41:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Call the actual deletion if the above passed.
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.deleteServiceTxn(tx, idx, nodeName, serviceID, entMeta); err != nil {
|
2018-12-03 08:41:24 +00:00
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// deleteServiceTxn is the inner method called to remove a service
|
|
|
|
// registration within an existing transaction.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) deleteServiceTxn(tx *txn, idx uint64, nodeName, serviceID string, entMeta *structs.EnterpriseMeta) error {
|
2017-01-13 19:47:16 +00:00
|
|
|
// Look up the service.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, service, err := firstWatchCompoundWithTxn(tx, "services", "id", entMeta, nodeName, serviceID)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
if service == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete any checks associated with the service. This will invalidate
|
|
|
|
// sessions as necessary.
|
2019-12-10 02:26:41 +00:00
|
|
|
checks, err := s.catalogChecksForNodeService(tx, nodeName, serviceID, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed service check lookup: %s", err)
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
var deleteChecks []*structs.HealthCheck
|
2017-01-13 19:47:16 +00:00
|
|
|
for check := checks.Next(); check != nil; check = checks.Next() {
|
2019-12-10 02:26:41 +00:00
|
|
|
deleteChecks = append(deleteChecks, check.(*structs.HealthCheck))
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Do the delete in a separate loop so we don't trash the iterator.
|
2019-12-10 02:26:41 +00:00
|
|
|
for _, check := range deleteChecks {
|
|
|
|
if err := s.deleteCheckTxn(tx, idx, nodeName, check.CheckID, &check.EnterpriseMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the index.
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateCheckIndexes(tx, idx, entMeta); err != nil {
|
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete the service and update the index
|
|
|
|
if err := tx.Delete("services", service); err != nil {
|
|
|
|
return fmt.Errorf("failed deleting service: %s", err)
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServicesIndexes(tx, idx, entMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return fmt.Errorf("failed updating index: %s", err)
|
|
|
|
}
|
|
|
|
|
2018-02-19 17:29:22 +00:00
|
|
|
svc := service.(*structs.ServiceNode)
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServiceKindIndexes(tx, svc.ServiceKind, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
2019-07-12 21:19:37 +00:00
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
if _, remainingService, err := firstWatchWithTxn(tx, "services", "service", svc.ServiceName, entMeta); err == nil {
|
2018-02-19 21:44:49 +00:00
|
|
|
if remainingService != nil {
|
2018-02-19 17:29:22 +00:00
|
|
|
// We have at least one remaining service, update the index
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServiceIndexes(tx, svc.ServiceName, idx, entMeta); err != nil {
|
|
|
|
return err
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// There are no more service instances, cleanup the service.<serviceName> index
|
2019-12-10 02:26:41 +00:00
|
|
|
_, serviceIndex, err := s.catalogServiceMaxIndex(tx, svc.ServiceName, entMeta)
|
2018-02-19 17:29:22 +00:00
|
|
|
if err == nil && serviceIndex != nil {
|
|
|
|
// we found service.<serviceName> index, garbage collect it
|
|
|
|
if errW := tx.Delete("index", serviceIndex); errW != nil {
|
|
|
|
return fmt.Errorf("[FAILED] deleting serviceIndex %s: %s", svc.ServiceName, err)
|
|
|
|
}
|
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServiceExtinctionIndex(tx, idx, entMeta); err != nil {
|
|
|
|
return err
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-04-17 00:51:27 +00:00
|
|
|
// Clean up association between service name and gateways if needed
|
|
|
|
gateways, err := s.serviceGateways(tx, svc.ServiceName, &svc.EnterpriseMeta)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed gateway lookup for %q: %s", svc.ServiceName, err)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-04-17 00:51:27 +00:00
|
|
|
for mapping := gateways.Next(); mapping != nil; mapping = gateways.Next() {
|
|
|
|
if gs, ok := mapping.(*structs.GatewayService); ok && gs != nil {
|
|
|
|
// Only delete if association was created by a wildcard specifier.
|
|
|
|
// Otherwise the service was specified in the config entry, and the association should be maintained
|
|
|
|
// for when the service is re-registered
|
|
|
|
if gs.FromWildcard {
|
|
|
|
if err := tx.Delete(gatewayServicesTableName, gs); err != nil {
|
|
|
|
return fmt.Errorf("failed to truncate gateway services table: %v", err)
|
|
|
|
}
|
|
|
|
if err := indexUpdateMaxTxn(tx, idx, gatewayServicesTableName); err != nil {
|
|
|
|
return fmt.Errorf("failed updating gateway-services index: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return fmt.Errorf("Could not find any service %s: %s", svc.ServiceName, err)
|
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// EnsureCheck is used to store a check registration in the db.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) EnsureCheck(idx uint64, hc *structs.HealthCheck) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Call the check registration
|
2017-01-24 19:53:02 +00:00
|
|
|
if err := s.ensureCheckTxn(tx, idx, hc); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2018-03-22 09:30:05 +00:00
|
|
|
// updateAllServiceIndexesOfNode updates the Raft index of all the services associated with this node
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) updateAllServiceIndexesOfNode(tx *txn, idx uint64, nodeID string) error {
|
2018-03-19 15:12:54 +00:00
|
|
|
services, err := tx.Get("services", "node", nodeID)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed updating services for node %s: %s", nodeID, err)
|
|
|
|
}
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
2019-12-10 02:26:41 +00:00
|
|
|
svc := service.(*structs.ServiceNode)
|
|
|
|
if err := s.catalogUpdateServiceIndexes(tx, svc.ServiceName, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
2019-06-20 19:04:39 +00:00
|
|
|
}
|
2019-12-19 16:15:37 +00:00
|
|
|
if err := s.catalogUpdateServiceKindIndexes(tx, svc.ServiceKind, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-03-19 13:14:03 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-10-29 18:41:42 +00:00
|
|
|
// ensureCheckCASTxn updates a check only if the existing index matches the given index.
|
|
|
|
// Returns a bool indicating if a write happened and any error.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureCheckCASTxn(tx *txn, idx uint64, hc *structs.HealthCheck) (bool, error) {
|
2018-10-29 18:41:42 +00:00
|
|
|
// Retrieve the existing entry.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, existing, err := s.getNodeCheckTxn(tx, hc.Node, hc.CheckID, &hc.EnterpriseMeta)
|
2018-10-29 18:41:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, fmt.Errorf("failed health check lookup: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the we should do the set. A ModifyIndex of 0 means that
|
|
|
|
// we are doing a set-if-not-exists.
|
|
|
|
if hc.ModifyIndex == 0 && existing != nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
if hc.ModifyIndex != 0 && existing == nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
2019-01-09 19:59:23 +00:00
|
|
|
if existing != nil && hc.ModifyIndex != 0 && hc.ModifyIndex != existing.ModifyIndex {
|
2018-10-29 18:41:42 +00:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform the update.
|
|
|
|
if err := s.ensureCheckTxn(tx, idx, hc); err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2020-03-19 13:11:20 +00:00
|
|
|
// ensureCheckTxn is used as the inner method to handle inserting
|
2017-01-13 19:47:16 +00:00
|
|
|
// a health check into the state store. It ensures safety against inserting
|
|
|
|
// checks with no matching node or service.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) ensureCheckTxn(tx *txn, idx uint64, hc *structs.HealthCheck) error {
|
2017-01-13 19:47:16 +00:00
|
|
|
// Check if we have an existing health check
|
2019-12-10 02:26:41 +00:00
|
|
|
_, existing, err := firstWatchCompoundWithTxn(tx, "checks", "id", &hc.EnterpriseMeta, hc.Node, string(hc.CheckID))
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed health check lookup: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the indexes
|
|
|
|
if existing != nil {
|
2018-10-11 11:42:39 +00:00
|
|
|
existingCheck := existing.(*structs.HealthCheck)
|
|
|
|
hc.CreateIndex = existingCheck.CreateIndex
|
|
|
|
hc.ModifyIndex = existingCheck.ModifyIndex
|
2017-01-13 19:47:16 +00:00
|
|
|
} else {
|
|
|
|
hc.CreateIndex = idx
|
|
|
|
hc.ModifyIndex = idx
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use the default check status if none was provided
|
|
|
|
if hc.Status == "" {
|
2017-04-19 23:00:11 +00:00
|
|
|
hc.Status = api.HealthCritical
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get the node
|
|
|
|
node, err := tx.First("nodes", "id", hc.Node)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
|
|
|
if node == nil {
|
|
|
|
return ErrMissingNode
|
|
|
|
}
|
|
|
|
|
2018-10-11 11:42:39 +00:00
|
|
|
modified := true
|
2017-01-13 19:47:16 +00:00
|
|
|
// If the check is associated with a service, check that we have
|
|
|
|
// a registration for the service.
|
|
|
|
if hc.ServiceID != "" {
|
2019-12-10 02:26:41 +00:00
|
|
|
_, service, err := firstWatchCompoundWithTxn(tx, "services", "id", &hc.EnterpriseMeta, hc.Node, hc.ServiceID)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
if service == nil {
|
|
|
|
return ErrMissingService
|
|
|
|
}
|
|
|
|
|
2017-04-27 23:03:05 +00:00
|
|
|
// Copy in the service name and tags
|
|
|
|
svc := service.(*structs.ServiceNode)
|
|
|
|
hc.ServiceName = svc.ServiceName
|
|
|
|
hc.ServiceTags = svc.ServiceTags
|
2018-10-11 11:42:39 +00:00
|
|
|
if existing != nil && existing.(*structs.HealthCheck).IsSame(hc) {
|
|
|
|
modified = false
|
|
|
|
} else {
|
2019-12-10 02:26:41 +00:00
|
|
|
if err = s.catalogUpdateServiceIndexes(tx, svc.ServiceName, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
2019-06-20 19:04:39 +00:00
|
|
|
}
|
2019-12-19 16:15:37 +00:00
|
|
|
if err := s.catalogUpdateServiceKindIndexes(tx, svc.ServiceKind, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
|
|
|
} else {
|
2018-10-11 11:42:39 +00:00
|
|
|
if existing != nil && existing.(*structs.HealthCheck).IsSame(hc) {
|
|
|
|
modified = false
|
|
|
|
} else {
|
|
|
|
// Since the check has been modified, it impacts all services of node
|
|
|
|
// Update the status for all the services associated with this node
|
|
|
|
err = s.updateAllServiceIndexesOfNode(tx, idx, hc.Node)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete any sessions for this check if the health is critical.
|
2017-04-19 23:00:11 +00:00
|
|
|
if hc.Status == api.HealthCritical {
|
2019-12-10 02:26:41 +00:00
|
|
|
sessions, err := checkSessionsTxn(tx, hc)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete the session in a separate loop so we don't trash the
|
|
|
|
// iterator.
|
2019-12-10 02:26:41 +00:00
|
|
|
for _, sess := range sessions {
|
|
|
|
if err := s.deleteSessionTxn(tx, idx, sess.Session, &sess.EnterpriseMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return fmt.Errorf("failed deleting session: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-03-19 13:11:20 +00:00
|
|
|
if !modified {
|
|
|
|
return nil
|
2018-10-11 11:42:39 +00:00
|
|
|
}
|
2020-03-19 13:11:20 +00:00
|
|
|
hc.ModifyIndex = idx
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.catalogInsertCheck(tx, hc, idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NodeCheck is used to retrieve a specific check associated with the given
|
|
|
|
// node.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) NodeCheck(nodeName string, checkID types.CheckID, entMeta *structs.EnterpriseMeta) (uint64, *structs.HealthCheck, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.getNodeCheckTxn(tx, nodeName, checkID, entMeta)
|
2018-10-29 18:41:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// nodeCheckTxn is used as the inner method to handle reading a health check
|
|
|
|
// from the state store.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) getNodeCheckTxn(tx *txn, nodeName string, checkID types.CheckID, entMeta *structs.EnterpriseMeta) (uint64, *structs.HealthCheck, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogChecksMaxIndex(tx, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Return the check.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, check, err := firstWatchCompoundWithTxn(tx, "checks", "id", entMeta, nodeName, string(checkID))
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed check lookup: %s", err)
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
if check != nil {
|
|
|
|
return idx, check.(*structs.HealthCheck), nil
|
|
|
|
}
|
2017-04-21 01:59:42 +00:00
|
|
|
return idx, nil, nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NodeChecks is used to retrieve checks associated with the
|
|
|
|
// given node from the state store.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) NodeChecks(ws memdb.WatchSet, nodeName string, entMeta *structs.EnterpriseMeta) (uint64, structs.HealthChecks, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogChecksMaxIndex(tx, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Return the checks.
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.catalogListChecksByNode(tx, nodeName, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed check lookup: %s", err)
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.Add(iter.WatchCh())
|
|
|
|
|
|
|
|
var results structs.HealthChecks
|
|
|
|
for check := iter.Next(); check != nil; check = iter.Next() {
|
|
|
|
results = append(results, check.(*structs.HealthCheck))
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// ServiceChecks is used to get all checks associated with a
|
|
|
|
// given service ID. The query is performed against a service
|
|
|
|
// _name_ instead of a service ID.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ServiceChecks(ws memdb.WatchSet, serviceName string, entMeta *structs.EnterpriseMeta) (uint64, structs.HealthChecks, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogChecksMaxIndex(tx, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Return the checks.
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.catalogListChecksByService(tx, serviceName, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed check lookup: %s", err)
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.Add(iter.WatchCh())
|
|
|
|
|
|
|
|
var results structs.HealthChecks
|
|
|
|
for check := iter.Next(); check != nil; check = iter.Next() {
|
|
|
|
results = append(results, check.(*structs.HealthCheck))
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2017-01-14 01:08:43 +00:00
|
|
|
// ServiceChecksByNodeMeta is used to get all checks associated with a
|
2017-01-14 01:45:34 +00:00
|
|
|
// given service ID, filtered by the given node metadata values. The query
|
|
|
|
// is performed against a service _name_ instead of a service ID.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) ServiceChecksByNodeMeta(ws memdb.WatchSet, serviceName string,
|
2019-12-10 02:26:41 +00:00
|
|
|
filters map[string]string, entMeta *structs.EnterpriseMeta) (uint64, structs.HealthChecks, error) {
|
2017-01-24 07:37:21 +00:00
|
|
|
|
2017-01-14 01:08:43 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.maxIndexForService(tx, serviceName, true, true, entMeta)
|
2017-01-14 01:08:43 +00:00
|
|
|
// Return the checks.
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.catalogListChecksByService(tx, serviceName, entMeta)
|
2017-01-14 01:08:43 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed check lookup: %s", err)
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.Add(iter.WatchCh())
|
|
|
|
|
|
|
|
return s.parseChecksByNodeMeta(tx, ws, idx, iter, filters)
|
2017-01-14 01:08:43 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// ChecksInState is used to query the state store for all checks
|
|
|
|
// which are in the provided state.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ChecksInState(ws memdb.WatchSet, state string, entMeta *structs.EnterpriseMeta) (uint64, structs.HealthChecks, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
idx, iter, err := s.checksInStateTxn(tx, ws, state, entMeta)
|
2017-04-27 23:03:05 +00:00
|
|
|
if err != nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
return 0, nil, err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2017-01-24 07:37:21 +00:00
|
|
|
var results structs.HealthChecks
|
|
|
|
for check := iter.Next(); check != nil; check = iter.Next() {
|
|
|
|
results = append(results, check.(*structs.HealthCheck))
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
return idx, results, nil
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2017-01-14 01:45:34 +00:00
|
|
|
// ChecksInStateByNodeMeta is used to query the state store for all checks
|
|
|
|
// which are in the provided state, filtered by the given node metadata values.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ChecksInStateByNodeMeta(ws memdb.WatchSet, state string, filters map[string]string, entMeta *structs.EnterpriseMeta) (uint64, structs.HealthChecks, error) {
|
2017-01-14 01:08:43 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
idx, iter, err := s.checksInStateTxn(tx, ws, state, entMeta)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return s.parseChecksByNodeMeta(tx, ws, idx, iter, filters)
|
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) checksInStateTxn(tx *txn, ws memdb.WatchSet, state string, entMeta *structs.EnterpriseMeta) (uint64, memdb.ResultIterator, error) {
|
2017-01-14 01:08:43 +00:00
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogChecksMaxIndex(tx, entMeta)
|
2017-01-14 01:08:43 +00:00
|
|
|
|
2017-01-24 07:37:21 +00:00
|
|
|
// Query all checks if HealthAny is passed, otherwise use the index.
|
|
|
|
var iter memdb.ResultIterator
|
2017-01-14 01:08:43 +00:00
|
|
|
var err error
|
2017-04-19 23:00:11 +00:00
|
|
|
if state == api.HealthAny {
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err = s.catalogListChecks(tx, entMeta)
|
2017-01-14 01:08:43 +00:00
|
|
|
} else {
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err = s.catalogListChecksInState(tx, state, entMeta)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed check lookup: %s", err)
|
2017-01-14 01:08:43 +00:00
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.Add(iter.WatchCh())
|
2017-01-14 01:08:43 +00:00
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
return idx, iter, err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2017-01-14 01:08:43 +00:00
|
|
|
// parseChecksByNodeMeta is a helper function used to deduplicate some
|
|
|
|
// repetitive code for returning health checks filtered by node metadata fields.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) parseChecksByNodeMeta(tx *txn, ws memdb.WatchSet,
|
2017-01-24 07:37:21 +00:00
|
|
|
idx uint64, iter memdb.ResultIterator, filters map[string]string) (uint64, structs.HealthChecks, error) {
|
|
|
|
|
|
|
|
// We don't want to track an unlimited number of nodes, so we pull a
|
|
|
|
// top-level watch to use as a fallback.
|
|
|
|
allNodes, err := tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed nodes lookup: %s", err)
|
|
|
|
}
|
|
|
|
allNodesCh := allNodes.WatchCh()
|
|
|
|
|
|
|
|
// Only take results for nodes that satisfy the node metadata filters.
|
2017-01-14 01:08:43 +00:00
|
|
|
var results structs.HealthChecks
|
|
|
|
for check := iter.Next(); check != nil; check = iter.Next() {
|
|
|
|
healthCheck := check.(*structs.HealthCheck)
|
2017-01-24 07:37:21 +00:00
|
|
|
watchCh, node, err := tx.FirstWatch("nodes", "id", healthCheck.Node)
|
2017-01-14 01:08:43 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
|
|
|
if node == nil {
|
|
|
|
return 0, nil, ErrMissingNode
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
|
|
|
|
// Add even the filtered nodes so we wake up if the node metadata
|
|
|
|
// changes.
|
|
|
|
ws.AddWithLimit(watchLimit, watchCh, allNodesCh)
|
2017-01-14 01:08:43 +00:00
|
|
|
if structs.SatisfiesMetaFilters(node.(*structs.Node).Meta, filters) {
|
|
|
|
results = append(results, healthCheck)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// DeleteCheck is used to delete a health check registration.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) DeleteCheck(idx uint64, node string, checkID types.CheckID, entMeta *structs.EnterpriseMeta) error {
|
2020-03-19 13:11:20 +00:00
|
|
|
tx := s.db.WriteTxn(idx)
|
2017-01-13 19:47:16 +00:00
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Call the check deletion
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.deleteCheckTxn(tx, idx, node, checkID, entMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-02 20:34:56 +00:00
|
|
|
return tx.Commit()
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2018-10-29 18:41:42 +00:00
|
|
|
// deleteCheckCASTxn is used to try doing a check delete operation with a given
|
2018-12-03 07:11:48 +00:00
|
|
|
// raft index. If the CAS index specified is not equal to the last observed index for
|
2018-10-29 18:41:42 +00:00
|
|
|
// the given check, then the call is a noop, otherwise a normal check delete is invoked.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) deleteCheckCASTxn(tx *txn, idx, cidx uint64, node string, checkID types.CheckID, entMeta *structs.EnterpriseMeta) (bool, error) {
|
2018-10-29 18:41:42 +00:00
|
|
|
// Try to retrieve the existing health check.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, hc, err := s.getNodeCheckTxn(tx, node, checkID, entMeta)
|
2018-10-29 18:41:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, fmt.Errorf("check lookup failed: %s", err)
|
|
|
|
}
|
|
|
|
if hc == nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the existing index does not match the provided CAS
|
|
|
|
// index arg, then we shouldn't update anything and can safely
|
|
|
|
// return early here.
|
2019-01-09 19:59:23 +00:00
|
|
|
if hc.ModifyIndex != cidx {
|
|
|
|
return false, nil
|
2018-10-29 18:41:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Call the actual deletion if the above passed.
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.deleteCheckTxn(tx, idx, node, checkID, entMeta); err != nil {
|
2018-10-29 18:41:42 +00:00
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// deleteCheckTxn is the inner method used to call a health
|
|
|
|
// check deletion within an existing transaction.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) deleteCheckTxn(tx *txn, idx uint64, node string, checkID types.CheckID, entMeta *structs.EnterpriseMeta) error {
|
2017-01-13 19:47:16 +00:00
|
|
|
// Try to retrieve the existing health check.
|
2019-12-10 02:26:41 +00:00
|
|
|
_, hc, err := firstWatchCompoundWithTxn(tx, "checks", "id", entMeta, node, string(checkID))
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("check lookup failed: %s", err)
|
|
|
|
}
|
|
|
|
if hc == nil {
|
|
|
|
return nil
|
|
|
|
}
|
2018-02-19 17:29:22 +00:00
|
|
|
existing := hc.(*structs.HealthCheck)
|
2018-03-19 13:14:03 +00:00
|
|
|
if existing != nil {
|
2018-03-19 15:12:54 +00:00
|
|
|
// When no service is linked to this service, update all services of node
|
|
|
|
if existing.ServiceID != "" {
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServiceIndexes(tx, existing.ServiceName, idx, &existing.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
2018-03-19 15:12:54 +00:00
|
|
|
}
|
2019-07-12 21:19:37 +00:00
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
_, svcRaw, err := firstWatchCompoundWithTxn(tx, "services", "id", &existing.EnterpriseMeta, existing.Node, existing.ServiceID)
|
2019-07-12 21:19:37 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed retrieving service from state store: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
svc := svcRaw.(*structs.ServiceNode)
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServiceKindIndexes(tx, svc.ServiceKind, idx, &svc.EnterpriseMeta); err != nil {
|
|
|
|
return err
|
2019-07-12 21:19:37 +00:00
|
|
|
}
|
2018-03-19 15:12:54 +00:00
|
|
|
} else {
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.updateAllServiceIndexesOfNode(tx, idx, existing.Node); err != nil {
|
2018-03-19 15:12:54 +00:00
|
|
|
return fmt.Errorf("Failed to update services linked to deleted healthcheck: %s", err)
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
if err := s.catalogUpdateServicesIndexes(tx, idx, entMeta); err != nil {
|
|
|
|
return err
|
2018-03-19 15:12:54 +00:00
|
|
|
}
|
2018-02-19 17:29:22 +00:00
|
|
|
}
|
|
|
|
}
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Delete the check from the DB and update the index.
|
|
|
|
if err := tx.Delete("checks", hc); err != nil {
|
|
|
|
return fmt.Errorf("failed removing check: %s", err)
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
|
|
|
|
if err := s.catalogUpdateCheckIndexes(tx, idx, entMeta); err != nil {
|
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete any sessions for this check.
|
2019-12-10 02:26:41 +00:00
|
|
|
sessions, err := checkSessionsTxn(tx, existing)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
2019-12-10 02:26:41 +00:00
|
|
|
return err
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Do the delete in a separate loop so we don't trash the iterator.
|
2019-12-10 02:26:41 +00:00
|
|
|
for _, sess := range sessions {
|
|
|
|
if err := s.deleteSessionTxn(tx, idx, sess.Session, &sess.EnterpriseMeta); err != nil {
|
2017-01-13 19:47:16 +00:00
|
|
|
return fmt.Errorf("failed deleting session: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-01-16 18:28:46 +00:00
|
|
|
// CheckServiceNodes is used to query all nodes and checks for a given service.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) CheckServiceNodes(ws memdb.WatchSet, serviceName string, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
|
|
|
return s.checkServiceNodes(ws, serviceName, false, entMeta)
|
2018-03-09 17:32:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// CheckConnectServiceNodes is used to query all nodes and checks for Connect
|
|
|
|
// compatible endpoints for a given service.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) CheckConnectServiceNodes(ws memdb.WatchSet, serviceName string, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
|
|
|
return s.checkServiceNodes(ws, serviceName, true, entMeta)
|
2018-03-09 17:32:22 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// CheckIngressServiceNodes is used to query all nodes and checks for ingress
|
|
|
|
// endpoints for a given service.
|
|
|
|
func (s *Store) CheckIngressServiceNodes(ws memdb.WatchSet, serviceName string, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
2020-04-29 22:52:27 +00:00
|
|
|
|
|
|
|
maxIdx, nodes, err := s.serviceGatewayNodes(tx, ws, serviceName, structs.ServiceKindIngressGateway, entMeta)
|
2020-04-16 21:00:48 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed gateway nodes lookup: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(ingress) : Deal with incorporating index from mapping table
|
2020-04-29 22:52:27 +00:00
|
|
|
// Watch for index changes to the gateway nodes
|
2020-05-08 15:44:34 +00:00
|
|
|
idx, chans := s.maxIndexAndWatchChsForServiceNodes(tx, nodes, false)
|
2020-04-29 22:52:27 +00:00
|
|
|
for _, ch := range chans {
|
2020-04-16 21:00:48 +00:00
|
|
|
ws.Add(ch)
|
|
|
|
}
|
2020-05-08 15:44:34 +00:00
|
|
|
maxIdx = lib.MaxUint64(maxIdx, idx)
|
2020-04-16 21:00:48 +00:00
|
|
|
|
|
|
|
// TODO(ingress): Test namespace functionality here
|
|
|
|
// De-dup services to lookup
|
2020-06-12 14:57:41 +00:00
|
|
|
names := make(map[structs.ServiceName]struct{})
|
2020-04-16 21:00:48 +00:00
|
|
|
for _, n := range nodes {
|
2020-06-12 14:57:41 +00:00
|
|
|
names[n.CompoundServiceName()] = struct{}{}
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var results structs.CheckServiceNodes
|
2020-06-12 15:34:02 +00:00
|
|
|
for sn := range names {
|
|
|
|
idx, n, err := s.checkServiceNodesTxn(tx, ws, sn.Name, false, &sn.EnterpriseMeta)
|
2020-04-16 21:00:48 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
2020-04-23 23:16:04 +00:00
|
|
|
maxIdx = lib.MaxUint64(maxIdx, idx)
|
2020-04-16 21:00:48 +00:00
|
|
|
results = append(results, n...)
|
|
|
|
}
|
|
|
|
return maxIdx, results, nil
|
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) checkServiceNodes(ws memdb.WatchSet, serviceName string, connect bool, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
return s.checkServiceNodesTxn(tx, ws, serviceName, connect, entMeta)
|
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) checkServiceNodesTxn(tx *txn, ws memdb.WatchSet, serviceName string, connect bool, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
2018-03-09 17:32:22 +00:00
|
|
|
// Function for lookup
|
2019-12-10 02:26:41 +00:00
|
|
|
index := "service"
|
|
|
|
if connect {
|
|
|
|
index = "connect"
|
2018-03-09 17:32:22 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// Query the state store for the service.
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.catalogServiceNodeList(tx, serviceName, index, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
2019-03-15 20:18:48 +00:00
|
|
|
// Note we decide if we want to watch this iterator or not down below. We need
|
|
|
|
// to see if it returned anything first.
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Return the results.
|
|
|
|
var results structs.ServiceNodes
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2019-03-21 16:01:56 +00:00
|
|
|
// For connect queries we need a list of any proxy service names in the result
|
|
|
|
// set. Rather than have different code path for connect and non-connect, we
|
|
|
|
// use the same one in both cases. For non-empty non-connect results,
|
|
|
|
// serviceNames will always have exactly one element which is the same as
|
|
|
|
// serviceName. For Connect there might be multiple different service names -
|
|
|
|
// one for each service name a proxy is registered under, and the target
|
|
|
|
// service name IFF there is at least one Connect-native instance of that
|
|
|
|
// service. Either way there is usually only one distinct name if proxies are
|
|
|
|
// named consistently but could be multiple.
|
2020-06-12 14:57:41 +00:00
|
|
|
serviceNames := make(map[structs.ServiceName]struct{}, 2)
|
2017-01-24 07:37:21 +00:00
|
|
|
for service := iter.Next(); service != nil; service = iter.Next() {
|
2019-03-21 16:01:56 +00:00
|
|
|
sn := service.(*structs.ServiceNode)
|
|
|
|
results = append(results, sn)
|
2020-05-08 15:44:34 +00:00
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
name := structs.NewServiceName(sn.ServiceName, &sn.EnterpriseMeta)
|
|
|
|
serviceNames[name] = struct{}{}
|
2019-03-21 16:01:56 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// If we are querying for Connect nodes, the associated proxy might be a terminating-gateway.
|
2020-04-08 18:37:24 +00:00
|
|
|
// Gateways are tracked in a separate table, and we append them to the result set.
|
|
|
|
// We append rather than replace since it allows users to migrate a service
|
|
|
|
// to the mesh with a mix of sidecars and gateways until all its instances have a sidecar.
|
2020-04-27 22:25:37 +00:00
|
|
|
var idx uint64
|
2020-04-08 18:37:24 +00:00
|
|
|
if connect {
|
|
|
|
// Look up gateway nodes associated with the service
|
2020-04-29 22:52:27 +00:00
|
|
|
gwIdx, nodes, err := s.serviceGatewayNodes(tx, ws, serviceName, structs.ServiceKindTerminatingGateway, entMeta)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed gateway nodes lookup: %v", err)
|
|
|
|
}
|
2020-04-23 23:16:04 +00:00
|
|
|
idx = lib.MaxUint64(idx, gwIdx)
|
2020-04-08 18:37:24 +00:00
|
|
|
for i := 0; i < len(nodes); i++ {
|
|
|
|
results = append(results, nodes[i])
|
2020-05-08 15:44:34 +00:00
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
name := structs.NewServiceName(nodes[i].ServiceName, &nodes[i].EnterpriseMeta)
|
|
|
|
serviceNames[name] = struct{}{}
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-21 16:01:56 +00:00
|
|
|
// watchOptimized tracks if we meet the necessary condition to optimize
|
|
|
|
// WatchSet size. That is that every service name represented in the result
|
|
|
|
// set must have a service-specific index we can watch instead of many radix
|
|
|
|
// nodes for all the actual nodes touched. This saves us watching potentially
|
|
|
|
// thousands of watch chans for large services which may need many goroutines.
|
|
|
|
// It also avoids the performance cliff that is hit when watchLimit is hit
|
|
|
|
// (~682 service instances). See
|
|
|
|
// https://github.com/hashicorp/consul/issues/4984
|
|
|
|
watchOptimized := false
|
|
|
|
if len(serviceNames) > 0 {
|
|
|
|
// Assume optimization will work since it really should at this point. For
|
|
|
|
// safety we'll sanity check this below for each service name.
|
|
|
|
watchOptimized = true
|
|
|
|
|
|
|
|
// Fetch indexes for all names services in result set.
|
2020-06-12 14:57:41 +00:00
|
|
|
for n := range serviceNames {
|
2019-03-21 16:01:56 +00:00
|
|
|
// We know service values should exist since the serviceNames map is only
|
|
|
|
// populated if there is at least one result above. so serviceExists arg
|
|
|
|
// below is always true.
|
2020-06-12 14:57:41 +00:00
|
|
|
svcIdx, svcCh := s.maxIndexAndWatchChForService(tx, n.Name, true, true, &n.EnterpriseMeta)
|
2019-03-21 16:01:56 +00:00
|
|
|
// Take the max index represented
|
2020-04-23 23:16:04 +00:00
|
|
|
idx = lib.MaxUint64(idx, svcIdx)
|
2019-03-21 16:01:56 +00:00
|
|
|
if svcCh != nil {
|
|
|
|
// Watch the service-specific index for changes in liu of all iradix nodes
|
|
|
|
// for checks etc.
|
|
|
|
ws.Add(svcCh)
|
|
|
|
} else {
|
|
|
|
// Nil svcCh shouldn't really happen since all existent services should
|
|
|
|
// have a service-specific index but just in case it does due to a bug,
|
|
|
|
// fall back to the more expensive old way of watching every radix node
|
|
|
|
// we touch.
|
|
|
|
watchOptimized = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// If we have no results, we should use the index of the last service
|
2020-05-08 14:03:45 +00:00
|
|
|
// extinction event so we don't go backwards when services deregister. We
|
2019-03-21 16:01:56 +00:00
|
|
|
// use target serviceName here but it actually doesn't matter. No chan will
|
|
|
|
// be returned as we can't use the optimization in this case (and don't need
|
|
|
|
// to as there is only one chan to watch anyway).
|
2020-04-29 22:52:27 +00:00
|
|
|
svcIdx, _ := s.maxIndexAndWatchChForService(tx, serviceName, false, true, entMeta)
|
2020-04-23 23:16:04 +00:00
|
|
|
idx = lib.MaxUint64(idx, svcIdx)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2019-03-15 20:18:48 +00:00
|
|
|
// Create a nil watchset to pass below, we'll only pass the real one if we
|
|
|
|
// need to. Nil watchers are safe/allowed and saves some allocation too.
|
|
|
|
var fallbackWS memdb.WatchSet
|
2019-03-21 16:01:56 +00:00
|
|
|
if !watchOptimized {
|
|
|
|
// We weren't able to use the optimization of watching only service indexes
|
|
|
|
// for some reason. That means we need to fallback to watching everything we
|
|
|
|
// touch in the DB as normal. We plumb the caller's watchset through (note
|
|
|
|
// it's a map so this is a by-reference assignment.)
|
2019-03-15 20:18:48 +00:00
|
|
|
fallbackWS = ws
|
|
|
|
// We also need to watch the iterator from earlier too.
|
|
|
|
fallbackWS.Add(iter.WatchCh())
|
2019-03-21 16:01:56 +00:00
|
|
|
} else if connect {
|
|
|
|
// If this is a connect query then there is a subtlety to watch out for.
|
|
|
|
// In addition to watching the proxy service indexes for changes above, we
|
|
|
|
// need to still keep an eye on the connect service index in case a new
|
|
|
|
// proxy with a new name registers - we are only watching proxy service
|
|
|
|
// names we know about above so we'd miss that otherwise. Thankfully this
|
|
|
|
// is only ever one extra chan to watch and will catch any changes to
|
|
|
|
// proxy registrations for this target service.
|
|
|
|
ws.Add(iter.WatchCh())
|
2019-03-15 20:18:48 +00:00
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
2020-06-23 17:18:22 +00:00
|
|
|
return s.parseCheckServiceNodes(tx, fallbackWS, idx, results, err)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// CheckServiceTagNodes is used to query all nodes and checks for a given
|
2017-01-16 18:28:46 +00:00
|
|
|
// service, filtering out services that don't contain the given tag.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) CheckServiceTagNodes(ws memdb.WatchSet, serviceName string, tags []string, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Query the state store for the service.
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.catalogServiceNodeList(tx, serviceName, "service", entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.Add(iter.WatchCh())
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Return the results, filtering by tag.
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
serviceExists := false
|
2017-01-13 19:47:16 +00:00
|
|
|
var results structs.ServiceNodes
|
2017-01-24 07:37:21 +00:00
|
|
|
for service := iter.Next(); service != nil; service = iter.Next() {
|
2017-01-13 19:47:16 +00:00
|
|
|
svc := service.(*structs.ServiceNode)
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
serviceExists = true
|
2018-10-11 11:50:05 +00:00
|
|
|
if !serviceTagsFilter(svc, tags) {
|
2017-01-13 19:47:16 +00:00
|
|
|
results = append(results, svc)
|
|
|
|
}
|
|
|
|
}
|
Improve blocking queries on services that do not exist (#4810)
## Background
When making a blocking query on a missing service (was never registered, or is not registered anymore) the query returns as soon as any service is updated.
On clusters with frequent updates (5~10 updates/s in our DCs) these queries virtually do not block, and clients with no protections againt this waste ressources on the agent and server side. Clients that do protect against this get updates later than they should because of the backoff time they implement between requests.
## Implementation
While reducing the number of unnecessary updates we still want :
* Clients to be notified as soon as when the last instance of a service disapears.
* Clients to be notified whenever there's there is an update for the service.
* Clients to be notified as soon as the first instance of the requested service is added.
To reduce the number of unnecessary updates we need to block when a request to a missing service is made. However in the following case :
1. Client `client1` makes a query for service `foo`, gets back a node and X-Consul-Index 42
2. `foo` is unregistered
3. `client1` makes a query for `foo` with `index=42` -> `foo` does not exist, the query blocks and `client1` is not notified of the change on `foo`
We could store the last raft index when each service was last alive to know wether we should block on the incoming query or not, but that list could grow indefinetly.
We instead store the last raft index when a service was unregistered and use it when a query targets a service that does not exist.
When a service `srv` is unregistered this "missing service index" is always greater than any X-Consul-Index held by the clients while `srv` was up, allowing us to immediatly notify them.
1. Client `client1` makes a query for service `foo`, gets back a node and `X-Consul-Index: 42`
2. `foo` is unregistered, we set the "missing service index" to 43
3. `client1` makes a blocking query for `foo` with `index=42` -> `foo` does not exist, we check against the "missing service index" and return immediatly with `X-Consul-Index: 43`
4. `client1` makes a blocking query for `foo` with `index=43` -> we block
5. Other changes happen in the cluster, but foo still doesn't exist and "missing service index" hasn't changed, the query is still blocked
6. `foo` is registered again on index 62 -> `foo` exists and its index is greater than 43, we unblock the query
2019-01-11 14:26:14 +00:00
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.maxIndexForService(tx, serviceName, serviceExists, true, entMeta)
|
2020-06-23 17:18:22 +00:00
|
|
|
return s.parseCheckServiceNodes(tx, ws, idx, results, err)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// GatewayServices is used to query all services associated with a gateway
|
|
|
|
func (s *Store) GatewayServices(ws memdb.WatchSet, gateway string, entMeta *structs.EnterpriseMeta) (uint64, structs.GatewayServices, error) {
|
2020-04-08 18:37:24 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
2020-04-23 23:16:04 +00:00
|
|
|
var maxIdx uint64
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
iter, err := s.gatewayServices(tx, gateway, entMeta)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed gateway services lookup: %s", err)
|
|
|
|
}
|
|
|
|
ws.Add(iter.WatchCh())
|
|
|
|
|
|
|
|
var results structs.GatewayServices
|
|
|
|
for service := iter.Next(); service != nil; service = iter.Next() {
|
|
|
|
svc := service.(*structs.GatewayService)
|
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
if svc.Service.Name != structs.WildcardSpecifier {
|
2020-04-23 23:16:04 +00:00
|
|
|
idx, matches, err := s.checkProtocolMatch(tx, ws, svc)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed checking protocol: %s", err)
|
|
|
|
}
|
|
|
|
maxIdx = lib.MaxUint64(maxIdx, idx)
|
|
|
|
if matches {
|
|
|
|
results = append(results, svc)
|
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
idx := maxIndexTxn(tx, gatewayServicesTableName)
|
2020-04-23 23:16:04 +00:00
|
|
|
return lib.MaxUint64(maxIdx, idx), results, nil
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// parseCheckServiceNodes is used to parse through a given set of services,
|
|
|
|
// and query for an associated node and a set of checks. This is the inner
|
|
|
|
// method used to return a rich set of results from a more simple query.
|
2017-04-21 00:46:29 +00:00
|
|
|
func (s *Store) parseCheckServiceNodes(
|
2020-06-03 17:21:00 +00:00
|
|
|
tx *txn, ws memdb.WatchSet, idx uint64,
|
2020-06-23 17:18:22 +00:00
|
|
|
services structs.ServiceNodes,
|
2017-01-13 19:47:16 +00:00
|
|
|
err error) (uint64, structs.CheckServiceNodes, error) {
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Special-case the zero return value to nil, since this ends up in
|
|
|
|
// external APIs.
|
|
|
|
if len(services) == 0 {
|
|
|
|
return idx, nil, nil
|
|
|
|
}
|
|
|
|
|
2017-01-24 07:37:21 +00:00
|
|
|
// We don't want to track an unlimited number of nodes, so we pull a
|
|
|
|
// top-level watch to use as a fallback.
|
|
|
|
allNodes, err := tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed nodes lookup: %s", err)
|
|
|
|
}
|
|
|
|
allNodesCh := allNodes.WatchCh()
|
|
|
|
|
|
|
|
// We need a similar fallback for checks. Since services need the
|
|
|
|
// status of node + service-specific checks, we pull in a top-level
|
|
|
|
// watch over all checks.
|
|
|
|
allChecks, err := tx.Get("checks", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed checks lookup: %s", err)
|
|
|
|
}
|
|
|
|
allChecksCh := allChecks.WatchCh()
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
results := make(structs.CheckServiceNodes, 0, len(services))
|
|
|
|
for _, sn := range services {
|
|
|
|
// Retrieve the node.
|
2017-01-24 07:37:21 +00:00
|
|
|
watchCh, n, err := tx.FirstWatch("nodes", "id", sn.Node)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.AddWithLimit(watchLimit, watchCh, allNodesCh)
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
if n == nil {
|
|
|
|
return 0, nil, ErrMissingNode
|
|
|
|
}
|
|
|
|
node := n.(*structs.Node)
|
|
|
|
|
2017-01-24 07:37:21 +00:00
|
|
|
// First add the node-level checks. These always apply to any
|
|
|
|
// service on the node.
|
2017-01-13 19:47:16 +00:00
|
|
|
var checks structs.HealthChecks
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err := s.catalogListNodeChecks(tx, sn.Node)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
2017-01-24 07:37:21 +00:00
|
|
|
ws.AddWithLimit(watchLimit, iter.WatchCh(), allChecksCh)
|
2017-01-13 19:47:16 +00:00
|
|
|
for check := iter.Next(); check != nil; check = iter.Next() {
|
2017-01-24 07:37:21 +00:00
|
|
|
checks = append(checks, check.(*structs.HealthCheck))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now add the service-specific checks.
|
2019-12-10 02:26:41 +00:00
|
|
|
iter, err = s.catalogListServiceChecks(tx, sn.Node, sn.ServiceID, &sn.EnterpriseMeta)
|
2017-01-24 07:37:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, err
|
|
|
|
}
|
|
|
|
ws.AddWithLimit(watchLimit, iter.WatchCh(), allChecksCh)
|
|
|
|
for check := iter.Next(); check != nil; check = iter.Next() {
|
|
|
|
checks = append(checks, check.(*structs.HealthCheck))
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Append to the results.
|
|
|
|
results = append(results, structs.CheckServiceNode{
|
|
|
|
Node: node,
|
|
|
|
Service: sn.ToNodeService(),
|
|
|
|
Checks: checks,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NodeInfo is used to generate a dump of a single node. The dump includes
|
|
|
|
// all services and checks which are registered against the node.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) NodeInfo(ws memdb.WatchSet, node string, entMeta *structs.EnterpriseMeta) (uint64, structs.NodeDump, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogMaxIndex(tx, entMeta, true)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Query the node by the passed node
|
|
|
|
nodes, err := tx.Get("nodes", "id", node)
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
2017-01-24 17:06:51 +00:00
|
|
|
ws.Add(nodes.WatchCh())
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.parseNodes(tx, ws, idx, nodes, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NodeDump is used to generate a dump of all nodes. This call is expensive
|
|
|
|
// as it has to query every node, service, and check. The response can also
|
|
|
|
// be quite large since there is currently no filtering applied.
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) NodeDump(ws memdb.WatchSet, entMeta *structs.EnterpriseMeta) (uint64, structs.NodeDump, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
|
|
|
// Get the table index.
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogMaxIndex(tx, entMeta, true)
|
2017-01-13 19:47:16 +00:00
|
|
|
|
|
|
|
// Fetch all of the registered nodes
|
|
|
|
nodes, err := tx.Get("nodes", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
2017-01-24 17:06:51 +00:00
|
|
|
ws.Add(nodes.WatchCh())
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.parseNodes(tx, ws, idx, nodes, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
func (s *Store) ServiceDump(ws memdb.WatchSet, kind structs.ServiceKind, useKind bool, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
2019-04-16 16:00:15 +00:00
|
|
|
tx := s.db.Txn(false)
|
|
|
|
defer tx.Abort()
|
|
|
|
|
2019-06-20 19:04:39 +00:00
|
|
|
if useKind {
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.serviceDumpKindTxn(tx, ws, kind, entMeta)
|
2019-06-20 19:04:39 +00:00
|
|
|
} else {
|
2019-12-10 02:26:41 +00:00
|
|
|
return s.serviceDumpAllTxn(tx, ws, entMeta)
|
2019-06-20 19:04:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) serviceDumpAllTxn(tx *txn, ws memdb.WatchSet, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
2019-04-16 16:00:15 +00:00
|
|
|
// Get the table index
|
2019-12-19 16:15:37 +00:00
|
|
|
idx := s.catalogMaxIndexWatch(tx, ws, entMeta, true)
|
2019-04-16 16:00:15 +00:00
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceList(tx, entMeta, true)
|
2019-04-16 16:00:15 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var results structs.ServiceNodes
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
sn := service.(*structs.ServiceNode)
|
|
|
|
results = append(results, sn)
|
|
|
|
}
|
|
|
|
|
2020-06-23 17:18:22 +00:00
|
|
|
return s.parseCheckServiceNodes(tx, nil, idx, results, err)
|
2019-06-20 19:04:39 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) serviceDumpKindTxn(tx *txn, ws memdb.WatchSet, kind structs.ServiceKind, entMeta *structs.EnterpriseMeta) (uint64, structs.CheckServiceNodes, error) {
|
2019-06-20 19:04:39 +00:00
|
|
|
// unlike when we are dumping all services here we only need to watch the kind specific index entry for changing (or nodes, checks)
|
|
|
|
// updating any services, nodes or checks will bump the appropriate service kind index so there is no need to watch any of the individual
|
|
|
|
// entries
|
2019-12-10 02:26:41 +00:00
|
|
|
idx := s.catalogServiceKindMaxIndex(tx, ws, kind, entMeta)
|
2019-06-20 19:04:39 +00:00
|
|
|
|
|
|
|
// Query the state store for the service.
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceListByKind(tx, kind, entMeta)
|
2019-06-20 19:04:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var results structs.ServiceNodes
|
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
sn := service.(*structs.ServiceNode)
|
|
|
|
results = append(results, sn)
|
|
|
|
}
|
|
|
|
|
2020-06-23 17:18:22 +00:00
|
|
|
return s.parseCheckServiceNodes(tx, nil, idx, results, err)
|
2019-04-16 16:00:15 +00:00
|
|
|
}
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
// parseNodes takes an iterator over a set of nodes and returns a struct
|
|
|
|
// containing the nodes along with all of their associated services
|
|
|
|
// and/or health checks.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) parseNodes(tx *txn, ws memdb.WatchSet, idx uint64,
|
2019-12-10 02:26:41 +00:00
|
|
|
iter memdb.ResultIterator, entMeta *structs.EnterpriseMeta) (uint64, structs.NodeDump, error) {
|
2017-01-13 19:47:16 +00:00
|
|
|
|
2017-01-24 17:06:51 +00:00
|
|
|
// We don't want to track an unlimited number of services, so we pull a
|
|
|
|
// top-level watch to use as a fallback.
|
|
|
|
allServices, err := tx.Get("services", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed services lookup: %s", err)
|
|
|
|
}
|
|
|
|
allServicesCh := allServices.WatchCh()
|
|
|
|
|
|
|
|
// We need a similar fallback for checks.
|
|
|
|
allChecks, err := tx.Get("checks", "id")
|
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed checks lookup: %s", err)
|
|
|
|
}
|
|
|
|
allChecksCh := allChecks.WatchCh()
|
|
|
|
|
2017-01-13 19:47:16 +00:00
|
|
|
var results structs.NodeDump
|
|
|
|
for n := iter.Next(); n != nil; n = iter.Next() {
|
|
|
|
node := n.(*structs.Node)
|
|
|
|
|
|
|
|
// Create the wrapped node
|
|
|
|
dump := &structs.NodeInfo{
|
2017-01-18 22:26:42 +00:00
|
|
|
ID: node.ID,
|
2017-01-13 19:47:16 +00:00
|
|
|
Node: node.Node,
|
|
|
|
Address: node.Address,
|
|
|
|
TaggedAddresses: node.TaggedAddresses,
|
|
|
|
Meta: node.Meta,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Query the node services
|
2019-12-10 02:26:41 +00:00
|
|
|
services, err := s.catalogServiceListByNode(tx, node.Node, entMeta, true)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed services lookup: %s", err)
|
|
|
|
}
|
2017-01-24 17:06:51 +00:00
|
|
|
ws.AddWithLimit(watchLimit, services.WatchCh(), allServicesCh)
|
2017-01-13 19:47:16 +00:00
|
|
|
for service := services.Next(); service != nil; service = services.Next() {
|
|
|
|
ns := service.(*structs.ServiceNode).ToNodeService()
|
|
|
|
dump.Services = append(dump.Services, ns)
|
|
|
|
}
|
|
|
|
|
2019-12-10 02:26:41 +00:00
|
|
|
// Query the service level checks
|
|
|
|
checks, err := s.catalogListChecksByNode(tx, node.Node, entMeta)
|
2017-01-13 19:47:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, nil, fmt.Errorf("failed node lookup: %s", err)
|
|
|
|
}
|
2017-01-24 17:06:51 +00:00
|
|
|
ws.AddWithLimit(watchLimit, checks.WatchCh(), allChecksCh)
|
2017-01-13 19:47:16 +00:00
|
|
|
for check := checks.Next(); check != nil; check = checks.Next() {
|
|
|
|
hc := check.(*structs.HealthCheck)
|
|
|
|
dump.Checks = append(dump.Checks, hc)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add the result to the slice
|
|
|
|
results = append(results, dump)
|
|
|
|
}
|
|
|
|
return idx, results, nil
|
|
|
|
}
|
2019-12-10 02:26:41 +00:00
|
|
|
|
|
|
|
// checkSessionsTxn returns the IDs of all sessions associated with a health check
|
2020-06-03 17:21:00 +00:00
|
|
|
func checkSessionsTxn(tx *txn, hc *structs.HealthCheck) ([]*sessionCheck, error) {
|
2019-12-10 02:26:41 +00:00
|
|
|
mappings, err := getCompoundWithTxn(tx, "session_checks", "node_check", &hc.EnterpriseMeta, hc.Node, string(hc.CheckID))
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed session checks lookup: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var sessions []*sessionCheck
|
|
|
|
for mapping := mappings.Next(); mapping != nil; mapping = mappings.Next() {
|
|
|
|
sessions = append(sessions, mapping.(*sessionCheck))
|
|
|
|
}
|
|
|
|
return sessions, nil
|
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// updateGatewayServices associates services with gateways as specified in a gateway config entry
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) updateGatewayServices(tx *txn, idx uint64, conf structs.ConfigEntry, entMeta *structs.EnterpriseMeta) error {
|
2020-05-11 17:38:04 +00:00
|
|
|
var (
|
|
|
|
noChange bool
|
|
|
|
gatewayServices structs.GatewayServices
|
|
|
|
err error
|
|
|
|
)
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
gateway := structs.NewServiceName(conf.GetName(), entMeta)
|
2020-04-16 21:00:48 +00:00
|
|
|
switch conf.GetKind() {
|
|
|
|
case structs.IngressGateway:
|
2020-06-12 14:57:41 +00:00
|
|
|
noChange, gatewayServices, err = s.ingressConfigGatewayServices(tx, gateway, conf, entMeta)
|
2020-04-16 21:00:48 +00:00
|
|
|
case structs.TerminatingGateway:
|
2020-06-12 14:57:41 +00:00
|
|
|
noChange, gatewayServices, err = s.terminatingConfigGatewayServices(tx, gateway, conf, entMeta)
|
2020-04-16 21:00:48 +00:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("config entry kind %q does not need gateway-services", conf.GetKind())
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-04-16 21:00:48 +00:00
|
|
|
// Return early if there is an error OR we don't have any services to update
|
2020-05-11 17:38:04 +00:00
|
|
|
if err != nil || noChange {
|
2020-04-16 21:00:48 +00:00
|
|
|
return err
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete all associated with gateway first, to avoid keeping mappings that were removed
|
2020-06-12 14:57:41 +00:00
|
|
|
if _, err := tx.DeleteAll(gatewayServicesTableName, "gateway", structs.NewServiceName(conf.GetName(), entMeta)); err != nil {
|
2020-04-08 18:37:24 +00:00
|
|
|
return fmt.Errorf("failed to truncate gateway services table: %v", err)
|
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
for _, svc := range gatewayServices {
|
2020-04-08 18:37:24 +00:00
|
|
|
// If the service is a wildcard we need to target all services within the namespace
|
2020-06-12 14:57:41 +00:00
|
|
|
if svc.Service.Name == structs.WildcardSpecifier {
|
2020-04-16 21:00:48 +00:00
|
|
|
if err := s.updateGatewayNamespace(tx, idx, svc, entMeta); err != nil {
|
2020-06-12 14:57:41 +00:00
|
|
|
return fmt.Errorf("failed to associate gateway %q with wildcard: %v", gateway.String(), err)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
// Skip service-specific update below if there was a wildcard update
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since this service was specified on its own, and not with a wildcard,
|
|
|
|
// if there is an existing entry, we overwrite it. The service entry is the source of truth.
|
|
|
|
//
|
|
|
|
// By extension, if TLS creds are provided with a wildcard but are not provided in
|
|
|
|
// the service entry, the service does not inherit the creds from the wildcard.
|
2020-04-16 21:00:48 +00:00
|
|
|
err = s.updateGatewayService(tx, idx, svc)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := indexUpdateMaxTxn(tx, idx, gatewayServicesTableName); err != nil {
|
|
|
|
return fmt.Errorf("failed updating gateway-services index: %v", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-05-11 19:04:59 +00:00
|
|
|
// ingressConfigGatewayServices constructs a list of GatewayService structs for
|
|
|
|
// insertion into the memdb table, specific to ingress gateways. The boolean
|
|
|
|
// returned indicates that there are no changes necessary to the memdb table.
|
|
|
|
func (s *Store) ingressConfigGatewayServices(
|
2020-06-03 17:21:00 +00:00
|
|
|
tx *txn,
|
2020-06-12 14:57:41 +00:00
|
|
|
gateway structs.ServiceName,
|
2020-05-11 19:04:59 +00:00
|
|
|
conf structs.ConfigEntry,
|
|
|
|
entMeta *structs.EnterpriseMeta,
|
|
|
|
) (bool, structs.GatewayServices, error) {
|
2020-04-16 21:00:48 +00:00
|
|
|
entry, ok := conf.(*structs.IngressGatewayConfigEntry)
|
|
|
|
if !ok {
|
2020-05-11 17:38:04 +00:00
|
|
|
return false, nil, fmt.Errorf("unexpected config entry type: %T", conf)
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if service list matches the last known list for the config entry, if it does, skip the update
|
|
|
|
_, c, err := s.configEntryTxn(tx, nil, conf.GetKind(), conf.GetName(), entMeta)
|
|
|
|
if err != nil {
|
2020-05-11 17:38:04 +00:00
|
|
|
return false, nil, fmt.Errorf("failed to get config entry: %v", err)
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
if cfg, ok := c.(*structs.IngressGatewayConfigEntry); ok && cfg != nil {
|
|
|
|
if reflect.DeepEqual(cfg.Listeners, entry.Listeners) {
|
|
|
|
// Services are the same, nothing to update
|
2020-05-11 17:38:04 +00:00
|
|
|
return true, nil, nil
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var gatewayServices structs.GatewayServices
|
|
|
|
for _, listener := range entry.Listeners {
|
|
|
|
for _, service := range listener.Services {
|
|
|
|
mapping := &structs.GatewayService{
|
|
|
|
Gateway: gateway,
|
2020-06-12 14:57:41 +00:00
|
|
|
Service: service.ToServiceName(),
|
2020-04-16 21:00:48 +00:00
|
|
|
GatewayKind: structs.ServiceKindIngressGateway,
|
2020-04-23 15:06:19 +00:00
|
|
|
Hosts: service.Hosts,
|
2020-04-16 21:00:48 +00:00
|
|
|
Port: listener.Port,
|
2020-04-16 23:24:11 +00:00
|
|
|
Protocol: listener.Protocol,
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
gatewayServices = append(gatewayServices, mapping)
|
|
|
|
}
|
|
|
|
}
|
2020-05-11 17:38:04 +00:00
|
|
|
return false, gatewayServices, nil
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
|
2020-05-11 19:04:59 +00:00
|
|
|
// terminatingConfigGatewayServices constructs a list of GatewayService structs
|
|
|
|
// for insertion into the memdb table, specific to terminating gateways. The
|
|
|
|
// boolean returned indicates that there are no changes necessary to the memdb
|
|
|
|
// table.
|
|
|
|
func (s *Store) terminatingConfigGatewayServices(
|
2020-06-03 17:21:00 +00:00
|
|
|
tx *txn,
|
2020-06-12 14:57:41 +00:00
|
|
|
gateway structs.ServiceName,
|
2020-05-11 19:04:59 +00:00
|
|
|
conf structs.ConfigEntry,
|
|
|
|
entMeta *structs.EnterpriseMeta,
|
|
|
|
) (bool, structs.GatewayServices, error) {
|
2020-04-16 21:00:48 +00:00
|
|
|
entry, ok := conf.(*structs.TerminatingGatewayConfigEntry)
|
|
|
|
if !ok {
|
2020-05-11 17:38:04 +00:00
|
|
|
return false, nil, fmt.Errorf("unexpected config entry type: %T", conf)
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if service list matches the last known list for the config entry, if it does, skip the update
|
|
|
|
_, c, err := s.configEntryTxn(tx, nil, conf.GetKind(), conf.GetName(), entMeta)
|
|
|
|
if err != nil {
|
2020-05-11 17:38:04 +00:00
|
|
|
return false, nil, fmt.Errorf("failed to get config entry: %v", err)
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
if cfg, ok := c.(*structs.TerminatingGatewayConfigEntry); ok && cfg != nil {
|
|
|
|
if reflect.DeepEqual(cfg.Services, entry.Services) {
|
|
|
|
// Services are the same, nothing to update
|
2020-05-11 17:38:04 +00:00
|
|
|
return true, nil, nil
|
2020-04-16 21:00:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var gatewayServices structs.GatewayServices
|
|
|
|
for _, svc := range entry.Services {
|
2020-04-08 18:37:24 +00:00
|
|
|
mapping := &structs.GatewayService{
|
2020-04-16 21:00:48 +00:00
|
|
|
Gateway: gateway,
|
2020-06-12 14:57:41 +00:00
|
|
|
Service: structs.NewServiceName(svc.Name, &svc.EnterpriseMeta),
|
2020-04-08 18:37:24 +00:00
|
|
|
GatewayKind: structs.ServiceKindTerminatingGateway,
|
|
|
|
KeyFile: svc.KeyFile,
|
|
|
|
CertFile: svc.CertFile,
|
|
|
|
CAFile: svc.CAFile,
|
2020-04-27 22:25:37 +00:00
|
|
|
SNI: svc.SNI,
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
gatewayServices = append(gatewayServices, mapping)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-05-11 17:38:04 +00:00
|
|
|
return false, gatewayServices, nil
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// updateGatewayNamespace is used to target all services within a namespace
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) updateGatewayNamespace(tx *txn, idx uint64, service *structs.GatewayService, entMeta *structs.EnterpriseMeta) error {
|
2020-04-08 18:37:24 +00:00
|
|
|
services, err := s.catalogServiceListByKind(tx, structs.ServiceKindTypical, entMeta)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed querying services: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate over services in namespace and insert mapping for each
|
|
|
|
for svc := services.Next(); svc != nil; svc = services.Next() {
|
|
|
|
sn := svc.(*structs.ServiceNode)
|
|
|
|
|
|
|
|
// Only associate non-consul services with gateways
|
|
|
|
if sn.ServiceName == "consul" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-04-21 21:06:23 +00:00
|
|
|
existing, err := tx.First(gatewayServicesTableName, "id", service.Gateway, sn.CompoundServiceName(), service.Port)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("gateway service lookup failed: %s", err)
|
|
|
|
}
|
2020-04-16 21:00:48 +00:00
|
|
|
if existing != nil {
|
2020-04-08 18:37:24 +00:00
|
|
|
// If there's an existing service associated with this gateway then we skip it.
|
|
|
|
// This means the service was specified on its own, and the service entry overrides the wildcard entry.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
mapping := service.Clone()
|
2020-04-17 00:51:27 +00:00
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
mapping.Service = structs.NewServiceName(sn.ServiceName, &service.Service.EnterpriseMeta)
|
2020-04-17 00:51:27 +00:00
|
|
|
mapping.FromWildcard = true
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
err = s.updateGatewayService(tx, idx, mapping)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Also store a mapping for the wildcard so that the TLS creds can be pulled
|
|
|
|
// for new services registered in its namespace
|
2020-04-16 21:00:48 +00:00
|
|
|
err = s.updateGatewayService(tx, idx, service)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// updateGatewayService associates services with gateways after an eligible event
|
|
|
|
// ie. Registering a service in a namespace targeted by a gateway
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) updateGatewayService(tx *txn, idx uint64, mapping *structs.GatewayService) error {
|
2020-04-08 18:37:24 +00:00
|
|
|
// Check if mapping already exists in table if it's already in the table
|
|
|
|
// Avoid insert if nothing changed
|
2020-04-21 21:06:23 +00:00
|
|
|
existing, err := tx.First(gatewayServicesTableName, "id", mapping.Gateway, mapping.Service, mapping.Port)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("gateway service lookup failed: %s", err)
|
|
|
|
}
|
|
|
|
if gs, ok := existing.(*structs.GatewayService); ok && gs != nil {
|
2020-04-16 21:00:48 +00:00
|
|
|
mapping.CreateIndex = gs.CreateIndex
|
2020-04-08 18:37:24 +00:00
|
|
|
if gs.IsSame(mapping) {
|
|
|
|
return nil
|
|
|
|
}
|
2020-04-16 21:00:48 +00:00
|
|
|
} else {
|
|
|
|
// We have a new mapping
|
|
|
|
mapping.CreateIndex = idx
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-04-16 21:00:48 +00:00
|
|
|
mapping.ModifyIndex = idx
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
if err := tx.Insert(gatewayServicesTableName, mapping); err != nil {
|
2020-04-08 18:37:24 +00:00
|
|
|
return fmt.Errorf("failed inserting gateway service mapping: %s", err)
|
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
if err := indexUpdateMaxTxn(tx, idx, gatewayServicesTableName); err != nil {
|
|
|
|
return fmt.Errorf("failed updating gateway-services index: %v", err)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-04-17 16:24:34 +00:00
|
|
|
// checkWildcardForGatewaysAndUpdate checks whether a service matches a
|
|
|
|
// wildcard definition in gateway config entries and if so adds it the the
|
|
|
|
// gateway-services table.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) checkGatewayWildcardsAndUpdate(tx *txn, idx uint64, svc *structs.NodeService) error {
|
2020-04-17 16:24:34 +00:00
|
|
|
// Do not associate non-typical services with gateways or consul services
|
|
|
|
if svc.Kind != structs.ServiceKindTypical || svc.Service == "consul" {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
svcGateways, err := s.serviceGateways(tx, structs.WildcardSpecifier, &svc.EnterpriseMeta)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed gateway lookup for %q: %s", svc.Service, err)
|
|
|
|
}
|
|
|
|
for service := svcGateways.Next(); service != nil; service = svcGateways.Next() {
|
|
|
|
if wildcardSvc, ok := service.(*structs.GatewayService); ok && wildcardSvc != nil {
|
|
|
|
|
|
|
|
// Copy the wildcard mapping and modify it
|
|
|
|
gatewaySvc := wildcardSvc.Clone()
|
2020-04-17 00:51:27 +00:00
|
|
|
|
2020-06-12 14:57:41 +00:00
|
|
|
gatewaySvc.Service = structs.NewServiceName(svc.Service, &svc.EnterpriseMeta)
|
2020-04-17 00:51:27 +00:00
|
|
|
gatewaySvc.FromWildcard = true
|
2020-04-17 16:24:34 +00:00
|
|
|
|
|
|
|
if err = s.updateGatewayService(tx, idx, gatewaySvc); err != nil {
|
|
|
|
return fmt.Errorf("Failed to associate service %q with gateway %q", gatewaySvc.Service.String(), gatewaySvc.Gateway.String())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// serviceGateways returns all GatewayService entries with the given service name. This effectively looks up
|
|
|
|
// all the gateways mapped to this service.
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) serviceGateways(tx *txn, name string, entMeta *structs.EnterpriseMeta) (memdb.ResultIterator, error) {
|
2020-06-12 14:57:41 +00:00
|
|
|
return tx.Get(gatewayServicesTableName, "service", structs.NewServiceName(name, entMeta))
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) gatewayServices(tx *txn, name string, entMeta *structs.EnterpriseMeta) (memdb.ResultIterator, error) {
|
2020-06-12 14:57:41 +00:00
|
|
|
return tx.Get(gatewayServicesTableName, "gateway", structs.NewServiceName(name, entMeta))
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
// TODO(ingress): How to handle index rolling back when a config entry is
|
|
|
|
// deleted that references a service?
|
|
|
|
// We might need something like the service_last_extinction index?
|
2020-06-03 17:21:00 +00:00
|
|
|
func (s *Store) serviceGatewayNodes(tx *txn, ws memdb.WatchSet, service string, kind structs.ServiceKind, entMeta *structs.EnterpriseMeta) (uint64, structs.ServiceNodes, error) {
|
2020-04-08 18:37:24 +00:00
|
|
|
// Look up gateway name associated with the service
|
2020-04-16 21:00:48 +00:00
|
|
|
gws, err := s.serviceGateways(tx, service, entMeta)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
2020-04-29 22:52:27 +00:00
|
|
|
return 0, nil, fmt.Errorf("failed gateway lookup: %s", err)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 23:30:31 +00:00
|
|
|
// Adding this channel to the WatchSet means that the watch will fire if a config entry targeting the service is added.
|
|
|
|
// Otherwise, if there's no associated gateway, then no watch channel would be returned
|
|
|
|
ws.Add(gws.WatchCh())
|
|
|
|
|
2020-04-08 18:37:24 +00:00
|
|
|
var ret structs.ServiceNodes
|
2020-04-16 21:00:48 +00:00
|
|
|
var maxIdx uint64
|
2020-04-08 18:37:24 +00:00
|
|
|
|
2020-04-16 21:00:48 +00:00
|
|
|
for gateway := gws.Next(); gateway != nil; gateway = gws.Next() {
|
|
|
|
mapping := gateway.(*structs.GatewayService)
|
|
|
|
// TODO(ingress): Test this conditional
|
|
|
|
if mapping.GatewayKind != kind {
|
|
|
|
continue
|
|
|
|
}
|
2020-04-23 23:16:04 +00:00
|
|
|
maxIdx = lib.MaxUint64(maxIdx, mapping.ModifyIndex)
|
2020-04-08 18:37:24 +00:00
|
|
|
|
|
|
|
// Look up nodes for gateway
|
2020-06-12 14:57:41 +00:00
|
|
|
gwServices, err := s.catalogServiceNodeList(tx, mapping.Gateway.Name, "service", &mapping.Gateway.EnterpriseMeta)
|
2020-04-08 18:37:24 +00:00
|
|
|
if err != nil {
|
2020-04-29 22:52:27 +00:00
|
|
|
return 0, nil, fmt.Errorf("failed service lookup: %s", err)
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-04-27 22:25:37 +00:00
|
|
|
|
|
|
|
var exists bool
|
2020-04-16 21:00:48 +00:00
|
|
|
for svc := gwServices.Next(); svc != nil; svc = gwServices.Next() {
|
|
|
|
sn := svc.(*structs.ServiceNode)
|
2020-04-08 18:37:24 +00:00
|
|
|
ret = append(ret, sn)
|
2020-04-27 22:25:37 +00:00
|
|
|
|
|
|
|
// Tracking existence to know whether we should check extinction index for service
|
|
|
|
exists = true
|
|
|
|
}
|
|
|
|
|
2020-05-08 15:44:34 +00:00
|
|
|
// This prevents the index from sliding back if case all instances of the gateway service are deregistered
|
2020-06-12 14:57:41 +00:00
|
|
|
svcIdx := s.maxIndexForService(tx, mapping.Gateway.Name, exists, false, &mapping.Gateway.EnterpriseMeta)
|
2020-04-23 23:16:04 +00:00
|
|
|
maxIdx = lib.MaxUint64(maxIdx, svcIdx)
|
2020-04-27 22:25:37 +00:00
|
|
|
|
2020-04-29 22:52:27 +00:00
|
|
|
// Ensure that blocking queries wake up if the gateway-service mapping exists, but the gateway does not exist yet
|
|
|
|
if !exists {
|
|
|
|
ws.Add(gwServices.WatchCh())
|
|
|
|
}
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-04-29 22:52:27 +00:00
|
|
|
return maxIdx, ret, nil
|
2020-04-08 18:37:24 +00:00
|
|
|
}
|
2020-04-23 23:16:04 +00:00
|
|
|
|
|
|
|
// checkProtocolMatch filters out any GatewayService entries added from a wildcard with a protocol
|
|
|
|
// that doesn't match the one configured in their discovery chain.
|
|
|
|
func (s *Store) checkProtocolMatch(
|
2020-06-03 17:21:00 +00:00
|
|
|
tx *txn,
|
2020-04-23 23:16:04 +00:00
|
|
|
ws memdb.WatchSet,
|
|
|
|
svc *structs.GatewayService,
|
|
|
|
) (uint64, bool, error) {
|
|
|
|
if svc.GatewayKind != structs.ServiceKindIngressGateway || !svc.FromWildcard {
|
|
|
|
return 0, true, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
idx, protocol, err := s.protocolForService(tx, ws, svc.Service)
|
|
|
|
if err != nil {
|
|
|
|
return 0, false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return idx, svc.Protocol == protocol, nil
|
|
|
|
}
|