core: switch to go.etc.io/bbolt

This PR swaps the underlying BoltDB implementation from boltdb/bolt
to go.etc.io/bbolt.

In addition, the Server has a new configuration option for disabling
NoFreelistSync on the underlying database.

Freelist option: https://github.com/etcd-io/bbolt/blob/master/db.go#L81
Consul equivelent PR: https://github.com/hashicorp/consul/pull/11720
This commit is contained in:
Seth Hoenig 2022-02-16 10:30:03 -06:00
parent 246db87a74
commit de95998faa
10 changed files with 127 additions and 7 deletions

19
.changelog/12107.txt Normal file
View File

@ -0,0 +1,19 @@
```release-note:improvement
deps: Update hashicorp/raft-boltdb to v2.2.0
```
```release-note:improvement
core: Switch from boltdb/bolt to go.etcd.io/bbolt
```
```release-note:improvement
core: Enable configuring raft boltdb freelist sync behavior
```
```release-note:improvement
metrics: Emit metrics regarding raft boltdb operations
```
```release-note:breaking-change
core: The server raft implementation will automatically migrate its underlying raft.db database on startup. Downgrading to a previous version of the server after upgrading it to Nomad 1.3 is not supported.
```

View File

@ -444,6 +444,11 @@ func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
}
}
// Set the raft bolt parameters
if bolt := agentConfig.Server.RaftBoltConfig; bolt != nil {
conf.RaftBoltNoFreelistSync = bolt.NoFreelistSync
}
return conf, nil
}

View File

@ -516,11 +516,26 @@ type ServerConfig struct {
// ExtraKeysHCL is used by hcl to surface unexpected keys
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
// Search configures UI search features.
Search *Search `hcl:"search"`
// DeploymentQueryRateLimit is in queries per second and is used by the
// DeploymentWatcher to throttle the amount of simultaneously deployments
DeploymentQueryRateLimit float64 `hcl:"deploy_query_rate_limit"`
// RaftBoltConfig configures boltdb as used by raft.
RaftBoltConfig *RaftBoltConfig `hcl:"raft_boltdb"`
}
// RaftBoltConfig is used in servers to configure parameters of the boltdb
// used for raft consensus.
type RaftBoltConfig struct {
// NoFreelistSync toggles whether the underlying raft storage should sync its
// freelist to disk within the bolt .db file. When disabled, IO performance
// will be improved but at the expense of longer startup times.
//
// Default: false.
NoFreelistSync bool `hcl:"no_freelist_sync"`
}
// Search is used in servers to configure search API options.
@ -1599,6 +1614,12 @@ func (s *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
}
}
if b.RaftBoltConfig != nil {
result.RaftBoltConfig = &RaftBoltConfig{
NoFreelistSync: b.RaftBoltConfig.NoFreelistSync,
}
}
// Add the schedulers
result.EnabledSchedulers = append(result.EnabledSchedulers, b.EnabledSchedulers...)

4
go.mod
View File

@ -77,7 +77,8 @@ require (
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69
github.com/hashicorp/nomad/api v0.0.0-20200529203653-c4416b26d3eb
github.com/hashicorp/raft v1.3.5
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea
github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea
github.com/hashicorp/raft-boltdb/v2 v2.2.0
github.com/hashicorp/serf v0.9.5
github.com/hashicorp/vault/api v1.0.5-0.20200805123347-1ef507638af6
github.com/hashicorp/vault/sdk v0.2.0
@ -114,6 +115,7 @@ require (
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
github.com/zclconf/go-cty v1.8.0
github.com/zclconf/go-cty-yaml v1.0.2
go.etcd.io/bbolt v1.3.5
go.uber.org/goleak v1.1.12
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519
golang.org/x/net v0.0.0-20211216030914-fe4d6282115f

7
go.sum
View File

@ -764,12 +764,16 @@ github.com/hashicorp/memberlist v0.3.1 h1:MXgUXLqva1QvpVEDQW1IQLG0wivQAtmFlHRQ+1
github.com/hashicorp/memberlist v0.3.1/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69 h1:lc3c72qGlIMDqQpQH82Y4vaglRMMFdJbziYWriR4UcE=
github.com/hashicorp/net-rpc-msgpackrpc v0.0.0-20151116020338-a14192a58a69/go.mod h1:/z+jUGRBlwVpUZfjute9jWaF6/HuhjuFQuL1YXzVD1Q=
github.com/hashicorp/raft v1.1.0/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM=
github.com/hashicorp/raft v1.1.1/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft v1.1.2/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8=
github.com/hashicorp/raft v1.3.5 h1:93YBXmHWW2MuyMZfMxN1PsAnPXAt+hBfG0S0ZrZxRrY=
github.com/hashicorp/raft v1.3.5/go.mod h1:4Ak7FSPnuvmb0GV6vgIAJ4vYT4bek9bb6Q+7HVbyzqM=
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea h1:xykPFhrBAS2J0VBzVa5e80b5ZtYuNQtgXjN40qBZlD4=
github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk=
github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea h1:RxcPJuutPRM8PUOyiweMmkuNO+RJyfy2jds2gfvgNmU=
github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea/go.mod h1:qRd6nFJYYS6Iqnc/8HcUmko2/2Gw8qTFEmxDLii6W5I=
github.com/hashicorp/raft-boltdb/v2 v2.2.0 h1:/CVN9LSAcH50L3yp2TsPFIpeyHn1m3VF6kiutlDE3Nw=
github.com/hashicorp/raft-boltdb/v2 v2.2.0/go.mod h1:SgPUD5TP20z/bswEr210SnkUFvQP/YjKV95aaiTbeMQ=
github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
github.com/hashicorp/serf v0.9.3/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
github.com/hashicorp/serf v0.9.4/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
@ -1258,6 +1262,7 @@ github.com/zclconf/go-cty-yaml v1.0.2 h1:dNyg4QLTrv2IfJpm7Wtxi55ed5gLGOlPrZ6kMd5
github.com/zclconf/go-cty-yaml v1.0.2/go.mod h1:IP3Ylp0wQpYm50IHK8OZWKMu6sPJIUgKa8XhiVHura0=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg=
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=

View File

@ -339,6 +339,9 @@ type Config struct {
// SearchConfig provides knobs for Search API.
SearchConfig *structs.SearchConfig
// RaftBoltNoFreelistSync configures whether freelist syncing is enabled.
RaftBoltNoFreelistSync bool
// AgentShutdown is used to call agent.Shutdown from the context of a Server
// It is used primarily for licensing
AgentShutdown func() error

View File

@ -40,8 +40,9 @@ import (
"github.com/hashicorp/nomad/nomad/volumewatcher"
"github.com/hashicorp/nomad/scheduler"
"github.com/hashicorp/raft"
raftboltdb "github.com/hashicorp/raft-boltdb"
raftboltdb "github.com/hashicorp/raft-boltdb/v2"
"github.com/hashicorp/serf/serf"
"go.etcd.io/bbolt"
)
const (
@ -1222,6 +1223,7 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
// setupRaft is used to setup and initialize Raft
func (s *Server) setupRaft() error {
// If we have an unclean exit then attempt to close the Raft store.
defer func() {
if s.raft == nil && s.raftStore != nil {
@ -1282,13 +1284,23 @@ func (s *Server) setupRaft() error {
return err
}
// Create the BoltDB backend
store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
if err != nil {
return err
// Create the BoltDB backend, with NoFreelistSync option
store, raftErr := raftboltdb.New(raftboltdb.Options{
Path: filepath.Join(path, "raft.db"),
NoSync: false, // fsync each log write
BoltOptions: &bbolt.Options{
NoFreelistSync: s.config.RaftBoltNoFreelistSync,
},
})
if raftErr != nil {
return raftErr
}
s.raftStore = store
stable = store
s.logger.Info("setting up raft bolt store", "no_freelist_sync", s.config.RaftBoltNoFreelistSync)
// Start publishing bboltdb metrics
go store.RunMetrics(s.shutdownCtx, 0)
// Wrap the store in a LogCache to improve performance
cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)

View File

@ -156,6 +156,13 @@ server {
disallow this server from making any scheduling decisions. This defaults to
the number of CPU cores.
- `raft_boltdb` - This is a nested object that allows configuring options for
Raft's BoltDB based log store.
- `no_freelist_sync` - Setting this to `true` will disable syncing the BoltDB
freelist to disk within the `raft.db` file. Not syncing the freelist to disk
will reduce disk IO required for write operations at the expense of longer
server startup times.
- `raft_protocol` `(int: 3)` - Specifies the Raft protocol version to use when
communicating with other Nomad servers. This affects available Autopilot
features and is typically not required as the agent internally knows the

View File

@ -449,6 +449,32 @@ those listed in [Key Metrics](#key-metrics) above.
| `nomad.scheduler.allocs.rescheduled.wait_until` | Time that a rescheduled allocation will be delayed | Float | Gauge | alloc_id, job, namespace, task_group, follow_up_eval_id |
| `nomad.state.snapshotIndex` | Current snapshot index | Integer | Gauge | host |
## Raft BoltDB Metrics
Raft database metrics are emitted by the `raft-boltdb` library.
| Metric | Description | Unit | Type |
| ----------------------------------------- | ----------------------------------------- | ----------- | ------- |
| `nomad.raft.boltdb.numFreePages` | Number of free pages | Integer | Gauge |
| `nomad.raft.boltdb.numPendingPages` | Number of pending pages | Integer | Gauge |
| `nomad.raft.boltdb.freePageBytes` | Number of free page bytes | Integer | Gauge |
| `nomad.raft.boltdb.freelistBytes` | Number of freelist bytes | Integer | Gauge |
| `nomad.raft.boltdb.totalReadTxn` | Count of total read transactions | Integer | Counter |
| `nomad.raft.boltdb.openReadTxn` | Number of current open read transactions | Integer | Gauge |
| `nomad.raft.boltdb.txstats.pageCount` | Number of pages in use | Integer | Gauge |
| `nomad.raft.boltdb.txstats.pageAlloc` | Number of page allocations | Integer | Gauge |
| `nomad.raft.boltdb.txstats.cursorCount` | Count of total database cursors | Integer | Counter |
| `nomad.raft.boltdb.txstats.nodeCount` | Count of total database nodes | Integer | Counter |
| `nomad.raft.boltdb.txstats.nodeDeref` | Count of total database node dereferences | Integer | Counter |
| `nomad.raft.boltdb.txstats.rebalance` | Count of total rebalance operations | Integer | Counter |
| `nomad.raft.boltdb.txstats.rebalanceTime` | Sample of rebalance operation times | Nanoseconds | Summary |
| `nomad.raft.boltdb.txstats.split` | Count of total split operations | Integer | Counter |
| `nomad.raft.boltdb.txstats.spill` | Count of total spill operations | Integer | Counter |
| `nomad.raft.boltdb.txstats.spillTime` | Sample of spill operation times | Nanoseconds | Summary |
| `nomad.raft.boltdb.txstats.write` | Count of total write operations | Integer | Counter |
| `nomad.raft.boltdb.txstats.writeTime` | Sample of write operation times | Nanoseconds | Summary |
[tagged-metrics]: /docs/telemetry/metrics#tagged-metrics
[s_port_plan_failure]: /s/port-plan-failure

View File

@ -51,6 +51,26 @@ The volume staging directory for new CSI plugin tasks will now be
mounted to the task's `NOMAD_TASK_DIR` instead of the
`csi_plugin.mount_config`.
#### Server Raft Database
The server raft database in `raft.db` will be automatically migrated to a new
underlying implementation provided by `go.etcd.io/bbolt`. Downgrading to a previous
version of the server after upgrading it to Nomad 1.3 is not supported.
The new database implementation enables a new server configuration option for
controlling the underlying freelist-sync behavior. Clusters experiencing extreme
disk IO on servers may want to consider disabling freelist-sync to reduce load.
The tradeoff is longer server startup times, as the database must be completely
scanned to re-build the freelist from scratch.
```hcl
server {
raft_boltdb {
no_freelist_sync = true
}
}
```
## Nomad 1.2.6, 1.1.12, and 1.0.18
#### ACL requirement for the job parse endpoint