Add support for restarting docker nodes, and for providing args (#20409)

This commit is contained in:
Nick Cabatoff 2023-04-28 14:03:14 -04:00 committed by GitHub
parent 2a954ef072
commit f19f537a4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 251 additions and 31 deletions

View File

@ -24,6 +24,7 @@ import (
"github.com/docker/docker/api/types" "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/mount"
"github.com/docker/docker/api/types/network" "github.com/docker/docker/api/types/network"
"github.com/docker/docker/api/types/strslice" "github.com/docker/docker/api/types/strslice"
"github.com/docker/docker/client" "github.com/docker/docker/client"
@ -41,26 +42,27 @@ type Runner struct {
} }
type RunOptions struct { type RunOptions struct {
ImageRepo string ImageRepo string
ImageTag string ImageTag string
ContainerName string ContainerName string
Cmd []string Cmd []string
Entrypoint []string Entrypoint []string
Env []string Env []string
NetworkName string NetworkName string
NetworkID string NetworkID string
CopyFromTo map[string]string CopyFromTo map[string]string
Ports []string Ports []string
DoNotAutoRemove bool DoNotAutoRemove bool
AuthUsername string AuthUsername string
AuthPassword string AuthPassword string
OmitLogTimestamps bool OmitLogTimestamps bool
LogConsumer func(string) LogConsumer func(string)
Capabilities []string Capabilities []string
PreDelete bool PreDelete bool
PostStart func(string, string) error PostStart func(string, string) error
LogStderr io.Writer LogStderr io.Writer
LogStdout io.Writer LogStdout io.Writer
VolumeNameToMountPoint map[string]string
} }
func NewDockerAPI() (*client.Client, error) { func NewDockerAPI() (*client.Client, error) {
@ -406,6 +408,15 @@ func (d *Runner) Start(ctx context.Context, addSuffix, forceLocalAddr bool) (*St
_, _ = ioutil.ReadAll(resp) _, _ = ioutil.ReadAll(resp)
} }
for vol, mtpt := range d.RunOptions.VolumeNameToMountPoint {
hostConfig.Mounts = append(hostConfig.Mounts, mount.Mount{
Type: "volume",
Source: vol,
Target: mtpt,
ReadOnly: false,
})
}
c, err := d.DockerAPI.ContainerCreate(ctx, cfg, hostConfig, netConfig, nil, cfg.Hostname) c, err := d.DockerAPI.ContainerCreate(ctx, cfg, hostConfig, netConfig, nil, cfg.Hostname)
if err != nil { if err != nil {
return nil, fmt.Errorf("container create failed: %v", err) return nil, fmt.Errorf("container create failed: %v", err)

View File

@ -31,6 +31,7 @@ import (
"time" "time"
"github.com/docker/docker/api/types" "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/volume"
docker "github.com/docker/docker/client" docker "github.com/docker/docker/client"
"github.com/hashicorp/go-cleanhttp" "github.com/hashicorp/go-cleanhttp"
log "github.com/hashicorp/go-hclog" log "github.com/hashicorp/go-hclog"
@ -474,6 +475,8 @@ type DockerClusterNode struct {
ContainerIPAddress string ContainerIPAddress string
ImageRepo string ImageRepo string
ImageTag string ImageTag string
DataVolumeName string
cleanupVolume func()
} }
func (n *DockerClusterNode) TLSConfig() *tls.Config { func (n *DockerClusterNode) TLSConfig() *tls.Config {
@ -539,20 +542,36 @@ func (n *DockerClusterNode) newAPIClient() (*api.Client, error) {
return client, nil return client, nil
} }
// Cleanup kills the container of the node // Cleanup kills the container of the node and deletes its data volume
func (n *DockerClusterNode) Cleanup() { func (n *DockerClusterNode) Cleanup() {
n.cleanup() n.cleanup()
} }
// Stop kills the container of the node
func (n *DockerClusterNode) Stop() {
n.cleanupContainer()
}
func (n *DockerClusterNode) cleanup() error { func (n *DockerClusterNode) cleanup() error {
if n.Container == nil || n.Container.ID == "" { if n.Container == nil || n.Container.ID == "" {
return nil return nil
} }
n.cleanupContainer() n.cleanupContainer()
n.cleanupVolume()
return nil return nil
} }
func (n *DockerClusterNode) start(ctx context.Context, caDir string, opts *DockerClusterOptions) error { func (n *DockerClusterNode) Start(ctx context.Context, opts *DockerClusterOptions) error {
if n.DataVolumeName == "" {
vol, err := n.dockerAPI.VolumeCreate(ctx, volume.CreateOptions{})
if err != nil {
return err
}
n.DataVolumeName = vol.Name
n.cleanupVolume = func() {
_ = n.dockerAPI.VolumeRemove(ctx, vol.Name, false)
}
}
vaultCfg := map[string]interface{}{} vaultCfg := map[string]interface{}{}
vaultCfg["listener"] = map[string]interface{}{ vaultCfg["listener"] = map[string]interface{}{
"tcp": map[string]interface{}{ "tcp": map[string]interface{}{
@ -628,6 +647,8 @@ func (n *DockerClusterNode) start(ctx context.Context, caDir string, opts *Docke
return err return err
} }
caDir := filepath.Join(n.Cluster.tmpDir, "ca")
// setup plugin bin copy if needed // setup plugin bin copy if needed
copyFromTo := map[string]string{ copyFromTo := map[string]string{
n.WorkDir: "/vault/config", n.WorkDir: "/vault/config",
@ -656,7 +677,7 @@ func (n *DockerClusterNode) start(ctx context.Context, caDir string, opts *Docke
// We don't need to run update-ca-certificates in the container, because // We don't need to run update-ca-certificates in the container, because
// we're providing the CA in the raft join call, and otherwise Vault // we're providing the CA in the raft join call, and otherwise Vault
// servers don't talk to one another on the API port. // servers don't talk to one another on the API port.
Cmd: []string{"server"}, Cmd: append([]string{"server"}, opts.Args...),
Env: []string{ Env: []string{
// For now we're using disable_mlock, because this is for testing // For now we're using disable_mlock, because this is for testing
// anyway, and because it prevents us using external plugins. // anyway, and because it prevents us using external plugins.
@ -686,12 +707,22 @@ func (n *DockerClusterNode) start(ctx context.Context, caDir string, opts *Docke
}, },
Capabilities: []string{"NET_ADMIN"}, Capabilities: []string{"NET_ADMIN"},
OmitLogTimestamps: true, OmitLogTimestamps: true,
VolumeNameToMountPoint: map[string]string{
n.DataVolumeName: "/vault/file",
},
}) })
if err != nil { if err != nil {
return err return err
} }
n.runner = r n.runner = r
probe := opts.StartProbe
if probe == nil {
probe = func(c *api.Client) error {
_, err = c.Sys().SealStatus()
return err
}
}
svc, _, err := r.StartNewService(ctx, false, false, func(ctx context.Context, host string, port int) (dockhelper.ServiceConfig, error) { svc, _, err := r.StartNewService(ctx, false, false, func(ctx context.Context, host string, port int) (dockhelper.ServiceConfig, error) {
config, err := n.apiConfig() config, err := n.apiConfig()
if err != nil { if err != nil {
@ -702,10 +733,11 @@ func (n *DockerClusterNode) start(ctx context.Context, caDir string, opts *Docke
if err != nil { if err != nil {
return nil, err return nil, err
} }
_, err = client.Sys().SealStatus() err = probe(client)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return dockhelper.NewServiceHostPort(host, port), nil return dockhelper.NewServiceHostPort(host, port), nil
}) })
if err != nil { if err != nil {
@ -730,6 +762,12 @@ func (n *DockerClusterNode) start(ctx context.Context, caDir string, opts *Docke
n.RealAPIAddr = "https://" + n.ContainerIPAddress + ":8200" n.RealAPIAddr = "https://" + n.ContainerIPAddress + ":8200"
n.cleanupContainer = svc.Cleanup n.cleanupContainer = svc.Cleanup
client, err := n.newAPIClient()
if err != nil {
return err
}
client.SetToken(n.Cluster.rootToken)
n.client = client
return nil return nil
} }
@ -758,6 +796,8 @@ type DockerClusterOptions struct {
ImageTag string ImageTag string
CloneCA *DockerCluster CloneCA *DockerCluster
VaultBinary string VaultBinary string
Args []string
StartProbe func(*api.Client) error
} }
func ensureLeaderMatches(ctx context.Context, client *api.Client, ready func(response *api.LeaderResponse) error) error { func ensureLeaderMatches(ctx context.Context, client *api.Client, ready func(response *api.LeaderResponse) error) error {
@ -877,15 +917,9 @@ func (dc *DockerCluster) addNode(ctx context.Context, opts *DockerClusterOptions
if err := os.MkdirAll(node.WorkDir, 0o755); err != nil { if err := os.MkdirAll(node.WorkDir, 0o755); err != nil {
return err return err
} }
if err := node.start(ctx, filepath.Join(dc.tmpDir, "ca"), opts); err != nil { if err := node.Start(ctx, opts); err != nil {
return err return err
} }
client, err := node.newAPIClient()
if err != nil {
return err
}
client.SetToken(dc.rootToken)
node.client = client
return nil return nil
} }

View File

@ -0,0 +1,175 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package misc
import (
"context"
"os"
"path"
"testing"
"github.com/mitchellh/mapstructure"
"github.com/go-test/deep"
"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/sdk/helper/testcluster"
"github.com/hashicorp/vault/sdk/helper/testcluster/docker"
)
// TestRecovery_Docker exercises recovery mode. It starts a single node raft
// cluster, writes some data, then restarts it and makes sure that we can read
// the data (that's mostly to make sure that our framework is properly handling
// a volume that persists across runs.) It then starts the node in recovery mode
// and deletes the data via sys/raw, and finally restarts it in normal mode and
// makes sure the data has been deleted.
func TestRecovery_Docker(t *testing.T) {
ctx := context.TODO()
t.Parallel()
binary := os.Getenv("VAULT_BINARY")
if binary == "" {
t.Skip("only running docker test when $VAULT_BINARY present")
}
opts := &docker.DockerClusterOptions{
ImageRepo: "hashicorp/vault",
// We're replacing the binary anyway, so we're not too particular about
// the docker image version tag.
ImageTag: "latest",
VaultBinary: binary,
ClusterOptions: testcluster.ClusterOptions{
NumCores: 1,
VaultNodeConfig: &testcluster.VaultNodeConfig{
LogLevel: "TRACE",
// If you want the test to run faster locally, you could
// uncomment this performance_multiplier change.
//StorageOptions: map[string]string{
// "performance_multiplier": "1",
//},
},
},
}
cluster := docker.NewTestDockerCluster(t, opts)
defer cluster.Cleanup()
var secretUUID string
{
client := cluster.Nodes()[0].APIClient()
if err := client.Sys().Mount("secret/", &api.MountInput{
Type: "kv-v1",
}); err != nil {
t.Fatal(err)
}
fooVal := map[string]interface{}{"bar": 1.0}
_, err := client.Logical().Write("secret/foo", fooVal)
if err != nil {
t.Fatal(err)
}
secret, err := client.Logical().List("secret/")
if err != nil {
t.Fatal(err)
}
if diff := deep.Equal(secret.Data["keys"], []interface{}{"foo"}); len(diff) > 0 {
t.Fatalf("got=%v, want=%v, diff: %v", secret.Data["keys"], []string{"foo"}, diff)
}
mounts, err := client.Sys().ListMounts()
if err != nil {
t.Fatal(err)
}
secretMount := mounts["secret/"]
if secretMount == nil {
t.Fatalf("secret mount not found, mounts: %v", mounts)
}
secretUUID = secretMount.UUID
}
listSecrets := func() []string {
client := cluster.Nodes()[0].APIClient()
secret, err := client.Logical().List("secret/")
if err != nil {
t.Fatal(err)
}
if secret == nil {
return nil
}
var result []string
err = mapstructure.Decode(secret.Data["keys"], &result)
return result
}
restart := func() {
cluster.Nodes()[0].(*docker.DockerClusterNode).Stop()
err := cluster.Nodes()[0].(*docker.DockerClusterNode).Start(ctx, opts)
if err != nil {
t.Fatalf("node restart post-recovery failed: %v", err)
}
err = testcluster.UnsealAllNodes(ctx, cluster)
if err != nil {
t.Fatalf("node unseal post-recovery failed: %v", err)
}
_, err = testcluster.WaitForActiveNode(ctx, cluster)
if err != nil {
t.Fatalf("node didn't become active: %v", err)
}
}
restart()
if len(listSecrets()) == 0 {
t.Fatal("expected secret to still be there")
}
// Now bring it up in recovery mode.
{
cluster.Nodes()[0].(*docker.DockerClusterNode).Stop()
newOpts := *opts
opts := &newOpts
opts.Args = []string{"-recovery"}
opts.StartProbe = func(client *api.Client) error {
// In recovery mode almost no paths are supported, and pretty much
// the only ones that don't require a recovery token are the ones used
// to generate a recovery token.
_, err := client.Sys().GenerateRecoveryOperationTokenStatusWithContext(ctx)
return err
}
err := cluster.Nodes()[0].(*docker.DockerClusterNode).Start(ctx, opts)
if err != nil {
t.Fatalf("node restart with -recovery failed: %v", err)
}
client := cluster.Nodes()[0].APIClient()
recoveryToken, err := testcluster.GenerateRoot(cluster, testcluster.GenerateRecovery)
if err != nil {
t.Fatalf("recovery token generation failed: %v", err)
}
_, err = testcluster.GenerateRoot(cluster, testcluster.GenerateRecovery)
if err == nil {
t.Fatal("expected second generate-root to fail")
}
client.SetToken(recoveryToken)
secret, err := client.Logical().List(path.Join("sys/raw/logical", secretUUID))
if err != nil {
t.Fatal(err)
}
if diff := deep.Equal(secret.Data["keys"], []interface{}{"foo"}); len(diff) > 0 {
t.Fatalf("got=%v, want=%v, diff: %v", secret.Data, []string{"foo"}, diff)
}
_, err = client.Logical().Delete(path.Join("sys/raw/logical", secretUUID, "foo"))
if err != nil {
t.Fatal(err)
}
}
// Now go back to regular mode and verify that our changes are present
restart()
if len(listSecrets()) != 0 {
t.Fatal("expected secret to still be gone")
}
}