[demo] Kadalu CSI support for Nomad (#11207)

This commit is contained in:
Leela Venkaiah G 2021-10-07 00:59:15 +05:30 committed by GitHub
parent 48aa6e26e9
commit dec0b925a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 602 additions and 0 deletions

View File

@ -0,0 +1,257 @@
# Kadalu CSI Plugin
Author: @leelavg and the [Kadalu][kadalu_org] team.
The configuration here is for using external [Gluster] volumes as persistent
storage in Nomad using [Kadalu CSI][kadalu].
Refer to the actual job files before proceeding with this demo and change the
configuration as required. Follow along with the commands according to your
configuration.
Locally tested against Nomad v1.1.4.
## Local Development
This section can be skipped if you already have a Nomad cluster setup.
```console
# Clone configuration repository used to create local Nomad cluster in Docker
$ git clone https://github.com/leelavg/kadalu-nomad && cd kadalu-nomad
# Install Shipyard following the instructions in https://shipyard.run
# Create local cluster
$ shipyard run
[...]
$ eval $(shipyard env)
$ export job_dir="$(pwd)/kadalu"
```
## Demo
### Pre-requisites
- Configure varisables mentioned in `cluster.vars` to reflect your external
Gluster details.
- For convenience the necessary variables are set from the CLI when running the
job.
```console
$ export volname="sample-pool" gluster_hosts="10.x.x.x" gluster_volname="sample-vol" job_dir="${job_dir:-$(pwd)}"
# Make sure external gluster volume is started and quota is set
$ ssh $gluster_hosts "gluster volume info $gluster_volname | grep Status"
Status: Started
$ ssh $gluster_hosts "gluster volume quota $gluster_volname enable"
volume quota : success
```
### CSI Deployment
Deploy the CSI plugin controller.
```console
$ nomad run -var="volname=$volname" -var="gluster_hosts=$gluster_hosts" -var="gluster_volname=$gluster_volname" $job_dir/controller.nomad
==> 2021-09-20T18:23:07+05:30: Monitoring evaluation "19317b74"
2021-09-20T18:23:07+05:30: Evaluation triggered by job "kadalu-csi-controller"
==> 2021-09-20T18:23:08+05:30: Monitoring evaluation "19317b74"
2021-09-20T18:23:08+05:30: Evaluation within deployment: "d9ee4dd7"
2021-09-20T18:23:08+05:30: Allocation "d55e314d" created: node "4e105698", group "controller"
2021-09-20T18:23:08+05:30: Evaluation status changed: "pending" -> "complete"
==> 2021-09-20T18:23:08+05:30: Evaluation "19317b74" finished with status "complete"
==> 2021-09-20T18:23:08+05:30: Monitoring deployment "d9ee4dd7"
✓ Deployment "d9ee4dd7" successful
2021-09-20T18:23:28+05:30
ID = d9ee4dd7
Job ID = kadalu-csi-controller
Job Version = 0
Status = successful
Description = Deployment completed successfully
Deployed
Task Group Desired Placed Healthy Unhealthy Progress Deadline
controller 1 1 1 0 2021-09-20T13:03:27Z
```
Deploy the CSI node plugin.
```console
$ nomad run -var="volname=$volname" -var="gluster_hosts=$gluster_hosts" -var="gluster_volname=$gluster_volname" $job_dir/nodeplugin.nomad
==> 2021-09-20T18:23:53+05:30: Monitoring evaluation "bd4d95d1"
2021-09-20T18:23:53+05:30: Evaluation triggered by job "kadalu-csi-nodeplugin"
==> 2021-09-20T18:23:54+05:30: Monitoring evaluation "bd4d95d1"
2021-09-20T18:23:54+05:30: Allocation "4c05ab5a" created: node "4e105698", group "nodeplugin"
2021-09-20T18:23:54+05:30: Evaluation status changed: "pending" -> "complete"
==> 2021-09-20T18:23:54+05:30: Evaluation "bd4d95d1" finished with status "complete"
```
Verify the CSI plugin status.
```console
$ nomad plugin status kadalu-csi
ID = kadalu-csi
Provider = kadalu
Version = 0.8.6
Controllers Healthy = 1
Controllers Expected = 1
Nodes Healthy = 1
Nodes Expected = 1
Allocations
ID Node ID Task Group Version Desired Status Created Modified
d55e314d 4e105698 controller 0 run running 1m20s ago 1m ago
4c05ab5a 4e105698 nodeplugin 0 run running 35s ago 20s ago
```
### Volume Management
Next, you will go through volume creation, attachment and deletion operations,
covering a typical volume life-cycle.
#### Creating a Volume
```console
# Create Nomad volume
$ sed -e "s/POOL/$volname/" -e "s/GHOST/$gluster_hosts/" -e "s/GVOL/$gluster_volname/" $job_dir/volume.hcl | nomad volume create -
Created external volume csi-test with ID csi-test
```
#### Attaching and Using a Volume
```console
# Attach the volume to a sample app
$ nomad run $job_dir/app.nomad
==> 2021-09-20T18:28:28+05:30: Monitoring evaluation "e6dd3129"
2021-09-20T18:28:28+05:30: Evaluation triggered by job "sample-pv-check"
==> 2021-09-20T18:28:29+05:30: Monitoring evaluation "e6dd3129"
2021-09-20T18:28:29+05:30: Evaluation within deployment: "814e328c"
2021-09-20T18:28:29+05:30: Allocation "64745b25" created: node "4e105698", group "apps"
2021-09-20T18:28:29+05:30: Evaluation status changed: "pending" -> "complete"
==> 2021-09-20T18:28:29+05:30: Evaluation "e6dd3129" finished with status "complete"
==> 2021-09-20T18:28:29+05:30: Monitoring deployment "814e328c"
✓ Deployment "814e328c" successful
2021-09-20T18:28:58+05:30
ID = 814e328c
Job ID = sample-pv-check
Job Version = 0
Status = successful
Description = Deployment completed successfully
Deployed
Task Group Desired Placed Healthy Unhealthy Progress Deadline
apps 1 1 1 0 2021-09-20T13:08:56Z
# Export allocation ID (64745b25) from the previous command output
$ export app=64745b25
# Verify that the CSI Volume is accessible
$ nomad alloc exec $app bash /kadalu/script.sh
This is a sample application
# df -h
Filesystem Size Used Available Use% Mounted on
<gluster_hosts>:<gluster_volname> 181.2M 0 181.2M 0% /mnt/pv
# mount
Write/Read test on PV mount Mon
Sep 20 12:59:34 UTC 2021
SUCCESS
# Write some data on the volume
$ nomad alloc exec $app bash -c 'cd /mnt/pv; for i in {1..10}; do cat /dev/urandom | tr -dc [:space:][:print:] | head -c 1m > file$i; done;'
# Checksum the written data
$ nomad alloc exec $app bash -c 'ls /mnt/pv; find /mnt/pv -type f -exec md5sum {} + | cut -f1 -d" " | sort | md5sum'
file1 file2 file4 file6 file8
file10 file3 file5 file7 file9
6776dd355c0f2ba5a1781b9831e5c174 -
# Stop sample app and run it again to check data persistence
$ nomad status
ID Type Priority Status Submit Date
kadalu-csi-controller service 50 running 2021-09-20T18:23:07+05:30
kadalu-csi-nodeplugin system 50 running 2021-09-20T18:23:53+05:30
sample-pv-check service 50 running 2021-09-20T18:28:28+05:30
$ nomad stop sample-pv-check
==> 2021-09-20T18:36:47+05:30: Monitoring evaluation "eecc0c00"
2021-09-20T18:36:47+05:30: Evaluation triggered by job "sample-pv-check"
==> 2021-09-20T18:36:48+05:30: Monitoring evaluation "eecc0c00"
2021-09-20T18:36:48+05:30: Evaluation within deployment: "814e328c"
2021-09-20T18:36:48+05:30: Evaluation status changed: "pending" -> "complete"
==> 2021-09-20T18:36:48+05:30: Evaluation "eecc0c00" finished with status "complete"
==> 2021-09-20T18:36:48+05:30: Monitoring deployment "814e328c"
✓ Deployment "814e328c" successful
2021-09-20T18:36:48+05:30
ID = 814e328c
Job ID = sample-pv-check
Job Version = 0
Status = successful
Description = Deployment completed successfully
Deployed
Task Group Desired Placed Healthy Unhealthy Progress Deadline
apps 1 1 1 0 2021-09-20T13:08:56Z
$ nomad run $job_dir/app.nomad
==> 2021-09-20T18:37:49+05:30: Monitoring evaluation "e04b4549"
2021-09-20T18:37:49+05:30: Evaluation triggered by job "sample-pv-check"
==> 2021-09-20T18:37:50+05:30: Monitoring evaluation "e04b4549"
2021-09-20T18:37:50+05:30: Evaluation within deployment: "66d246ee"
2021-09-20T18:37:50+05:30: Allocation "526d5543" created: node "4e105698", group "apps"
2021-09-20T18:37:50+05:30: Evaluation status changed: "pending" -> "complete"
==> 2021-09-20T18:37:50+05:30: Evaluation "e04b4549" finished with status "complete"
==> 2021-09-20T18:37:50+05:30: Monitoring deployment "66d246ee"
✓ Deployment "66d246ee" successful
2021-09-20T18:38:10+05:30
ID = 66d246ee
Job ID = sample-pv-check
Job Version = 2
Status = successful
Description = Deployment completed successfully
Deployed
Task Group Desired Placed Healthy Unhealthy Progress Deadline
apps 1 1 1 0 2021-09-20T13:18:08Z
# Export the new allocation ID and verify that md5sum matches after stopping and
# running the same job
$ export app=526d5543
$ nomad alloc exec $app bash -c 'ls /mnt/pv; find /mnt/pv -type f -exec md5sum {} + | cut -f1 -d" " | sort | md5sum'
file1 file10 file2 file3 file4 file5 file6 file7 file8 file9
6776dd355c0f2ba5a1781b9831e5c174 -
```
#### Cleanup
```console
# Stop sample app, delete the volume and stop the CSI plugin components
$ nomad stop sample-pv-check
$ nomad volume delete csi-test
$ nomad stop kadalu-csi-nodeplugin
$ nomad stop kadalu-csi-controller
# Destroy local Shipyard cluster
$ shipyard destroy
```
## Contact
- For any extra information/feature with regards to the Kadalu CSI plugin,
please raise an issue against the [`kadalu` repo][kadalu].
- For any extra information with regards to the local Nomad dev setup for CSI,
please raise an issue against the [`kadalu-nomad` repo][kadalu_nomad].
- Based on ask/feature request, we may work on supporting internal Gluster
deployed and managed by Nomad itself (feature parity with current Kubernetes
deployments).
- If this folder isn't updated frequently you can find updated jobs at the
[`nomad` folder][nomad_folder] in the `kadalu` repository.
[Gluster]: https://www.gluster.org/
[kadalu]: https://github.com/kadalu/kadalu
[kadalu_org]: https://github.com/kadalu
[kadalu_nomad]: https://github.com/leelavg/kadalu-nomad
[nomad_folder]: https://github.com/kadalu/kadalu/tree/devel/nomad

View File

@ -0,0 +1,45 @@
variable "cn_network" {
default = "dc1"
}
variable "vol-id" {
default = "csi-test"
}
job "sample-pv-check" {
datacenters = ["${var.cn_network}"]
group "apps" {
volume "test" {
type = "csi"
source = "${var.vol-id}"
access_mode = "multi-node-multi-writer"
attachment_mode = "file-system"
}
task "sample" {
# To verify volume is mounted correctly and accessible, please run
# 'nomad alloc exec <alloc_id> bash /kadalu/script.sh'
# after this job is scheduled and running on a nomad client
driver = "docker"
config {
image = "kadalu/sample-pv-check-app:latest"
force_pull = false
entrypoint = [
"tail",
"-f",
"/dev/null",
]
}
volume_mount {
volume = "test"
# Script in this image looks for PV mounted at '/mnt/pv'
destination = "/mnt/pv"
}
}
}
}

View File

@ -0,0 +1,25 @@
# client_nodes is only applicable for local dev environment
client_nodes=0
# Below are the variables with defaults that corresponding job accepts
/* # controller.nomad */
/* cn_network = "dc1" */
/* volname = "sample-pool" */
/* gluster_hosts = "ghost.example.com" */
/* gluster_volname = "dist" */
/* gluster_user = "root" */
/* kadalu_version = "0.8.6" */
/* ssh_priv_path = "/root/.ssh/id_rsa" */
/* # nodeplugin.nomad */
/* cn_network = "dc1" */
/* volname = "sample-pool" */
/* gluster_hosts = "ghost.example.com" */
/* gluster_volname = "dist" */
/* gluster_user = "root" */
/* kadalu_version = "0.8.6" */
/* # app.nomad */
/* cn_network = "dc1" */
/* vol_id = "csi-test" */

View File

@ -0,0 +1,164 @@
variable "cn_network" {
default = "dc1"
description = "Data Ceneter that the job needs to be run in"
}
variable "volname" {
default = "sample-pool"
description = "Volume name for Kadalu CSI which is used for all PVC creations purposes"
}
variable "gluster_hosts" {
default = "ghost.example.com"
description = <<-EOS
- External gluster host where the gluster volume is created, started and quota is set
- Multiple hosts can be supplied like "host1,host2,host3" (no spaces and trimmed endings)
- Prefer to supply only one or else need to supply the same wherever interpolation is not supported (ex: in volume.hcl files)
EOS
}
variable "gluster_volname" {
default = "dist"
description = "Gluster volume name in external cluster"
}
variable "kadalu_version" {
default = "0.8.6"
description = "Kadalu CSI version which is tested against Nomad (v1.1.4)"
}
variable "gluster_user" {
default = "root"
description = "Remote user in external gluster cluster who has privileges to run gluster cli"
}
variable "ssh_priv_path" {
default = "~/.ssh/id_rsa"
description = <<-EOS
- Path to SSH private key which is used to connect to external gluster
- Needed only if gluster native quota capabilities is needed
- If not needed all corresponding SSH related info should be removed from this Job
- However it is highly recommended to supply SSH Private key for utilizing on the fly PVC expansion capabilities even with external gluster cluster
- SSH Key will only be used to perform two ops: set quota and change quota
- Please refer https://kadalu.io/rfcs/0007-Using-GlusterFS-directory-quota-for-external-gluster-volumes.html for more info
EOS
}
locals {
ssh_priv_key = "${file("${pathexpand("${var.ssh_priv_path}")}")}"
}
job "kadalu-csi-controller" {
datacenters = ["${var.cn_network}"]
type = "service"
group "controller" {
task "kadalu-controller" {
driver = "docker"
template {
# This is basically a JSON file which is used to connect to external gluster
# Make sure it follows JSON convention (No comma ',' for last key pair)
data = <<-EOS
{
"volname": "${var.volname}",
"volume_id": "${uuidv5("dns", "${var.volname}.kadalu.io")}",
"type": "External",
"pvReclaimPolicy": "delete",
"kadalu_format": "native",
"gluster_hosts": "${var.gluster_hosts}",
"gluster_volname": "${var.gluster_volname}",
"gluster_options": "log-level=DEBUG"
}
EOS
destination = "${NOMAD_TASK_DIR}/${var.volname}.info"
change_mode = "noop"
}
template {
data = "${uuidv5("dns", "kadalu.io")}"
destination = "${NOMAD_TASK_DIR}/uid"
change_mode = "noop"
}
template {
data = "${local.ssh_priv_key}"
destination = "${NOMAD_SECRETS_DIR}/ssh-privatekey"
change_mode = "noop"
perms = "600"
}
template {
# No need to supply 'SECRET_XXX' key if not using gluster native quota
data = <<-EOS
NODE_ID = "${node.unique.name}"
CSI_ENDPOINT = "unix://csi/csi.sock"
SECRET_GLUSTERQUOTA_SSH_USERNAME = "${var.gluster_user}"
KADALU_VERSION = "${var.kadalu_version}"
CSI_ROLE = "controller"
VERBOSE = "yes"
EOS
destination = "${NOMAD_TASK_DIR}/file.env"
env = true
}
config {
image = "docker.io/kadalu/kadalu-csi:${var.kadalu_version}"
# Nomad client config for docker plugin should have privileged set to 'true'
# refer https://www.nomadproject.io/docs/drivers/docker#privileged
# Need to access '/dev/fuse' for mounting external gluster volume
privileged = true
mount {
# Analogous to kadalu-info configmap
type = "bind"
# Make sure the source paths starts with current dir (basically: "./")
source = "./${NOMAD_TASK_DIR}/${var.volname}.info"
target = "/var/lib/gluster/${var.volname}.info"
readonly = true
}
mount {
# Extra baggage for now, will be taken care in Kadalu in next release
type = "bind"
source = "./${NOMAD_TASK_DIR}/uid"
target = "/var/lib/gluster/uid"
readonly = true
}
mount {
# If you are not using gluster native quota comment out this stanza
type = "bind"
source = "./${NOMAD_SECRETS_DIR}/ssh-privatekey"
target = "/etc/secret-volume/ssh-privatekey"
readonly = true
}
mount {
# Logging
type = "tmpfs"
target = "/var/log/gluster"
readonly = false
tmpfs_options {
# 1 MB
size = 1000000 # size in bytes
}
}
}
csi_plugin {
id = "kadalu-csi"
type = "controller"
mount_dir = "/csi"
}
}
}
}

View File

@ -0,0 +1,111 @@
# Please refer 'controller.nomad' file for variable and job descriptions
variable "cn_network" {
default = "dc1"
}
variable "volname" {
default = "sample-pool"
}
variable "gluster_hosts" {
default = "ghost.example.com"
}
variable "gluster_volname" {
default = "dist"
}
variable "kadalu_version" {
default = "0.8.6"
}
job "kadalu-csi-nodeplugin" {
datacenters = ["${var.cn_network}"]
# Should be running on every nomad client
type = "system"
update {
stagger = "5s"
max_parallel = 1
}
group "nodeplugin" {
task "kadalu-nodeplugin" {
driver = "docker"
template {
data = <<-EOS
{
"volname": "${var.volname}",
"volume_id": "${uuidv5("dns", "${var.volname}.kadalu.io")}",
"type": "External",
"pvReclaimPolicy": "delete",
"kadalu_format": "native",
"gluster_hosts": "${var.gluster_hosts}",
"gluster_volname": "${var.gluster_volname}",
"gluster_options": "log-level=DEBUG"
}
EOS
destination = "${NOMAD_TASK_DIR}/${var.volname}.info"
change_mode = "noop"
}
template {
data = "${uuidv5("dns", "kadalu.io")}"
destination = "${NOMAD_TASK_DIR}/uid"
change_mode = "noop"
}
template {
data = <<-EOS
NODE_ID = "${node.unique.name}"
CSI_ENDPOINT = "unix://csi/csi.sock"
KADALU_VERSION = "${var.kadalu_version}"
CSI_ROLE = "nodeplugin"
VERBOSE = "yes"
EOS
destination = "${NOMAD_TASK_DIR}/file.env"
env = true
}
config {
image = "docker.io/kadalu/kadalu-csi:${var.kadalu_version}"
privileged = true
mount {
type = "bind"
source = "./${NOMAD_TASK_DIR}/${var.volname}.info"
target = "/var/lib/gluster/${var.volname}.info"
readonly = true
}
mount {
type = "bind"
source = "./${NOMAD_TASK_DIR}/uid"
target = "/var/lib/gluster/uid"
readonly = true
}
mount {
type = "tmpfs"
target = "/var/log/gluster"
readonly = false
tmpfs_options {
size = 1000000
}
}
}
csi_plugin {
id = "kadalu-csi"
type = "node"
mount_dir = "/csi"
}
}
}
}