diff --git a/bench/Makefile b/bench/Makefile new file mode 100644 index 000000000..fab574396 --- /dev/null +++ b/bench/Makefile @@ -0,0 +1,24 @@ +REQ=20480 +CLIENTS=64 +ADDR=http://localhost:8500/v1/kv/bench +DATA="74a31e96-1d0f-4fa7-aa14-7212a326986e" +MAXPROCS=4 + +all: put get-default get-stale get-consistent + +put: + @echo "===== PUT test =====" + GOMAXPROCS=${MAXPROCS} boom -m PUT -d ${DATA} -n ${REQ} -c ${CLIENTS} ${ADDR} + +get-default: + @echo "===== GET default test =====" + GOMAXPROCS=${MAXPROCS} boom -n ${REQ} -c ${CLIENTS} ${ADDR} + +get-stale: + @echo "===== GET stale test =====" + GOMAXPROCS=${MAXPROCS} boom -n ${REQ} -c ${CLIENTS} ${ADDR}?stale + +get-consistent: + @echo "===== GET consistent test =====" + GOMAXPROCS=${MAXPROCS} boom -n ${REQ} -c ${CLIENTS} ${ADDR}?consistent + diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 000000000..b26b85ee4 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,35 @@ +Consul Benchmark +================ + +This repo contains the Packer automation necessary for the Consul benchmarks. + +There is a single main Packer file `bench.json`. To use it, the variables +for `do_client_id` and `do_api_key` must be provided. There correspond to +your DigitalOcean client id and API key. + +When Packer runs, it will generate 3 images: +* bench-bootstrap - Consul server in bootstrap mode +* bench-server - Consul server +* bench-worker - Worker node + +For the benchmark you should start 1 bootstrap instance, and 2 normal +servers. As many workers as desired can be started. Once the nodes are +up, you must SSH into one of the Consul servers. + +Connect all the nodes with: + + $ consul join ... + +This will connect all the nodes within the same datacenter. + +To run the benchmarks, use the Makefile: + + $ cd / + $ make # Runs all the benchmarks + $ make put # Runs only the PUT benchmarks + +There is no good way to currently cause multiple workers to run at the same +time, so I just type in the make command and rapidly start the test on all +workers. It is not perfect, but the test runs long enough that the calls +overlap. + diff --git a/bench/bench.json b/bench/bench.json new file mode 100644 index 000000000..b02101fbe --- /dev/null +++ b/bench/bench.json @@ -0,0 +1,86 @@ +{ + "variables": { + "do_client_id": "", + "do_api_key": "" + }, + "builders": [ + { + "type": "digitalocean", + "api_key": "{{ user `do_api_key` }}", + "client_id": "{{ user `do_client_id` }}", + "region_id": "1", + "size_id": "61", + "image_id": "3101045", + "snapshot_name": "bench-bootstrap-{{ isotime }}", + "name": "bootstrap" + }, + { + "type": "digitalocean", + "api_key": "{{ user `do_api_key` }}", + "client_id": "{{ user `do_client_id` }}", + "region_id": "1", + "size_id": "61", + "image_id": "3101045", + "snapshot_name": "bench-server-{{ isotime }}", + "name": "server" + }, + { + "type": "digitalocean", + "api_key": "{{ user `do_api_key` }}", + "client_id": "{{ user `do_client_id` }}", + "region_id": "1", + "size_id": "61", + "image_id": "3101045", + "snapshot_name": "bench-worker-{{ isotime }}", + "name": "worker" + } + ], + "provisioners":[ + { + "type": "file", + "source": "conf/upstart.conf", + "destination": "/etc/init/consul.conf" + }, + { + "type": "shell", + "inline": [ + "mkdir /etc/consul.d", + "apt-get update", + "apt-get install unzip make", + "wget https://dl.bintray.com/mitchellh/consul/0.2.0_linux_amd64.zip", + "unzip 0.2.0_linux_amd64.zip", + "mv consul /usr/local/bin/consul", + "chmod +x /usr/local/bin/consul" + ] + }, + { + "type": "file", + "source": "conf/common.json", + "destination": "/etc/consul.d/common.json" + }, + { + "type": "file", + "source": "conf/bootstrap.json", + "destination": "/etc/consul.d/bootstrap.json", + "only": ["bootstrap"] + }, + { + "type": "file", + "source": "conf/server.json", + "destination": "/etc/consul.d/server.json", + "only": ["server"] + }, + { + "type": "shell", + "inline": [ + "curl https://s3.amazonaws.com/hc-ops/boom_linux_amd64 -o /usr/bin/boom", + "chmod +x /usr/bin/boom" + ] + }, + { + "type": "file", + "source": "Makefile", + "destination": "/Makefile" + } + ] +} diff --git a/bench/conf/bootstrap.json b/bench/conf/bootstrap.json new file mode 100644 index 000000000..7f751b428 --- /dev/null +++ b/bench/conf/bootstrap.json @@ -0,0 +1,4 @@ +{ + "bootstrap": true, + "server": true +} diff --git a/bench/conf/common.json b/bench/conf/common.json new file mode 100644 index 000000000..2a3fa5d6a --- /dev/null +++ b/bench/conf/common.json @@ -0,0 +1,4 @@ +{ + "data_dir": "/var/lib/consul", + "log_level": "info" +} diff --git a/bench/conf/server.json b/bench/conf/server.json new file mode 100644 index 000000000..3e35005af --- /dev/null +++ b/bench/conf/server.json @@ -0,0 +1,3 @@ +{ + "server": true +} diff --git a/bench/conf/upstart.conf b/bench/conf/upstart.conf new file mode 100644 index 000000000..af8260a7b --- /dev/null +++ b/bench/conf/upstart.conf @@ -0,0 +1,24 @@ +description "Consul agent" + +start on runlevel [2345] +stop on runlevel [!2345] + +respawn + +script + if [ -f "/etc/service/consul" ]; then + . /etc/service/consul + fi + + # Make sure to use all our CPUs, because Consul can block a scheduler thread + export GOMAXPROCS=`nproc` + + # Get the public IP + BIND=`ifconfig eth0 | grep "inet addr" | awk '{ print substr($2,6) }'` + + exec /usr/local/bin/consul agent \ + -config-dir="/etc/consul.d" \ + -bind=$BIND \ + ${CONSUL_FLAGS} \ + >>/var/log/consul.log 2>&1 +end script diff --git a/bench/results.md b/bench/results.md new file mode 100644 index 000000000..ad28880c3 --- /dev/null +++ b/bench/results.md @@ -0,0 +1,178 @@ +# Consul Benchmark Results + +As part of a benchmark, we started a 5 node DigitalOcean cluster to do. +There are 3 servers, meaning writes must commit to at least 2 servers. +The cluster uses the 16GB DigitalOcean droplet which has the following specs: + + * 8 CPU Cores, 2Ghz + * 16GB RAM + * 160GB SSD disk + * 1Gbps NIC + +We used `bonnie++` to benchmark the disk, and the key metrics are: + + * 188MB/s sequential write + * 86MB/s sequential read-write-flush + * 840MB/s sequential read + * 2636 random seeks per second + +# Output + +Below is the output for a test run on a benchmark cluster. We ran the benchmark +several times to warm up the nodes, and this is just a single representative sample. + +Note, that a single worker was running the benchmark. This means the "stale" test is +not representative of total throughput, as the client was only routing to a single server. + + ===== PUT test ===== + GOMAXPROCS=4 boom -m PUT -d "74a31e96-1d0f-4fa7-aa14-7212a326986e" -n 20480 -c 64 http://localhost:8500/v1/kv/bench + 20480 / 20480 Booooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo! 100.00 % + + Summary: + Total: 19.4302 secs. + Slowest: 0.1715 secs. + Fastest: 0.0157 secs. + Average: 0.0606 secs. + Requests/sec: 1054.0313 + Total Data Received: 102400 bytes. + Response Size per Request: 5 bytes. + + Status code distribution: + [200] 20480 responses + + Response time histogram: + 0.016 [1] | + 0.031 [233] |∎ + 0.047 [4120] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.062 [8079] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.078 [5082] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.094 [2045] |∎∎∎∎∎∎∎∎∎∎ + 0.109 [656] |∎∎∎ + 0.125 [200] | + 0.140 [12] | + 0.156 [31] | + 0.172 [21] | + + Latency distribution: + 10% in 0.0416 secs. + 25% in 0.0484 secs. + 50% in 0.0579 secs. + 75% in 0.0697 secs. + 90% in 0.0835 secs. + 95% in 0.0919 secs. + 99% in 0.1113 secs. + + ===== GET default test ===== + GOMAXPROCS=4 boom -n 20480 -c 64 http://localhost:8500/v1/kv/bench + 20480 / 20480 Booooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo! 100.00 % + + Summary: + Total: 9.6804 secs. + Slowest: 0.0830 secs. + Fastest: 0.0023 secs. + Average: 0.0302 secs. + Requests/sec: 2115.6096 + Total Data Received: 2560000 bytes. + Response Size per Request: 125 bytes. + + Status code distribution: + [200] 20480 responses + + Response time histogram: + 0.002 [1] | + 0.010 [143] | + 0.018 [1666] |∎∎∎∎∎∎∎∎∎ + 0.026 [6009] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.035 [6732] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.043 [3857] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.051 [1389] |∎∎∎∎∎∎∎∎ + 0.059 [459] |∎∎ + 0.067 [154] | + 0.075 [53] | + 0.083 [17] | + + Latency distribution: + 10% in 0.0189 secs. + 25% in 0.0233 secs. + 50% in 0.0291 secs. + 75% in 0.0358 secs. + 90% in 0.0427 secs. + 95% in 0.0476 secs. + 99% in 0.0597 secs. + + ===== GET stale test ===== + GOMAXPROCS=4 boom -n 20480 -c 64 http://localhost:8500/v1/kv/bench?stale + 20480 / 20480 Booooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo! 100.00 % + + Summary: + Total: 10.3082 secs. + Slowest: 0.0972 secs. + Fastest: 0.0015 secs. + Average: 0.0322 secs. + Requests/sec: 1986.7714 + Total Data Received: 2560000 bytes. + Response Size per Request: 125 bytes. + + Status code distribution: + [200] 20480 responses + + Response time histogram: + 0.002 [1] | + 0.011 [320] |∎ + 0.021 [2558] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.030 [6247] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.040 [6895] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.049 [3174] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.059 [971] |∎∎∎∎∎ + 0.068 [249] |∎ + 0.078 [52] | + 0.088 [11] | + 0.097 [2] | + + Latency distribution: + 10% in 0.0187 secs. + 25% in 0.0246 secs. + 50% in 0.0317 secs. + 75% in 0.0387 secs. + 90% in 0.0461 secs. + 95% in 0.0511 secs. + 99% in 0.0618 secs. + + ===== GET consistent test ===== + GOMAXPROCS=4 boom -n 20480 -c 64 http://localhost:8500/v1/kv/bench?consistent + 20480 / 20480 Booooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo! 100.00 % + + Summary: + Total: 10.4835 secs. + Slowest: 0.0991 secs. + Fastest: 0.0024 secs. + Average: 0.0327 secs. + Requests/sec: 1953.5549 + Total Data Received: 2560000 bytes. + Response Size per Request: 125 bytes. + + Status code distribution: + [200] 20480 responses + + Response time histogram: + 0.002 [1] | + 0.012 [137] | + 0.022 [2405] |∎∎∎∎∎∎∎∎∎∎∎∎ + 0.031 [7754] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.041 [6382] |∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.051 [2629] |∎∎∎∎∎∎∎∎∎∎∎∎∎ + 0.060 [826] |∎∎∎∎ + 0.070 [245] |∎ + 0.080 [81] | + 0.089 [17] | + 0.099 [3] | + + Latency distribution: + 10% in 0.0208 secs. + 25% in 0.0254 secs. + 50% in 0.0314 secs. + 75% in 0.0384 secs. + 90% in 0.0463 secs. + 95% in 0.0518 secs. + 99% in 0.0645 secs. +