Merge pull request #93 from hashicorp/f-getstarted

Getting started guide and various bug fixes and UX cleanups
2015-09-22 22:51:46 -07:00 · 2015-09-22 22:51:46 -07:00 · 98d3fc3b42
parent a23c8bf713 a375bc5892
commit 98d3fc3b42
22 changed files with 833 additions and 166 deletions
--- a/.gitignore
+++ b/.gitignore
@ -42,3 +42,9 @@ ui/dist/

 website/.bundle
 website/vendor
+
+example.nomad
+nomad_linux_amd64
+nomad_darwin_amd64
+TODO.md
+
--- a/client/client.go
+++ b/client/client.go
@ -142,6 +142,13 @@ func (c *Client) init() error {
 			return fmt.Errorf("failed creating alloc dir: %s", err)
 		}
 	}
+
+	// Ensure the state dir exists if we have one
+	if c.config.StateDir != "" {
+		if err := os.MkdirAll(c.config.StateDir, 0700); err != nil {
+			return fmt.Errorf("failed creating state dir: %s", err)
+		}
+	}
 	return nil
 }

@ -244,8 +251,8 @@ func (c *Client) Stats() map[string]map[string]string {
 		"client": map[string]string{
 			"known_servers":   toString(uint64(len(c.config.Servers))),
 			"num_allocations": toString(uint64(numAllocs)),
-			"last_heartbeat":  fmt.Sprintf("%#v", time.Since(c.lastHeartbeat)),
-			"heartbeat_ttl":   fmt.Sprintf("%#v", c.heartbeatTTL),
+			"last_heartbeat":  fmt.Sprintf("%v", time.Since(c.lastHeartbeat)),
+			"heartbeat_ttl":   fmt.Sprintf("%v", c.heartbeatTTL),
 		},
 		"runtime": nomad.RuntimeStats(),
 	}
@ -265,7 +272,9 @@ func (c *Client) restoreState() error {

 	// Scan the directory
 	list, err := ioutil.ReadDir(filepath.Join(c.config.StateDir, "alloc"))
-	if err != nil {
+	if err != nil && os.IsNotExist(err) {
+		return nil
+	} else if err != nil {
 		return fmt.Errorf("failed to list alloc state: %v", err)
 	}

@ -556,6 +565,7 @@ func (c *Client) watchAllocations(allocUpdates chan []*structs.Allocation) {

 	for {
 		// Get the allocations, blocking for updates
+		resp = structs.NodeAllocsResponse{}
 		err := c.RPC("Node.GetAllocs", &req, &resp)
 		if err != nil {
 			c.logger.Printf("[ERR] client: failed to query for node allocations: %v", err)
--- a/command/agent/command.go
+++ b/command/agent/command.go
@ -76,7 +76,7 @@ func (c *Command) readConfig() *Config {
 	flags.StringVar(&cmdConfig.Region, "region", "", "")
 	flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
 	flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
-	flags.StringVar(&cmdConfig.LogLevel, "log-level", "info", "")
+	flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
 	flags.StringVar(&cmdConfig.NodeName, "node", "", "")

 	// Atlas options
--- a/command/init.go
+++ b/command/init.go
@ -2,8 +2,15 @@ package command

 import (
 	"fmt"
+	"io/ioutil"
 	"os"
-	"path/filepath"
+	"strings"
+)
+
+const (
+	// DefaultInitName is the default name we use when
+	// initializing the example file
+	DefaultInitName = "example.nomad"
 )

 // InitCommand generates a new job template that you can customize to your
@ -13,89 +20,106 @@ type InitCommand struct {
 }

 func (c *InitCommand) Help() string {
-	return initUsage
-}
+	helpText := `
+Usage: nomad init

-func (c *InitCommand) Run(args []string) int {
-	dir, err := os.Getwd()
-	if err != nil {
-		c.Ui.Error("Unable to determine pwd; aborting")
-		return 1
-	}
+  Creates an example job file that can be used as a starting
+  point to customize further.

-	// Derive the job name from the pwd folder name, which is our best guess at
-	// the project's name
-	jobname := filepath.Base(dir)
-	jobfile := fmt.Sprintf("%s.nomad", jobname)
-	jobpath := filepath.Join(dir, jobfile)
-	if _, err := os.Stat(jobpath); err == nil {
-		c.Ui.Error(fmt.Sprintf("%s file already exists", jobfile))
-		return 1
-	}
-
-	file, err := os.Create(jobfile)
-	defer file.Close()
-	if err != nil {
-		c.Ui.Error(fmt.Sprintf("Unable to create file %s: %s", jobfile, err))
-		return 1
-	}
-
-	_, err = file.WriteString(defaultJob)
-	if err != nil {
-		c.Ui.Error(fmt.Sprintf("Failed to write job template to %s", jobfile))
-		return 1
-	}
-
-	c.Ui.Output(fmt.Sprintf("Initialized nomad job template in %s", jobfile))
-
-	return 0
+`
+	return strings.TrimSpace(helpText)
 }

 func (c *InitCommand) Synopsis() string {
-	return "Create a new job template"
+	return "Create an example job file"
 }

-const initUsage = ``
+func (c *InitCommand) Run(args []string) int {
+	// Check if the file already exists
+	_, err := os.Stat(DefaultInitName)
+	if err == nil || !os.IsNotExist(err) {
+		c.Ui.Error(fmt.Sprintf("Job '%s' already exists", DefaultInitName))
+		return 1
+	} else if !os.IsNotExist(err) {
+		c.Ui.Error(fmt.Sprintf("Failed to stat '%s': %v", DefaultInitName, err))
+		return 1
+	}
+
+	// Write out the example
+	err = ioutil.WriteFile(DefaultInitName, []byte(defaultJob), 0660)
+	if err != nil {
+		c.Ui.Error(fmt.Sprintf("Failed to write '%s': %v", DefaultInitName, err))
+		return 1
+	}
+
+	// Success
+	c.Ui.Output(fmt.Sprintf("Example job file written to %s", DefaultInitName))
+	return 0
+}

 const defaultJob = `
-job "my-app" {
-    region = "global"
-    type = "service"
-    priority = 50
+# There can only be a single job definition per file.
+# Create a job with ID and Name 'example'
+job "example" {
+	# Run the job in the global region, which is the default.
+	# region = "global"

-    // Each task in the group will be scheduled on the same machine(s).
-    group "app-group" {
-        // How many copies of this group should we run?
-        count = 5
+	# Specify the datacenters within the region this job can run in.
+	datacenters = ["dc1"]

-        task "python-webapp" {
-            driver = "docker"
-            config {
-                image = "org/container"
-            }
-            resources {
-                // For CPU 1024 = 1ghz
-                cpu = 500
-                // Memory in megabytes
-                memory = 128
+	# Service type jobs optimize for long-lived services. This is
+	# the default but we can change to batch for short-lived tasks.
+	# type = "service"

-                network {
-                    dynamic_ports = [
-                        "http",
-                        "https",
-                    ]
-                }
-            }
-        }
+	# Priority controls our access to resources and scheduling priority.
+	# This can be 1 to 100, inclusively, and defaults to 50.
+	# priority = 50

-        task "logshipper" {
-            driver = "exec"
-        }
+	# Restrict our job to only linux. We can specify multiple
+	# constraints as needed.
+	constraint {
+		attribute = "$attr.kernel.name"
+		value = "linux"
+	}

-        constraint {
-            attribute = "kernel.os"
-            value = "linux"
-        }
-    }
+	# Configure the job to do rolling updates
+	update {
+		# Stagger updates every 10 seconds
+		stagger = "10s"
+
+		# Update a single task at a time
+		max_parallel = 1
+	}
+
+	# Create a 'cache' group. Each task in the group will be
+	# scheduled onto the same machine.
+	group "cache" {
+		# Control the number of instances of this groups.
+		# Defaults to 1
+		# count = 1
+
+		# Define a task to run
+		task "redis" {
+			# Use Docker to run the task.
+			driver = "docker"
+
+			# Configure Docker driver with the image
+			config {
+				image = "redis:latest"
+			}
+
+			# We must specify the resources required for
+			# this task to ensure it runs on a machine with
+			# enough capacity.
+			resources {
+				cpu = 500 # 500 Mhz
+				memory = 256 # 256MB
+				network {
+					mbits = 10
+					dynamic_ports = ["redis"]
+				}
+			}
+		}
+	}
 }
 `
--- a/command/node_status.go
+++ b/command/node_status.go
@ -130,7 +130,6 @@ func (c *NodeStatusCommand) Run(args []string) int {
 				alloc.ID,
 				alloc.EvalID,
 				alloc.JobID,
-				alloc.NodeID,
 				alloc.TaskGroup,
 				alloc.DesiredStatus,
 				alloc.ClientStatus)
--- a/command/run.go
+++ b/command/run.go
@ -19,8 +19,9 @@ func (c *RunCommand) Help() string {
 	helpText := `
 Usage: nomad run [options] <file>

-  Starts running a new job using the definition located at <file>.
-  This is the main command used to invoke new work in Nomad.
+  Starts running a new job or updates an existing job using
+  the specification located at <file>. This is the main command
+  used to interact with Nomad.

  Upon successful job submission, this command will immediately
  enter an interactive monitor. This is useful to watch Nomad's
@ -50,7 +51,7 @@ Run Options:
 }

 func (c *RunCommand) Synopsis() string {
-	return "Run a new job"
+	return "Run a new job or update an existing job"
 }

 func (c *RunCommand) Run(args []string) int {
--- a/demo/vagrant/README.md
+++ b/demo/vagrant/README.md
@ -0,0 +1,24 @@
+# Vagrant Nomad Demo
+
+This Vagrantfile and associated Nomad configuration files are meant
+to be used along with the
+[getting started guide](https://nomadproject.io/intro/getting-started/install.html).
+
+Follow along with the guide, or just start the Vagrant box with:
+
+    $ vagrant up
+
+Once it is finished, you should be able to SSH in and interact with Nomad:
+
+    $ vagrant ssh
+    ...
+    $ nomad
+    usage: nomad [--version] [--help] <command> [<args>]
+
+    Available commands are:
+        agent                 Runs a Nomad agent
+        agent-info            Display status information about the local agent
+    ...
+
+To learn more about starting Nomad see the [official site](https://nomadproject.io).
+
--- a/demo/vagrant/Vagrantfile
+++ b/demo/vagrant/Vagrantfile
@ -0,0 +1,43 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+$script = <<SCRIPT
+# Update apt and get dependencies
+sudo apt-get update
+sudo apt-get install -y unzip curl wget
+
+# Install Docker
+sudo curl -sSL https://get.docker.com/ | sh
+
+# Download Nomad
+echo Fetching Nomad...
+cd /tmp/
+wget https://s3.amazonaws.com/hc-public/nomad/0.1.0dev/nomad_linux_amd64 -O nomad
+
+echo Installing Nomad...
+#unzip nomad.zip
+sudo chmod +x nomad
+sudo mv nomad /usr/bin/nomad
+
+sudo mkdir /etc/nomad.d
+sudo chmod a+w /etc/nomad.d
+
+SCRIPT
+
+Vagrant.configure(2) do |config|
+  config.vm.box = "puphpet/ubuntu1404-x64"
+  config.vm.hostname = "nomad"
+  config.vm.provision "shell", inline: $script, privileged: false
+
+  # Increase memory for Virtualbox
+  config.vm.provider "virtualbox" do |vb|
+        vb.memory = "1024"
+  end
+
+  # Increase memory for VMware
+  ["vmware_fusion", "vmware_workstation"].each do |p|
+    config.vm.provider p do |v|
+      v.vmx["memsize"] = "1024"
+    end
+  end
+end
--- a/demo/vagrant/client1.hcl
+++ b/demo/vagrant/client1.hcl
@ -0,0 +1,20 @@
+# Increase log verbosity
+log_level = "DEBUG"
+
+# Setup data dir
+data_dir = "/tmp/client1"
+
+# Enable the client
+client {
+    enabled = true
+
+    # For demo assume we are talking to server1. For production,
+    # this should be like "nomad.service.consul:4647" and a system
+    # like Consul used for service discovery.
+    servers = ["127.0.0.1:4647"]
+}
+
+# Modify our port to avoid a collision with server1
+ports {
+    http = 5656
+}
--- a/demo/vagrant/client2.hcl
+++ b/demo/vagrant/client2.hcl
@ -0,0 +1,25 @@
+# Increase log verbosity
+log_level = "DEBUG"
+
+# Setup data dir
+data_dir = "/tmp/client2"
+
+# Enable the client
+client {
+    enabled = true
+
+    # For demo assume we are talking to server1. For production,
+    # this should be like "nomad.service.consul:4647" and a system
+    # like Consul used for service discovery.
+    servers = ["127.0.0.1:4647"]
+
+    # Set ourselves as thing one
+    meta {
+        thing = "two"
+    }
+}
+
+# Modify our port to avoid a collision with server1 and client1
+ports {
+    http = 5657
+}
--- a/demo/vagrant/server.hcl
+++ b/demo/vagrant/server.hcl
@ -0,0 +1,13 @@
+# Increase log verbosity
+log_level = "DEBUG"
+
+# Setup data dir
+data_dir = "/tmp/server1"
+
+# Enable the server
+server {
+    enabled = true
+
+    # Self-elect, should be 3 or 5 for production
+    bootstrap_expect = 1
+}
--- a/scheduler/rank.go
+++ b/scheduler/rank.go
@ -289,8 +289,8 @@ func (iter *JobAntiAffinityIterator) Next() *RankedNode {

 		// Apply a penalty if there are collisions
 		if collisions > 0 {
-			scorePenalty := float64(collisions) * iter.penalty
-			option.Score -= scorePenalty
+			scorePenalty := -1 * float64(collisions) * iter.penalty
+			option.Score += scorePenalty
 			iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty)
 		}
 		return option
--- a/scheduler/stack.go
+++ b/scheduler/stack.go
@ -88,7 +88,7 @@ func NewGenericStack(batch bool, ctx Context, baseNodes []*structs.Node) *Generi
 	s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")

 	// Apply a limit function. This is to avoid scanning *every* possible node.
-	s.limit = NewLimitIterator(ctx, s.binPack, 2)
+	s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)

 	// Select the node with the maximum score for placement
 	s.maxScore = NewMaxScoreIterator(ctx, s.limit)
--- a/website/source/docs/commands/init.html.md.erb
+++ b/website/source/docs/commands/init.html.md.erb
@ -3,20 +3,23 @@ layout: "docs"
 page_title: "Commands: init"
 sidebar_current: "docs-commands-init"
 description: >
-  Toggle drain mode for a given node.
+  Generate a skeleton jobspec template.
 ---

 # Command: init

-The `init` command creates a [jobspec](/docs/jobspec/) template in the current
+The `init` command creates an example [job specification](/docs/jobspec/) in the current
 directory that demonstrates some common configurations for tasks, tasks groups,
 runtime constraints, and resource allocation.

 Please refer to the [jobspec](/docs/jobspec/) and [drivers](/docs/drivers/)
 pages to learn how to customize the template.

-## Usage
+## Examples
+
+Generate an example job file:

 ```
-nomad init
+$ nomad init
+Example job file written to example.nomad
 ```
--- a/website/source/docs/commands/run.html.md.erb
+++ b/website/source/docs/commands/run.html.md.erb
@ -8,10 +8,9 @@ description: >

 # Command: run

-The `run` command is used to run new jobs in Nomad. Jobs are specified using
-[HCL](https://github.com/hashicorp/hcl)-encoded files, and may specify one or
-more task groups. More information about jobs and their configuration format
-can be found in the [jobs documentation](#).
+The `run` command is used to submit new jobs to Nomad or to update existing
+jobs. Job files must conform to the [job specification](/docs/jobspec/index.html)
+format.

 ## Usage

@ -20,8 +19,8 @@ nomad run [options] <file>
 ```

 The run command requires a single argument, specifying the path to a file
-containing a valid [job definition](#). This file will be read and the job
-will be submitted to the Nomad server for scheduling.
+containing a valid [job specification](/docs/jobspec/index.html). This file
+will be read and the job will be submitted to Nomad for scheduling.

 By default, on sucessful job submission, the run command will enter an
 interactive monitor and display log information detailing the scheduling
--- a/website/source/intro/getting-started/apis.html.md
+++ b/website/source/intro/getting-started/apis.html.md
@ -1,21 +0,0 @@
---
-layout: "intro"
-page_title: "Using the HTTP APIs with Authentication"
-sidebar_current: "getting-started-apis"
-description: |-
-  Using the HTTP APIs for authentication and secret access.
---
-
-# Using the HTTP APIs with Authentication
-Many of Nomad's capabilities are accessible via the HTTP API in addition to the
-CLI.
-
-TODO: Document Nomad's HTTP API
-
-Congratulations! You now know all the basics to get started with Nomad.
-
-## Next
-
-Next, we have a page dedicated to
-[next steps](/intro/getting-started/next-steps.html) depending on
-what you would like to achieve.
--- a/website/source/intro/getting-started/cluster.html.md
+++ b/website/source/intro/getting-started/cluster.html.md
@ -0,0 +1,203 @@
+---
+layout: "intro"
+page_title: "Clustering"
+sidebar_current: "getting-started-cluster"
+description: |-
+  Join another Nomad client to create your first cluster.
+---
+
+# Clustering
+
+We have started our first agent and run a job against it in development mode.
+This demonstrates the ease of use and the workflow of Nomad, but did not show how
+this could be extended to a scalable, production-grade configuration. In this step,
+we will create our first real cluster with multiple nodes.
+
+## Starting the Server
+
+The first step is to create the config file for the server. Either download
+the file from the [repository here](#), or paste this into a file called
+`server.hcl`:
+
+```
+# Increase log verbosity
+log_level = "DEBUG"
+
+# Setup data dir
+data_dir = "/tmp/server1"
+
+# Enable the server
+server {
+    enabled = true
+
+    # Self-elect, should be 3 or 5 for production
+    bootstrap_expect = 1
+}
+```
+
+This is a fairly minimal server configuration file, but it
+is enough to start an agent in server only mode and have it
+elect as a leader. The major change that should be made for
+production is to run more than one server, and to change the
+corresponding `bootstrap_expect` value.
+
+Once the file is created, start the agent in a new tab:
+
+```
+$ sudo nomad agent -config server.hcl
+==> WARNING: Bootstrap mode enabled! Potentially unsafe operation.
+==> Starting Nomad agent...
+==> Nomad agent configuration:
+
+                 Atlas: <disabled>
+                Client: false
+             Log Level: DEBUG
+                Region: global (DC: dc1)
+                Server: true
+
+==> Nomad agent started! Log data will stream in below:
+
+    [INFO] serf: EventMemberJoin: nomad.global 127.0.0.1
+    [INFO] nomad: starting 4 scheduling worker(s) for [service batch _core]
+    [INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state
+    [WARN] serf: Failed to re-join any previously known node
+    [INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1)
+    [WARN] raft: Heartbeat timeout reached, starting election
+    [INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state
+    [DEBUG] raft: Votes needed: 1
+    [DEBUG] raft: Vote granted. Tally: 1
+    [INFO] raft: Election won. Tally: 1
+    [INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state
+    [INFO] nomad: cluster leadership acquired
+```
+
+We can see above that client mode is disabled, and that we are
+only running as the server. This means that this server will manage
+state and make scheduling decisions but will not run any tasks.
+Now we need some agents to run tasks!
+
+## Starting the Clients
+
+Similar to the server, we must first configure the clients. Either download
+the configuration for client1 and client2 from the [repository here](#), or
+paste the following into `client1.hcl`:
+
+```
+# Increase log verbosity
+log_level = "DEBUG"
+
+# Setup data dir
+data_dir = "/tmp/client1"
+
+# Enable the client
+client {
+    enabled = true
+
+    # For demo assume we are talking to server1. For production,
+    # this should be like "nomad.service.consul:4647" and a system
+    # like Consul used for service discovery.
+    servers = ["127.0.0.1:4647"]
+}
+
+# Modify our port to avoid a collision with server1
+ports {
+    http = 5656
+}
+```
+
+Copy that file to `client2.hcl` and change the `data_dir` to
+be "/tmp/client2" and the `http` port to 5657. Once you've created
+both `client1.hcl` and `client2.hcl`, open a tab for each and
+start the agents:
+
+```
+$ sudo nomad agent -config client1.hcl
+==> Starting Nomad agent...
+==> Nomad agent configuration:
+
+                 Atlas: <disabled>
+                Client: true
+             Log Level: DEBUG
+                Region: global (DC: dc1)
+                Server: false
+
+==> Nomad agent started! Log data will stream in below:
+
+    [DEBUG] client: applied fingerprints [host memory storage arch cpu]
+    [DEBUG] client: available drivers [docker exec]
+    [DEBUG] client: node registration complete
+    ...
+```
+
+In the output we can see the agent is running in client mode only.
+This agent will be available to run tasks but will not participate
+in managing the cluster or making scheduling decisions.
+
+Using the [`node-status` command](/docs/commands/node-status.html)
+we should see both nodes in the `ready` state:
+
+```
+$ nomad node-status
+ID                                    DC   Name   Class   Drain  Status
+e5239796-7285-3ed2-efe1-37cdc2d459d4  dc1  nomad  <none>  false  ready
+d12e4ab0-4206-bd33-ff75-e1367590eceb  dc1  nomad  <none>  false  ready
+```
+
+We now have a simple three node cluster running. The only difference
+between a demo and full production cluster is that we are running a
+single server instead of three or five.
+
+## Submit a Job
+
+Now that we have a simple cluster, we can use it to schedule a job.
+We should still have the `example.nomad` job file from before, but
+verify that the `count` is still set to 3.
+
+Then, use the [`run` command](/docs/commands/run.html) to submit the job:
+
+```
+$ nomad run example.nomad
+==> Monitoring evaluation "2d742049-497f-c602-c56d-ae2a328a5671"
+    Evaluation triggered by job "example"
+    Allocation "44d46439-655d-701e-55ce-552ee74fbbd8" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache"
+    Allocation "624be24f-5992-0c75-742d-7f8dbd3044a2" created: node "e5239796-7285-3ed2-efe1-37cdc2d459d4", group "cache"
+    Allocation "a133a2c7-cc3c-2f8c-8664-71d2389c7759" created: node "d12e4ab0-4206-bd33-ff75-e1367590eceb", group "cache"
+    Evaluation status changed: "pending" -> "complete"
+==> Evaluation "2d742049-497f-c602-c56d-ae2a328a5671" finished with status "complete"
+```
+
+We can see in the output that the scheduler assigned two of the
+tasks for one of the client nodes and the remaining task to the
+second client.
+
+We can again use the [`status` command](/docs/commands/status.html) to verify:
+
+```
+$ nomad status example
+ID          = example
+Name        = example
+Type        = service
+Priority    = 50
+Datacenters = dc1
+Status      =
+
+==> Evaluations
+ID                                    Priority  TriggeredBy     Status
+2d742049-497f-c602-c56d-ae2a328a5671  50        job-register    complete
+
+==> Allocations
+ID                                    EvalID                                NodeID                                TaskGroup  Desired  Status
+44d46439-655d-701e-55ce-552ee74fbbd8  2d742049-497f-c602-c56d-ae2a328a5671  e5239796-7285-3ed2-efe1-37cdc2d459d4  cache      run      running
+a133a2c7-cc3c-2f8c-8664-71d2389c7759  2d742049-497f-c602-c56d-ae2a328a5671  d12e4ab0-4206-bd33-ff75-e1367590eceb  cache      run      running
+624be24f-5992-0c75-742d-7f8dbd3044a2  2d742049-497f-c602-c56d-ae2a328a5671  e5239796-7285-3ed2-efe1-37cdc2d459d4  cache      run      running
+```
+
+We can see that all our tasks have been allocated and are running.
+Once we are satisfied that our job is happily running, we can tear
+it down with `nomad stop`.
+
+## Next Steps
+
+We've now concluded the getting started guide, however there are a number
+of [next steps](next-steps.html) to get started with Nomad.
+
--- a/website/source/intro/getting-started/install.html.md
+++ b/website/source/intro/getting-started/install.html.md
@ -8,52 +8,63 @@ description: |-

 # Install Nomad

-Nomad must first be installed on your machine. Nomad is distributed as
-a [binary package](/downloads.html) for all supported platforms and
-architectures. This page will not cover how to compile Nomad from source,
-but compiling from source is covered in the [documentation](/docs/install/index.html)
-for those who want to be sure they're compiling source they trust into
-the final binary.
+The task drivers that are available to Nomad vary by operating system,
+for example Docker is only available on Linux machines. To simplify the
+getting started experience, we will be working in a Vagrant environment.
+Create a new directory, and download [this `Vagrantfile`](#).

-## Installing Nomad
+## Vagrant Setup

-To install Nomad, find the [appropriate package](/downloads.html) for
-your system and download it. Nomad is packaged as a zip archive.
+Once you have created a new directory and downloaded the `Vagrantfile`
+you must create the virtual the machine:

-After downloading Nomad, unzip the package. Nomad runs as a single binary
-named `nomad`. Any other files in the package can be safely removed and
-Nomad will still function.
+    $ vagrant up

-The final step is to make sure that `nomad` is available on the PATH.
-See [this page](https://stackoverflow.com/questions/14637979/how-to-permanently-set-path-on-linux)
-for instructions on setting the PATH on Linux and Mac.
-[This page](https://stackoverflow.com/questions/1618280/where-can-i-set-path-to-make-exe-on-windows)
-contains instructions for setting the PATH on Windows.
+This will take a few minutes as the base Ubuntu box must be downloaded
+and provisioned with both Docker and Nomad. Once this completes, you should
+see output similar to:
+
+    Bringing machine 'default' up with 'vmware_fusion' provider...
+    ==> default: Checking if box 'puphpet/ubuntu1404-x64' is up to date...
+    ==> default: Machine is already running.
+
+At this point the Vagrant box is running and ready to go.

 ## Verifying the Installation

-After installing Nomad, verify the installation worked by opening a new
-terminal session and checking that `nomad` is available. By executing
+After starting the Vagrant box, verify the installation worked by connecting
+to the box using SSH and checking that `nomad` is available. By executing
 `nomad`, you should see help output similar to the following:

 ```
-$ nomad
+$ vagrant ssh
+...
+
+vagrant@nomad:~$ nomad
 usage: nomad [--version] [--help] <command> [<args>]

 Available commands are:
-    agent                Runs a Nomad agent
-    agent-force-leave    Force a member into the 'left' state
-    agent-info           Display status information about the local agent
-    agent-join           Join server nodes together
-    agent-members        Display a list of known members and their status
-    node-drain           Toggle drain mode on a given node
-    node-status          Display status information about nodes
-    status               Display status information about jobs
-    version              Prints the Nomad version
+    agent                 Runs a Nomad agent
+    agent-info            Display status information about the local agent
+    eval-monitor          Monitor an evaluation interactively
+    node-drain            Toggle drain mode on a given node
+    node-status           Display status information about nodes
+    run                   Run a new job
+    server-force-leave    Force a server into the 'left' state
+    server-join           Join server nodes together
+    server-members        Display a list of known servers and their status
+    status                Display status information about jobs
+    stop                  Stop a running job
+    version               Prints the Nomad version
 ```

-If you get an error that Nomad could not be found, then your PATH environment
-variable was not setup properly. Please go back and ensure that your PATH
-variable contains the directory where Nomad was installed.
+If you get an error that Nomad could not be found, then your Vagrant box
+may not have provisioned correctly. Check any error messages that may have
+been occurred during `vagrant up`. You can always destroy the box and
+re-create it.
+
+## Next Steps
+
+Vagrant is running and Nomad is installed. Let's [start Nomad](/intro/getting-started/running.html)!
+

-Otherwise, Nomad is installed and ready to go!
--- a/website/source/intro/getting-started/jobs.html.md
+++ b/website/source/intro/getting-started/jobs.html.md
@ -0,0 +1,177 @@
+---
+layout: "intro"
+page_title: "Jobs"
+sidebar_current: "getting-started-jobs"
+description: |-
+  Learn how to submit, modify and stop jobs in Nomad.
+---
+
+# Jobs
+
+Jobs are the primary configuration that users interact with when using
+Nomad. A job is a declarative specification of tasks that Nomad should run.
+Jobs have a globally unique name, one or many task groups, which are themselves
+collections of one or many tasks.
+
+The format of the jobs is [documented here](/docs/jobspec/index.html). They
+can either be specified in [HCL](https://github.com/hashicorp/hcl) or JSON,
+however we recommend only using JSON when the configuration is generated by a machine.
+
+## Running a Job
+
+To get started, we will use the [`init` command](/docs/commands/init.html) which
+generates an skeleton job file:
+
+```
+$ nomad init
+Example job file written to example.nomad
+
+$ cat example.nomad
+
+# There can only be a single job definition per file.
+# Create a job with ID and Name 'example'
+job "example" {
+	# Run the job in the global region, which is the default.
+	# region = "global"
+...
+```
+
+In this example job file, we have declared a single task 'redis' which is using
+the Docker driver to run the task. The primary way you interact with Nomad
+is with the [`run` command](/docs/commands/run.html). The `run` command takes
+a job file and registers it with Nomad. This is used both to register new
+jobs and to update existing jobs.
+
+We can register our example job now:
+
+```
+$ nomad run example.nomad
+==> Monitoring evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6"
+    Evaluation triggered by job "example"
+    Allocation "c1d2f085-7049-6c4a-4479-1b2310fdaba9" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
+    Evaluation status changed: "pending" -> "complete"
+==> Evaluation "f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6" finished with status "complete"
+```
+
+Anytime a job is updated, Nomad creates an evaluation to determine what
+actions need to take place. In this case, because this is a new job, Nomad has
+determined that an allocation should be created and has scheduled it on our
+local agent.
+
+To inspect the status of our job we use the [`status` command](/docs/commands/status.html):
+
+```
+$ nomad status example
+ID          = example
+Name        = example
+Type        = service
+Priority    = 50
+Datacenters = dc1
+Status      =
+
+==> Evaluations
+ID                                    Priority  TriggeredBy   Status
+f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6  50        job-register  complete
+
+==> Allocations
+ID                                    EvalID                                NodeID                                TaskGroup  Desired  Status
+c1d2f085-7049-6c4a-4479-1b2310fdaba9  f119efb5-e2fa-a94f-e4cc-0c9f6c2a07f6  1f43787c-7ab4-8d10-d2d6-1593ed06463a  cache      run      running
+```
+
+Here we can see that our evaluation that was created has completed, and that
+it resulted in the creation of an allocation that is now running on the local node.
+
+## Modifying a Job
+
+The definition of a job is not static, and is meant to be updated overtime.
+You may update a job to change the docker container to update the application version,
+or change the count of a task group to scale with load.
+
+For now, edit the `example.nomad` file to uncomment the count and set it to 3:
+
+```
+# Control the number of instances of this groups.
+# Defaults to 1
+count = 3
+```
+
+Once you have finished modifying the job specification, use `nomad run` to
+push the updated version of the job:
+
+```
+$ nomad run example.nomad
+==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee"
+    Evaluation triggered by job "example"
+    Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
+    Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
+    Evaluation status changed: "pending" -> "complete"
+==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete"
+```
+
+Because we set the count of the task group to three, Nomad created two
+additional allocations to get to the desired state. It is idempotent to
+run the same job specification again and no new allocations will be created.
+
+Now, lets try to do an application update. In this case, we will simply change
+the version of redis we want to run. Edit the `example.nomad` file and change
+the Docker image from "redis:latest" to "redis:2.8":
+
+```
+# Configure Docker driver with the image
+config {
+    image = "redis:2.8"
+}
+```
+
+This time we have not change the number of task groups we want running,
+but we've changed the task itself. This requires stopping the old tasks
+and starting new tasks. Our example job is configured to do a rolling update,
+doing a single update every 10 seconds. Use `run` to push the updated
+specification now:
+
+```
+$ nomad run example.nomad
+==> Monitoring evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee"
+    Evaluation triggered by job "example"
+    Allocation "412b58c4-6be3-8ffe-0538-eace7b8a4c08" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
+    Allocation "7147246f-5ddd-5061-0534-ed28ede2d099" created: node "1f43787c-7ab4-8d10-d2d6-1593ed06463a", group "cache"
+    Evaluation status changed: "pending" -> "complete"
+==> Evaluation "f358a19c-e451-acf1-a023-91f5b146e1ee" finished with status "complete"
+```
+
+We can see that Nomad handled the updated in three phases, each
+time only updating a single task group at a time. The update strategy
+can be configured, but rolling updates makes it easy to upgrade
+an application at large scale.
+
+## Stopping a Job
+
+So far we've created, run and modified a job. The final step in a job lifecycle
+is stopping the job. This is done with the [`stop` command](/docs/commands/stop.html):
+
+```
+$ nomad stop example
+==> Monitoring evaluation "4b236340-d5ed-1838-be15-a896095d3ac9"
+    Evaluation triggered by job "example"
+    Evaluation status changed: "pending" -> "complete"
+==> Evaluation "4b236340-d5ed-1838-be15-a896095d3ac9" finished with status "complete"
+```
+
+When we stop a job, it creates an evaluation which is used to stop all
+the existing allocations. This also deletes the job definition out of Nomad.
+If we try to query the job status, we can see it is no longer registered:
+
+```
+$ nomad status example
+Error querying job: Unexpected response code: 404 (job not found)
+```
+
+If we wanted to start the job again, we could simply `run` it again.
+
+## Next Steps
+
+Users of Nomad primarily interact with jobs, and we've now seen
+how to create and scale our job, perform an application update,
+and do a job tear down. Next we will add another Nomad
+client to [create our first cluster](cluster.html)
+
--- a/website/source/intro/getting-started/running.html.md
+++ b/website/source/intro/getting-started/running.html.md
@ -0,0 +1,144 @@
+---
+layout: "intro"
+page_title: "Running Nomad"
+sidebar_current: "getting-started-running"
+description: |-
+  Learn about the Nomad agent, and the lifecycle of running and stopping.
+---
+
+# Running Nomad
+
+Nomad relies on a long running agent on every machine in the cluster.
+The agent can run either in server or client mode. Each region must
+have at least one server, though a cluster of 3 or 5 servers is recommended.
+A single server deployment is _**highly**_ discouraged as data loss is inevitable
+in a failure scenario.
+
+All other agents run in client mode. A client is a very lightweight
+process that registers the host machine, performs heartbeating, and runs any tasks
+that are assigned to it by the servers. The agent must be run on every node that
+is part of the cluster so that the servers can assign work to those machines.
+
+## Starting the Agent
+
+For simplicity, we will run a single Nomad agent in development mode. This mode
+is used to quickly start an agent that is acting as a client and server to test
+job configurations or prototype interactions. It should _**not**_ be used in
+production as it does not persist state.
+
+```
+$ sudo nomad agent -dev
+==> Starting Nomad agent...
+==> Nomad agent configuration:
+
+                 Atlas: <disabled>
+                Client: true
+             Log Level: debug
+                Region: global (DC: dc1)
+                Server: true
+
+==> Nomad agent started! Log data will stream in below:
+
+    [INFO] serf: EventMemberJoin: nomad.global 127.0.0.1
+    [INFO] nomad: starting 4 scheduling worker(s) for [service batch _core]
+    [INFO] raft: Node at 127.0.0.1:4647 [Follower] entering Follower state
+    [INFO] nomad: adding server nomad.global (Addr: 127.0.0.1:4647) (DC: dc1)
+    [DEBUG] client: applied fingerprints [storage arch cpu host memory]
+    [DEBUG] client: available drivers [exec docker]
+    [WARN] raft: Heartbeat timeout reached, starting election
+    [INFO] raft: Node at 127.0.0.1:4647 [Candidate] entering Candidate state
+    [DEBUG] raft: Votes needed: 1
+    [DEBUG] raft: Vote granted. Tally: 1
+    [INFO] raft: Election won. Tally: 1
+    [INFO] raft: Node at 127.0.0.1:4647 [Leader] entering Leader state
+    [INFO] raft: Disabling EnableSingleNode (bootstrap)
+    [DEBUG] raft: Node 127.0.0.1:4647 updated peer set (2): [127.0.0.1:4647]
+    [INFO] nomad: cluster leadership acquired
+    [DEBUG] client: node registration complete
+    [DEBUG] client: updated allocations at index 1 (0 allocs)
+    [DEBUG] client: allocs: (added 0) (removed 0) (updated 0) (ignore 0)
+    [DEBUG] client: state updated to ready
+```
+
+As you can see, the Nomad agent has started and has output some log
+data. From the log data, you can see that our agent is running in both
+client and server mode, and has claimed leadership of the cluster.
+Additionally, the local client has been registered and marked as ready.
+
+-> **Note:** Typically any agent running in client mode must be run with root level
+privilege. Nomad makes use of operating system primitives for resource isolation
+which require elevated permissions. The agent will function as non-root, but
+certain task drivers will not be available.
+
+## Cluster Nodes
+
+If you run [`nomad node-status`](/docs/commands/node-status.html) in another terminal, you
+can see the registered nodes of the Nomad cluster:
+
+```text
+$ vagrant ssh
+...
+
+$ nomad node-status
+ID                                    DC   Name   Class   Drain  Status
+72d3af97-144f-1e5f-94e5-df1516fe4add  dc1  nomad  <none>  false  ready
+```
+
+The output shows our Node ID, which is randomly generated UUID,
+it's datacenter, node name, node class, drain mode and current status.
+We can see that our node is in the ready state, and task draining is
+currently off.
+
+The agent is also running in server mode, which means it is part of
+the [gossip protocol](/docs/internals/gossip.html) used to connect all
+the server instances together. We can view the members of the gossip
+ring using the [`server-members`](/docs/commands/server-members.html) command:
+
+```text
+$ nomad server-members
+Name          Addr       Port  Status  Proto  Build     DC   Region
+nomad.global  127.0.0.1  4648  alive   2      0.1.0dev  dc1  global
+```
+
+The output shows our own agent, the address it is running on, its
+health state, some version information, and the datacenter and region.
+Additional metadata can be viewed by providing the `-detailed` flag.
+
+## <a name="stopping"></a>Stopping the Agent
+
+You can use `Ctrl-C` (the interrupt signal) to halt the agent.
+By default, all signals will cause the agent to forcefully shutdown.
+The agent [can be configured](/docs/agent/config.html) to gracefully
+leave on either the interrupt or terminate signals.
+
+After interrupting the agent, you should see it leave the cluster
+and shut down:
+
+```
+^C==> Caught signal: interrupt
+    [DEBUG] http: Shutting down http server
+    [INFO] agent: requesting shutdown
+    [INFO] client: shutting down
+    [INFO] nomad: shutting down server
+    [WARN] serf: Shutdown without a Leave
+    [INFO] agent: shutdown complete
+```
+
+By gracefully leaving, Nomad clients update their status to prevent
+futher tasks from being scheduled and to start migrating any tasks that are
+already assigned. Nomad servers notifies other their peers they intend to leave.
+When a server leaves, replication to that server stops. If a server fails,
+replication continues to be attempted until the node recovers. Nomad will
+automatically try to reconnect to _failed_ nodes, allowing it to recover from
+certain network conditions, while _left_ nodes are no longer contacted.
+
+If an agent is operating as a server, a graceful leave is important to avoid
+causing a potential availability outage affecting the
+[consensus protocol](/docs/internals/consensus.html). If a server does
+forcefully exit and will not be returning into service, the
+[`server-force-leave` command](/docs/commands/server-force-leave.html) should
+be used to force the server from a _failed_ to a _left_ state.
+
+## Next Steps
+
+The development Nomad agent is up and running. Let's try to [run a job](jobs.html)!
--- a/website/source/intro/getting-started/running.md
+++ b/website/source/intro/getting-started/running.md
@ -1,18 +0,0 @@
---
-layout: "intro"
-page_title: "Running Nomad"
-sidebar_current: "getting-started-running"
-description: |-
-  Learn how to deploy Nomad into production, how to initialize it, configure it, etc.
---
-
-# Running Nomad
-This section will detail how to run Nomad on client machines. It should include
-a sample upstart script and stuff
-
-## Next
-
-TODO: Fill in text here.
-
-Next, we have a [short tutorial](/intro/getting-started/apis.html) on using
-Nomad's HTTP APIs.
--- a/website/source/layouts/intro.erb
+++ b/website/source/layouts/intro.erb
@ -58,8 +58,12 @@
 							<a href="/intro/getting-started/running.html">Running Nomad</a>
 						</li>

-						<li<%= sidebar_current("getting-started-apis") %>>
-							<a href="/intro/getting-started/apis.html">HTTP API</a>
+						<li<%= sidebar_current("getting-started-jobs") %>>
+							<a href="/intro/getting-started/jobs.html">Jobs</a>
+                        </li>
+
+						<li<%= sidebar_current("getting-started-cluster") %>>
+							<a href="/intro/getting-started/cluster.html">Clustering</a>
 						</li>

 						<li<%= sidebar_current("getting-started-nextsteps") %>>