From 9d170164e4d77d773ef865fe2467c062f078c20c Mon Sep 17 00:00:00 2001
From: Jack Pearkes <jackpearkes@gmail.com>
Date: Thu, 6 Dec 2018 13:51:49 -0800
Subject: [PATCH] Documentation and changes for `verify_server_hostname`
 (#5069)

* verify_server_hostname implies verify_outgoing

* mention CVE in the docs.
---
 agent/config/builder.go                       | 16 +++++++-
 agent/config/runtime_test.go                  | 18 +++++++++
 tlsutil/config.go                             |  4 --
 tlsutil/config_test.go                        |  4 ++
 website/source/docs/agent/options.html.md     | 40 +++++++++++++------
 .../source/docs/internals/security.html.md    | 26 ++++++++++--
 6 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/agent/config/builder.go b/agent/config/builder.go
index 01040fb76..8c4beff79 100644
--- a/agent/config/builder.go
+++ b/agent/config/builder.go
@@ -613,6 +613,18 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
 	enableRemoteScriptChecks := b.boolVal(c.EnableScriptChecks)
 	enableLocalScriptChecks := b.boolValWithDefault(c.EnableLocalScriptChecks, enableRemoteScriptChecks)
 
+	// VerifyServerHostname implies VerifyOutgoing
+	verifyServerName := b.boolVal(c.VerifyServerHostname)
+	verifyOutgoing := b.boolVal(c.VerifyOutgoing)
+	if verifyServerName {
+		// Setting only verify_server_hostname is documented to imply
+		// verify_outgoing. If it doesn't then we risk sending communication over TCP
+		// when we documented it as forcing TLS for RPCs. Enforce this here rather
+		// than in several different places through the code that need to reason
+		// about it. (See CVE-2018-19653)
+		verifyOutgoing = true
+	}
+
 	// ----------------------------------------------------------------
 	// build runtime config
 	//
@@ -840,8 +852,8 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
 		VerifyIncoming:                          b.boolVal(c.VerifyIncoming),
 		VerifyIncomingHTTPS:                     b.boolVal(c.VerifyIncomingHTTPS),
 		VerifyIncomingRPC:                       b.boolVal(c.VerifyIncomingRPC),
-		VerifyOutgoing:                          b.boolVal(c.VerifyOutgoing),
-		VerifyServerHostname:                    b.boolVal(c.VerifyServerHostname),
+		VerifyOutgoing:                          verifyOutgoing,
+		VerifyServerHostname:                    verifyServerName,
 		Watches:                                 c.Watches,
 	}
 
diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go
index f318448c7..26408d67e 100644
--- a/agent/config/runtime_test.go
+++ b/agent/config/runtime_test.go
@@ -2619,6 +2619,24 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
 				}
 			},
 		},
+		{
+			// This tests checks that VerifyServerHostname implies VerifyOutgoing
+			desc: "verify_server_hostname implies verify_outgoing",
+			args: []string{
+				`-data-dir=` + dataDir,
+			},
+			json: []string{`{
+			  "verify_server_hostname": true
+			}`},
+			hcl: []string{`
+			  verify_server_hostname = true
+			`},
+			patch: func(rt *RuntimeConfig) {
+				rt.DataDir = dataDir
+				rt.VerifyServerHostname = true
+				rt.VerifyOutgoing = true
+			},
+		},
 	}
 
 	testConfig(t, tests, dataDir)
diff --git a/tlsutil/config.go b/tlsutil/config.go
index 62ad91038..29748ddc3 100644
--- a/tlsutil/config.go
+++ b/tlsutil/config.go
@@ -125,10 +125,6 @@ func (c *Config) KeyPair() (*tls.Certificate, error) {
 // requests. It will return a nil config if this configuration should
 // not use TLS for outgoing connections.
 func (c *Config) OutgoingTLSConfig() (*tls.Config, error) {
-	// If VerifyServerHostname is true, that implies VerifyOutgoing
-	if c.VerifyServerHostname {
-		c.VerifyOutgoing = true
-	}
 	if !c.UseTLS && !c.VerifyOutgoing {
 		return nil, nil
 	}
diff --git a/tlsutil/config_test.go b/tlsutil/config_test.go
index 11e1a131f..b646aec2a 100644
--- a/tlsutil/config_test.go
+++ b/tlsutil/config_test.go
@@ -153,6 +153,7 @@ func TestConfig_OutgoingTLS_ServerName(t *testing.T) {
 
 func TestConfig_OutgoingTLS_VerifyHostname(t *testing.T) {
 	conf := &Config{
+		VerifyOutgoing:       true,
 		VerifyServerHostname: true,
 		CAFile:               "../test/ca/root.cer",
 	}
@@ -263,6 +264,7 @@ func TestConfig_outgoingWrapper_OK(t *testing.T) {
 		CertFile:             "../test/hostname/Alice.crt",
 		KeyFile:              "../test/hostname/Alice.key",
 		VerifyServerHostname: true,
+		VerifyOutgoing:       true,
 		Domain:               "consul",
 	}
 
@@ -297,6 +299,7 @@ func TestConfig_outgoingWrapper_BadDC(t *testing.T) {
 		CertFile:             "../test/hostname/Alice.crt",
 		KeyFile:              "../test/hostname/Alice.key",
 		VerifyServerHostname: true,
+		VerifyOutgoing:       true,
 		Domain:               "consul",
 	}
 
@@ -329,6 +332,7 @@ func TestConfig_outgoingWrapper_BadCert(t *testing.T) {
 		CertFile:             "../test/key/ourdomain.cer",
 		KeyFile:              "../test/key/ourdomain.key",
 		VerifyServerHostname: true,
+		VerifyOutgoing:       true,
 		Domain:               "consul",
 	}
 
diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md
index 641412713..2d1907ff6 100644
--- a/website/source/docs/agent/options.html.md
+++ b/website/source/docs/agent/options.html.md
@@ -203,7 +203,7 @@ will exit with an error at startup.
   whether [health checks that execute scripts](/docs/agent/checks.html) are
   enabled on this agent, and defaults to `false` so operators must opt-in to
   allowing these. This was added in Consul 0.9.0.
-  
+
     ~> **Security Warning:** Enabling script checks in some configurations may
   introduce a remote execution vulnerability which is known to be targeted by
   malware. We strongly recommend `-enable-local-script-checks` instead. See [this
@@ -596,9 +596,9 @@ default will automatically work with some tooling.
         The ACL token used to authorize secondary datacenters with the primary datacenter for replication
         operations. This token is required for servers outside the [`primary_datacenter`](#primary_datacenter) when
         ACLs are enabled. This token may be provided later using the [agent token API](/api/agent.html#update-acl-tokens)
-        on each server. This token must have at least "read" permissions on ACL data but if ACL 
-        token replication is enabled then it must have "write" permissions. This also enables 
-        Connect replication in Consul Enterprise, for which the token will require both operator 
+        on each server. This token must have at least "read" permissions on ACL data but if ACL
+        token replication is enabled then it must have "write" permissions. This also enables
+        Connect replication in Consul Enterprise, for which the token will require both operator
         "write" and intention "read" permissions for replicating CA and Intention data.
 
 * <a name="acl_datacenter"></a><a href="#acl_datacenter">`acl_datacenter`</a> - **This field is
@@ -1595,19 +1595,35 @@ default will automatically work with some tooling.
   default, HTTPS is disabled.
 
 * <a name="verify_outgoing"></a><a href="#verify_outgoing">`verify_outgoing`</a> - If set to
-  true, Consul requires that all outgoing connections
+  true, Consul requires that all outgoing connections from this agent
   make use of TLS and that the server provides a certificate that is signed by
   a Certificate Authority from the [`ca_file`](#ca_file) or [`ca_path`](#ca_path). By default,
   this is false, and Consul will not make use of TLS for outgoing connections. This applies to clients
   and servers as both will make outgoing connections.
 
-* <a name="verify_server_hostname"></a><a href="#verify_server_hostname">`verify_server_hostname`</a> - If set to
-  true, Consul verifies for all outgoing connections that the TLS certificate presented by the servers
-  matches "server.&lt;datacenter&gt;.&lt;domain&gt;" hostname. This implies `verify_outgoing`.
-  By default, this is false, and Consul does not verify the hostname of the certificate, only
-  that it is signed by a trusted CA. This setting is important to prevent a compromised
-  client from being restarted as a server, and thus being able to perform a MITM attack
-  or to be added as a Raft peer. This is new in 0.5.1.
+    ~> **Security Note:** Note that servers that specify `verify_outgoing =
+    true` will always talk to other servers over TLS, but they still _accept_
+    non-TLS connections to allow for a transition of all clients to TLS.
+    Currently the only way to enforce that no client can communicate with a
+    server unencrypted is to also enable `verify_incoming` which requires client
+    certificates too.
+
+* <a name="verify_server_hostname"></a><a
+  href="#verify_server_hostname">`verify_server_hostname`</a> - If set to true,
+  Consul verifies for all outgoing TLS connections that the TLS certificate
+  presented by the servers matches "server.&lt;datacenter&gt;.&lt;domain&gt;"
+  hostname. By default, this is false, and Consul does not verify the hostname
+  of the certificate, only that it is signed by a trusted CA. This setting is
+  _critical_ to prevent a compromised client from being restarted as a server
+  and having all cluster state _including all ACL tokens and Connect CA root keys_
+  replicated to it. This is new in 0.5.1.
+
+    ~> **Security Note:** From versions 0.5.1 to 1.4.0, due to a bug, setting
+  this flag alone _does not_ imply `verify_outgoing` and leaves client to server
+  and server to server RPCs unencrypted despite the documentation stating otherwise. See
+  [CVE-2018-19653](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-19653)
+  for more details. For those versions you **must also set `verify_outgoing =
+  true`** to ensure encrypted RPC connections.
 
 * <a name="watches"></a><a href="#watches">`watches`</a> - Watches is a list of watch
   specifications which allow an external process to be automatically invoked when a
diff --git a/website/source/docs/internals/security.html.md b/website/source/docs/internals/security.html.md
index 095923c55..f5a54d22c 100644
--- a/website/source/docs/internals/security.html.md
+++ b/website/source/docs/internals/security.html.md
@@ -47,10 +47,21 @@ items outside of Consul's threat model as noted in sections below.
 
 * **Encryption enabled.** TCP and UDP encryption must be enabled and configured
   to prevent plaintext communication between Consul agents. At a minimum,
-  verify_outgoing should be enabled to verify server authenticity with each
-  server having a unique TLS certificate. verify_incoming provides additional
-  agent verification, but shouldn't directly affect the threat model since
-  requests must also contain a valid ACL token.
+  `verify_outgoing` should be enabled to verify server authenticity with each
+  server having a unique TLS certificate. `verify_server_hostname` is also
+  required to prevent a compromised agent restarting as a server and being given
+  access to all secrets.
+
+    `verify_incoming` provides additional agent verification via mutual
+  authentication, but isn't _strictly_ necessary to enforce the threat model
+  since requests must also contain a valid ACL token. The subtlety is that
+  currently `verify_incoming = false` will allow servers to still accept
+  un-encrypted connections from clients (to allow for gradual TLS rollout). That
+  alone doesn't violate the threat model, but any misconfigured client that
+  chooses not to use TLS will violate the model. We recommend setting this to
+  true. If it is left as false care must be taken to ensure all consul clients
+  use `verify_outgoing = true` as noted above, but also all external API/UI
+  access must be via HTTPS with HTTP listeners disabled.
 
 ### Known Insecure Configurations
 
@@ -74,6 +85,13 @@ non-default options that potentially present additional security risks.
   ACLs restrict access, for example any management token grants access to
   execute arbitrary code on the cluster.
 
+* **Verify Server Hostname Used Alone.** From version 0.5.1 to 1.4.0 we documented that
+  `verify_server_hostname` being `true` _implied_ `verify_outgoing` however due
+  to a bug this was not the case so setting _only_ `verify_server_hostname`
+  results in plaintext communciation between client and server. See
+  [CVE-2018-19653](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-19653)
+  for more details. This is fixed in 1.4.1.
+
 ## Threat Model
 
 The following are parts of the Consul threat model: