diff --git a/audit/format.go b/audit/format.go index cbc7f8a06..f0f1b6873 100644 --- a/audit/format.go +++ b/audit/format.go @@ -192,7 +192,24 @@ func (f *AuditFormatter) FormatResponse(ctx context.Context, w io.Writer, config connState = in.Request.Connection.ConnState } - if !config.Raw { + elideListResponseData := config.ElideListResponses && req.Operation == logical.ListOperation + + var respData map[string]interface{} + if config.Raw { + // In the non-raw case, elision of list response data occurs inside HashResponse, to avoid redundant deep + // copies and hashing of data only to elide it later. In the raw case, we need to do it here. + if elideListResponseData && resp.Data != nil { + // Copy the data map before making changes, but we only need to go one level deep in this case + respData = make(map[string]interface{}, len(resp.Data)) + for k, v := range resp.Data { + respData[k] = v + } + + doElideListResponseData(respData) + } else { + respData = resp.Data + } + } else { auth, err = HashAuth(salt, auth, config.HMACAccessor) if err != nil { return err @@ -203,10 +220,12 @@ func (f *AuditFormatter) FormatResponse(ctx context.Context, w io.Writer, config return err } - resp, err = HashResponse(salt, resp, config.HMACAccessor, in.NonHMACRespDataKeys) + resp, err = HashResponse(salt, resp, config.HMACAccessor, in.NonHMACRespDataKeys, elideListResponseData) if err != nil { return err } + + respData = resp.Data } var errString string @@ -315,7 +334,7 @@ func (f *AuditFormatter) FormatResponse(ctx context.Context, w io.Writer, config MountAccessor: req.MountAccessor, Auth: respAuth, Secret: respSecret, - Data: resp.Data, + Data: respData, Warnings: resp.Warnings, Redirect: resp.Redirect, WrapInfo: respWrapInfo, @@ -495,7 +514,7 @@ func parseVaultTokenFromJWT(token string) *string { return &claims.ID } -// Create a formatter not backed by a persistent salt. +// NewTemporaryFormatter creates a formatter not backed by a persistent salt func NewTemporaryFormatter(format, prefix string) *AuditFormatter { temporarySalt := func(ctx context.Context) (*salt.Salt, error) { return salt.NewNonpersistentSalt(), nil @@ -516,3 +535,22 @@ func NewTemporaryFormatter(format, prefix string) *AuditFormatter { } return ret } + +// doElideListResponseData performs the actual elision of list operation response data, once surrounding code has +// determined it should apply to a particular request. The data map that is passed in must be a copy that is safe to +// modify in place, but need not be a full recursive deep copy, as only top-level keys are changed. +// +// See the documentation of the controlling option in FormatterConfig for more information on the purpose. +func doElideListResponseData(data map[string]interface{}) { + for k, v := range data { + if k == "keys" { + if vSlice, ok := v.([]string); ok { + data[k] = len(vSlice) + } + } else if k == "key_info" { + if vMap, ok := v.(map[string]interface{}); ok { + data[k] = len(vMap) + } + } + } +} diff --git a/audit/format_test.go b/audit/format_test.go index 4f3cc5cbb..bc0610c37 100644 --- a/audit/format_test.go +++ b/audit/format_test.go @@ -3,45 +3,74 @@ package audit import ( "context" "io" - "io/ioutil" "testing" + "github.com/hashicorp/vault/helper/namespace" "github.com/hashicorp/vault/sdk/helper/salt" "github.com/hashicorp/vault/sdk/logical" + "github.com/mitchellh/copystructure" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -type noopFormatWriter struct { - salt *salt.Salt - SaltFunc func() (*salt.Salt, error) +type testingFormatWriter struct { + salt *salt.Salt + lastRequest *AuditRequestEntry + lastResponse *AuditResponseEntry } -func (n *noopFormatWriter) WriteRequest(_ io.Writer, _ *AuditRequestEntry) error { +func (fw *testingFormatWriter) WriteRequest(_ io.Writer, entry *AuditRequestEntry) error { + fw.lastRequest = entry return nil } -func (n *noopFormatWriter) WriteResponse(_ io.Writer, _ *AuditResponseEntry) error { +func (fw *testingFormatWriter) WriteResponse(_ io.Writer, entry *AuditResponseEntry) error { + fw.lastResponse = entry return nil } -func (n *noopFormatWriter) Salt(ctx context.Context) (*salt.Salt, error) { - if n.salt != nil { - return n.salt, nil +func (fw *testingFormatWriter) Salt(ctx context.Context) (*salt.Salt, error) { + if fw.salt != nil { + return fw.salt, nil } var err error - n.salt, err = salt.NewSalt(ctx, nil, nil) + fw.salt, err = salt.NewSalt(ctx, nil, nil) if err != nil { return nil, err } - return n.salt, nil + return fw.salt, nil +} + +// hashExpectedValueForComparison replicates enough of the audit HMAC process on a piece of expected data in a test, +// so that we can use assert.Equal to compare the expected and output values. +func (fw *testingFormatWriter) hashExpectedValueForComparison(input map[string]interface{}) map[string]interface{} { + // Copy input before modifying, since we may re-use the same data in another test + copied, err := copystructure.Copy(input) + if err != nil { + panic(err) + } + copiedAsMap := copied.(map[string]interface{}) + + salter, err := fw.Salt(context.Background()) + if err != nil { + panic(err) + } + + err = hashMap(salter.GetIdentifiedHMAC, copiedAsMap, nil) + if err != nil { + panic(err) + } + + return copiedAsMap } func TestFormatRequestErrors(t *testing.T) { config := FormatterConfig{} formatter := AuditFormatter{ - AuditFormatWriter: &noopFormatWriter{}, + AuditFormatWriter: &testingFormatWriter{}, } - if err := formatter.FormatRequest(context.Background(), ioutil.Discard, config, &logical.LogInput{}); err == nil { + if err := formatter.FormatRequest(context.Background(), io.Discard, config, &logical.LogInput{}); err == nil { t.Fatal("expected error due to nil request") } @@ -56,10 +85,10 @@ func TestFormatRequestErrors(t *testing.T) { func TestFormatResponseErrors(t *testing.T) { config := FormatterConfig{} formatter := AuditFormatter{ - AuditFormatWriter: &noopFormatWriter{}, + AuditFormatWriter: &testingFormatWriter{}, } - if err := formatter.FormatResponse(context.Background(), ioutil.Discard, config, &logical.LogInput{}); err == nil { + if err := formatter.FormatResponse(context.Background(), io.Discard, config, &logical.LogInput{}); err == nil { t.Fatal("expected error due to nil request") } @@ -70,3 +99,123 @@ func TestFormatResponseErrors(t *testing.T) { t.Fatal("expected error due to nil writer") } } + +func TestElideListResponses(t *testing.T) { + tfw := testingFormatWriter{} + formatter := AuditFormatter{&tfw} + ctx := namespace.RootContext(context.Background()) + + type test struct { + name string + inputData map[string]interface{} + expectedData map[string]interface{} + } + + tests := []test{ + { + "nil data", + nil, + nil, + }, + { + "Normal list (keys only)", + map[string]interface{}{ + "keys": []string{"foo", "bar", "baz"}, + }, + map[string]interface{}{ + "keys": 3, + }, + }, + { + "Enhanced list (has key_info)", + map[string]interface{}{ + "keys": []string{"foo", "bar", "baz", "quux"}, + "key_info": map[string]interface{}{ + "foo": "alpha", + "bar": "beta", + "baz": "gamma", + "quux": "delta", + }, + }, + map[string]interface{}{ + "keys": 4, + "key_info": 4, + }, + }, + { + "Unconventional other values in a list response are not touched", + map[string]interface{}{ + "keys": []string{"foo", "bar"}, + "something_else": "baz", + }, + map[string]interface{}{ + "keys": 2, + "something_else": "baz", + }, + }, + { + "Conventional values in a list response are not elided if their data types are unconventional", + map[string]interface{}{ + "keys": map[string]interface{}{ + "You wouldn't expect keys to be a map": nil, + }, + "key_info": []string{ + "You wouldn't expect key_info to be a slice", + }, + }, + map[string]interface{}{ + "keys": map[string]interface{}{ + "You wouldn't expect keys to be a map": nil, + }, + "key_info": []string{ + "You wouldn't expect key_info to be a slice", + }, + }, + }, + } + oneInterestingTestCase := tests[2] + + formatResponse := func( + t *testing.T, + config FormatterConfig, + operation logical.Operation, + inputData map[string]interface{}, + ) { + err := formatter.FormatResponse(ctx, io.Discard, config, &logical.LogInput{ + Request: &logical.Request{Operation: operation}, + Response: &logical.Response{Data: inputData}, + }) + require.Nil(t, err) + } + + t.Run("Default case", func(t *testing.T) { + config := FormatterConfig{ElideListResponses: true} + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + formatResponse(t, config, logical.ListOperation, tc.inputData) + assert.Equal(t, tfw.hashExpectedValueForComparison(tc.expectedData), tfw.lastResponse.Response.Data) + }) + } + }) + + t.Run("When Operation is not list, eliding does not happen", func(t *testing.T) { + config := FormatterConfig{ElideListResponses: true} + tc := oneInterestingTestCase + formatResponse(t, config, logical.ReadOperation, tc.inputData) + assert.Equal(t, tfw.hashExpectedValueForComparison(tc.inputData), tfw.lastResponse.Response.Data) + }) + + t.Run("When ElideListResponses is false, eliding does not happen", func(t *testing.T) { + config := FormatterConfig{ElideListResponses: false} + tc := oneInterestingTestCase + formatResponse(t, config, logical.ListOperation, tc.inputData) + assert.Equal(t, tfw.hashExpectedValueForComparison(tc.inputData), tfw.lastResponse.Response.Data) + }) + + t.Run("When Raw is true, eliding still happens", func(t *testing.T) { + config := FormatterConfig{ElideListResponses: true, Raw: true} + tc := oneInterestingTestCase + formatResponse(t, config, logical.ListOperation, tc.inputData) + assert.Equal(t, tc.expectedData, tfw.lastResponse.Response.Data) + }) +} diff --git a/audit/formatter.go b/audit/formatter.go index c27035768..df82057e3 100644 --- a/audit/formatter.go +++ b/audit/formatter.go @@ -7,7 +7,7 @@ import ( "github.com/hashicorp/vault/sdk/logical" ) -// Formatter is an interface that is responsible for formating a +// Formatter is an interface that is responsible for formatting a // request/response into some format. Formatters write their output // to an io.Writer. // @@ -21,6 +21,28 @@ type FormatterConfig struct { Raw bool HMACAccessor bool + // Vault lacks pagination in its APIs. As a result, certain list operations can return **very** large responses. + // The user's chosen audit sinks may experience difficulty consuming audit records that swell to tens of megabytes + // of JSON. The responses of list operations are typically not very interesting, as they are mostly lists of keys, + // or, even when they include a "key_info" field, are not returning confidential information. They become even less + // interesting once HMAC-ed by the audit system. + // + // Some example Vault "list" operations that are prone to becoming very large in an active Vault installation are: + // auth/token/accessors/ + // identity/entity/id/ + // identity/entity-alias/id/ + // pki/certs/ + // + // This option exists to provide such users with the option to have response data elided from audit logs, only when + // the operation type is "list". For added safety, the elision only applies to the "keys" and "key_info" fields + // within the response data - these are conventionally the only fields present in a list response - see + // logical.ListResponse, and logical.ListResponseWithInfo. However, other fields are technically possible if a + // plugin author writes unusual code, and these will be preserved in the audit log even with this option enabled. + // The elision replaces the values of the "keys" and "key_info" fields with an integer count of the number of + // entries. This allows even the elided audit logs to still be useful for answering questions like + // "Was any data returned?" or "How many records were listed?". + ElideListResponses bool + // This should only ever be used in a testing context OmitTime bool } diff --git a/audit/hashstructure.go b/audit/hashstructure.go index 11c6214ff..9040f8c2b 100644 --- a/audit/hashstructure.go +++ b/audit/hashstructure.go @@ -98,7 +98,13 @@ func hashMap(fn func(string) string, data map[string]interface{}, nonHMACDataKey } // HashResponse returns a hashed copy of the logical.Request input. -func HashResponse(salter *salt.Salt, in *logical.Response, HMACAccessor bool, nonHMACDataKeys []string) (*logical.Response, error) { +func HashResponse( + salter *salt.Salt, + in *logical.Response, + HMACAccessor bool, + nonHMACDataKeys []string, + elideListResponseData bool, +) (*logical.Response, error) { if in == nil { return nil, nil } @@ -129,12 +135,20 @@ func HashResponse(salter *salt.Salt, in *logical.Response, HMACAccessor bool, no mapCopy[logical.HTTPRawBody] = string(b) } + // Processing list response data elision takes place at this point in the code for performance reasons: + // - take advantage of the deep copy of resp.Data that was going to be done anyway for hashing + // - but elide data before potentially spending time hashing it + if elideListResponseData { + doElideListResponseData(mapCopy) + } + err = hashMap(fn, mapCopy, nonHMACDataKeys) if err != nil { return nil, err } resp.Data = mapCopy } + if resp.WrapInfo != nil { var err error resp.WrapInfo, err = HashWrapInfo(salter, resp.WrapInfo, HMACAccessor) diff --git a/audit/hashstructure_test.go b/audit/hashstructure_test.go index 7f6c5a869..3b080a526 100644 --- a/audit/hashstructure_test.go +++ b/audit/hashstructure_test.go @@ -293,7 +293,7 @@ func TestHashResponse(t *testing.T) { } for _, tc := range cases { input := fmt.Sprintf("%#v", tc.Input) - out, err := HashResponse(localSalt, tc.Input, tc.HMACAccessor, tc.NonHMACDataKeys) + out, err := HashResponse(localSalt, tc.Input, tc.HMACAccessor, tc.NonHMACDataKeys, false) if err != nil { t.Fatalf("err: %s\n\n%s", err, input) } diff --git a/builtin/audit/file/backend.go b/builtin/audit/file/backend.go index 9e7d7c36c..43dd26c25 100644 --- a/builtin/audit/file/backend.go +++ b/builtin/audit/file/backend.go @@ -71,6 +71,15 @@ func Factory(ctx context.Context, conf *audit.BackendConfig) (audit.Backend, err logRaw = b } + elideListResponses := false + if elideListResponsesRaw, ok := conf.Config["elide_list_responses"]; ok { + value, err := strconv.ParseBool(elideListResponsesRaw) + if err != nil { + return nil, err + } + elideListResponses = value + } + // Check if mode is provided mode := os.FileMode(0o600) if modeRaw, ok := conf.Config["mode"]; ok { @@ -102,8 +111,9 @@ func Factory(ctx context.Context, conf *audit.BackendConfig) (audit.Backend, err saltView: conf.SaltView, salt: new(atomic.Value), formatConfig: audit.FormatterConfig{ - Raw: logRaw, - HMACAccessor: hmacAccessor, + Raw: logRaw, + HMACAccessor: hmacAccessor, + ElideListResponses: elideListResponses, }, } diff --git a/builtin/audit/socket/backend.go b/builtin/audit/socket/backend.go index 7a000b2c7..be6f583fc 100644 --- a/builtin/audit/socket/backend.go +++ b/builtin/audit/socket/backend.go @@ -9,7 +9,7 @@ import ( "sync" "time" - multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-secure-stdlib/parseutil" "github.com/hashicorp/vault/audit" "github.com/hashicorp/vault/sdk/helper/salt" @@ -73,12 +73,22 @@ func Factory(ctx context.Context, conf *audit.BackendConfig) (audit.Backend, err logRaw = b } + elideListResponses := false + if elideListResponsesRaw, ok := conf.Config["elide_list_responses"]; ok { + value, err := strconv.ParseBool(elideListResponsesRaw) + if err != nil { + return nil, err + } + elideListResponses = value + } + b := &Backend{ saltConfig: conf.SaltConfig, saltView: conf.SaltView, formatConfig: audit.FormatterConfig{ - Raw: logRaw, - HMACAccessor: hmacAccessor, + Raw: logRaw, + HMACAccessor: hmacAccessor, + ElideListResponses: elideListResponses, }, writeDuration: writeDuration, diff --git a/builtin/audit/syslog/backend.go b/builtin/audit/syslog/backend.go index 9c7b775b8..1d3bce893 100644 --- a/builtin/audit/syslog/backend.go +++ b/builtin/audit/syslog/backend.go @@ -63,6 +63,15 @@ func Factory(ctx context.Context, conf *audit.BackendConfig) (audit.Backend, err logRaw = b } + elideListResponses := false + if elideListResponsesRaw, ok := conf.Config["elide_list_responses"]; ok { + value, err := strconv.ParseBool(elideListResponsesRaw) + if err != nil { + return nil, err + } + elideListResponses = value + } + // Get the logger logger, err := gsyslog.NewLogger(gsyslog.LOG_INFO, facility, tag) if err != nil { @@ -74,8 +83,9 @@ func Factory(ctx context.Context, conf *audit.BackendConfig) (audit.Backend, err saltConfig: conf.SaltConfig, saltView: conf.SaltView, formatConfig: audit.FormatterConfig{ - Raw: logRaw, - HMACAccessor: hmacAccessor, + Raw: logRaw, + HMACAccessor: hmacAccessor, + ElideListResponses: elideListResponses, }, } diff --git a/changelog/18128.txt b/changelog/18128.txt new file mode 100644 index 000000000..32dc53766 --- /dev/null +++ b/changelog/18128.txt @@ -0,0 +1,3 @@ +```release-note:improvement +audit: Add `elide_list_responses` option, providing a countermeasure for a common source of oversized audit log entries +``` diff --git a/website/content/docs/audit/file.mdx b/website/content/docs/audit/file.mdx index 6c8af50eb..d282df0dd 100644 --- a/website/content/docs/audit/file.mdx +++ b/website/content/docs/audit/file.mdx @@ -41,9 +41,10 @@ $ vault audit enable file file_path=stdout Note the difference between `audit enable` command options and the `file` backend configuration options. Use `vault audit enable -help` to see the command options. -Following are the configuration options available for the backend. -## Configuration +The `file` audit device supports the common configuration options documented on +the [main Audit Devices page](/docs/audit#common-configuration-options), and +these device-specific options: - `file_path` `(string: )` - The path to where the audit log will be written. If a file already exists at the given path, the audit backend will @@ -53,21 +54,11 @@ Following are the configuration options available for the backend. - `discard` writes output (useful in testing scenarios) -- `log_raw` `(bool: false)` - If enabled, logs the security sensitive - information without hashing, in the raw format. - -- `hmac_accessor` `(bool: true)` - If enabled, enables the hashing of token - accessor. - - `mode` `(string: "0600")` - A string containing an octal number representing the bit pattern for the file mode, similar to `chmod`. Set to `"0000"` to prevent Vault from modifying the file mode. -- `format` `(string: "json")` - Allows selecting the output format. Valid values - are `"json"` and `"jsonx"`, which formats the normal log entries as XML. -- `prefix` `(string: "")` - A customizable string prefix to write before the - actual log line. ## Log File Rotation diff --git a/website/content/docs/audit/index.mdx b/website/content/docs/audit/index.mdx index bd63d7617..c8a3bffa6 100644 --- a/website/content/docs/audit/index.mdx +++ b/website/content/docs/audit/index.mdx @@ -105,3 +105,69 @@ Refer to [Blocked Audit Devices](https://learn.hashicorp.com/tutorials/vault/blo Audit devices also have a full HTTP API. Please see the [Audit device API docs](/api-docs/system/audit) for more details. + +## Common configuration options + +- `elide_list_responses` `(bool: false)` - See [Eliding list response + bodies](docs/audit#eliding-list-response-bodies) below. + +- `format` `(string: "json")` - Allows selecting the output format. Valid values + are `"json"` and `"jsonx"`, which formats the normal log entries as XML. + +- `hmac_accessor` `(bool: true)` - If enabled, enables the hashing of token + accessor. + +- `log_raw` `(bool: false)` - If enabled, logs the security sensitive + information without hashing, in the raw format. + +- `prefix` `(string: "")` - A customizable string prefix to write before the + actual log line. + +## Eliding list response bodies + +Some Vault responses can be very large. Primarily, this affects list operations - +as Vault lacks pagination in its APIs, listing a very large collection can result +in a response that is tens of megabytes long. Some audit backends are unable to +process individual audit records of larger sizes. + +The contents of the response for a list operation is often not very interesting; +most contain only a "keys" field, containing a list of IDs. Select API endpoints +additionally return a "key_info" field, a map from ID to some additional +information about the list entry - `identity/entity/id/` is an example of this. +Even in this case, the response to a list operation is usually less-confidential +or public information, for which having the full response in the audit logs is of +lesser importance. + +The `elide_list_responses` audit option provides the flexibility to not write the +full list response data from the audit log, to mitigate the creation of very long +individual audit records. + +When enabled, it affects only audit records of `type=response` and +`request.operation=list`. The values of `response.data.keys` and +`response.data.key_info` will be replaced with a simple integer, recording how +many entries were contained in the list (`keys`) or map (`key_info`) - therefore +even with this feature enabled, it is still possible to see how many items were +returned by a list operation. + +This extra processing only affects the response data fields `keys` and `key_info`, +and only when they have the expected data types - in the event a list response +contains data outside of the usual conventions that apply to Vault list responses, +it will be left as is by this feature. + +Here is an example of an audit record that has been processed by this feature +(formatted with extra whitespace, and with fields not relevant to the example +omitted): +```json +{ + "type": "response", + "request": { + "operation": "list" + }, + "response": { + "data": { + "key_info": 4, + "keys": 4 + } + } +} +``` diff --git a/website/content/docs/audit/socket.mdx b/website/content/docs/audit/socket.mdx index 68fd3006a..5a03cd3ce 100644 --- a/website/content/docs/audit/socket.mdx +++ b/website/content/docs/audit/socket.mdx @@ -28,6 +28,10 @@ $ vault audit enable socket address=127.0.0.1:9090 socket_type=tcp ## Configuration +The `socket` audit device supports the common configuration options documented on +the [main Audit Devices page](/docs/audit#common-configuration-options), and +these device-specific options: + - `address` `(string: "")` - The socket server address to use. Example `127.0.0.1:9090` or `/tmp/audit.sock`. @@ -35,18 +39,3 @@ $ vault audit enable socket address=127.0.0.1:9090 socket_type=tcp with net.Dial is acceptable. It's important to note if TCP is used and the destination socket becomes unavailable Vault may become unresponsive per [Blocked Audit Devices](docs/audit/#blocked-audit-devices). - -- `log_raw` `(bool: false)` - If enabled, logs the security sensitive - information without hashing, in the raw format. - -- `hmac_accessor` `(bool: true)` - If enabled, enables the hashing of token - accessor. - -- `mode` `(string: "0600")` - A string containing an octal number representing - the bit pattern for the file mode, similar to `chmod`. - -- `format` `(string: "json")` - Allows selecting the output format. Valid values - are `"json"` and `"jsonx"`, which formats the normal log entries as XML. - -- `prefix` `(string: "")` - A customizable string prefix to write before the - actual log line. diff --git a/website/content/docs/audit/syslog.mdx b/website/content/docs/audit/syslog.mdx index 2238a869a..12f715791 100644 --- a/website/content/docs/audit/syslog.mdx +++ b/website/content/docs/audit/syslog.mdx @@ -36,21 +36,10 @@ $ vault audit enable syslog tag="vault" facility="AUTH" ## Configuration +The `syslog` audit device supports the common configuration options documented on +the [main Audit Devices page](/docs/audit#common-configuration-options), and +these device-specific options: + - `facility` `(string: "AUTH")` - The syslog facility to use. - `tag` `(string: "vault")` - The syslog tag to use. - -- `log_raw` `(bool: false)` - If enabled, logs the security sensitive - information without hashing, in the raw format. - -- `hmac_accessor` `(bool: true)` - If enabled, enables the hashing of token - accessor. - -- `mode` `(string: "0600")` - A string containing an octal number representing - the bit pattern for the file mode, similar to `chmod`. - -- `format` `(string: "json")` - Allows selecting the output format. Valid values - are `"json"` and `"jsonx"`, which formats the normal log entries as XML. - -- `prefix` `(string: "")` - A customizable string prefix to write before the - actual log line.