Make generic utility for compression and decompression
This commit is contained in:
parent
55ecad83bc
commit
29989fa4c1
|
@ -2,45 +2,111 @@ package compressutil
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"compress/lzw"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
)
|
||||
|
||||
const (
|
||||
CompressionCanaryJSON byte = 'Z'
|
||||
// A byte value used as a canary prefix for the compressed information
|
||||
// which is used to distinguish if a JSON input is compressed or not.
|
||||
// The value of this constant should not be a first character of any
|
||||
// valid JSON string.
|
||||
CompressionCanary byte = 'Z'
|
||||
|
||||
CompressionTypeLzw = "lzw"
|
||||
|
||||
CompressionTypeGzip = "gzip"
|
||||
)
|
||||
|
||||
func Compress(data []byte, canary byte) ([]byte, error) {
|
||||
// Create a buffer and place the canary as its first byte
|
||||
// CompressionConfig is used to select a compression type to be performed by
|
||||
// Compress and Decompress utilities.
|
||||
// Supported types are:
|
||||
// * CompressionTypeLzw
|
||||
// * CompressionTypeGzip
|
||||
//
|
||||
// When using CompressionTypeGzip, the compression levels can also be chosen:
|
||||
// * gzip.DefaultCompression
|
||||
// * gzip.BestSpeed
|
||||
// * gzip.BestCompression
|
||||
type CompressionConfig struct {
|
||||
// Type of the compression algorithm to be used
|
||||
Type string
|
||||
|
||||
// When using Gzip format, the compression level to employ
|
||||
GzipCompressionLevel int
|
||||
}
|
||||
|
||||
// Compress places the canary byte in a buffer and uses the same buffer to fill
|
||||
// in the compressed information of the given input. The configuration supports
|
||||
// two type of compression: LZW and Gzip. When using Gzip compression format,
|
||||
// if GzipCompressionLevel is not specified, the 'gzip.DefaultCompression' will
|
||||
// be assumed.
|
||||
func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
buf.Write([]byte{canary})
|
||||
var writer io.WriteCloser
|
||||
var err error
|
||||
|
||||
// Create writer to compress the JSON encoded bytes
|
||||
writer := lzw.NewWriter(&buf, lzw.LSB, 8)
|
||||
if config == nil {
|
||||
return nil, fmt.Errorf("config is nil")
|
||||
}
|
||||
|
||||
// Compress the JSON bytes
|
||||
if _, err := writer.Write(data); err != nil {
|
||||
// Write the canary into the buffer first
|
||||
buf.Write([]byte{CompressionCanary})
|
||||
|
||||
// Create writer to compress the input data based on the configured type
|
||||
switch config.Type {
|
||||
case CompressionTypeLzw:
|
||||
writer = lzw.NewWriter(&buf, lzw.LSB, 8)
|
||||
case CompressionTypeGzip:
|
||||
level := gzip.DefaultCompression
|
||||
if config.GzipCompressionLevel != gzip.NoCompression {
|
||||
level = config.GzipCompressionLevel
|
||||
}
|
||||
writer, err = gzip.NewWriterLevel(&buf, level)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported compression type")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create a compression writer; err: %v", err)
|
||||
}
|
||||
|
||||
if writer == nil {
|
||||
return nil, fmt.Errorf("failed to create a compression writer")
|
||||
}
|
||||
|
||||
// Compress the input and place it in the same buffer containing the
|
||||
// canary byte.
|
||||
if _, err = writer.Write(data); err != nil {
|
||||
return nil, fmt.Errorf("failed to compress JSON string; err: %v", err)
|
||||
}
|
||||
|
||||
// Close the io.WriteCloser
|
||||
if err := writer.Close(); err != nil {
|
||||
if err = writer.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Printf("compressutil.Compress: len(compressedBytes): %d\n", len(buf.Bytes()))
|
||||
|
||||
// Return the compressed bytes with canary byte at the start
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
func Decompress(data []byte, canary byte) ([]byte, bool, error) {
|
||||
if data == nil || len(data) < 2 {
|
||||
// Decompress checks if the first byte in the input matches the canary byte.
|
||||
// If the first byte is a canary byte, then the input past the canary byte
|
||||
// will be decompressed using the method specified in the given configuration.
|
||||
// If the first byte isn't a canary byte, then the utility returns a boolean
|
||||
// return value indicating that the input was not compressed.
|
||||
func Decompress(data []byte, config *CompressionConfig) ([]byte, bool, error) {
|
||||
var err error
|
||||
var reader io.ReadCloser
|
||||
if data == nil || len(data) == 0 {
|
||||
return nil, false, fmt.Errorf("'data' being decompressed is invalid")
|
||||
}
|
||||
|
||||
if config == nil {
|
||||
return nil, false, fmt.Errorf("config is nil")
|
||||
}
|
||||
|
||||
// Read the first byte
|
||||
bytesReader := bytes.NewReader(data)
|
||||
firstByte, err := bytesReader.ReadByte()
|
||||
|
@ -48,25 +114,44 @@ func Decompress(data []byte, canary byte) ([]byte, bool, error) {
|
|||
return nil, false, fmt.Errorf("failed to read the first byte from the input")
|
||||
}
|
||||
|
||||
// If the first byte doesn't match the canaryByte, it means that the
|
||||
// content was not compressed in the first place. Try JSON decoding it.
|
||||
if canary != firstByte {
|
||||
// If the first byte doesn't match the canary byte, it means that the
|
||||
// content was not compressed in the first place.
|
||||
if CompressionCanary != firstByte {
|
||||
// Indicate the caller that the input was not compressed
|
||||
return nil, true, nil
|
||||
} else {
|
||||
// If the first byte matches the canaryByte, remove the canary
|
||||
// If the first byte matches the canary byte, remove the canary
|
||||
// byte and try to decompress the data before JSON decoding it.
|
||||
if len(data) < 2 {
|
||||
return nil, false, fmt.Errorf("invalid 'data' after the canary")
|
||||
}
|
||||
data = data[1:]
|
||||
}
|
||||
|
||||
// Create a reader to read the compressed data
|
||||
reader := lzw.NewReader(bytes.NewReader(data), lzw.LSB, 8)
|
||||
// Create a reader to read the compressed data based on the configured
|
||||
// compression type
|
||||
switch config.Type {
|
||||
case CompressionTypeLzw:
|
||||
reader = lzw.NewReader(bytes.NewReader(data), lzw.LSB, 8)
|
||||
case CompressionTypeGzip:
|
||||
reader, err = gzip.NewReader(bytes.NewReader(data))
|
||||
default:
|
||||
return nil, false, fmt.Errorf("invalid 'data' being decompressed is invalid")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("failed to create a compression reader; err: %v", err)
|
||||
}
|
||||
|
||||
if reader == nil {
|
||||
return nil, false, fmt.Errorf("failed to create a compression reader")
|
||||
}
|
||||
|
||||
// Close the io.ReadCloser
|
||||
defer reader.Close()
|
||||
|
||||
// Read all the compressed data into a buffer
|
||||
var jsonBuf bytes.Buffer
|
||||
if _, err := io.Copy(&jsonBuf, reader); err != nil {
|
||||
if _, err = io.Copy(&jsonBuf, reader); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
|
|
|
@ -2,10 +2,10 @@ package jsonutil
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
|
||||
"github.com/hashicorp/vault/helper/compressutil"
|
||||
)
|
||||
|
@ -53,42 +53,48 @@ func DecodeJSONFromReader(r io.Reader, out interface{}) error {
|
|||
return dec.Decode(out)
|
||||
}
|
||||
|
||||
// DecompressAndDecodeJSON checks if the first byte in the input matches the
|
||||
// canary byte. If it does, the input will be decompressed (lzw) before being
|
||||
// JSON decoded. If the does not, the input will be JSON decoded without
|
||||
// attempting to decompress it.
|
||||
func DecompressAndDecodeJSON(data []byte, out interface{}) error {
|
||||
if data == nil || len(data) < 2 {
|
||||
return fmt.Errorf("'data' being decoded is invalid")
|
||||
// DecompressAndDecodeJSON tries to decompress the given data. The call to
|
||||
// decompress, fails if the content was not compressed in the first place,
|
||||
// which is identified by a canary byte before the compressed data. If the data
|
||||
// is not compressed, it is JSON decoded directly. Otherwise the decompressed
|
||||
// data will be JSON decoded.
|
||||
func DecompressAndDecodeJSON(dataBytes []byte, out interface{}) error {
|
||||
if dataBytes == nil || len(dataBytes) == 0 {
|
||||
return fmt.Errorf("'dataBytes' being decoded is invalid")
|
||||
}
|
||||
if out == nil {
|
||||
return fmt.Errorf("output parameter 'out' is nil")
|
||||
}
|
||||
|
||||
decompressedBytes, unencrypted, err := compressutil.Decompress(data, compressutil.CompressionCanaryJSON)
|
||||
// Decompress the dataBytes using Gzip format. Decompression when using Gzip
|
||||
// is agnostic of the compression levels used during compression.
|
||||
decompressedBytes, unencrypted, err :=
|
||||
compressutil.Decompress(dataBytes, &compressutil.CompressionConfig{
|
||||
Type: compressutil.CompressionTypeGzip,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decompress JSON: err: %v", err)
|
||||
}
|
||||
|
||||
// If the data supplied failed to contain the JSON compression canary,
|
||||
// it can be inferred that it was not compressed in the first place.
|
||||
// Try to JSON decode it.
|
||||
// If the dataBytes supplied failed to contain the compression canary, it
|
||||
// can be inferred that it was not compressed in the first place. Try
|
||||
// to decode it.
|
||||
if unencrypted {
|
||||
return DecodeJSON(data, out)
|
||||
return DecodeJSON(dataBytes, out)
|
||||
}
|
||||
|
||||
if decompressedBytes == nil || len(decompressedBytes) == 0 {
|
||||
return fmt.Errorf("decompressed data being decoded is invalid")
|
||||
}
|
||||
|
||||
// JSON decode the read out bytes
|
||||
// JSON decode the decompressed data
|
||||
return DecodeJSON(decompressedBytes, out)
|
||||
}
|
||||
|
||||
// EncodeJSONAndCompress encodes the given input into JSON and compresses the
|
||||
// encoded value (lzw). A canary byte is placed at the beginning of the
|
||||
// returned bytes for the logic in decompression method to identify compressed
|
||||
// input.
|
||||
// encoded value using Gzip format (BestCompression level). A canary byte is
|
||||
// placed at the beginning of the returned bytes for the logic in decompression
|
||||
// method to identify compressed input.
|
||||
func EncodeJSONAndCompress(in interface{}) ([]byte, error) {
|
||||
if in == nil {
|
||||
return nil, fmt.Errorf("input for encoding is nil")
|
||||
|
@ -99,7 +105,10 @@ func EncodeJSONAndCompress(in interface{}) ([]byte, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Printf("EncodeJSONAndCompress: len(encodedBytes): %d\n", len(encodedBytes))
|
||||
|
||||
return compressutil.Compress(encodedBytes, compressutil.CompressionCanaryJSON)
|
||||
// For compression, use Gzip format with 'BestCompression' level.
|
||||
return compressutil.Compress(encodedBytes, &compressutil.CompressionConfig{
|
||||
Type: compressutil.CompressionTypeGzip,
|
||||
GzipCompressionLevel: gzip.BestCompression,
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue