2016-08-05 17:34:23 +00:00
|
|
|
package compressutil
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2016-08-05 22:04:30 +00:00
|
|
|
"compress/gzip"
|
2016-08-05 17:34:23 +00:00
|
|
|
"compress/lzw"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
2017-07-07 18:43:45 +00:00
|
|
|
|
|
|
|
"github.com/golang/snappy"
|
2016-08-05 17:34:23 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2016-08-05 22:04:30 +00:00
|
|
|
// A byte value used as a canary prefix for the compressed information
|
|
|
|
// which is used to distinguish if a JSON input is compressed or not.
|
|
|
|
// The value of this constant should not be a first character of any
|
|
|
|
// valid JSON string.
|
2016-08-09 14:33:41 +00:00
|
|
|
|
|
|
|
// Byte value used as canary when using Gzip format
|
|
|
|
CompressionCanaryGzip byte = 'G'
|
|
|
|
|
|
|
|
// Byte value used as canary when using Lzw format
|
|
|
|
CompressionCanaryLzw byte = 'L'
|
2016-08-05 22:04:30 +00:00
|
|
|
|
2017-07-07 18:43:45 +00:00
|
|
|
// Byte value used as canary when using Snappy format
|
|
|
|
CompressionCanarySnappy byte = 'S'
|
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
CompressionTypeLzw = "lzw"
|
|
|
|
|
|
|
|
CompressionTypeGzip = "gzip"
|
2017-07-07 18:43:45 +00:00
|
|
|
|
|
|
|
CompressionTypeSnappy = "snappy"
|
2016-08-05 17:34:23 +00:00
|
|
|
)
|
|
|
|
|
2017-07-07 18:43:45 +00:00
|
|
|
// SnappyReadCloser embeds the snappy reader which implements the io.Reader
|
2018-03-20 18:54:10 +00:00
|
|
|
// interface. The decompress procedure in this utility expects an
|
2017-07-07 18:43:45 +00:00
|
|
|
// io.ReadCloser. This type implements the io.Closer interface to retain the
|
|
|
|
// generic way of decompression.
|
|
|
|
type SnappyReadCloser struct {
|
|
|
|
*snappy.Reader
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close is a noop method implemented only to satisfy the io.Closer interface
|
|
|
|
func (s *SnappyReadCloser) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
// CompressionConfig is used to select a compression type to be performed by
|
|
|
|
// Compress and Decompress utilities.
|
|
|
|
// Supported types are:
|
|
|
|
// * CompressionTypeLzw
|
|
|
|
// * CompressionTypeGzip
|
2017-07-07 18:43:45 +00:00
|
|
|
// * CompressionTypeSnappy
|
2016-08-05 22:04:30 +00:00
|
|
|
//
|
|
|
|
// When using CompressionTypeGzip, the compression levels can also be chosen:
|
|
|
|
// * gzip.DefaultCompression
|
|
|
|
// * gzip.BestSpeed
|
|
|
|
// * gzip.BestCompression
|
|
|
|
type CompressionConfig struct {
|
|
|
|
// Type of the compression algorithm to be used
|
|
|
|
Type string
|
|
|
|
|
|
|
|
// When using Gzip format, the compression level to employ
|
|
|
|
GzipCompressionLevel int
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compress places the canary byte in a buffer and uses the same buffer to fill
|
|
|
|
// in the compressed information of the given input. The configuration supports
|
|
|
|
// two type of compression: LZW and Gzip. When using Gzip compression format,
|
|
|
|
// if GzipCompressionLevel is not specified, the 'gzip.DefaultCompression' will
|
|
|
|
// be assumed.
|
|
|
|
func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
|
2016-08-05 17:34:23 +00:00
|
|
|
var buf bytes.Buffer
|
2016-08-05 22:04:30 +00:00
|
|
|
var writer io.WriteCloser
|
|
|
|
var err error
|
2016-08-05 17:34:23 +00:00
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
if config == nil {
|
|
|
|
return nil, fmt.Errorf("config is nil")
|
|
|
|
}
|
|
|
|
|
2016-08-09 14:33:41 +00:00
|
|
|
// Write the canary into the buffer and create writer to compress the
|
|
|
|
// input data based on the configured type
|
2016-08-05 22:04:30 +00:00
|
|
|
switch config.Type {
|
|
|
|
case CompressionTypeLzw:
|
2016-08-09 14:33:41 +00:00
|
|
|
buf.Write([]byte{CompressionCanaryLzw})
|
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
writer = lzw.NewWriter(&buf, lzw.LSB, 8)
|
|
|
|
case CompressionTypeGzip:
|
2016-08-09 14:33:41 +00:00
|
|
|
buf.Write([]byte{CompressionCanaryGzip})
|
|
|
|
|
2016-08-09 07:43:03 +00:00
|
|
|
switch {
|
|
|
|
case config.GzipCompressionLevel == gzip.BestCompression,
|
|
|
|
config.GzipCompressionLevel == gzip.BestSpeed,
|
|
|
|
config.GzipCompressionLevel == gzip.DefaultCompression:
|
|
|
|
// These are valid compression levels
|
|
|
|
default:
|
|
|
|
// If compression level is set to NoCompression or to
|
|
|
|
// any invalid value, fallback to Defaultcompression
|
|
|
|
config.GzipCompressionLevel = gzip.DefaultCompression
|
2016-08-05 22:04:30 +00:00
|
|
|
}
|
2016-08-09 07:43:03 +00:00
|
|
|
writer, err = gzip.NewWriterLevel(&buf, config.GzipCompressionLevel)
|
2017-07-07 18:43:45 +00:00
|
|
|
case CompressionTypeSnappy:
|
|
|
|
buf.Write([]byte{CompressionCanarySnappy})
|
|
|
|
writer = snappy.NewBufferedWriter(&buf)
|
2016-08-05 22:04:30 +00:00
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unsupported compression type")
|
|
|
|
}
|
2017-07-07 18:43:45 +00:00
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create a compression writer; err: %v", err)
|
|
|
|
}
|
2016-08-05 17:34:23 +00:00
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
if writer == nil {
|
|
|
|
return nil, fmt.Errorf("failed to create a compression writer")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compress the input and place it in the same buffer containing the
|
|
|
|
// canary byte.
|
|
|
|
if _, err = writer.Write(data); err != nil {
|
2016-08-09 07:43:03 +00:00
|
|
|
return nil, fmt.Errorf("failed to compress input data; err: %v", err)
|
2016-08-05 17:34:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close the io.WriteCloser
|
2016-08-05 22:04:30 +00:00
|
|
|
if err = writer.Close(); err != nil {
|
2016-08-05 17:34:23 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the compressed bytes with canary byte at the start
|
|
|
|
return buf.Bytes(), nil
|
|
|
|
}
|
|
|
|
|
2016-08-05 22:04:30 +00:00
|
|
|
// Decompress checks if the first byte in the input matches the canary byte.
|
|
|
|
// If the first byte is a canary byte, then the input past the canary byte
|
|
|
|
// will be decompressed using the method specified in the given configuration.
|
|
|
|
// If the first byte isn't a canary byte, then the utility returns a boolean
|
2016-08-09 07:43:03 +00:00
|
|
|
// value indicating that the input was not compressed.
|
2016-08-09 14:33:41 +00:00
|
|
|
func Decompress(data []byte) ([]byte, bool, error) {
|
2016-08-05 22:04:30 +00:00
|
|
|
var err error
|
|
|
|
var reader io.ReadCloser
|
|
|
|
if data == nil || len(data) == 0 {
|
2016-08-09 15:01:59 +00:00
|
|
|
return nil, false, fmt.Errorf("'data' being decompressed is empty")
|
2016-08-05 17:34:23 +00:00
|
|
|
}
|
|
|
|
|
2016-08-09 14:33:41 +00:00
|
|
|
switch {
|
2017-07-07 18:43:45 +00:00
|
|
|
// If the first byte matches the canary byte, remove the canary
|
|
|
|
// byte and try to decompress the data that is after the canary.
|
2016-08-09 15:01:59 +00:00
|
|
|
case data[0] == CompressionCanaryGzip:
|
2016-08-05 22:04:30 +00:00
|
|
|
if len(data) < 2 {
|
|
|
|
return nil, false, fmt.Errorf("invalid 'data' after the canary")
|
|
|
|
}
|
2016-08-05 17:34:23 +00:00
|
|
|
data = data[1:]
|
2016-08-05 22:04:30 +00:00
|
|
|
reader, err = gzip.NewReader(bytes.NewReader(data))
|
2016-08-09 15:01:59 +00:00
|
|
|
case data[0] == CompressionCanaryLzw:
|
2016-08-09 14:33:41 +00:00
|
|
|
if len(data) < 2 {
|
|
|
|
return nil, false, fmt.Errorf("invalid 'data' after the canary")
|
|
|
|
}
|
|
|
|
data = data[1:]
|
|
|
|
reader = lzw.NewReader(bytes.NewReader(data), lzw.LSB, 8)
|
2017-07-07 18:43:45 +00:00
|
|
|
|
|
|
|
case data[0] == CompressionCanarySnappy:
|
|
|
|
if len(data) < 2 {
|
|
|
|
return nil, false, fmt.Errorf("invalid 'data' after the canary")
|
|
|
|
}
|
|
|
|
data = data[1:]
|
|
|
|
reader = &SnappyReadCloser{
|
|
|
|
Reader: snappy.NewReader(bytes.NewReader(data)),
|
|
|
|
}
|
2016-08-05 22:04:30 +00:00
|
|
|
default:
|
2016-08-09 14:33:41 +00:00
|
|
|
// If the first byte doesn't match the canary byte, it means
|
|
|
|
// that the content was not compressed at all. Indicate the
|
|
|
|
// caller that the input was not compressed.
|
|
|
|
return nil, true, nil
|
2016-08-05 22:04:30 +00:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return nil, false, fmt.Errorf("failed to create a compression reader; err: %v", err)
|
|
|
|
}
|
|
|
|
if reader == nil {
|
|
|
|
return nil, false, fmt.Errorf("failed to create a compression reader")
|
|
|
|
}
|
2016-08-05 17:34:23 +00:00
|
|
|
|
|
|
|
// Close the io.ReadCloser
|
|
|
|
defer reader.Close()
|
|
|
|
|
|
|
|
// Read all the compressed data into a buffer
|
2016-08-09 07:43:03 +00:00
|
|
|
var buf bytes.Buffer
|
|
|
|
if _, err = io.Copy(&buf, reader); err != nil {
|
2016-08-05 17:34:23 +00:00
|
|
|
return nil, false, err
|
|
|
|
}
|
|
|
|
|
2016-08-09 07:43:03 +00:00
|
|
|
return buf.Bytes(), false, nil
|
2016-08-05 17:34:23 +00:00
|
|
|
}
|