From 2306167d30543841b63257446150405b66eb6bc6 Mon Sep 17 00:00:00 2001 From: omegaga Date: Tue, 2 Aug 2016 15:16:39 -0700 Subject: [PATCH] Fix clang build failure and refactor unit test Summary: is not platform independent. Switch to our own endianness transformation function instead. Test Plan: Pass Travis CI. Refactor tests to make sure endianness transformation runs properly. Reviewers: IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D61389 --- util/coding.h | 11 ++ utilities/col_buf_decoder.cc | 18 +-- utilities/col_buf_decoder.h | 1 - utilities/col_buf_encoder.cc | 13 +- utilities/col_buf_encoder.h | 1 - utilities/column_aware_encoding_test.cc | 171 ++++++++++++++++-------- 6 files changed, 141 insertions(+), 74 deletions(-) diff --git a/util/coding.h b/util/coding.h index cff896d36c..bb355d8ecc 100644 --- a/util/coding.h +++ b/util/coding.h @@ -295,6 +295,17 @@ inline bool GetVarint64(Slice* input, uint64_t* value) { } } +// Provide an interface for platform independent endianness transformation +inline uint64_t EndianTransform(uint64_t input, size_t size) { + char* pos = reinterpret_cast(&input); + uint64_t ret_val = 0; + for (size_t i = 0; i < size; ++i) { + ret_val |= (static_cast(static_cast(pos[i])) + << ((size - i - 1) << 3)); + } + return ret_val; +} + inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) { uint32_t len = 0; if (GetVarint32(input, &len) && input->size() >= len) { diff --git a/utilities/col_buf_decoder.cc b/utilities/col_buf_decoder.cc index c2f0bd861c..e38dfef3f0 100644 --- a/utilities/col_buf_decoder.cc +++ b/utilities/col_buf_decoder.cc @@ -14,18 +14,18 @@ ColBufDecoder::~ColBufDecoder() {} namespace { -inline uint64_t EncodeFixed64WithEndian(uint64_t val, bool big_endian) { +inline uint64_t EncodeFixed64WithEndian(uint64_t val, bool big_endian, + size_t size) { if (big_endian && port::kLittleEndian) { - val = le64toh(val); - val = htobe64(val); + val = EndianTransform(val, size); } else if (!big_endian && !port::kLittleEndian) { - val = be64toh(val); - val = htole64(val); + val = EndianTransform(val, size); } return val; } } // namespace + ColBufDecoder* ColBufDecoder::NewColBufDecoder( const ColDeclaration& col_declaration) { if (col_declaration.col_type == "FixedLength") { @@ -73,7 +73,7 @@ size_t FixedLengthColBufDecoder::Init(const char* src) { // Bypass limit ReadVarint64(&src, &dict_key); - dict_key = EncodeFixed64WithEndian(dict_key, big_endian_); + dict_key = EncodeFixed64WithEndian(dict_key, big_endian_, size_); dict_vec_.push_back(dict_key); } } @@ -111,7 +111,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) { if (col_compression_type_ != kColRleDeltaVarint && col_compression_type_ != kColRleDict) { - run_val_ = EncodeFixed64WithEndian(run_val_, big_endian_); + run_val_ = EncodeFixed64WithEndian(run_val_, big_endian_, size_); } } read_val = run_val_; @@ -127,7 +127,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) { } if (col_compression_type_ != kColDeltaVarint && col_compression_type_ != kColDict) { - read_val = EncodeFixed64WithEndian(read_val, big_endian_); + read_val = EncodeFixed64WithEndian(read_val, big_endian_, size_); } } @@ -141,7 +141,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) { write_val = last_val_ + delta; uint64_t tmp = write_val; - write_val = EncodeFixed64WithEndian(write_val, big_endian_); + write_val = EncodeFixed64WithEndian(write_val, big_endian_, size_); last_val_ = tmp; } else if (col_compression_type_ == kColRleDict || col_compression_type_ == kColDict) { diff --git a/utilities/col_buf_decoder.h b/utilities/col_buf_decoder.h index 1351ecbd0c..8231770472 100644 --- a/utilities/col_buf_decoder.h +++ b/utilities/col_buf_decoder.h @@ -4,7 +4,6 @@ // of patent rights can be found in the PATENTS file in the same directory. #pragma once -#include #include #include #include diff --git a/utilities/col_buf_encoder.cc b/utilities/col_buf_encoder.cc index 64d7a43bfe..159917edae 100644 --- a/utilities/col_buf_encoder.cc +++ b/utilities/col_buf_encoder.cc @@ -14,13 +14,12 @@ ColBufEncoder::~ColBufEncoder() {} namespace { -inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian) { +inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian, + size_t size) { if (big_endian && port::kLittleEndian) { - val = be64toh(val); - val = htole64(val); + val = EndianTransform(val, size); } else if (!big_endian && !port::kLittleEndian) { - val = le64toh(val); - val = htobe64(val); + val = EndianTransform(val, size); } return val; } @@ -58,9 +57,7 @@ size_t FixedLengthColBufEncoder::Append(const char *buf) { } uint64_t read_val = 0; memcpy(&read_val, buf, size_); - if (big_endian_) { - read_val = DecodeFixed64WithEndian(read_val, big_endian_); - } + read_val = DecodeFixed64WithEndian(read_val, big_endian_, size_); // Determine write value uint64_t write_val = read_val; diff --git a/utilities/col_buf_encoder.h b/utilities/col_buf_encoder.h index 441943153e..38962e2e48 100644 --- a/utilities/col_buf_encoder.h +++ b/utilities/col_buf_encoder.h @@ -4,7 +4,6 @@ // of patent rights can be found in the PATENTS file in the same directory. #pragma once -#include #include #include #include diff --git a/utilities/column_aware_encoding_test.cc b/utilities/column_aware_encoding_test.cc index acfc650130..d1b5af81e7 100644 --- a/utilities/column_aware_encoding_test.cc +++ b/utilities/column_aware_encoding_test.cc @@ -20,135 +20,196 @@ class ColumnAwareEncodingTest : public testing::Test { ~ColumnAwareEncodingTest() {} }; -TEST_F(ColumnAwareEncodingTest, NoCompressionEncodeDecode) { +class ColumnAwareEncodingTestWithSize + : public ColumnAwareEncodingTest, + public testing::WithParamInterface { + public: + ColumnAwareEncodingTestWithSize() {} + + ~ColumnAwareEncodingTestWithSize() {} + + static std::vector GetValues() { return {4, 8}; } +}; + +INSTANTIATE_TEST_CASE_P( + ColumnAwareEncodingTestWithSize, ColumnAwareEncodingTestWithSize, + ::testing::ValuesIn(ColumnAwareEncodingTestWithSize::GetValues())); + +TEST_P(ColumnAwareEncodingTestWithSize, NoCompressionEncodeDecode) { + size_t col_size = GetParam(); std::unique_ptr col_buf_encoder( - new FixedLengthColBufEncoder(8, kColNoCompression, false, true)); + new FixedLengthColBufEncoder(col_size, kColNoCompression, false, true)); std::string str_buf; - uint64_t val = 0x0102030405060708; - for (int i = 0; i < 4; ++i) { - str_buf.append(reinterpret_cast(&val), 8); + uint64_t base_val = 0x0102030405060708; + uint64_t val = 0; + memcpy(&val, &base_val, col_size); + const int row_count = 4; + for (int i = 0; i < row_count; ++i) { + str_buf.append(reinterpret_cast(&val), col_size); } const char* str_buf_ptr = str_buf.c_str(); - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { col_buf_encoder->Append(str_buf_ptr); } col_buf_encoder->Finish(); const std::string& encoded_data = col_buf_encoder->GetData(); - ASSERT_EQ(encoded_data.size(), 32); + // Check correctness of encoded string length + ASSERT_EQ(row_count * col_size, encoded_data.size()); const char* encoded_data_ptr = encoded_data.c_str(); - uint64_t encoded_val; - for (int i = 0; i < 4; ++i) { - memcpy(&encoded_val, encoded_data_ptr, 8); - ASSERT_EQ(encoded_val, 0x0807060504030201); - encoded_data_ptr += 8; + uint64_t expected_encoded_val; + if (col_size == 8) { + expected_encoded_val = 0x0807060504030201; + } else if (col_size == 4) { + expected_encoded_val = 0x08070605; + } + uint64_t encoded_val = 0; + for (int i = 0; i < row_count; ++i) { + memcpy(&encoded_val, encoded_data_ptr, col_size); + // Check correctness of encoded value + ASSERT_EQ(expected_encoded_val, encoded_val); + encoded_data_ptr += col_size; } std::unique_ptr col_buf_decoder( - new FixedLengthColBufDecoder(8, kColNoCompression, false, true)); + new FixedLengthColBufDecoder(col_size, kColNoCompression, false, true)); encoded_data_ptr = encoded_data.c_str(); encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr); char* decoded_data = new char[100]; char* decoded_data_base = decoded_data; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { encoded_data_ptr += col_buf_decoder->Decode(encoded_data_ptr, &decoded_data); } - ASSERT_EQ(4 * 8, decoded_data - decoded_data_base); + // Check correctness of decoded string length + ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base); decoded_data = decoded_data_base; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { uint64_t decoded_val; decoded_val = 0; - memcpy(&decoded_val, decoded_data, 8); - ASSERT_EQ(decoded_val, val); - decoded_data += 8; + memcpy(&decoded_val, decoded_data, col_size); + // Check correctness of decoded value + ASSERT_EQ(val, decoded_val); + decoded_data += col_size; } delete[] decoded_data_base; } -TEST_F(ColumnAwareEncodingTest, RleEncodeDecode) { +TEST_P(ColumnAwareEncodingTestWithSize, RleEncodeDecode) { + size_t col_size = GetParam(); std::unique_ptr col_buf_encoder( - new FixedLengthColBufEncoder(8, kColRle, false, true)); + new FixedLengthColBufEncoder(col_size, kColRle, false, true)); std::string str_buf; - uint64_t val = 0x0102030405060708; - for (int i = 0; i < 4; ++i) { - str_buf.append(reinterpret_cast(&val), 8); + uint64_t base_val = 0x0102030405060708; + uint64_t val = 0; + memcpy(&val, &base_val, col_size); + const int row_count = 4; + for (int i = 0; i < row_count; ++i) { + str_buf.append(reinterpret_cast(&val), col_size); } const char* str_buf_ptr = str_buf.c_str(); - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { str_buf_ptr += col_buf_encoder->Append(str_buf_ptr); } col_buf_encoder->Finish(); const std::string& encoded_data = col_buf_encoder->GetData(); - ASSERT_EQ(encoded_data.size(), 9); + // Check correctness of encoded string length + ASSERT_EQ(col_size + 1, encoded_data.size()); const char* encoded_data_ptr = encoded_data.c_str(); - uint64_t encoded_val; - memcpy(&encoded_val, encoded_data_ptr, 8); - ASSERT_EQ(encoded_val, 0x0807060504030201); + uint64_t encoded_val = 0; + memcpy(&encoded_val, encoded_data_ptr, col_size); + uint64_t expected_encoded_val; + if (col_size == 8) { + expected_encoded_val = 0x0807060504030201; + } else if (col_size == 4) { + expected_encoded_val = 0x08070605; + } + // Check correctness of encoded value + ASSERT_EQ(expected_encoded_val, encoded_val); std::unique_ptr col_buf_decoder( - new FixedLengthColBufDecoder(8, kColRle, false, true)); + new FixedLengthColBufDecoder(col_size, kColRle, false, true)); char* decoded_data = new char[100]; char* decoded_data_base = decoded_data; encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr); - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { encoded_data_ptr += col_buf_decoder->Decode(encoded_data_ptr, &decoded_data); } - ASSERT_EQ(4 * 8, decoded_data - decoded_data_base); + // Check correctness of decoded string length + ASSERT_EQ(decoded_data - decoded_data_base, row_count * col_size); decoded_data = decoded_data_base; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { uint64_t decoded_val; decoded_val = 0; - memcpy(&decoded_val, decoded_data, 8); - ASSERT_EQ(decoded_val, val); - decoded_data += 8; + memcpy(&decoded_val, decoded_data, col_size); + // Check correctness of decoded value + ASSERT_EQ(val, decoded_val); + decoded_data += col_size; } delete[] decoded_data_base; } -TEST_F(ColumnAwareEncodingTest, DeltaEncodeDecode) { +TEST_P(ColumnAwareEncodingTestWithSize, DeltaEncodeDecode) { + size_t col_size = GetParam(); + int row_count = 4; std::unique_ptr col_buf_encoder( - new FixedLengthColBufEncoder(8, kColDeltaVarint, false, true)); + new FixedLengthColBufEncoder(col_size, kColDeltaVarint, false, true)); std::string str_buf; - uint64_t val = 0x0102030405060708; - uint64_t val2 = 0x0202030405060708; + uint64_t base_val1 = 0x0102030405060708; + uint64_t base_val2 = 0x0202030405060708; + uint64_t val1 = 0, val2 = 0; + memcpy(&val1, &base_val1, col_size); + memcpy(&val2, &base_val2, col_size); const char* str_buf_ptr; - for (int i = 0; i < 2; ++i) { - str_buf = std::string(reinterpret_cast(&val), 8); + for (int i = 0; i < row_count / 2; ++i) { + str_buf = std::string(reinterpret_cast(&val1), col_size); str_buf_ptr = str_buf.c_str(); col_buf_encoder->Append(str_buf_ptr); - str_buf = std::string(reinterpret_cast(&val2), 8); + str_buf = std::string(reinterpret_cast(&val2), col_size); str_buf_ptr = str_buf.c_str(); col_buf_encoder->Append(str_buf_ptr); } col_buf_encoder->Finish(); const std::string& encoded_data = col_buf_encoder->GetData(); - ASSERT_EQ(encoded_data.size(), 9 + 3); + // Check encoded string length + int varint_len = 0; + if (col_size == 8) { + varint_len = 9; + } else if (col_size == 4) { + varint_len = 5; + } + // Check encoded string length: first value is original one (val - 0), the + // coming three are encoded as 1, -1, 1, so they should take 1 byte in varint. + ASSERT_EQ(varint_len + 3 * 1, encoded_data.size()); std::unique_ptr col_buf_decoder( - new FixedLengthColBufDecoder(8, kColDeltaVarint, false, true)); + new FixedLengthColBufDecoder(col_size, kColDeltaVarint, false, true)); char* decoded_data = new char[100]; char* decoded_data_base = decoded_data; const char* encoded_data_ptr = encoded_data.c_str(); encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr); - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < row_count; ++i) { encoded_data_ptr += col_buf_decoder->Decode(encoded_data_ptr, &decoded_data); } - ASSERT_EQ(4 * 8, decoded_data - decoded_data_base); + + // Check correctness of decoded string length + ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base); decoded_data = decoded_data_base; - for (int i = 0; i < 2; ++i) { - uint64_t decoded_val; - memcpy(&decoded_val, decoded_data, 8); - ASSERT_EQ(decoded_val, val); - decoded_data += 8; - memcpy(&decoded_val, decoded_data, 8); - ASSERT_EQ(decoded_val, val2); - decoded_data += 8; + + // Check correctness of decoded data + for (int i = 0; i < row_count / 2; ++i) { + uint64_t decoded_val = 0; + memcpy(&decoded_val, decoded_data, col_size); + ASSERT_EQ(val1, decoded_val); + decoded_data += col_size; + memcpy(&decoded_val, decoded_data, col_size); + ASSERT_EQ(val2, decoded_val); + decoded_data += col_size; } delete[] decoded_data_base; }