mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 07:30:54 +00:00
Fix clang build failure and refactor unit test
Summary: <endian.h> is not platform independent. Switch to our own endianness transformation function instead. Test Plan: Pass Travis CI. Refactor tests to make sure endianness transformation runs properly. Reviewers: IslamAbdelRahman, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D61389
This commit is contained in:
parent
343304e1d3
commit
2306167d30
|
@ -295,6 +295,17 @@ inline bool GetVarint64(Slice* input, uint64_t* value) {
|
|||
}
|
||||
}
|
||||
|
||||
// Provide an interface for platform independent endianness transformation
|
||||
inline uint64_t EndianTransform(uint64_t input, size_t size) {
|
||||
char* pos = reinterpret_cast<char*>(&input);
|
||||
uint64_t ret_val = 0;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
ret_val |= (static_cast<uint64_t>(static_cast<unsigned char>(pos[i]))
|
||||
<< ((size - i - 1) << 3));
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
|
||||
uint32_t len = 0;
|
||||
if (GetVarint32(input, &len) && input->size() >= len) {
|
||||
|
|
|
@ -14,18 +14,18 @@ ColBufDecoder::~ColBufDecoder() {}
|
|||
|
||||
namespace {
|
||||
|
||||
inline uint64_t EncodeFixed64WithEndian(uint64_t val, bool big_endian) {
|
||||
inline uint64_t EncodeFixed64WithEndian(uint64_t val, bool big_endian,
|
||||
size_t size) {
|
||||
if (big_endian && port::kLittleEndian) {
|
||||
val = le64toh(val);
|
||||
val = htobe64(val);
|
||||
val = EndianTransform(val, size);
|
||||
} else if (!big_endian && !port::kLittleEndian) {
|
||||
val = be64toh(val);
|
||||
val = htole64(val);
|
||||
val = EndianTransform(val, size);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ColBufDecoder* ColBufDecoder::NewColBufDecoder(
|
||||
const ColDeclaration& col_declaration) {
|
||||
if (col_declaration.col_type == "FixedLength") {
|
||||
|
@ -73,7 +73,7 @@ size_t FixedLengthColBufDecoder::Init(const char* src) {
|
|||
// Bypass limit
|
||||
ReadVarint64(&src, &dict_key);
|
||||
|
||||
dict_key = EncodeFixed64WithEndian(dict_key, big_endian_);
|
||||
dict_key = EncodeFixed64WithEndian(dict_key, big_endian_, size_);
|
||||
dict_vec_.push_back(dict_key);
|
||||
}
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) {
|
|||
|
||||
if (col_compression_type_ != kColRleDeltaVarint &&
|
||||
col_compression_type_ != kColRleDict) {
|
||||
run_val_ = EncodeFixed64WithEndian(run_val_, big_endian_);
|
||||
run_val_ = EncodeFixed64WithEndian(run_val_, big_endian_, size_);
|
||||
}
|
||||
}
|
||||
read_val = run_val_;
|
||||
|
@ -127,7 +127,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) {
|
|||
}
|
||||
if (col_compression_type_ != kColDeltaVarint &&
|
||||
col_compression_type_ != kColDict) {
|
||||
read_val = EncodeFixed64WithEndian(read_val, big_endian_);
|
||||
read_val = EncodeFixed64WithEndian(read_val, big_endian_, size_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,7 +141,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) {
|
|||
write_val = last_val_ + delta;
|
||||
|
||||
uint64_t tmp = write_val;
|
||||
write_val = EncodeFixed64WithEndian(write_val, big_endian_);
|
||||
write_val = EncodeFixed64WithEndian(write_val, big_endian_, size_);
|
||||
last_val_ = tmp;
|
||||
} else if (col_compression_type_ == kColRleDict ||
|
||||
col_compression_type_ == kColDict) {
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#pragma once
|
||||
#include <endian.h>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
|
|
@ -14,13 +14,12 @@ ColBufEncoder::~ColBufEncoder() {}
|
|||
|
||||
namespace {
|
||||
|
||||
inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian) {
|
||||
inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian,
|
||||
size_t size) {
|
||||
if (big_endian && port::kLittleEndian) {
|
||||
val = be64toh(val);
|
||||
val = htole64(val);
|
||||
val = EndianTransform(val, size);
|
||||
} else if (!big_endian && !port::kLittleEndian) {
|
||||
val = le64toh(val);
|
||||
val = htobe64(val);
|
||||
val = EndianTransform(val, size);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
@ -58,9 +57,7 @@ size_t FixedLengthColBufEncoder::Append(const char *buf) {
|
|||
}
|
||||
uint64_t read_val = 0;
|
||||
memcpy(&read_val, buf, size_);
|
||||
if (big_endian_) {
|
||||
read_val = DecodeFixed64WithEndian(read_val, big_endian_);
|
||||
}
|
||||
read_val = DecodeFixed64WithEndian(read_val, big_endian_, size_);
|
||||
|
||||
// Determine write value
|
||||
uint64_t write_val = read_val;
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#pragma once
|
||||
#include <endian.h>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
|
|
@ -20,135 +20,196 @@ class ColumnAwareEncodingTest : public testing::Test {
|
|||
~ColumnAwareEncodingTest() {}
|
||||
};
|
||||
|
||||
TEST_F(ColumnAwareEncodingTest, NoCompressionEncodeDecode) {
|
||||
class ColumnAwareEncodingTestWithSize
|
||||
: public ColumnAwareEncodingTest,
|
||||
public testing::WithParamInterface<size_t> {
|
||||
public:
|
||||
ColumnAwareEncodingTestWithSize() {}
|
||||
|
||||
~ColumnAwareEncodingTestWithSize() {}
|
||||
|
||||
static std::vector<size_t> GetValues() { return {4, 8}; }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
ColumnAwareEncodingTestWithSize, ColumnAwareEncodingTestWithSize,
|
||||
::testing::ValuesIn(ColumnAwareEncodingTestWithSize::GetValues()));
|
||||
|
||||
TEST_P(ColumnAwareEncodingTestWithSize, NoCompressionEncodeDecode) {
|
||||
size_t col_size = GetParam();
|
||||
std::unique_ptr<ColBufEncoder> col_buf_encoder(
|
||||
new FixedLengthColBufEncoder(8, kColNoCompression, false, true));
|
||||
new FixedLengthColBufEncoder(col_size, kColNoCompression, false, true));
|
||||
std::string str_buf;
|
||||
uint64_t val = 0x0102030405060708;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
str_buf.append(reinterpret_cast<char*>(&val), 8);
|
||||
uint64_t base_val = 0x0102030405060708;
|
||||
uint64_t val = 0;
|
||||
memcpy(&val, &base_val, col_size);
|
||||
const int row_count = 4;
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
str_buf.append(reinterpret_cast<char*>(&val), col_size);
|
||||
}
|
||||
const char* str_buf_ptr = str_buf.c_str();
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
col_buf_encoder->Append(str_buf_ptr);
|
||||
}
|
||||
col_buf_encoder->Finish();
|
||||
const std::string& encoded_data = col_buf_encoder->GetData();
|
||||
ASSERT_EQ(encoded_data.size(), 32);
|
||||
// Check correctness of encoded string length
|
||||
ASSERT_EQ(row_count * col_size, encoded_data.size());
|
||||
|
||||
const char* encoded_data_ptr = encoded_data.c_str();
|
||||
uint64_t encoded_val;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
memcpy(&encoded_val, encoded_data_ptr, 8);
|
||||
ASSERT_EQ(encoded_val, 0x0807060504030201);
|
||||
encoded_data_ptr += 8;
|
||||
uint64_t expected_encoded_val;
|
||||
if (col_size == 8) {
|
||||
expected_encoded_val = 0x0807060504030201;
|
||||
} else if (col_size == 4) {
|
||||
expected_encoded_val = 0x08070605;
|
||||
}
|
||||
uint64_t encoded_val = 0;
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
memcpy(&encoded_val, encoded_data_ptr, col_size);
|
||||
// Check correctness of encoded value
|
||||
ASSERT_EQ(expected_encoded_val, encoded_val);
|
||||
encoded_data_ptr += col_size;
|
||||
}
|
||||
|
||||
std::unique_ptr<ColBufDecoder> col_buf_decoder(
|
||||
new FixedLengthColBufDecoder(8, kColNoCompression, false, true));
|
||||
new FixedLengthColBufDecoder(col_size, kColNoCompression, false, true));
|
||||
encoded_data_ptr = encoded_data.c_str();
|
||||
encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
|
||||
char* decoded_data = new char[100];
|
||||
char* decoded_data_base = decoded_data;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
encoded_data_ptr +=
|
||||
col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
|
||||
}
|
||||
|
||||
ASSERT_EQ(4 * 8, decoded_data - decoded_data_base);
|
||||
// Check correctness of decoded string length
|
||||
ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base);
|
||||
decoded_data = decoded_data_base;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
uint64_t decoded_val;
|
||||
decoded_val = 0;
|
||||
memcpy(&decoded_val, decoded_data, 8);
|
||||
ASSERT_EQ(decoded_val, val);
|
||||
decoded_data += 8;
|
||||
memcpy(&decoded_val, decoded_data, col_size);
|
||||
// Check correctness of decoded value
|
||||
ASSERT_EQ(val, decoded_val);
|
||||
decoded_data += col_size;
|
||||
}
|
||||
delete[] decoded_data_base;
|
||||
}
|
||||
|
||||
TEST_F(ColumnAwareEncodingTest, RleEncodeDecode) {
|
||||
TEST_P(ColumnAwareEncodingTestWithSize, RleEncodeDecode) {
|
||||
size_t col_size = GetParam();
|
||||
std::unique_ptr<ColBufEncoder> col_buf_encoder(
|
||||
new FixedLengthColBufEncoder(8, kColRle, false, true));
|
||||
new FixedLengthColBufEncoder(col_size, kColRle, false, true));
|
||||
std::string str_buf;
|
||||
uint64_t val = 0x0102030405060708;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
str_buf.append(reinterpret_cast<char*>(&val), 8);
|
||||
uint64_t base_val = 0x0102030405060708;
|
||||
uint64_t val = 0;
|
||||
memcpy(&val, &base_val, col_size);
|
||||
const int row_count = 4;
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
str_buf.append(reinterpret_cast<char*>(&val), col_size);
|
||||
}
|
||||
const char* str_buf_ptr = str_buf.c_str();
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
str_buf_ptr += col_buf_encoder->Append(str_buf_ptr);
|
||||
}
|
||||
col_buf_encoder->Finish();
|
||||
const std::string& encoded_data = col_buf_encoder->GetData();
|
||||
ASSERT_EQ(encoded_data.size(), 9);
|
||||
// Check correctness of encoded string length
|
||||
ASSERT_EQ(col_size + 1, encoded_data.size());
|
||||
|
||||
const char* encoded_data_ptr = encoded_data.c_str();
|
||||
uint64_t encoded_val;
|
||||
memcpy(&encoded_val, encoded_data_ptr, 8);
|
||||
ASSERT_EQ(encoded_val, 0x0807060504030201);
|
||||
uint64_t encoded_val = 0;
|
||||
memcpy(&encoded_val, encoded_data_ptr, col_size);
|
||||
uint64_t expected_encoded_val;
|
||||
if (col_size == 8) {
|
||||
expected_encoded_val = 0x0807060504030201;
|
||||
} else if (col_size == 4) {
|
||||
expected_encoded_val = 0x08070605;
|
||||
}
|
||||
// Check correctness of encoded value
|
||||
ASSERT_EQ(expected_encoded_val, encoded_val);
|
||||
|
||||
std::unique_ptr<ColBufDecoder> col_buf_decoder(
|
||||
new FixedLengthColBufDecoder(8, kColRle, false, true));
|
||||
new FixedLengthColBufDecoder(col_size, kColRle, false, true));
|
||||
char* decoded_data = new char[100];
|
||||
char* decoded_data_base = decoded_data;
|
||||
encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
encoded_data_ptr +=
|
||||
col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
|
||||
}
|
||||
ASSERT_EQ(4 * 8, decoded_data - decoded_data_base);
|
||||
// Check correctness of decoded string length
|
||||
ASSERT_EQ(decoded_data - decoded_data_base, row_count * col_size);
|
||||
decoded_data = decoded_data_base;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
uint64_t decoded_val;
|
||||
decoded_val = 0;
|
||||
memcpy(&decoded_val, decoded_data, 8);
|
||||
ASSERT_EQ(decoded_val, val);
|
||||
decoded_data += 8;
|
||||
memcpy(&decoded_val, decoded_data, col_size);
|
||||
// Check correctness of decoded value
|
||||
ASSERT_EQ(val, decoded_val);
|
||||
decoded_data += col_size;
|
||||
}
|
||||
delete[] decoded_data_base;
|
||||
}
|
||||
|
||||
TEST_F(ColumnAwareEncodingTest, DeltaEncodeDecode) {
|
||||
TEST_P(ColumnAwareEncodingTestWithSize, DeltaEncodeDecode) {
|
||||
size_t col_size = GetParam();
|
||||
int row_count = 4;
|
||||
std::unique_ptr<ColBufEncoder> col_buf_encoder(
|
||||
new FixedLengthColBufEncoder(8, kColDeltaVarint, false, true));
|
||||
new FixedLengthColBufEncoder(col_size, kColDeltaVarint, false, true));
|
||||
std::string str_buf;
|
||||
uint64_t val = 0x0102030405060708;
|
||||
uint64_t val2 = 0x0202030405060708;
|
||||
uint64_t base_val1 = 0x0102030405060708;
|
||||
uint64_t base_val2 = 0x0202030405060708;
|
||||
uint64_t val1 = 0, val2 = 0;
|
||||
memcpy(&val1, &base_val1, col_size);
|
||||
memcpy(&val2, &base_val2, col_size);
|
||||
const char* str_buf_ptr;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
str_buf = std::string(reinterpret_cast<char*>(&val), 8);
|
||||
for (int i = 0; i < row_count / 2; ++i) {
|
||||
str_buf = std::string(reinterpret_cast<char*>(&val1), col_size);
|
||||
str_buf_ptr = str_buf.c_str();
|
||||
col_buf_encoder->Append(str_buf_ptr);
|
||||
|
||||
str_buf = std::string(reinterpret_cast<char*>(&val2), 8);
|
||||
str_buf = std::string(reinterpret_cast<char*>(&val2), col_size);
|
||||
str_buf_ptr = str_buf.c_str();
|
||||
col_buf_encoder->Append(str_buf_ptr);
|
||||
}
|
||||
col_buf_encoder->Finish();
|
||||
const std::string& encoded_data = col_buf_encoder->GetData();
|
||||
ASSERT_EQ(encoded_data.size(), 9 + 3);
|
||||
// Check encoded string length
|
||||
int varint_len = 0;
|
||||
if (col_size == 8) {
|
||||
varint_len = 9;
|
||||
} else if (col_size == 4) {
|
||||
varint_len = 5;
|
||||
}
|
||||
// Check encoded string length: first value is original one (val - 0), the
|
||||
// coming three are encoded as 1, -1, 1, so they should take 1 byte in varint.
|
||||
ASSERT_EQ(varint_len + 3 * 1, encoded_data.size());
|
||||
|
||||
std::unique_ptr<ColBufDecoder> col_buf_decoder(
|
||||
new FixedLengthColBufDecoder(8, kColDeltaVarint, false, true));
|
||||
new FixedLengthColBufDecoder(col_size, kColDeltaVarint, false, true));
|
||||
char* decoded_data = new char[100];
|
||||
char* decoded_data_base = decoded_data;
|
||||
const char* encoded_data_ptr = encoded_data.c_str();
|
||||
encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
encoded_data_ptr +=
|
||||
col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
|
||||
}
|
||||
ASSERT_EQ(4 * 8, decoded_data - decoded_data_base);
|
||||
|
||||
// Check correctness of decoded string length
|
||||
ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base);
|
||||
decoded_data = decoded_data_base;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
uint64_t decoded_val;
|
||||
memcpy(&decoded_val, decoded_data, 8);
|
||||
ASSERT_EQ(decoded_val, val);
|
||||
decoded_data += 8;
|
||||
memcpy(&decoded_val, decoded_data, 8);
|
||||
ASSERT_EQ(decoded_val, val2);
|
||||
decoded_data += 8;
|
||||
|
||||
// Check correctness of decoded data
|
||||
for (int i = 0; i < row_count / 2; ++i) {
|
||||
uint64_t decoded_val = 0;
|
||||
memcpy(&decoded_val, decoded_data, col_size);
|
||||
ASSERT_EQ(val1, decoded_val);
|
||||
decoded_data += col_size;
|
||||
memcpy(&decoded_val, decoded_data, col_size);
|
||||
ASSERT_EQ(val2, decoded_val);
|
||||
decoded_data += col_size;
|
||||
}
|
||||
delete[] decoded_data_base;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue