rocksdb/third-party/fbson/FbsonDocument.h
Dmitri Smirnov 18285c1e2f Windows Port from Microsoft
Summary: Make RocksDb build and run on Windows to be functionally
 complete and performant. All existing test cases run with no
 regressions. Performance numbers are in the pull-request.

 Test plan: make all of the existing unit tests pass, obtain perf numbers.

 Co-authored-by: Praveen Rao praveensinghrao@outlook.com
 Co-authored-by: Sherlock Huang baihan.huang@gmail.com
 Co-authored-by: Alex Zinoviev alexander.zinoviev@me.com
 Co-authored-by: Dmitri Smirnov dmitrism@microsoft.com
2015-07-01 16:13:56 -07:00

899 lines
23 KiB
C++

/*
* Copyright (c) 2014, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
/*
* This header defines FbsonDocument, FbsonKeyValue, and various value classes
* which are derived from FbsonValue, and a forward iterator for container
* values - essentially everything that is related to FBSON binary data
* structures.
*
* Implementation notes:
*
* None of the classes in this header file can be instantiated directly (i.e.
* you cannot create a FbsonKeyValue or FbsonValue object - all constructors
* are declared non-public). We use the classes as wrappers on the packed FBSON
* bytes (serialized), and cast the classes (types) to the underlying packed
* byte array.
*
* For the same reason, we cannot define any FBSON value class to be virtual,
* since we never call constructors, and will not instantiate vtbl and vptrs.
*
* Therefore, the classes are defined as packed structures (i.e. no data
* alignment and padding), and the private member variables of the classes are
* defined precisely in the same order as the FBSON spec. This ensures we
* access the packed FBSON bytes correctly.
*
* The packed structures are highly optimized for in-place operations with low
* overhead. The reads (and in-place writes) are performed directly on packed
* bytes. There is no memory allocation at all at runtime.
*
* For updates/writes of values that will expand the original FBSON size, the
* write will fail, and the caller needs to handle buffer increase.
*
* ** Iterator **
* Both ObjectVal class and ArrayVal class have iterator type that you can use
* to declare an iterator on a container object to go through the key-value
* pairs or value list. The iterator has both non-const and const types.
*
* Note: iterators are forward direction only.
*
* ** Query **
* Querying into containers is through the member functions find (for key/value
* pairs) and get (for array elements), and is in streaming style. We don't
* need to read/scan the whole FBSON packed bytes in order to return results.
* Once the key/index is found, we will stop search. You can use text to query
* both objects and array (for array, text will be converted to integer index),
* and use index to retrieve from array. Array index is 0-based.
*
* ** External dictionary **
* During query processing, you can also pass a call-back function, so the
* search will first try to check if the key string exists in the dictionary.
* If so, search will be based on the id instead of the key string.
*
* @author Tian Xia <tianx@fb.com>
*/
#ifndef FBSON_FBSONDOCUMENT_H
#define FBSON_FBSONDOCUMENT_H
#include <stdlib.h>
#include <string.h>
#include <assert.h>
namespace fbson {
#pragma pack(push, 1)
#define FBSON_VER 1
// forward declaration
class FbsonValue;
class ObjectVal;
/*
* FbsonDocument is the main object that accesses and queries FBSON packed
* bytes. NOTE: FbsonDocument only allows object container as the top level
* FBSON value. However, you can use the static method "createValue" to get any
* FbsonValue object from the packed bytes.
*
* FbsonDocument object also dereferences to an object container value
* (ObjectVal) once FBSON is loaded.
*
* ** Load **
* FbsonDocument is usable after loading packed bytes (memory location) into
* the object. We only need the header and first few bytes of the payload after
* header to verify the FBSON.
*
* Note: creating an FbsonDocument (through createDocument) does not allocate
* any memory. The document object is an efficient wrapper on the packed bytes
* which is accessed directly.
*
* ** Query **
* Query is through dereferencing into ObjectVal.
*/
class FbsonDocument {
public:
// create an FbsonDocument object from FBSON packed bytes
static FbsonDocument* createDocument(const char* pb, uint32_t size);
// create an FbsonValue from FBSON packed bytes
static FbsonValue* createValue(const char* pb, uint32_t size);
uint8_t version() { return header_.ver_; }
FbsonValue* getValue() { return ((FbsonValue*)payload_); }
ObjectVal* operator->() { return ((ObjectVal*)payload_); }
const ObjectVal* operator->() const { return ((const ObjectVal*)payload_); }
private:
/*
* FbsonHeader class defines FBSON header (internal to FbsonDocument).
*
* Currently it only contains version information (1-byte). We may expand the
* header to include checksum of the FBSON binary for more security.
*/
struct FbsonHeader {
uint8_t ver_;
} header_;
char payload_[1];
FbsonDocument();
FbsonDocument(const FbsonDocument&) = delete;
FbsonDocument& operator=(const FbsonDocument&) = delete;
};
/*
* FbsonFwdIteratorT implements FBSON's iterator template.
*
* Note: it is an FORWARD iterator only due to the design of FBSON format.
*/
template <class Iter_Type, class Cont_Type>
class FbsonFwdIteratorT {
typedef Iter_Type iterator;
typedef typename std::iterator_traits<Iter_Type>::pointer pointer;
typedef typename std::iterator_traits<Iter_Type>::reference reference;
public:
explicit FbsonFwdIteratorT(const iterator& i) : current_(i) {}
// allow non-const to const iterator conversion (same container type)
template <class Iter_Ty>
FbsonFwdIteratorT(const FbsonFwdIteratorT<Iter_Ty, Cont_Type>& rhs)
: current_(rhs.base()) {}
bool operator==(const FbsonFwdIteratorT& rhs) const {
return (current_ == rhs.current_);
}
bool operator!=(const FbsonFwdIteratorT& rhs) const {
return !operator==(rhs);
}
bool operator<(const FbsonFwdIteratorT& rhs) const {
return (current_ < rhs.current_);
}
bool operator>(const FbsonFwdIteratorT& rhs) const { return !operator<(rhs); }
FbsonFwdIteratorT& operator++() {
current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
return *this;
}
FbsonFwdIteratorT operator++(int) {
auto tmp = *this;
current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
return tmp;
}
explicit operator pointer() { return current_; }
reference operator*() const { return *current_; }
pointer operator->() const { return current_; }
iterator base() const { return current_; }
private:
iterator current_;
};
typedef int (*hDictInsert)(const char* key, unsigned len);
typedef int (*hDictFind)(const char* key, unsigned len);
/*
* FbsonType defines 10 primitive types and 2 container types, as described
* below.
*
* primitive_value ::=
* 0x00 //null value (0 byte)
* | 0x01 //boolean true (0 byte)
* | 0x02 //boolean false (0 byte)
* | 0x03 int8 //char/int8 (1 byte)
* | 0x04 int16 //int16 (2 bytes)
* | 0x05 int32 //int32 (4 bytes)
* | 0x06 int64 //int64 (8 bytes)
* | 0x07 double //floating point (8 bytes)
* | 0x08 string //variable length string
* | 0x09 binary //variable length binary
*
* container ::=
* 0x0A int32 key_value_list //object, int32 is the total bytes of the object
* | 0x0B int32 value_list //array, int32 is the total bytes of the array
*/
enum class FbsonType : char {
T_Null = 0x00,
T_True = 0x01,
T_False = 0x02,
T_Int8 = 0x03,
T_Int16 = 0x04,
T_Int32 = 0x05,
T_Int64 = 0x06,
T_Double = 0x07,
T_String = 0x08,
T_Binary = 0x09,
T_Object = 0x0A,
T_Array = 0x0B,
NUM_TYPES,
};
typedef std::underlying_type<FbsonType>::type FbsonTypeUnder;
/*
* FbsonKeyValue class defines FBSON key type, as described below.
*
* key ::=
* 0x00 int8 //1-byte dictionary id
* | int8 (byte*) //int8 (>0) is the size of the key string
*
* value ::= primitive_value | container
*
* FbsonKeyValue can be either an id mapping to the key string in an external
* dictionary, or it is the original key string. Whether to read an id or a
* string is decided by the first byte (size_).
*
* Note: a key object must be followed by a value object. Therefore, a key
* object implicitly refers to a key-value pair, and you can get the value
* object right after the key object. The function numPackedBytes hence
* indicates the total size of the key-value pair, so that we will be able go
* to next pair from the key.
*
* ** Dictionary size **
* By default, the dictionary size is 255 (1-byte). Users can define
* "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
*/
class FbsonKeyValue {
public:
#ifdef USE_LARGE_DICT
static const int sMaxKeyId = 65535;
typedef uint16_t keyid_type;
#else
static const int sMaxKeyId = 255;
typedef uint8_t keyid_type;
#endif // #ifdef USE_LARGE_DICT
static const uint8_t sMaxKeyLen = 64;
// size of the key. 0 indicates it is stored as id
uint8_t klen() const { return size_; }
// get the key string. Note the string may not be null terminated.
const char* getKeyStr() const { return key_.str_; }
keyid_type getKeyId() const { return key_.id_; }
unsigned int keyPackedBytes() const {
return size_ ? (sizeof(size_) + size_)
: (sizeof(size_) + sizeof(keyid_type));
}
FbsonValue* value() const {
return (FbsonValue*)(((char*)this) + keyPackedBytes());
}
// size of the total packed bytes (key+value)
unsigned int numPackedBytes() const;
private:
uint8_t size_;
union key_ {
keyid_type id_;
char str_[1];
} key_;
FbsonKeyValue();
};
/*
* FbsonValue is the base class of all FBSON types. It contains only one member
* variable - type info, which can be retrieved by member functions is[Type]()
* or type().
*/
class FbsonValue {
public:
static const uint32_t sMaxValueLen = 1 << 24; // 16M
bool isNull() const { return (type_ == FbsonType::T_Null); }
bool isTrue() const { return (type_ == FbsonType::T_True); }
bool isFalse() const { return (type_ == FbsonType::T_False); }
bool isInt8() const { return (type_ == FbsonType::T_Int8); }
bool isInt16() const { return (type_ == FbsonType::T_Int16); }
bool isInt32() const { return (type_ == FbsonType::T_Int32); }
bool isInt64() const { return (type_ == FbsonType::T_Int64); }
bool isDouble() const { return (type_ == FbsonType::T_Double); }
bool isString() const { return (type_ == FbsonType::T_String); }
bool isBinary() const { return (type_ == FbsonType::T_Binary); }
bool isObject() const { return (type_ == FbsonType::T_Object); }
bool isArray() const { return (type_ == FbsonType::T_Array); }
FbsonType type() const { return type_; }
// size of the total packed bytes
unsigned int numPackedBytes() const;
// size of the value in bytes
unsigned int size() const;
// get the raw byte array of the value
const char* getValuePtr() const;
// find the FBSON value by a key path string (null terminated)
FbsonValue* findPath(const char* key_path,
const char* delim = ".",
hDictFind handler = nullptr) {
return findPath(key_path, (unsigned int)strlen(key_path), delim, handler);
}
// find the FBSON value by a key path string (with length)
FbsonValue* findPath(const char* key_path,
unsigned int len,
const char* delim,
hDictFind handler);
protected:
FbsonType type_; // type info
FbsonValue();
};
/*
* NumerValT is the template class (derived from FbsonValue) of all number
* types (integers and double).
*/
template <class T>
class NumberValT : public FbsonValue {
public:
T val() const { return num_; }
unsigned int numPackedBytes() const { return sizeof(FbsonValue) + sizeof(T); }
// catch all unknow specialization of the template class
bool setVal(T value) { return false; }
private:
T num_;
NumberValT();
};
typedef NumberValT<int8_t> Int8Val;
// override setVal for Int8Val
template <>
inline bool Int8Val::setVal(int8_t value) {
if (!isInt8()) {
return false;
}
num_ = value;
return true;
}
typedef NumberValT<int16_t> Int16Val;
// override setVal for Int16Val
template <>
inline bool Int16Val::setVal(int16_t value) {
if (!isInt16()) {
return false;
}
num_ = value;
return true;
}
typedef NumberValT<int32_t> Int32Val;
// override setVal for Int32Val
template <>
inline bool Int32Val::setVal(int32_t value) {
if (!isInt32()) {
return false;
}
num_ = value;
return true;
}
typedef NumberValT<int64_t> Int64Val;
// override setVal for Int64Val
template <>
inline bool Int64Val::setVal(int64_t value) {
if (!isInt64()) {
return false;
}
num_ = value;
return true;
}
typedef NumberValT<double> DoubleVal;
// override setVal for DoubleVal
template <>
inline bool DoubleVal::setVal(double value) {
if (!isDouble()) {
return false;
}
num_ = value;
return true;
}
/*
* BlobVal is the base class (derived from FbsonValue) for string and binary
* types. The size_ indicates the total bytes of the payload_.
*/
class BlobVal : public FbsonValue {
public:
// size of the blob payload only
unsigned int getBlobLen() const { return size_; }
// return the blob as byte array
const char* getBlob() const { return payload_; }
// size of the total packed bytes
unsigned int numPackedBytes() const {
return sizeof(FbsonValue) + sizeof(size_) + size_;
}
protected:
uint32_t size_;
char payload_[1];
// set new blob bytes
bool internalSetVal(const char* blob, uint32_t blobSize) {
// if we cannot fit the new blob, fail the operation
if (blobSize > size_) {
return false;
}
memcpy(payload_, blob, blobSize);
// Set the reset of the bytes to 0. Note we cannot change the size_ of the
// current payload, as all values are packed.
memset(payload_ + blobSize, 0, size_ - blobSize);
return true;
}
BlobVal();
private:
// Disable as this class can only be allocated dynamically
BlobVal(const BlobVal&) = delete;
BlobVal& operator=(const BlobVal&) = delete;
};
/*
* Binary type
*/
class BinaryVal : public BlobVal {
public:
bool setVal(const char* blob, uint32_t blobSize) {
if (!isBinary()) {
return false;
}
return internalSetVal(blob, blobSize);
}
private:
BinaryVal();
};
/*
* String type
* Note: FBSON string may not be a c-string (NULL-terminated)
*/
class StringVal : public BlobVal {
public:
bool setVal(const char* str, uint32_t blobSize) {
if (!isString()) {
return false;
}
return internalSetVal(str, blobSize);
}
private:
StringVal();
};
/*
* ContainerVal is the base class (derived from FbsonValue) for object and
* array types. The size_ indicates the total bytes of the payload_.
*/
class ContainerVal : public FbsonValue {
public:
// size of the container payload only
unsigned int getContainerSize() const { return size_; }
// return the container payload as byte array
const char* getPayload() const { return payload_; }
// size of the total packed bytes
unsigned int numPackedBytes() const {
return sizeof(FbsonValue) + sizeof(size_) + size_;
}
protected:
uint32_t size_;
char payload_[1];
ContainerVal();
ContainerVal(const ContainerVal&) = delete;
ContainerVal& operator=(const ContainerVal&) = delete;
};
/*
* Object type
*/
class ObjectVal : public ContainerVal {
public:
// find the FBSON value by a key string (null terminated)
FbsonValue* find(const char* key, hDictFind handler = nullptr) const {
if (!key)
return nullptr;
return find(key, (unsigned int)strlen(key), handler);
}
// find the FBSON value by a key string (with length)
FbsonValue* find(const char* key,
unsigned int klen,
hDictFind handler = nullptr) const {
if (!key || !klen)
return nullptr;
int key_id = -1;
if (handler && (key_id = handler(key, klen)) >= 0) {
return find(key_id);
}
return internalFind(key, klen);
}
// find the FBSON value by a key dictionary ID
FbsonValue* find(int key_id) const {
if (key_id < 0 || key_id > FbsonKeyValue::sMaxKeyId)
return nullptr;
const char* pch = payload_;
const char* fence = payload_ + size_;
while (pch < fence) {
FbsonKeyValue* pkey = (FbsonKeyValue*)(pch);
if (!pkey->klen() && key_id == pkey->getKeyId()) {
return pkey->value();
}
pch += pkey->numPackedBytes();
}
assert(pch == fence);
return nullptr;
}
typedef FbsonKeyValue value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef FbsonFwdIteratorT<pointer, ObjectVal> iterator;
typedef FbsonFwdIteratorT<const_pointer, ObjectVal> const_iterator;
iterator begin() { return iterator((pointer)payload_); }
const_iterator begin() const { return const_iterator((pointer)payload_); }
iterator end() { return iterator((pointer)(payload_ + size_)); }
const_iterator end() const {
return const_iterator((pointer)(payload_ + size_));
}
private:
FbsonValue* internalFind(const char* key, unsigned int klen) const {
const char* pch = payload_;
const char* fence = payload_ + size_;
while (pch < fence) {
FbsonKeyValue* pkey = (FbsonKeyValue*)(pch);
if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) {
return pkey->value();
}
pch += pkey->numPackedBytes();
}
assert(pch == fence);
return nullptr;
}
private:
ObjectVal();
};
/*
* Array type
*/
class ArrayVal : public ContainerVal {
public:
// get the FBSON value at index
FbsonValue* get(int idx) const {
if (idx < 0)
return nullptr;
const char* pch = payload_;
const char* fence = payload_ + size_;
while (pch < fence && idx-- > 0)
pch += ((FbsonValue*)pch)->numPackedBytes();
if (idx == -1)
return (FbsonValue*)pch;
else {
assert(pch == fence);
return nullptr;
}
}
// Get number of elements in array
unsigned int numElem() const {
const char* pch = payload_;
const char* fence = payload_ + size_;
unsigned int num = 0;
while (pch < fence) {
++num;
pch += ((FbsonValue*)pch)->numPackedBytes();
}
assert(pch == fence);
return num;
}
typedef FbsonValue value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef FbsonFwdIteratorT<pointer, ArrayVal> iterator;
typedef FbsonFwdIteratorT<const_pointer, ArrayVal> const_iterator;
iterator begin() { return iterator((pointer)payload_); }
const_iterator begin() const { return const_iterator((pointer)payload_); }
iterator end() { return iterator((pointer)(payload_ + size_)); }
const_iterator end() const {
return const_iterator((pointer)(payload_ + size_));
}
private:
ArrayVal();
};
inline FbsonDocument* FbsonDocument::createDocument(const char* pb,
uint32_t size) {
if (!pb || size < sizeof(FbsonHeader) + sizeof(FbsonValue)) {
return nullptr;
}
FbsonDocument* doc = (FbsonDocument*)pb;
if (doc->header_.ver_ != FBSON_VER) {
return nullptr;
}
FbsonValue* val = (FbsonValue*)doc->payload_;
if (!val->isObject() || size != sizeof(FbsonHeader) + val->numPackedBytes()) {
return nullptr;
}
return doc;
}
inline FbsonValue* FbsonDocument::createValue(const char* pb, uint32_t size) {
if (!pb || size < sizeof(FbsonHeader) + sizeof(FbsonValue)) {
return nullptr;
}
FbsonDocument* doc = (FbsonDocument*)pb;
if (doc->header_.ver_ != FBSON_VER) {
return nullptr;
}
FbsonValue* val = (FbsonValue*)doc->payload_;
if (size != sizeof(FbsonHeader) + val->numPackedBytes()) {
return nullptr;
}
return val;
}
inline unsigned int FbsonKeyValue::numPackedBytes() const {
unsigned int ks = keyPackedBytes();
FbsonValue* val = (FbsonValue*)(((char*)this) + ks);
return ks + val->numPackedBytes();
}
// Poor man's "virtual" function FbsonValue::numPackedBytes
inline unsigned int FbsonValue::numPackedBytes() const {
switch (type_) {
case FbsonType::T_Null:
case FbsonType::T_True:
case FbsonType::T_False: {
return sizeof(type_);
}
case FbsonType::T_Int8: {
return sizeof(type_) + sizeof(int8_t);
}
case FbsonType::T_Int16: {
return sizeof(type_) + sizeof(int16_t);
}
case FbsonType::T_Int32: {
return sizeof(type_) + sizeof(int32_t);
}
case FbsonType::T_Int64: {
return sizeof(type_) + sizeof(int64_t);
}
case FbsonType::T_Double: {
return sizeof(type_) + sizeof(double);
}
case FbsonType::T_String:
case FbsonType::T_Binary: {
return ((BlobVal*)(this))->numPackedBytes();
}
case FbsonType::T_Object:
case FbsonType::T_Array: {
return ((ContainerVal*)(this))->numPackedBytes();
}
default:
return 0;
}
}
inline unsigned int FbsonValue::size() const {
switch (type_) {
case FbsonType::T_Int8: {
return sizeof(int8_t);
}
case FbsonType::T_Int16: {
return sizeof(int16_t);
}
case FbsonType::T_Int32: {
return sizeof(int32_t);
}
case FbsonType::T_Int64: {
return sizeof(int64_t);
}
case FbsonType::T_Double: {
return sizeof(double);
}
case FbsonType::T_String:
case FbsonType::T_Binary: {
return ((BlobVal*)(this))->getBlobLen();
}
case FbsonType::T_Object:
case FbsonType::T_Array: {
return ((ContainerVal*)(this))->getContainerSize();
}
case FbsonType::T_Null:
case FbsonType::T_True:
case FbsonType::T_False:
default:
return 0;
}
}
inline const char* FbsonValue::getValuePtr() const {
switch (type_) {
case FbsonType::T_Int8:
case FbsonType::T_Int16:
case FbsonType::T_Int32:
case FbsonType::T_Int64:
case FbsonType::T_Double:
return ((char*)this) + sizeof(FbsonType);
case FbsonType::T_String:
case FbsonType::T_Binary:
return ((BlobVal*)(this))->getBlob();
case FbsonType::T_Object:
case FbsonType::T_Array:
return ((ContainerVal*)(this))->getPayload();
case FbsonType::T_Null:
case FbsonType::T_True:
case FbsonType::T_False:
default:
return nullptr;
}
}
inline FbsonValue* FbsonValue::findPath(const char* key_path,
unsigned int kp_len,
const char* delim = ".",
hDictFind handler = nullptr) {
if (!key_path || !kp_len)
return nullptr;
if (!delim)
delim = "."; // default delimiter
FbsonValue* pval = this;
const char* fence = key_path + kp_len;
char idx_buf[21]; // buffer to parse array index (integer value)
while (pval && key_path < fence) {
const char* key = key_path;
unsigned int klen = 0;
// find the current key
for (; key_path != fence && *key_path != *delim; ++key_path, ++klen)
;
if (!klen)
return nullptr;
switch (pval->type_) {
case FbsonType::T_Object: {
pval = ((ObjectVal*)pval)->find(key, klen, handler);
break;
}
case FbsonType::T_Array: {
// parse string into an integer (array index)
if (klen >= sizeof(idx_buf))
return nullptr;
memcpy(idx_buf, key, klen);
idx_buf[klen] = 0;
char* end = nullptr;
int index = (int)strtol(idx_buf, &end, 10);
if (end && !*end)
pval = ((fbson::ArrayVal*)pval)->get(index);
else
// incorrect index string
return nullptr;
break;
}
default:
return nullptr;
}
// skip the delimiter
if (key_path < fence) {
++key_path;
if (key_path == fence)
// we have a trailing delimiter at the end
return nullptr;
}
}
return pval;
}
#pragma pack(pop)
} // namespace fbson
#endif // FBSON_FBSONDOCUMENT_H