Port folly/synchronization/DistributedMutex to rocksdb (#5642)

Summary: This ports `folly::DistributedMutex` into RocksDB. The PR includes everything else needed to compile and use DistributedMutex as a component within folly. Most files are unchanged except for some portability stuff and includes. For now, I've put this under `rocksdb/third-party`, but if there is a better folder to put this under, let me know. I also am not sure how or where to put unit tests for third-party stuff like this. It seems like gtest is included already, but I need to link with it from another third-party folder. This also includes some other common components from folly - folly/Optional - folly/ScopeGuard (In particular `SCOPE_EXIT`) - folly/synchronization/ParkingLot (A portable futex-like interface) - folly/synchronization/AtomicNotification (The standard C++ interface for futexes) - folly/Indestructible (For singletons that don't get destroyed without allocations) Pull Request resolved: https://github.com/facebook/rocksdb/pull/5642 Differential Revision: D16544439 fbshipit-source-id: 179b98b5dcddc3075926d31a30f92fd064245731
2019-08-07 14:29:35 -07:00 · 2019-08-07 14:29:35 -07:00 · 38b03c840e
parent 6e78fe3c8d
commit 38b03c840e
48 changed files with 7335 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -60,6 +60,13 @@ option(WITH_WINDOWS_UTF8_FILENAMES "use UTF8 as characterset for opening files,
 if (WITH_WINDOWS_UTF8_FILENAMES)
  add_definitions(-DROCKSDB_WINDOWS_UTF8_FILENAMES)
 endif()
+# third-party/folly is only validated to work on Linux and Windows for now.
+# So only turn it on there by default.
+if(CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Windows")
+  option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
+else()
+  option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
+endif()
 if(MSVC)
  # Defaults currently different for GFLAGS.
  #  We will address find_package work a little later
@ -462,6 +469,9 @@ endif()
 include_directories(${PROJECT_SOURCE_DIR})
 include_directories(${PROJECT_SOURCE_DIR}/include)
 include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src)
+if(WITH_FOLLY_DISTRIBUTED_MUTEX)
+  include_directories(${PROJECT_SOURCE_DIR}/third-party/folly)
+endif()
 find_package(Threads REQUIRED)

 # Main library source code
@ -738,6 +748,15 @@ else()
    env/io_posix.cc)
 endif()

+if(WITH_FOLLY_DISTRIBUTED_MUTEX)
+  list(APPEND SOURCES
+    third-party/folly/folly/detail/Futex.cpp
+    third-party/folly/folly/synchronization/AtomicNotification.cpp
+    third-party/folly/folly/synchronization/DistributedMutex.cpp
+    third-party/folly/folly/synchronization/ParkingLot.cpp
+    third-party/folly/folly/synchronization/WaitOptions.cpp)
+endif()
+
 set(ROCKSDB_STATIC_LIB rocksdb${ARTIFACT_SUFFIX})
 set(ROCKSDB_SHARED_LIB rocksdb-shared${ARTIFACT_SUFFIX})
 set(ROCKSDB_IMPORT_LIB ${ROCKSDB_SHARED_LIB})
@ -1009,6 +1028,10 @@ if(WITH_TESTS)
    list(APPEND TESTS utilities/env_librados_test.cc)
  endif()

+  if(WITH_FOLLY_DISTRIBUTED_MUTEX)
+    list(APPEND TESTS third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp)
+  endif()
+
  set(BENCHMARKS
    cache/cache_bench.cc
    memtable/memtablerep_bench.cc
--- a/30
+++ b/30
@ -89,7 +89,7 @@ endif

 ifeq ($(MAKECMDGOALS),rocksdbjavastaticreleasedocker)
 	ifneq ($(DEBUG_LEVEL),2)
-        	DEBUG_LEVEL=0
+		DEBUG_LEVEL=0
 	endif
 endif

@ -304,6 +304,10 @@ ifndef DISABLE_JEMALLOC
 	PLATFORM_CCFLAGS += $(JEMALLOC_INCLUDE)
 endif

+ifndef USE_FOLLY_DISTRIBUTED_MUTEX
+	USE_FOLLY_DISTRIBUTED_MUTEX=0
+endif
+
 export GTEST_THROW_ON_FAILURE=1
 export GTEST_HAS_EXCEPTIONS=1
 GTEST_DIR = ./third-party/gtest-1.7.0/fused-src
@ -316,6 +320,18 @@ else
 	PLATFORM_CXXFLAGS += -isystem $(GTEST_DIR)
 endif

+ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
+	FOLLY_DIR = ./third-party/folly
+	# AIX: pre-defined system headers are surrounded by an extern "C" block
+	ifeq ($(PLATFORM), OS_AIX)
+		PLATFORM_CCFLAGS += -I$(FOLLY_DIR)
+		PLATFORM_CXXFLAGS += -I$(FOLLY_DIR)
+	else
+		PLATFORM_CCFLAGS += -isystem $(FOLLY_DIR)
+		PLATFORM_CXXFLAGS += -isystem $(FOLLY_DIR)
+	endif
+endif
+
 # This (the first rule) must depend on "all".
 default: all

@ -402,6 +418,9 @@ endif

 LIBOBJECTS += $(TOOL_LIB_SOURCES:.cc=.o)
 MOCKOBJECTS = $(MOCK_LIB_SOURCES:.cc=.o)
+ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
+  FOLLYOBJECTS = $(FOLLY_SOURCES:.cpp=.o)
+endif

 GTEST = $(GTEST_DIR)/gtest/gtest-all.o
 TESTUTIL = ./test_util/testutil.o
@ -569,6 +588,10 @@ TESTS = \
 	block_cache_tracer_test \
 	block_cache_trace_analyzer_test \

+ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
+	TESTS += folly_synchronization_distributed_mutex_test
+endif
+
 PARALLEL_TEST = \
 	backupable_db_test \
 	db_bloom_filter_test \
@ -1120,6 +1143,11 @@ trace_analyzer: tools/trace_analyzer.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS)
 block_cache_trace_analyzer: tools/block_cache_analyzer/block_cache_trace_analyzer_tool.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS)
 	$(AM_LINK)

+ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
+folly_synchronization_distributed_mutex_test: $(LIBOBJECTS) $(TESTHARNESS) $(FOLLYOBJECTS) third-party/folly/folly/synchronization/test/DistributedMutexTest.o
+	$(AM_LINK)
+endif
+
 cache_bench: cache/cache_bench.o $(LIBOBJECTS) $(TESTUTIL)
 	$(AM_LINK)

--- a/build_tools/build_detect_platform
+++ b/build_tools/build_detect_platform
@ -150,6 +150,9 @@ case "$TARGET_OS" in
            PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
        fi
        PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
+        if test -z "$USE_FOLLY_DISTRIBUTED_MUTEX"; then
+          USE_FOLLY_DISTRIBUTED_MUTEX=1
+        fi
        # PORT_FILES=port/linux/linux_specific.cc
        ;;
    SunOS)
@ -661,3 +664,6 @@ if test -n "$WITH_JEMALLOC_FLAG"; then
  echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT"
 fi
 echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT"
+if test -n "$USE_FOLLY_DISTRIBUTED_MUTEX"; then
+  echo "USE_FOLLY_DISTRIBUTED_MUTEX=$USE_FOLLY_DISTRIBUTED_MUTEX" >> "$OUTPUT"
+fi
--- a/build_tools/fbcode_config.sh
+++ b/build_tools/fbcode_config.sh
@ -159,4 +159,6 @@ else
  LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
 fi

+USE_FOLLY_DISTRIBUTED_MUTEX=1
+
 export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
--- a/build_tools/fbcode_config_platform007.sh
+++ b/build_tools/fbcode_config_platform007.sh
@ -155,4 +155,6 @@ VALGRIND_VER="$VALGRIND_BASE/bin/"
 LUA_PATH=
 LUA_LIB=

+USE_FOLLY_DISTRIBUTED_MUTEX=1
+
 export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
--- a/src.mk
+++ b/src.mk
@ -263,6 +263,13 @@ TEST_LIB_SOURCES =                                              \
  test_util/testutil.cc                                         \
  utilities/cassandra/test_utils.cc                             \

+FOLLY_SOURCES = \
+  third-party/folly/folly/detail/Futex.cpp                                     \
+  third-party/folly/folly/synchronization/AtomicNotification.cpp               \
+  third-party/folly/folly/synchronization/DistributedMutex.cpp                 \
+  third-party/folly/folly/synchronization/ParkingLot.cpp                       \
+  third-party/folly/folly/synchronization/WaitOptions.cpp                      \
+
 MAIN_SOURCES =                                                          \
  cache/cache_bench.cc                                                  \
  cache/cache_test.cc                                                   \
--- a/third-party/folly/folly/CPortability.h
+++ b/third-party/folly/folly/CPortability.h
@ -0,0 +1,15 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+/**
+ * Macro for marking functions as having public visibility.
+ */
+#if defined(__GNUC__)
+#define FOLLY_EXPORT __attribute__((__visibility__("default")))
+#else
+#define FOLLY_EXPORT
+#endif
--- a/third-party/folly/folly/ConstexprMath.h
+++ b/third-party/folly/folly/ConstexprMath.h
@ -0,0 +1,17 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+namespace folly {
+template <typename T>
+constexpr T constexpr_max(T a) {
+  return a;
+}
+template <typename T, typename... Ts>
+constexpr T constexpr_max(T a, T b, Ts... ts) {
+  return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...);
+}
+} // namespace folly
--- a/third-party/folly/folly/Indestructible.h
+++ b/third-party/folly/folly/Indestructible.h
@ -0,0 +1,166 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+#include <utility>
+
+#include <folly/Traits.h>
+
+namespace folly {
+
+/***
+ *  Indestructible
+ *
+ *  When you need a Meyers singleton that will not get destructed, even at
+ *  shutdown, and you also want the object stored inline.
+ *
+ *  Use like:
+ *
+ *      void doSomethingWithExpensiveData();
+ *
+ *      void doSomethingWithExpensiveData() {
+ *        static const Indestructible<map<string, int>> data{
+ *          map<string, int>{{"key1", 17}, {"key2", 19}, {"key3", 23}},
+ *        };
+ *        callSomethingTakingAMapByRef(*data);
+ *      }
+ *
+ *  This should be used only for Meyers singletons, and, even then, only when
+ *  the instance does not need to be destructed ever.
+ *
+ *  This should not be used more generally, e.g., as member fields, etc.
+ *
+ *  This is designed as an alternative, but with one fewer allocation at
+ *  construction time and one fewer pointer dereference at access time, to the
+ *  Meyers singleton pattern of:
+ *
+ *    void doSomethingWithExpensiveData() {
+ *      static const auto data =  // never `delete`d
+ *          new map<string, int>{{"key1", 17}, {"key2", 19}, {"key3", 23}};
+ *      callSomethingTakingAMapByRef(*data);
+ *    }
+ */
+
+template <typename T>
+class Indestructible final {
+ public:
+  template <typename S = T, typename = decltype(S())>
+  constexpr Indestructible() noexcept(noexcept(T())) {}
+
+  /**
+   * Constructor accepting a single argument by forwarding reference, this
+   * allows using list initialzation without the overhead of things like
+   * in_place, etc and also works with std::initializer_list constructors
+   * which can't be deduced, the default parameter helps there.
+   *
+   *    auto i = folly::Indestructible<std::map<int, int>>{{{1, 2}}};
+   *
+   * This provides convenience
+   *
+   * There are two versions of this constructor - one for when the element is
+   * implicitly constructible from the given argument and one for when the
+   * type is explicitly but not implicitly constructible from the given
+   * argument.
+   */
+  template <
+      typename U = T,
+      _t<std::enable_if<std::is_constructible<T, U&&>::value>>* = nullptr,
+      _t<std::enable_if<
+          !std::is_same<Indestructible<T>, remove_cvref_t<U>>::value>>* =
+          nullptr,
+      _t<std::enable_if<!std::is_convertible<U&&, T>::value>>* = nullptr>
+  explicit constexpr Indestructible(U&& u) noexcept(
+      noexcept(T(std::declval<U>())))
+      : storage_(std::forward<U>(u)) {}
+  template <
+      typename U = T,
+      _t<std::enable_if<std::is_constructible<T, U&&>::value>>* = nullptr,
+      _t<std::enable_if<
+          !std::is_same<Indestructible<T>, remove_cvref_t<U>>::value>>* =
+          nullptr,
+      _t<std::enable_if<std::is_convertible<U&&, T>::value>>* = nullptr>
+  /* implicit */ constexpr Indestructible(U&& u) noexcept(
+      noexcept(T(std::declval<U>())))
+      : storage_(std::forward<U>(u)) {}
+
+  template <typename... Args, typename = decltype(T(std::declval<Args>()...))>
+  explicit constexpr Indestructible(Args&&... args) noexcept(
+      noexcept(T(std::declval<Args>()...)))
+      : storage_(std::forward<Args>(args)...) {}
+  template <
+      typename U,
+      typename... Args,
+      typename = decltype(
+          T(std::declval<std::initializer_list<U>&>(),
+            std::declval<Args>()...))>
+  explicit constexpr Indestructible(std::initializer_list<U> il, Args... args) noexcept(
+      noexcept(
+          T(std::declval<std::initializer_list<U>&>(),
+            std::declval<Args>()...)))
+      : storage_(il, std::forward<Args>(args)...) {}
+
+  ~Indestructible() = default;
+
+  Indestructible(Indestructible const&) = delete;
+  Indestructible& operator=(Indestructible const&) = delete;
+
+  Indestructible(Indestructible&& other) noexcept(
+      noexcept(T(std::declval<T>())))
+      : storage_(std::move(other.storage_.value)) {
+    other.erased_ = true;
+  }
+  Indestructible& operator=(Indestructible&& other) noexcept(
+      noexcept(T(std::declval<T>()))) {
+    storage_.value = std::move(other.storage_.value);
+    other.erased_ = true;
+  }
+
+  T* get() noexcept {
+    check();
+    return &storage_.value;
+  }
+  T const* get() const noexcept {
+    check();
+    return &storage_.value;
+  }
+  T& operator*() noexcept {
+    return *get();
+  }
+  T const& operator*() const noexcept {
+    return *get();
+  }
+  T* operator->() noexcept {
+    return get();
+  }
+  T const* operator->() const noexcept {
+    return get();
+  }
+
+ private:
+  void check() const noexcept {
+    assert(!erased_);
+  }
+
+  union Storage {
+    T value;
+
+    template <typename S = T, typename = decltype(S())>
+    constexpr Storage() noexcept(noexcept(T())) : value() {}
+
+    template <typename... Args, typename = decltype(T(std::declval<Args>()...))>
+    explicit constexpr Storage(Args&&... args) noexcept(
+        noexcept(T(std::declval<Args>()...)))
+        : value(std::forward<Args>(args)...) {}
+
+    ~Storage() {}
+  };
+
+  Storage storage_{};
+  bool erased_{false};
+};
+} // namespace folly
--- a/third-party/folly/folly/Optional.h
+++ b/third-party/folly/folly/Optional.h
@ -0,0 +1,570 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+/*
+ * Optional - For conditional initialization of values, like boost::optional,
+ * but with support for move semantics and emplacement.  Reference type support
+ * has not been included due to limited use cases and potential confusion with
+ * semantics of assignment: Assigning to an optional reference could quite
+ * reasonably copy its value or redirect the reference.
+ *
+ * Optional can be useful when a variable might or might not be needed:
+ *
+ *  Optional<Logger> maybeLogger = ...;
+ *  if (maybeLogger) {
+ *    maybeLogger->log("hello");
+ *  }
+ *
+ * Optional enables a 'null' value for types which do not otherwise have
+ * nullability, especially useful for parameter passing:
+ *
+ * void testIterator(const unique_ptr<Iterator>& it,
+ *                   initializer_list<int> idsExpected,
+ *                   Optional<initializer_list<int>> ranksExpected = none) {
+ *   for (int i = 0; it->next(); ++i) {
+ *     EXPECT_EQ(it->doc().id(), idsExpected[i]);
+ *     if (ranksExpected) {
+ *       EXPECT_EQ(it->doc().rank(), (*ranksExpected)[i]);
+ *     }
+ *   }
+ * }
+ *
+ * Optional models OptionalPointee, so calling 'get_pointer(opt)' will return a
+ * pointer to nullptr if the 'opt' is empty, and a pointer to the value if it is
+ * not:
+ *
+ *  Optional<int> maybeInt = ...;
+ *  if (int* v = get_pointer(maybeInt)) {
+ *    cout << *v << endl;
+ *  }
+ */
+
+#include <cstddef>
+#include <functional>
+#include <new>
+#include <stdexcept>
+#include <type_traits>
+#include <utility>
+
+#include <folly/CPortability.h>
+#include <folly/Traits.h>
+#include <folly/Utility.h>
+
+namespace folly {
+
+template <class Value>
+class Optional;
+
+namespace detail {
+template <class Value>
+struct OptionalPromiseReturn;
+} // namespace detail
+
+struct None {
+  enum class _secret { _token };
+
+  /**
+   * No default constructor to support both `op = {}` and `op = none`
+   * as syntax for clearing an Optional, just like std::nullopt_t.
+   */
+  constexpr explicit None(_secret) {}
+};
+constexpr None none{None::_secret::_token};
+
+class FOLLY_EXPORT OptionalEmptyException : public std::runtime_error {
+ public:
+  OptionalEmptyException()
+      : std::runtime_error("Empty Optional cannot be unwrapped") {}
+};
+
+template <class Value>
+class Optional {
+ public:
+  typedef Value value_type;
+
+  static_assert(
+      !std::is_reference<Value>::value,
+      "Optional may not be used with reference types");
+  static_assert(
+      !std::is_abstract<Value>::value,
+      "Optional may not be used with abstract types");
+
+  Optional() noexcept {}
+
+  Optional(const Optional& src) noexcept(
+      std::is_nothrow_copy_constructible<Value>::value) {
+    if (src.hasValue()) {
+      construct(src.value());
+    }
+  }
+
+  Optional(Optional&& src) noexcept(
+      std::is_nothrow_move_constructible<Value>::value) {
+    if (src.hasValue()) {
+      construct(std::move(src.value()));
+      src.clear();
+    }
+  }
+
+  /* implicit */ Optional(const None&) noexcept {}
+
+  /* implicit */ Optional(Value&& newValue) noexcept(
+      std::is_nothrow_move_constructible<Value>::value) {
+    construct(std::move(newValue));
+  }
+
+  /* implicit */ Optional(const Value& newValue) noexcept(
+      std::is_nothrow_copy_constructible<Value>::value) {
+    construct(newValue);
+  }
+
+  template <typename... Args>
+  explicit Optional(in_place_t, Args&&... args) noexcept(
+      std::is_nothrow_constructible<Value, Args...>::value)
+      : Optional{PrivateConstructor{}, std::forward<Args>(args)...} {}
+
+  template <typename U, typename... Args>
+  explicit Optional(
+      in_place_t,
+      std::initializer_list<U> il,
+      Args&&... args) noexcept(std::
+                                   is_nothrow_constructible<
+                                       Value,
+                                       std::initializer_list<U>,
+                                       Args...>::value)
+      : Optional{PrivateConstructor{}, il, std::forward<Args>(args)...} {}
+
+  // Used only when an Optional is used with coroutines on MSVC
+  /* implicit */ Optional(const detail::OptionalPromiseReturn<Value>& p)
+      : Optional{} {
+    p.promise_->value_ = this;
+  }
+
+  void assign(const None&) {
+    clear();
+  }
+
+  void assign(Optional&& src) {
+    if (this != &src) {
+      if (src.hasValue()) {
+        assign(std::move(src.value()));
+        src.clear();
+      } else {
+        clear();
+      }
+    }
+  }
+
+  void assign(const Optional& src) {
+    if (src.hasValue()) {
+      assign(src.value());
+    } else {
+      clear();
+    }
+  }
+
+  void assign(Value&& newValue) {
+    if (hasValue()) {
+      storage_.value = std::move(newValue);
+    } else {
+      construct(std::move(newValue));
+    }
+  }
+
+  void assign(const Value& newValue) {
+    if (hasValue()) {
+      storage_.value = newValue;
+    } else {
+      construct(newValue);
+    }
+  }
+
+  Optional& operator=(None) noexcept {
+    reset();
+    return *this;
+  }
+
+  template <class Arg>
+  Optional& operator=(Arg&& arg) {
+    assign(std::forward<Arg>(arg));
+    return *this;
+  }
+
+  Optional& operator=(Optional&& other) noexcept(
+      std::is_nothrow_move_assignable<Value>::value) {
+    assign(std::move(other));
+    return *this;
+  }
+
+  Optional& operator=(const Optional& other) noexcept(
+      std::is_nothrow_copy_assignable<Value>::value) {
+    assign(other);
+    return *this;
+  }
+
+  template <class... Args>
+  Value& emplace(Args&&... args) {
+    clear();
+    construct(std::forward<Args>(args)...);
+    return value();
+  }
+
+  template <class U, class... Args>
+  typename std::enable_if<
+      std::is_constructible<Value, std::initializer_list<U>&, Args&&...>::value,
+      Value&>::type
+  emplace(std::initializer_list<U> ilist, Args&&... args) {
+    clear();
+    construct(ilist, std::forward<Args>(args)...);
+    return value();
+  }
+
+  void reset() noexcept {
+    storage_.clear();
+  }
+
+  void clear() noexcept {
+    reset();
+  }
+
+  void swap(Optional& that) noexcept(IsNothrowSwappable<Value>::value) {
+    if (hasValue() && that.hasValue()) {
+      using std::swap;
+      swap(value(), that.value());
+    } else if (hasValue()) {
+      that.emplace(std::move(value()));
+      reset();
+    } else if (that.hasValue()) {
+      emplace(std::move(that.value()));
+      that.reset();
+    }
+  }
+
+  const Value& value() const& {
+    require_value();
+    return storage_.value;
+  }
+
+  Value& value() & {
+    require_value();
+    return storage_.value;
+  }
+
+  Value&& value() && {
+    require_value();
+    return std::move(storage_.value);
+  }
+
+  const Value&& value() const&& {
+    require_value();
+    return std::move(storage_.value);
+  }
+
+  const Value* get_pointer() const& {
+    return storage_.hasValue ? &storage_.value : nullptr;
+  }
+  Value* get_pointer() & {
+    return storage_.hasValue ? &storage_.value : nullptr;
+  }
+  Value* get_pointer() && = delete;
+
+  bool has_value() const noexcept {
+    return storage_.hasValue;
+  }
+
+  bool hasValue() const noexcept {
+    return has_value();
+  }
+
+  explicit operator bool() const noexcept {
+    return has_value();
+  }
+
+  const Value& operator*() const& {
+    return value();
+  }
+  Value& operator*() & {
+    return value();
+  }
+  const Value&& operator*() const&& {
+    return std::move(value());
+  }
+  Value&& operator*() && {
+    return std::move(value());
+  }
+
+  const Value* operator->() const {
+    return &value();
+  }
+  Value* operator->() {
+    return &value();
+  }
+
+  // Return a copy of the value if set, or a given default if not.
+  template <class U>
+  Value value_or(U&& dflt) const& {
+    if (storage_.hasValue) {
+      return storage_.value;
+    }
+
+    return std::forward<U>(dflt);
+  }
+
+  template <class U>
+  Value value_or(U&& dflt) && {
+    if (storage_.hasValue) {
+      return std::move(storage_.value);
+    }
+
+    return std::forward<U>(dflt);
+  }
+
+ private:
+  template <class T>
+  friend Optional<_t<std::decay<T>>> make_optional(T&&);
+  template <class T, class... Args>
+  friend Optional<T> make_optional(Args&&... args);
+  template <class T, class U, class... As>
+  friend Optional<T> make_optional(std::initializer_list<U>, As&&...);
+
+  /**
+   * Construct the optional in place, this is duplicated as a non-explicit
+   * constructor to allow returning values that are non-movable from
+   * make_optional using list initialization.
+   *
+   * Until C++17, at which point this will become unnecessary because of
+   * specified prvalue elision.
+   */
+  struct PrivateConstructor {
+    explicit PrivateConstructor() = default;
+  };
+  template <typename... Args>
+  Optional(PrivateConstructor, Args&&... args) noexcept(
+      std::is_constructible<Value, Args&&...>::value) {
+    construct(std::forward<Args>(args)...);
+  }
+
+  void require_value() const {
+    if (!storage_.hasValue) {
+      throw OptionalEmptyException{};
+    }
+  }
+
+  template <class... Args>
+  void construct(Args&&... args) {
+    const void* ptr = &storage_.value;
+    // For supporting const types.
+    new (const_cast<void*>(ptr)) Value(std::forward<Args>(args)...);
+    storage_.hasValue = true;
+  }
+
+  struct StorageTriviallyDestructible {
+    union {
+      char emptyState;
+      Value value;
+    };
+    bool hasValue;
+
+    StorageTriviallyDestructible()
+        : emptyState('\0'), hasValue{false} {}
+    void clear() {
+      hasValue = false;
+    }
+  };
+
+  struct StorageNonTriviallyDestructible {
+    union {
+      char emptyState;
+      Value value;
+    };
+    bool hasValue;
+
+    StorageNonTriviallyDestructible() : hasValue{false} {}
+    ~StorageNonTriviallyDestructible() {
+      clear();
+    }
+
+    void clear() {
+      if (hasValue) {
+        hasValue = false;
+        value.~Value();
+      }
+    }
+  };
+
+  using Storage = typename std::conditional<
+      std::is_trivially_destructible<Value>::value,
+      StorageTriviallyDestructible,
+      StorageNonTriviallyDestructible>::type;
+
+  Storage storage_;
+};
+
+template <class T>
+const T* get_pointer(const Optional<T>& opt) {
+  return opt.get_pointer();
+}
+
+template <class T>
+T* get_pointer(Optional<T>& opt) {
+  return opt.get_pointer();
+}
+
+template <class T>
+void swap(Optional<T>& a, Optional<T>& b) noexcept(noexcept(a.swap(b))) {
+  a.swap(b);
+}
+
+template <class T>
+Optional<_t<std::decay<T>>> make_optional(T&& v) {
+  using PrivateConstructor =
+      typename folly::Optional<_t<std::decay<T>>>::PrivateConstructor;
+  return {PrivateConstructor{}, std::forward<T>(v)};
+}
+
+template <class T, class... Args>
+folly::Optional<T> make_optional(Args&&... args) {
+  using PrivateConstructor = typename folly::Optional<T>::PrivateConstructor;
+  return {PrivateConstructor{}, std::forward<Args>(args)...};
+}
+
+template <class T, class U, class... Args>
+folly::Optional<T> make_optional(
+    std::initializer_list<U> il,
+    Args&&... args) {
+  using PrivateConstructor = typename folly::Optional<T>::PrivateConstructor;
+  return {PrivateConstructor{}, il, std::forward<Args>(args)...};
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Comparisons.
+
+template <class U, class V>
+bool operator==(const Optional<U>& a, const V& b) {
+  return a.hasValue() && a.value() == b;
+}
+
+template <class U, class V>
+bool operator!=(const Optional<U>& a, const V& b) {
+  return !(a == b);
+}
+
+template <class U, class V>
+bool operator==(const U& a, const Optional<V>& b) {
+  return b.hasValue() && b.value() == a;
+}
+
+template <class U, class V>
+bool operator!=(const U& a, const Optional<V>& b) {
+  return !(a == b);
+}
+
+template <class U, class V>
+bool operator==(const Optional<U>& a, const Optional<V>& b) {
+  if (a.hasValue() != b.hasValue()) {
+    return false;
+  }
+  if (a.hasValue()) {
+    return a.value() == b.value();
+  }
+  return true;
+}
+
+template <class U, class V>
+bool operator!=(const Optional<U>& a, const Optional<V>& b) {
+  return !(a == b);
+}
+
+template <class U, class V>
+bool operator<(const Optional<U>& a, const Optional<V>& b) {
+  if (a.hasValue() != b.hasValue()) {
+    return a.hasValue() < b.hasValue();
+  }
+  if (a.hasValue()) {
+    return a.value() < b.value();
+  }
+  return false;
+}
+
+template <class U, class V>
+bool operator>(const Optional<U>& a, const Optional<V>& b) {
+  return b < a;
+}
+
+template <class U, class V>
+bool operator<=(const Optional<U>& a, const Optional<V>& b) {
+  return !(b < a);
+}
+
+template <class U, class V>
+bool operator>=(const Optional<U>& a, const Optional<V>& b) {
+  return !(a < b);
+}
+
+// Suppress comparability of Optional<T> with T, despite implicit conversion.
+template <class V>
+bool operator<(const Optional<V>&, const V& other) = delete;
+template <class V>
+bool operator<=(const Optional<V>&, const V& other) = delete;
+template <class V>
+bool operator>=(const Optional<V>&, const V& other) = delete;
+template <class V>
+bool operator>(const Optional<V>&, const V& other) = delete;
+template <class V>
+bool operator<(const V& other, const Optional<V>&) = delete;
+template <class V>
+bool operator<=(const V& other, const Optional<V>&) = delete;
+template <class V>
+bool operator>=(const V& other, const Optional<V>&) = delete;
+template <class V>
+bool operator>(const V& other, const Optional<V>&) = delete;
+
+// Comparisons with none
+template <class V>
+bool operator==(const Optional<V>& a, None) noexcept {
+  return !a.hasValue();
+}
+template <class V>
+bool operator==(None, const Optional<V>& a) noexcept {
+  return !a.hasValue();
+}
+template <class V>
+bool operator<(const Optional<V>&, None) noexcept {
+  return false;
+}
+template <class V>
+bool operator<(None, const Optional<V>& a) noexcept {
+  return a.hasValue();
+}
+template <class V>
+bool operator>(const Optional<V>& a, None) noexcept {
+  return a.hasValue();
+}
+template <class V>
+bool operator>(None, const Optional<V>&) noexcept {
+  return false;
+}
+template <class V>
+bool operator<=(None, const Optional<V>&) noexcept {
+  return true;
+}
+template <class V>
+bool operator<=(const Optional<V>& a, None) noexcept {
+  return !a.hasValue();
+}
+template <class V>
+bool operator>=(const Optional<V>&, None) noexcept {
+  return true;
+}
+template <class V>
+bool operator>=(None, const Optional<V>& a) noexcept {
+  return !a.hasValue();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace folly
--- a/third-party/folly/folly/Portability.h
+++ b/third-party/folly/folly/Portability.h
@ -0,0 +1,74 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#if defined(__arm__)
+#define FOLLY_ARM 1
+#else
+#define FOLLY_ARM 0
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define FOLLY_X64 1
+#else
+#define FOLLY_X64 0
+#endif
+
+#if defined(__aarch64__)
+#define FOLLY_AARCH64 1
+#else
+#define FOLLY_AARCH64 0
+#endif
+
+#if defined(__powerpc64__)
+#define FOLLY_PPC64 1
+#else
+#define FOLLY_PPC64 0
+#endif
+
+#if defined(__has_builtin)
+#define FOLLY_HAS_BUILTIN(...) __has_builtin(__VA_ARGS__)
+#else
+#define FOLLY_HAS_BUILTIN(...) 0
+#endif
+
+#if defined(__has_cpp_attribute)
+#if __has_cpp_attribute(nodiscard)
+#define FOLLY_NODISCARD [[nodiscard]]
+#endif
+#endif
+#if !defined FOLLY_NODISCARD
+#if defined(_MSC_VER) && (_MSC_VER >= 1700)
+#define FOLLY_NODISCARD _Check_return_
+#elif defined(__GNUC__)
+#define FOLLY_NODISCARD __attribute__((__warn_unused_result__))
+#else
+#define FOLLY_NODISCARD
+#endif
+#endif
+
+namespace folly {
+constexpr bool kIsArchArm = FOLLY_ARM == 1;
+constexpr bool kIsArchAmd64 = FOLLY_X64 == 1;
+constexpr bool kIsArchAArch64 = FOLLY_AARCH64 == 1;
+constexpr bool kIsArchPPC64 = FOLLY_PPC64 == 1;
+} // namespace folly
+
+namespace folly {
+#ifdef NDEBUG
+constexpr auto kIsDebug = false;
+#else
+constexpr auto kIsDebug = true;
+#endif
+} // namespace folly
+
+namespace folly {
+#if defined(_MSC_VER)
+constexpr bool kIsMsvc = true;
+#else
+constexpr bool kIsMsvc = false;
+#endif
+} // namespace folly
--- a/third-party/folly/folly/ScopeGuard.h
+++ b/third-party/folly/folly/ScopeGuard.h
@ -0,0 +1,54 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Traits.h>
+
+#include <utility>
+#include <type_traits>
+
+namespace folly {
+namespace scope_guard_detail {
+template <typename F>
+class ScopeGuardImpl {
+ public:
+  explicit ScopeGuardImpl(F&& f) : f_{std::forward<F>(f)} {}
+  ~ScopeGuardImpl() {
+    f_();
+  }
+
+ private:
+  F f_;
+};
+
+enum class ScopeGuardEnum {};
+template <typename Func, typename DecayedFunc = _t<std::decay<Func>>>
+ScopeGuardImpl<DecayedFunc> operator+(ScopeGuardEnum, Func&& func) {
+  return ScopeGuardImpl<DecayedFunc>{std::forward<Func>(func)};
+}
+} // namespace scope_guard_detail
+} // namespace folly
+
+/**
+ * FB_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
+ * str and ending with a number that varies with the line.
+ */
+#ifndef FB_ANONYMOUS_VARIABLE
+#define FB_CONCATENATE_IMPL(s1, s2) s1##s2
+#define FB_CONCATENATE(s1, s2) FB_CONCATENATE_IMPL(s1, s2)
+#ifdef __COUNTER__
+#define FB_ANONYMOUS_VARIABLE(str) \
+  FB_CONCATENATE(FB_CONCATENATE(FB_CONCATENATE(str, __COUNTER__), _), __LINE__)
+#else
+#define FB_ANONYMOUS_VARIABLE(str) FB_CONCATENATE(str, __LINE__)
+#endif
+#endif
+
+#ifndef SCOPE_EXIT
+#define SCOPE_EXIT                                    \
+    auto FB_ANONYMOUS_VARIABLE(SCOPE_EXIT_STATE) =    \
+        ::folly::scope_guard_detail::ScopeGuardEnum{} + [&]() noexcept
+#endif
--- a/third-party/folly/folly/Traits.h
+++ b/third-party/folly/folly/Traits.h
@ -0,0 +1,152 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <type_traits>
+#include <utility>
+
+namespace folly {
+
+#if !defined(_MSC_VER)
+template <class T>
+struct is_trivially_copyable
+    : std::integral_constant<bool, __has_trivial_copy(T)> {};
+#else
+template <class T>
+using is_trivially_copyable = std::is_trivially_copyable<T>;
+#endif
+
+/***
+ *  _t
+ *
+ *  Instead of:
+ *
+ *    using decayed = typename std::decay<T>::type;
+ *
+ *  With the C++14 standard trait aliases, we could use:
+ *
+ *    using decayed = std::decay_t<T>;
+ *
+ *  Without them, we could use:
+ *
+ *    using decayed = _t<std::decay<T>>;
+ *
+ *  Also useful for any other library with template types having dependent
+ *  member types named `type`, like the standard trait types.
+ */
+template <typename T>
+using _t = typename T::type;
+
+/**
+ *  type_t
+ *
+ *  A type alias for the first template type argument. `type_t` is useful for
+ *  controlling class-template and function-template partial specialization.
+ *
+ *  Example:
+ *
+ *    template <typename Value>
+ *    class Container {
+ *     public:
+ *      template <typename... Args>
+ *      Container(
+ *          type_t<in_place_t, decltype(Value(std::declval<Args>()...))>,
+ *          Args&&...);
+ *    };
+ *
+ *  void_t
+ *
+ *  A type alias for `void`. `void_t` is useful for controling class-template
+ *  and function-template partial specialization.
+ *
+ *  Example:
+ *
+ *    // has_value_type<T>::value is true if T has a nested type `value_type`
+ *    template <class T, class = void>
+ *    struct has_value_type
+ *        : std::false_type {};
+ *
+ *    template <class T>
+ *    struct has_value_type<T, folly::void_t<typename T::value_type>>
+ *        : std::true_type {};
+ */
+
+/**
+ * There is a bug in libstdc++, libc++, and MSVC's STL that causes it to
+ * ignore unused template parameter arguments in template aliases and does not
+ * cause substitution failures. This defect has been recorded here:
+ * http://open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#1558.
+ *
+ * This causes the implementation of std::void_t to be buggy, as it is likely
+ * defined as something like the following:
+ *
+ *  template <typename...>
+ *  using void_t = void;
+ *
+ * This causes the compiler to ignore all the template arguments and does not
+ * help when one wants to cause substitution failures.  Rather declarations
+ * which have void_t in orthogonal specializations are treated as the same.
+ * For example, assuming the possible `T` types are only allowed to have
+ * either the alias `one` or `two` and never both or none:
+ *
+ *  template <typename T,
+ *            typename std::void_t<std::decay_t<T>::one>* = nullptr>
+ *  void foo(T&&) {}
+ *  template <typename T,
+ *            typename std::void_t<std::decay_t<T>::two>* = nullptr>
+ *  void foo(T&&) {}
+ *
+ * The second foo() will be a redefinition because it conflicts with the first
+ * one; void_t does not cause substitution failures - the template types are
+ * just ignored.
+ */
+
+namespace traits_detail {
+template <class T, class...>
+struct type_t_ {
+  using type = T;
+};
+} // namespace traits_detail
+
+template <class T, class... Ts>
+using type_t = typename traits_detail::type_t_<T, Ts...>::type;
+template <class... Ts>
+using void_t = type_t<void, Ts...>;
+
+/**
+ * A type trait to remove all const volatile and reference qualifiers on a
+ * type T
+ */
+template <typename T>
+struct remove_cvref {
+  using type =
+      typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+};
+template <typename T>
+using remove_cvref_t = typename remove_cvref<T>::type;
+
+template <class T>
+struct IsNothrowSwappable
+    : std::integral_constant<
+          bool,
+          std::is_nothrow_move_constructible<T>::value&& noexcept(
+              std::swap(std::declval<T&>(), std::declval<T&>()))> {};
+
+template <typename...>
+struct Conjunction : std::true_type {};
+template <typename T>
+struct Conjunction<T> : T {};
+template <typename T, typename... TList>
+struct Conjunction<T, TList...>
+    : std::conditional<T::value, Conjunction<TList...>, T>::type {};
+
+template <typename T>
+struct Negation : std::integral_constant<bool, !T::value> {};
+
+template <std::size_t I>
+using index_constant = std::integral_constant<std::size_t, I>;
+
+} // namespace folly
--- a/third-party/folly/folly/Unit.h
+++ b/third-party/folly/folly/Unit.h
@ -0,0 +1,59 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <type_traits>
+
+namespace folly {
+
+/// In functional programming, the degenerate case is often called "unit". In
+/// C++, "void" is often the best analogue. However, because of the syntactic
+/// special-casing required for void, it is frequently a liability for template
+/// metaprogramming. So, instead of writing specializations to handle cases like
+/// SomeContainer<void>, a library author may instead rule that out and simply
+/// have library users use SomeContainer<Unit>. Contained values may be ignored.
+/// Much easier.
+///
+/// "void" is the type that admits of no values at all. It is not possible to
+/// construct a value of this type.
+/// "unit" is the type that admits of precisely one unique value. It is
+/// possible to construct a value of this type, but it is always the same value
+/// every time, so it is uninteresting.
+struct Unit {
+  constexpr bool operator==(const Unit& /*other*/) const {
+    return true;
+  }
+  constexpr bool operator!=(const Unit& /*other*/) const {
+    return false;
+  }
+};
+
+constexpr Unit unit{};
+
+template <typename T>
+struct lift_unit {
+  using type = T;
+};
+template <>
+struct lift_unit<void> {
+  using type = Unit;
+};
+template <typename T>
+using lift_unit_t = typename lift_unit<T>::type;
+
+template <typename T>
+struct drop_unit {
+  using type = T;
+};
+template <>
+struct drop_unit<Unit> {
+  using type = void;
+};
+template <typename T>
+using drop_unit_t = typename drop_unit<T>::type;
+
+} // namespace folly
+
--- a/third-party/folly/folly/Utility.h
+++ b/third-party/folly/folly/Utility.h
@ -0,0 +1,141 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <utility>
+#include <type_traits>
+
+namespace folly {
+
+/**
+ *  Backports from C++17 of:
+ *    std::in_place_t
+ *    std::in_place_type_t
+ *    std::in_place_index_t
+ *    std::in_place
+ *    std::in_place_type
+ *    std::in_place_index
+ */
+
+struct in_place_tag {};
+template <class>
+struct in_place_type_tag {};
+template <std::size_t>
+struct in_place_index_tag {};
+
+using in_place_t = in_place_tag (&)(in_place_tag);
+template <class T>
+using in_place_type_t = in_place_type_tag<T> (&)(in_place_type_tag<T>);
+template <std::size_t I>
+using in_place_index_t = in_place_index_tag<I> (&)(in_place_index_tag<I>);
+
+inline in_place_tag in_place(in_place_tag = {}) {
+  return {};
+}
+template <class T>
+inline in_place_type_tag<T> in_place_type(in_place_type_tag<T> = {}) {
+  return {};
+}
+template <std::size_t I>
+inline in_place_index_tag<I> in_place_index(in_place_index_tag<I> = {}) {
+  return {};
+}
+
+template <class T, class U = T>
+T exchange(T& obj, U&& new_value) {
+  T old_value = std::move(obj);
+  obj = std::forward<U>(new_value);
+  return old_value;
+}
+
+namespace utility_detail {
+template <typename...>
+struct make_seq_cat;
+template <
+    template <typename T, T...> class S,
+    typename T,
+    T... Ta,
+    T... Tb,
+    T... Tc>
+struct make_seq_cat<S<T, Ta...>, S<T, Tb...>, S<T, Tc...>> {
+  using type =
+      S<T,
+        Ta...,
+        (sizeof...(Ta) + Tb)...,
+        (sizeof...(Ta) + sizeof...(Tb) + Tc)...>;
+};
+
+// Not parameterizing by `template <typename T, T...> class, typename` because
+// clang precisely v4.0 fails to compile that. Note that clang v3.9 and v5.0
+// handle that code correctly.
+//
+// For this to work, `S0` is required to be `Sequence<T>` and `S1` is required
+// to be `Sequence<T, 0>`.
+
+template <std::size_t Size>
+struct make_seq {
+  template <typename S0, typename S1>
+  using apply = typename make_seq_cat<
+      typename make_seq<Size / 2>::template apply<S0, S1>,
+      typename make_seq<Size / 2>::template apply<S0, S1>,
+      typename make_seq<Size % 2>::template apply<S0, S1>>::type;
+};
+template <>
+struct make_seq<1> {
+  template <typename S0, typename S1>
+  using apply = S1;
+};
+template <>
+struct make_seq<0> {
+  template <typename S0, typename S1>
+  using apply = S0;
+};
+} // namespace utility_detail
+
+// TODO: Remove after upgrading to C++14 baseline
+
+template <class T, T... Ints>
+struct integer_sequence {
+  using value_type = T;
+
+  static constexpr std::size_t size() noexcept {
+    return sizeof...(Ints);
+  }
+};
+
+template <std::size_t... Ints>
+using index_sequence = integer_sequence<std::size_t, Ints...>;
+
+template <typename T, std::size_t Size>
+using make_integer_sequence = typename utility_detail::make_seq<
+    Size>::template apply<integer_sequence<T>, integer_sequence<T, 0>>;
+
+template <std::size_t Size>
+using make_index_sequence = make_integer_sequence<std::size_t, Size>;
+template <class... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+/**
+ * A simple helper for getting a constant reference to an object.
+ *
+ * Example:
+ *
+ *   std::vector<int> v{1,2,3};
+ *   // The following two lines are equivalent:
+ *   auto a = const_cast<const std::vector<int>&>(v).begin();
+ *   auto b = folly::as_const(v).begin();
+ *
+ * Like C++17's std::as_const. See http://wg21.link/p0007
+ */
+template <class T>
+T const& as_const(T& t) noexcept {
+  return t;
+}
+
+template <class T>
+void as_const(T const&&) = delete;
+
+} // namespace folly
--- a/third-party/folly/folly/chrono/Hardware.h
+++ b/third-party/folly/folly/chrono/Hardware.h
@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Portability.h>
+
+#include <chrono>
+#include <cstdint>
+
+#if _MSC_VER
+extern "C" std::uint64_t __rdtsc();
+#pragma intrinsic(__rdtsc)
+#endif
+
+namespace folly {
+
+inline std::uint64_t hardware_timestamp() {
+#if _MSC_VER
+  return __rdtsc();
+#elif __GNUC__ && (__i386__ || FOLLY_X64)
+  return __builtin_ia32_rdtsc();
+#else
+  // use steady_clock::now() as an approximation for the timestamp counter on
+  // non-x86 systems
+  return std::chrono::steady_clock::now().time_since_epoch().count();
+#endif
+}
+
+} // namespace folly
+
--- a/third-party/folly/folly/container/Array.h
+++ b/third-party/folly/folly/container/Array.h
@ -0,0 +1,74 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include <folly/Traits.h>
+#include <folly/Utility.h>
+
+namespace folly {
+
+namespace array_detail {
+template <typename>
+struct is_ref_wrapper : std::false_type {};
+template <typename T>
+struct is_ref_wrapper<std::reference_wrapper<T>> : std::true_type {};
+
+template <typename T>
+using not_ref_wrapper =
+    folly::Negation<is_ref_wrapper<typename std::decay<T>::type>>;
+
+template <typename D, typename...>
+struct return_type_helper {
+  using type = D;
+};
+template <typename... TList>
+struct return_type_helper<void, TList...> {
+  static_assert(
+      folly::Conjunction<not_ref_wrapper<TList>...>::value,
+      "TList cannot contain reference_wrappers when D is void");
+  using type = typename std::common_type<TList...>::type;
+};
+
+template <typename D, typename... TList>
+using return_type = std::
+    array<typename return_type_helper<D, TList...>::type, sizeof...(TList)>;
+} // namespace array_detail
+
+template <typename D = void, typename... TList>
+constexpr array_detail::return_type<D, TList...> make_array(TList&&... t) {
+  using value_type =
+      typename array_detail::return_type_helper<D, TList...>::type;
+  return {{static_cast<value_type>(std::forward<TList>(t))...}};
+}
+
+namespace array_detail {
+template <typename MakeItem, std::size_t... Index>
+inline constexpr auto make_array_with(
+    MakeItem const& make,
+    folly::index_sequence<Index...>)
+      -> std::array<decltype(make(0)), sizeof...(Index)> {
+  return std::array<decltype(make(0)), sizeof...(Index)>{{make(Index)...}};
+}
+} // namespace array_detail
+
+//  make_array_with
+//
+//  Constructs a std::array<..., Size> with elements m(i) for i in [0, Size).
+template <std::size_t Size, typename MakeItem>
+constexpr auto make_array_with(MakeItem const& make)
+    -> decltype(array_detail::make_array_with(
+          make,
+          folly::make_index_sequence<Size>{})) {
+  return array_detail::make_array_with(
+      make,
+      folly::make_index_sequence<Size>{});
+}
+
+} // namespace folly
--- a/third-party/folly/folly/detail/Futex-inl.h
+++ b/third-party/folly/folly/detail/Futex-inl.h
@ -0,0 +1,117 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/detail/Futex.h>
+#include <folly/synchronization/ParkingLot.h>
+
+namespace folly {
+namespace detail {
+
+/** Optimal when TargetClock is the same type as Clock.
+ *
+ *  Otherwise, both Clock::now() and TargetClock::now() must be invoked. */
+template <typename TargetClock, typename Clock, typename Duration>
+typename TargetClock::time_point time_point_conv(
+    std::chrono::time_point<Clock, Duration> const& time) {
+  using std::chrono::duration_cast;
+  using TimePoint = std::chrono::time_point<Clock, Duration>;
+  using TargetDuration = typename TargetClock::duration;
+  using TargetTimePoint = typename TargetClock::time_point;
+  if (time == TimePoint::max()) {
+    return TargetTimePoint::max();
+  } else if (std::is_same<Clock, TargetClock>::value) {
+    // in place of time_point_cast, which cannot compile without if-constexpr
+    auto const delta = time.time_since_epoch();
+    return TargetTimePoint(duration_cast<TargetDuration>(delta));
+  } else {
+    // different clocks with different epochs, so non-optimal case
+    auto const delta = time - Clock::now();
+    return TargetClock::now() + duration_cast<TargetDuration>(delta);
+  }
+}
+
+/**
+ * Available overloads, with definitions elsewhere
+ *
+ * These functions are treated as ADL-extension points, the templates above
+ * call these functions without them having being pre-declared.  This works
+ * because ADL lookup finds the definitions of these functions when you pass
+ * the relevant arguments
+ */
+int futexWakeImpl(
+    const Futex<std::atomic>* futex,
+    int count,
+    uint32_t wakeMask);
+FutexResult futexWaitImpl(
+    const Futex<std::atomic>* futex,
+    uint32_t expected,
+    std::chrono::system_clock::time_point const* absSystemTime,
+    std::chrono::steady_clock::time_point const* absSteadyTime,
+    uint32_t waitMask);
+
+int futexWakeImpl(
+    const Futex<EmulatedFutexAtomic>* futex,
+    int count,
+    uint32_t wakeMask);
+FutexResult futexWaitImpl(
+    const Futex<EmulatedFutexAtomic>* futex,
+    uint32_t expected,
+    std::chrono::system_clock::time_point const* absSystemTime,
+    std::chrono::steady_clock::time_point const* absSteadyTime,
+    uint32_t waitMask);
+
+template <typename Futex, typename Deadline>
+typename std::enable_if<Deadline::clock::is_steady, FutexResult>::type
+futexWaitImpl(
+    Futex* futex,
+    uint32_t expected,
+    Deadline const& deadline,
+    uint32_t waitMask) {
+  return futexWaitImpl(futex, expected, nullptr, &deadline, waitMask);
+}
+
+template <typename Futex, typename Deadline>
+typename std::enable_if<!Deadline::clock::is_steady, FutexResult>::type
+futexWaitImpl(
+    Futex* futex,
+    uint32_t expected,
+    Deadline const& deadline,
+    uint32_t waitMask) {
+  return futexWaitImpl(futex, expected, &deadline, nullptr, waitMask);
+}
+
+template <typename Futex>
+FutexResult
+futexWait(const Futex* futex, uint32_t expected, uint32_t waitMask) {
+  auto rv = futexWaitImpl(futex, expected, nullptr, nullptr, waitMask);
+  assert(rv != FutexResult::TIMEDOUT);
+  return rv;
+}
+
+template <typename Futex>
+int futexWake(const Futex* futex, int count, uint32_t wakeMask) {
+  return futexWakeImpl(futex, count, wakeMask);
+}
+
+template <typename Futex, class Clock, class Duration>
+FutexResult futexWaitUntil(
+    const Futex* futex,
+    uint32_t expected,
+    std::chrono::time_point<Clock, Duration> const& deadline,
+    uint32_t waitMask) {
+  using Target = typename std::conditional<
+      Clock::is_steady,
+      std::chrono::steady_clock,
+      std::chrono::system_clock>::type;
+  auto const converted = time_point_conv<Target>(deadline);
+  return converted == Target::time_point::max()
+      ? futexWaitImpl(futex, expected, nullptr, nullptr, waitMask)
+      : futexWaitImpl(futex, expected, converted, waitMask);
+}
+
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/detail/Futex.cpp
+++ b/third-party/folly/folly/detail/Futex.cpp
@ -0,0 +1,263 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <folly/detail/Futex.h>
+#include <folly/portability/SysSyscall.h>
+#include <stdint.h>
+#include <string.h>
+#include <array>
+#include <cerrno>
+
+#include <folly/synchronization/ParkingLot.h>
+
+#ifdef __linux__
+#include <linux/futex.h>
+#endif
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+using namespace std::chrono;
+
+namespace folly {
+namespace detail {
+
+namespace {
+
+////////////////////////////////////////////////////
+// native implementation using the futex() syscall
+
+#ifdef __linux__
+
+/// Certain toolchains (like Android's) don't include the full futex API in
+/// their headers even though they support it. Make sure we have our constants
+/// even if the headers don't have them.
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_PRIVATE_FLAG
+#define FUTEX_PRIVATE_FLAG 128
+#endif
+#ifndef FUTEX_CLOCK_REALTIME
+#define FUTEX_CLOCK_REALTIME 256
+#endif
+
+int nativeFutexWake(const void* addr, int count, uint32_t wakeMask) {
+  int rv = syscall(
+      __NR_futex,
+      addr, /* addr1 */
+      FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */
+      count, /* val */
+      nullptr, /* timeout */
+      nullptr, /* addr2 */
+      wakeMask); /* val3 */
+
+  /* NOTE: we ignore errors on wake for the case of a futex
+     guarding its own destruction, similar to this
+     glibc bug with sem_post/sem_wait:
+     https://sourceware.org/bugzilla/show_bug.cgi?id=12674 */
+  if (rv < 0) {
+    return 0;
+  }
+  return rv;
+}
+
+template <class Clock>
+struct timespec timeSpecFromTimePoint(time_point<Clock> absTime) {
+  auto epoch = absTime.time_since_epoch();
+  if (epoch.count() < 0) {
+    // kernel timespec_valid requires non-negative seconds and nanos in [0,1G)
+    epoch = Clock::duration::zero();
+  }
+
+  // timespec-safe seconds and nanoseconds;
+  // chrono::{nano,}seconds are `long long int`
+  // whereas timespec uses smaller types
+  using time_t_seconds = duration<std::time_t, seconds::period>;
+  using long_nanos = duration<long int, nanoseconds::period>;
+
+  auto secs = duration_cast<time_t_seconds>(epoch);
+  auto nanos = duration_cast<long_nanos>(epoch - secs);
+  struct timespec result = {secs.count(), nanos.count()};
+  return result;
+}
+
+FutexResult nativeFutexWaitImpl(
+    const void* addr,
+    uint32_t expected,
+    system_clock::time_point const* absSystemTime,
+    steady_clock::time_point const* absSteadyTime,
+    uint32_t waitMask) {
+  assert(absSystemTime == nullptr || absSteadyTime == nullptr);
+
+  int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG;
+  struct timespec ts;
+  struct timespec* timeout = nullptr;
+
+  if (absSystemTime != nullptr) {
+    op |= FUTEX_CLOCK_REALTIME;
+    ts = timeSpecFromTimePoint(*absSystemTime);
+    timeout = &ts;
+  } else if (absSteadyTime != nullptr) {
+    ts = timeSpecFromTimePoint(*absSteadyTime);
+    timeout = &ts;
+  }
+
+  // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout
+  // value - http://locklessinc.com/articles/futex_cheat_sheet/
+  int rv = syscall(
+      __NR_futex,
+      addr, /* addr1 */
+      op, /* op */
+      expected, /* val */
+      timeout, /* timeout */
+      nullptr, /* addr2 */
+      waitMask); /* val3 */
+
+  if (rv == 0) {
+    return FutexResult::AWOKEN;
+  } else {
+    switch (errno) {
+      case ETIMEDOUT:
+        assert(timeout != nullptr);
+        return FutexResult::TIMEDOUT;
+      case EINTR:
+        return FutexResult::INTERRUPTED;
+      case EWOULDBLOCK:
+        return FutexResult::VALUE_CHANGED;
+      default:
+        assert(false);
+        // EINVAL, EACCESS, or EFAULT.  EINVAL means there was an invalid
+        // op (should be impossible) or an invalid timeout (should have
+        // been sanitized by timeSpecFromTimePoint).  EACCESS or EFAULT
+        // means *addr points to invalid memory, which is unlikely because
+        // the caller should have segfaulted already.  We can either
+        // crash, or return a value that lets the process continue for
+        // a bit. We choose the latter. VALUE_CHANGED probably turns the
+        // caller into a spin lock.
+        return FutexResult::VALUE_CHANGED;
+    }
+  }
+}
+
+#endif // __linux__
+
+///////////////////////////////////////////////////////
+// compatibility implementation using standard C++ API
+
+using Lot = ParkingLot<uint32_t>;
+Lot parkingLot;
+
+int emulatedFutexWake(const void* addr, int count, uint32_t waitMask) {
+  int woken = 0;
+  parkingLot.unpark(addr, [&](const uint32_t& mask) {
+    if ((mask & waitMask) == 0) {
+      return UnparkControl::RetainContinue;
+    }
+    assert(count > 0);
+    count--;
+    woken++;
+    return count > 0 ? UnparkControl::RemoveContinue
+                     : UnparkControl::RemoveBreak;
+  });
+  return woken;
+}
+
+template <typename F>
+FutexResult emulatedFutexWaitImpl(
+    F* futex,
+    uint32_t expected,
+    system_clock::time_point const* absSystemTime,
+    steady_clock::time_point const* absSteadyTime,
+    uint32_t waitMask) {
+  static_assert(
+      std::is_same<F, const Futex<std::atomic>>::value ||
+          std::is_same<F, const Futex<EmulatedFutexAtomic>>::value,
+      "Type F must be either Futex<std::atomic> or Futex<EmulatedFutexAtomic>");
+  ParkResult res;
+  if (absSystemTime) {
+    res = parkingLot.park_until(
+        futex,
+        waitMask,
+        [&] { return *futex == expected; },
+        [] {},
+        *absSystemTime);
+  } else if (absSteadyTime) {
+    res = parkingLot.park_until(
+        futex,
+        waitMask,
+        [&] { return *futex == expected; },
+        [] {},
+        *absSteadyTime);
+  } else {
+    res = parkingLot.park(
+        futex, waitMask, [&] { return *futex == expected; }, [] {});
+  }
+  switch (res) {
+    case ParkResult::Skip:
+      return FutexResult::VALUE_CHANGED;
+    case ParkResult::Unpark:
+      return FutexResult::AWOKEN;
+    case ParkResult::Timeout:
+      return FutexResult::TIMEDOUT;
+  }
+
+  return FutexResult::INTERRUPTED;
+}
+
+} // namespace
+
+/////////////////////////////////
+// Futex<> overloads
+
+int futexWakeImpl(
+    const Futex<std::atomic>* futex,
+    int count,
+    uint32_t wakeMask) {
+#ifdef __linux__
+  return nativeFutexWake(futex, count, wakeMask);
+#else
+  return emulatedFutexWake(futex, count, wakeMask);
+#endif
+}
+
+int futexWakeImpl(
+    const Futex<EmulatedFutexAtomic>* futex,
+    int count,
+    uint32_t wakeMask) {
+  return emulatedFutexWake(futex, count, wakeMask);
+}
+
+FutexResult futexWaitImpl(
+    const Futex<std::atomic>* futex,
+    uint32_t expected,
+    system_clock::time_point const* absSystemTime,
+    steady_clock::time_point const* absSteadyTime,
+    uint32_t waitMask) {
+#ifdef __linux__
+  return nativeFutexWaitImpl(
+      futex, expected, absSystemTime, absSteadyTime, waitMask);
+#else
+  return emulatedFutexWaitImpl(
+      futex, expected, absSystemTime, absSteadyTime, waitMask);
+#endif
+}
+
+FutexResult futexWaitImpl(
+    const Futex<EmulatedFutexAtomic>* futex,
+    uint32_t expected,
+    system_clock::time_point const* absSystemTime,
+    steady_clock::time_point const* absSteadyTime,
+    uint32_t waitMask) {
+  return emulatedFutexWaitImpl(
+      futex, expected, absSystemTime, absSteadyTime, waitMask);
+}
+
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/detail/Futex.h
+++ b/third-party/folly/folly/detail/Futex.h
@ -0,0 +1,96 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace folly {
+namespace detail {
+
+enum class FutexResult {
+  VALUE_CHANGED, /* futex value didn't match expected */
+  AWOKEN, /* wakeup by matching futex wake, or spurious wakeup */
+  INTERRUPTED, /* wakeup by interrupting signal */
+  TIMEDOUT, /* wakeup by expiring deadline */
+};
+
+/**
+ * Futex is an atomic 32 bit unsigned integer that provides access to the
+ * futex() syscall on that value.  It is templated in such a way that it
+ * can interact properly with DeterministicSchedule testing.
+ *
+ * If you don't know how to use futex(), you probably shouldn't be using
+ * this class.  Even if you do know how, you should have a good reason
+ * (and benchmarks to back you up).
+ *
+ * Because of the semantics of the futex syscall, the futex family of
+ * functions are available as free functions rather than member functions
+ */
+template <template <typename> class Atom = std::atomic>
+using Futex = Atom<std::uint32_t>;
+
+/**
+ * Puts the thread to sleep if this->load() == expected.  Returns true when
+ * it is returning because it has consumed a wake() event, false for any
+ * other return (signal, this->load() != expected, or spurious wakeup).
+ */
+template <typename Futex>
+FutexResult
+futexWait(const Futex* futex, uint32_t expected, uint32_t waitMask = -1);
+
+/**
+ * Similar to futexWait but also accepts a deadline until when the wait call
+ * may block.
+ *
+ * Optimal clock types: std::chrono::system_clock, std::chrono::steady_clock.
+ * NOTE: On some systems steady_clock is just an alias for system_clock,
+ * and is not actually steady.
+ *
+ * For any other clock type, now() will be invoked twice.
+ */
+template <typename Futex, class Clock, class Duration>
+FutexResult futexWaitUntil(
+    const Futex* futex,
+    uint32_t expected,
+    std::chrono::time_point<Clock, Duration> const& deadline,
+    uint32_t waitMask = -1);
+
+/**
+ * Wakes up to count waiters where (waitMask & wakeMask) != 0, returning the
+ * number of awoken threads, or -1 if an error occurred.  Note that when
+ * constructing a concurrency primitive that can guard its own destruction, it
+ * is likely that you will want to ignore EINVAL here (as well as making sure
+ * that you never touch the object after performing the memory store that is
+ * the linearization point for unlock or control handoff).  See
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=13690
+ */
+template <typename Futex>
+int futexWake(
+    const Futex* futex,
+    int count = std::numeric_limits<int>::max(),
+    uint32_t wakeMask = -1);
+
+/** A std::atomic subclass that can be used to force Futex to emulate
+ *  the underlying futex() syscall.  This is primarily useful to test or
+ *  benchmark the emulated implementation on systems that don't need it. */
+template <typename T>
+struct EmulatedFutexAtomic : public std::atomic<T> {
+  EmulatedFutexAtomic() noexcept = default;
+  constexpr /* implicit */ EmulatedFutexAtomic(T init) noexcept
+      : std::atomic<T>(init) {}
+  // It doesn't copy or move
+  EmulatedFutexAtomic(EmulatedFutexAtomic&& rhs) = delete;
+};
+
+} // namespace detail
+} // namespace folly
+
+#include <folly/detail/Futex-inl.h>
--- a/third-party/folly/folly/functional/Invoke.h
+++ b/third-party/folly/folly/functional/Invoke.h
@ -0,0 +1,40 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Traits.h>
+
+#include <functional>
+#include <type_traits>
+
+namespace folly {
+namespace invoke_detail {
+template <typename F, typename... Args>
+using invoke_result_ = decltype(std::declval<F>()(std::declval<Args>()...));
+
+template <typename Void, typename F, typename... Args>
+struct is_invocable : std::false_type {};
+
+template <typename F, typename... Args>
+struct is_invocable<void_t<invoke_result_<F, Args...>>, F, Args...>
+    : std::true_type {};
+
+template <typename Void, typename R, typename F, typename... Args>
+struct is_invocable_r : std::false_type {};
+
+template <typename R, typename F, typename... Args>
+struct is_invocable_r<void_t<invoke_result_<F, Args...>>, R, F, Args...>
+    : std::is_convertible<invoke_result_<F, Args...>, R> {};
+} // namespace invoke_detail
+
+//  mimic: std::is_invocable, C++17
+template <typename F, typename... Args>
+struct is_invocable : invoke_detail::is_invocable<void, F, Args...> {};
+
+//  mimic: std::is_invocable_r, C++17
+template <typename R, typename F, typename... Args>
+struct is_invocable_r : invoke_detail::is_invocable_r<void, R, F, Args...> {};
+} // namespace folly
--- a/third-party/folly/folly/hash/Hash.h
+++ b/third-party/folly/folly/hash/Hash.h
@ -0,0 +1,29 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+namespace folly {
+namespace hash {
+
+/*
+ * Thomas Wang 64 bit mix hash function
+ */
+
+inline uint64_t twang_mix64(uint64_t key) noexcept {
+  key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1;
+  key = key ^ (key >> 24);
+  key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8)
+  key = key ^ (key >> 14);
+  key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4)
+  key = key ^ (key >> 28);
+  key = key + (key << 31); // key *= 1 + (1 << 31)
+  return key;
+}
+
+} // namespace hash
+} // namespace folly
--- a/third-party/folly/folly/lang/Align.h
+++ b/third-party/folly/folly/lang/Align.h
@ -0,0 +1,38 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+namespace folly {
+
+//  Memory locations within the same cache line are subject to destructive
+//  interference, also known as false sharing, which is when concurrent
+//  accesses to these different memory locations from different cores, where at
+//  least one of the concurrent accesses is or involves a store operation,
+//  induce contention and harm performance.
+//
+//  Microbenchmarks indicate that pairs of cache lines also see destructive
+//  interference under heavy use of atomic operations, as observed for atomic
+//  increment on Sandy Bridge.
+//
+//  We assume a cache line size of 64, so we use a cache line pair size of 128
+//  to avoid destructive interference.
+//
+//  mimic: std::hardware_destructive_interference_size, C++17
+constexpr std::size_t hardware_destructive_interference_size = 128;
+
+//  Memory locations within the same cache line are subject to constructive
+//  interference, also known as true sharing, which is when accesses to some
+//  memory locations induce all memory locations within the same cache line to
+//  be cached, benefiting subsequent accesses to different memory locations
+//  within the same cache line and heping performance.
+//
+//  mimic: std::hardware_constructive_interference_size, C++17
+constexpr std::size_t hardware_constructive_interference_size = 64;
+
+} // namespace folly
+
--- a/third-party/folly/folly/lang/Bits.h
+++ b/third-party/folly/folly/lang/Bits.h
@ -0,0 +1,30 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Traits.h>
+
+#include <cstdint>
+#include <cstring>
+#include <type_traits>
+
+namespace folly {
+
+template <
+    typename To,
+    typename From,
+    _t<std::enable_if<
+        sizeof(From) == sizeof(To) && std::is_trivial<To>::value &&
+            is_trivially_copyable<From>::value,
+        int>> = 0>
+To bit_cast(const From& src) noexcept {
+  To to;
+  std::memcpy(&to, &src, sizeof(From));
+  return to;
+}
+
+} // namespace folly
+
--- a/third-party/folly/folly/lang/Launder.h
+++ b/third-party/folly/folly/lang/Launder.h
@ -0,0 +1,51 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <new>
+
+#include <folly/Portability.h>
+
+/***
+ *  include or backport:
+ *  * std::launder
+ */
+
+namespace folly {
+
+/**
+ * Approximate backport from C++17 of std::launder. It should be `constexpr`
+ * but that can't be done without specific support from the compiler.
+ */
+template <typename T>
+FOLLY_NODISCARD inline T* launder(T* in) noexcept {
+#if FOLLY_HAS_BUILTIN(__builtin_launder) || __GNUC__ >= 7
+  // The builtin has no unwanted side-effects.
+  return __builtin_launder(in);
+#elif __GNUC__
+  // This inline assembler block declares that `in` is an input and an output,
+  // so the compiler has to assume that it has been changed inside the block.
+  __asm__("" : "+r"(in));
+  return in;
+#elif defined(_WIN32)
+  // MSVC does not currently have optimizations around const members of structs.
+  // _ReadWriteBarrier() will prevent compiler reordering memory accesses.
+  _ReadWriteBarrier();
+  return in;
+#else
+  static_assert(
+      false, "folly::launder is not implemented for this environment");
+#endif
+}
+
+/* The standard explicitly forbids laundering these */
+void launder(void*) = delete;
+void launder(void const*) = delete;
+void launder(void volatile*) = delete;
+void launder(void const volatile*) = delete;
+template <typename T, typename... Args>
+void launder(T (*)(Args...)) = delete;
+} // namespace folly
--- a/third-party/folly/folly/portability/Asm.h
+++ b/third-party/folly/folly/portability/Asm.h
@ -0,0 +1,28 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Portability.h>
+
+#include <cstdint>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+namespace folly {
+inline void asm_volatile_pause() {
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+  ::_mm_pause();
+#elif defined(__i386__) || FOLLY_X64
+  asm volatile("pause");
+#elif FOLLY_AARCH64 || defined(__arm__)
+  asm volatile("yield");
+#elif FOLLY_PPC64
+  asm volatile("or 27,27,27");
+#endif
+}
+} // namespace folly
--- a/third-party/folly/folly/portability/SysSyscall.h
+++ b/third-party/folly/folly/portability/SysSyscall.h
@ -0,0 +1,10 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#ifndef _WIN32
+#include <sys/syscall.h>
+#endif
--- a/third-party/folly/folly/portability/SysTypes.h
+++ b/third-party/folly/folly/portability/SysTypes.h
@ -0,0 +1,26 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <sys/types.h>
+
+#ifdef _WIN32
+#include <basetsd.h> // @manual
+
+#define HAVE_MODE_T 1
+
+// This is a massive pain to have be an `int` due to the pthread implementation
+// we support, but it's far more compatible with the rest of the windows world
+// as an `int` than it would be as a `void*`
+using pid_t = int;
+// This isn't actually supposed to be defined here, but it's the most
+// appropriate place without defining a portability header for stdint.h
+// with just this single typedef.
+using ssize_t = SSIZE_T;
+// The Windows headers don't define this anywhere, nor do any of the libs
+// that Folly depends on, so define it here.
+using mode_t = unsigned short;
+#endif
--- a/third-party/folly/folly/synchronization/AtomicNotification-inl.h
+++ b/third-party/folly/folly/synchronization/AtomicNotification-inl.h
@ -0,0 +1,138 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/detail/Futex.h>
+#include <folly/synchronization/ParkingLot.h>
+
+#include <condition_variable>
+#include <cstdint>
+
+namespace folly {
+namespace detail {
+namespace atomic_notification {
+/**
+ * We use Futex<std::atomic> as the alias that has the lowest performance
+ * overhead with respect to atomic notifications.  Assert that
+ * atomic_uint_fast_wait_t is the same as Futex<std::atomic>
+ */
+static_assert(std::is_same<atomic_uint_fast_wait_t, Futex<std::atomic>>{}, "");
+
+/**
+ * Implementation and specializations for the atomic_wait() family of
+ * functions
+ */
+inline std::cv_status toCvStatus(FutexResult result) {
+  return (result == FutexResult::TIMEDOUT) ? std::cv_status::timeout
+                                           : std::cv_status::no_timeout;
+}
+inline std::cv_status toCvStatus(ParkResult result) {
+  return (result == ParkResult::Timeout) ? std::cv_status::timeout
+                                         : std::cv_status::no_timeout;
+}
+
+// ParkingLot instantiation for futex management
+extern ParkingLot<std::uint32_t> parkingLot;
+
+template <template <typename...> class Atom, typename... Args>
+void atomic_wait_impl(
+    const Atom<std::uint32_t, Args...>* atomic,
+    std::uint32_t expected) {
+  futexWait(atomic, expected);
+  return;
+}
+
+template <template <typename...> class Atom, typename Integer, typename... Args>
+void atomic_wait_impl(const Atom<Integer, Args...>* atomic, Integer expected) {
+  static_assert(!std::is_same<Integer, std::uint32_t>{}, "");
+  parkingLot.park(
+      atomic, -1, [&] { return atomic->load() == expected; }, [] {});
+}
+
+template <
+    template <typename...> class Atom,
+    typename... Args,
+    typename Clock,
+    typename Duration>
+std::cv_status atomic_wait_until_impl(
+    const Atom<std::uint32_t, Args...>* atomic,
+    std::uint32_t expected,
+    const std::chrono::time_point<Clock, Duration>& deadline) {
+  return toCvStatus(futexWaitUntil(atomic, expected, deadline));
+}
+
+template <
+    template <typename...> class Atom,
+    typename Integer,
+    typename... Args,
+    typename Clock,
+    typename Duration>
+std::cv_status atomic_wait_until_impl(
+    const Atom<Integer, Args...>* atomic,
+    Integer expected,
+    const std::chrono::time_point<Clock, Duration>& deadline) {
+  static_assert(!std::is_same<Integer, std::uint32_t>{}, "");
+  return toCvStatus(parkingLot.park_until(
+      atomic, -1, [&] { return atomic->load() == expected; }, [] {}, deadline));
+}
+
+template <template <typename...> class Atom, typename... Args>
+void atomic_notify_one_impl(const Atom<std::uint32_t, Args...>* atomic) {
+  futexWake(atomic, 1);
+  return;
+}
+
+template <template <typename...> class Atom, typename Integer, typename... Args>
+void atomic_notify_one_impl(const Atom<Integer, Args...>* atomic) {
+  static_assert(!std::is_same<Integer, std::uint32_t>{}, "");
+  parkingLot.unpark(atomic, [&](std::uint32_t data) {
+    assert(data == std::numeric_limits<std::uint32_t>::max());
+    return UnparkControl::RemoveBreak;
+  });
+}
+
+template <template <typename...> class Atom, typename... Args>
+void atomic_notify_all_impl(const Atom<std::uint32_t, Args...>* atomic) {
+  futexWake(atomic);
+  return;
+}
+
+template <template <typename...> class Atom, typename Integer, typename... Args>
+void atomic_notify_all_impl(const Atom<Integer, Args...>* atomic) {
+  static_assert(!std::is_same<Integer, std::uint32_t>{}, "");
+  parkingLot.unpark(atomic, [&](std::uint32_t data) {
+    assert(data == std::numeric_limits<std::uint32_t>::max());
+    return UnparkControl::RemoveContinue;
+  });
+}
+} // namespace atomic_notification
+} // namespace detail
+
+template <typename Integer>
+void atomic_wait(const std::atomic<Integer>* atomic, Integer expected) {
+  detail::atomic_notification::atomic_wait_impl(atomic, expected);
+}
+
+template <typename Integer, typename Clock, typename Duration>
+std::cv_status atomic_wait_until(
+    const std::atomic<Integer>* atomic,
+    Integer expected,
+    const std::chrono::time_point<Clock, Duration>& deadline) {
+  return detail::atomic_notification::atomic_wait_until_impl(
+      atomic, expected, deadline);
+}
+
+template <typename Integer>
+void atomic_notify_one(const std::atomic<Integer>* atomic) {
+  detail::atomic_notification::atomic_notify_one_impl(atomic);
+}
+
+template <typename Integer>
+void atomic_notify_all(const std::atomic<Integer>* atomic) {
+  detail::atomic_notification::atomic_notify_all_impl(atomic);
+}
+
+} // namespace folly
--- a/third-party/folly/folly/synchronization/AtomicNotification.cpp
+++ b/third-party/folly/folly/synchronization/AtomicNotification.cpp
@ -0,0 +1,23 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <folly/synchronization/AtomicNotification.h>
+
+#include <cstdint>
+
+namespace folly {
+namespace detail {
+namespace atomic_notification {
+
+// ParkingLot instance used for the atomic_wait() family of functions
+//
+// This has been defined as a static object (as opposed to allocated to avoid
+// destruction order problems) because of possible uses coming from
+// allocation-sensitive contexts.
+ParkingLot<std::uint32_t> parkingLot;
+
+} // namespace atomic_notification
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/synchronization/AtomicNotification.h
+++ b/third-party/folly/folly/synchronization/AtomicNotification.h
@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+
+namespace folly {
+
+/**
+ * The behavior of the atomic_wait() family of functions is semantically
+ * identical to futex().  Correspondingly, calling atomic_notify_one(),
+ * atomic_notify_all() is identical to futexWake() with 1 and
+ * std::numeric_limits<int>::max() respectively
+ *
+ * The difference here compared to the futex API above is that it works with
+ * all types of atomic widths.  When a 32 bit atomic integer is used, the
+ * implementation falls back to using futex() if possible, and the
+ * compatibility implementation for non-linux systems otherwise.  For all
+ * other integer widths, the compatibility implementation is used
+ *
+ * The templating of this API is changed from the standard in the following
+ * ways
+ *
+ * - At the time of writing, libstdc++'s implementation of std::atomic<> does
+ *   not include the value_type alias.  So we rely on the atomic type being a
+ *   template class such that the first type is the underlying value type
+ * - The Atom parameter allows this API to be compatible with
+ *   DeterministicSchedule testing.
+ * - atomic_wait_until() does not exist in the linked paper, the version here
+ *   is identical to futexWaitUntil() and returns std::cv_status
+ */
+//  mimic: std::atomic_wait, p1135r0
+template <typename Integer>
+void atomic_wait(const std::atomic<Integer>* atomic, Integer expected);
+template <typename Integer, typename Clock, typename Duration>
+std::cv_status atomic_wait_until(
+    const std::atomic<Integer>* atomic,
+    Integer expected,
+    const std::chrono::time_point<Clock, Duration>& deadline);
+
+//  mimic: std::atomic_notify_one, p1135r0
+template <typename Integer>
+void atomic_notify_one(const std::atomic<Integer>* atomic);
+//  mimic: std::atomic_notify_all, p1135r0
+template <typename Integer>
+void atomic_notify_all(const std::atomic<Integer>* atomic);
+
+//  mimic: std::atomic_uint_fast_wait_t, p1135r0
+using atomic_uint_fast_wait_t = std::atomic<std::uint32_t>;
+
+} // namespace folly
+
+#include <folly/synchronization/AtomicNotification-inl.h>
--- a/third-party/folly/folly/synchronization/AtomicUtil-inl.h
+++ b/third-party/folly/folly/synchronization/AtomicUtil-inl.h
@ -0,0 +1,258 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Portability.h>
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <type_traits>
+
+#if _WIN32
+#include <intrin.h>
+#endif
+
+namespace folly {
+namespace detail {
+
+// TODO: Remove the non-default implementations when both gcc and clang
+// can recognize single bit set/reset patterns and compile them down to locked
+// bts and btr instructions.
+//
+// Currently, at the time of writing it seems like gcc7 and greater can make
+// this optimization and clang cannot - https://gcc.godbolt.org/z/Q83rxX
+
+template <typename Atomic>
+bool atomic_fetch_set_default(
+    Atomic& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  using Integer = decltype(atomic.load());
+  auto mask = Integer{0b1} << static_cast<Integer>(bit);
+  return (atomic.fetch_or(mask, order) & mask);
+}
+
+template <typename Atomic>
+bool atomic_fetch_reset_default(
+    Atomic& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  using Integer = decltype(atomic.load());
+  auto mask = Integer{0b1} << static_cast<Integer>(bit);
+  return (atomic.fetch_and(~mask, order) & mask);
+}
+
+/**
+ * A simple trait to determine if the given type is an instantiation of
+ * std::atomic
+ */
+template <typename T>
+struct is_atomic : std::false_type {};
+template <typename Integer>
+struct is_atomic<std::atomic<Integer>> : std::true_type {};
+
+#if FOLLY_X64
+
+#if _MSC_VER
+
+template <typename Integer>
+inline bool atomic_fetch_set_x86(
+    std::atomic<Integer>& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  static_assert(alignof(std::atomic<Integer>) == alignof(Integer), "");
+  static_assert(sizeof(std::atomic<Integer>) == sizeof(Integer), "");
+  assert(atomic.is_lock_free());
+
+  if /* constexpr */ (sizeof(Integer) == 4) {
+    return _interlockedbittestandset(
+        reinterpret_cast<volatile long*>(&atomic), static_cast<long>(bit));
+  } else if /* constexpr */ (sizeof(Integer) == 8) {
+    return _interlockedbittestandset64(
+        reinterpret_cast<volatile long long*>(&atomic),
+        static_cast<long long>(bit));
+  } else {
+    assert(sizeof(Integer) != 4 && sizeof(Integer) != 8);
+    return atomic_fetch_set_default(atomic, bit, order);
+  }
+}
+
+template <typename Atomic>
+inline bool
+atomic_fetch_set_x86(Atomic& atomic, std::size_t bit, std::memory_order order) {
+  static_assert(!std::is_same<Atomic, std::atomic<std::uint32_t>>{}, "");
+  static_assert(!std::is_same<Atomic, std::atomic<std::uint64_t>>{}, "");
+  return atomic_fetch_set_default(atomic, bit, order);
+}
+
+template <typename Integer>
+inline bool atomic_fetch_reset_x86(
+    std::atomic<Integer>& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  static_assert(alignof(std::atomic<Integer>) == alignof(Integer), "");
+  static_assert(sizeof(std::atomic<Integer>) == sizeof(Integer), "");
+  assert(atomic.is_lock_free());
+
+  if /* constexpr */ (sizeof(Integer) == 4) {
+    return _interlockedbittestandreset(
+        reinterpret_cast<volatile long*>(&atomic), static_cast<long>(bit));
+  } else if /* constexpr */ (sizeof(Integer) == 8) {
+    return _interlockedbittestandreset64(
+        reinterpret_cast<volatile long long*>(&atomic),
+        static_cast<long long>(bit));
+  } else {
+    assert(sizeof(Integer) != 4 && sizeof(Integer) != 8);
+    return atomic_fetch_reset_default(atomic, bit, order);
+  }
+}
+
+template <typename Atomic>
+inline bool
+atomic_fetch_reset_x86(Atomic& atomic, std::size_t bit, std::memory_order mo) {
+  static_assert(!std::is_same<Atomic, std::atomic<std::uint32_t>>{}, "");
+  static_assert(!std::is_same<Atomic, std::atomic<std::uint64_t>>{}, "");
+  return atomic_fetch_reset_default(atomic, bit, mo);
+}
+
+#else
+
+template <typename Integer>
+inline bool atomic_fetch_set_x86(
+    std::atomic<Integer>& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  auto previous = false;
+
+  if /* constexpr */ (sizeof(Integer) == 2) {
+    auto pointer = reinterpret_cast<std::uint16_t*>(&atomic);
+    asm volatile("lock; btsw %1, (%2); setc %0"
+                 : "=r"(previous)
+                 : "ri"(static_cast<std::uint16_t>(bit)), "r"(pointer)
+                 : "memory", "flags");
+  } else if /* constexpr */ (sizeof(Integer) == 4) {
+    auto pointer = reinterpret_cast<std::uint32_t*>(&atomic);
+    asm volatile("lock; btsl %1, (%2); setc %0"
+                 : "=r"(previous)
+                 : "ri"(static_cast<std::uint32_t>(bit)), "r"(pointer)
+                 : "memory", "flags");
+  } else if /* constexpr */ (sizeof(Integer) == 8) {
+    auto pointer = reinterpret_cast<std::uint64_t*>(&atomic);
+    asm volatile("lock; btsq %1, (%2); setc %0"
+                 : "=r"(previous)
+                 : "ri"(static_cast<std::uint64_t>(bit)), "r"(pointer)
+                 : "memory", "flags");
+  } else {
+    assert(sizeof(Integer) == 1);
+    return atomic_fetch_set_default(atomic, bit, order);
+  }
+
+  return previous;
+}
+
+template <typename Atomic>
+inline bool
+atomic_fetch_set_x86(Atomic& atomic, std::size_t bit, std::memory_order order) {
+  static_assert(!is_atomic<Atomic>::value, "");
+  return atomic_fetch_set_default(atomic, bit, order);
+}
+
+template <typename Integer>
+inline bool atomic_fetch_reset_x86(
+    std::atomic<Integer>& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  auto previous = false;
+
+  if /* constexpr */ (sizeof(Integer) == 2) {
+    auto pointer = reinterpret_cast<std::uint16_t*>(&atomic);
+    asm volatile("lock; btrw %1, (%2); setc %0"
+                 : "=r"(previous)
+                 : "ri"(static_cast<std::uint16_t>(bit)), "r"(pointer)
+                 : "memory", "flags");
+  } else if /* constexpr */ (sizeof(Integer) == 4) {
+    auto pointer = reinterpret_cast<std::uint32_t*>(&atomic);
+    asm volatile("lock; btrl %1, (%2); setc %0"
+                 : "=r"(previous)
+                 : "ri"(static_cast<std::uint32_t>(bit)), "r"(pointer)
+                 : "memory", "flags");
+  } else if /* constexpr */ (sizeof(Integer) == 8) {
+    auto pointer = reinterpret_cast<std::uint64_t*>(&atomic);
+    asm volatile("lock; btrq %1, (%2); setc %0"
+                 : "=r"(previous)
+                 : "ri"(static_cast<std::uint64_t>(bit)), "r"(pointer)
+                 : "memory", "flags");
+  } else {
+    assert(sizeof(Integer) == 1);
+    return atomic_fetch_reset_default(atomic, bit, order);
+  }
+
+  return previous;
+}
+
+template <typename Atomic>
+bool atomic_fetch_reset_x86(
+    Atomic& atomic,
+    std::size_t bit,
+    std::memory_order order) {
+  static_assert(!is_atomic<Atomic>::value, "");
+  return atomic_fetch_reset_default(atomic, bit, order);
+}
+
+#endif
+
+#else
+
+template <typename Atomic>
+bool atomic_fetch_set_x86(Atomic&, std::size_t, std::memory_order) noexcept {
+  // This should never be called on non x86_64 platforms.
+  std::terminate();
+}
+template <typename Atomic>
+bool atomic_fetch_reset_x86(Atomic&, std::size_t, std::memory_order) noexcept {
+  // This should never be called on non x86_64 platforms.
+  std::terminate();
+}
+
+#endif
+
+} // namespace detail
+
+template <typename Atomic>
+bool atomic_fetch_set(Atomic& atomic, std::size_t bit, std::memory_order mo) {
+  using Integer = decltype(atomic.load());
+  static_assert(std::is_unsigned<Integer>{}, "");
+  static_assert(!std::is_const<Atomic>{}, "");
+  assert(bit < (sizeof(Integer) * 8));
+
+  if (folly::kIsArchAmd64) {
+    // do the optimized thing on x86 builds
+    return detail::atomic_fetch_set_x86(atomic, bit, mo);
+  } else {
+    // otherwise default to the default implementation using fetch_or()
+    return detail::atomic_fetch_set_default(atomic, bit, mo);
+  }
+}
+
+template <typename Atomic>
+bool atomic_fetch_reset(Atomic& atomic, std::size_t bit, std::memory_order mo) {
+  using Integer = decltype(atomic.load());
+  static_assert(std::is_unsigned<Integer>{}, "");
+  static_assert(!std::is_const<Atomic>{}, "");
+  assert(bit < (sizeof(Integer) * 8));
+
+  if (folly::kIsArchAmd64) {
+    // do the optimized thing on x86 builds
+    return detail::atomic_fetch_reset_x86(atomic, bit, mo);
+  } else {
+    // otherwise default to the default implementation using fetch_and()
+    return detail::atomic_fetch_reset_default(atomic, bit, mo);
+  }
+}
+
+} // namespace folly
--- a/third-party/folly/folly/synchronization/AtomicUtil.h
+++ b/third-party/folly/folly/synchronization/AtomicUtil.h
@ -0,0 +1,52 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+namespace folly {
+
+/**
+ * Sets a bit at the given index in the binary representation of the integer
+ * to 1.  Returns the previous value of the bit, so true if the bit was not
+ * changed, false otherwise
+ *
+ * On some architectures, using this is more efficient than the corresponding
+ * std::atomic::fetch_or() with a mask.  For example to set the first (least
+ * significant) bit of an integer, you could do atomic.fetch_or(0b1)
+ *
+ * The efficiency win is only visible in x86 (yet) and comes from the
+ * implementation using the x86 bts instruction when possible.
+ *
+ * When something other than std::atomic is passed, the implementation assumed
+ * incompatibility with this interface and calls Atomic::fetch_or()
+ */
+template <typename Atomic>
+bool atomic_fetch_set(
+    Atomic& atomic,
+    std::size_t bit,
+    std::memory_order order = std::memory_order_seq_cst);
+
+/**
+ * Resets a bit at the given index in the binary representation of the integer
+ * to 0.  Returns the previous value of the bit, so true if the bit was
+ * changed, false otherwise
+ *
+ * This follows the same underlying principle and implementation as
+ * fetch_set().  Using the optimized implementation when possible and falling
+ * back to std::atomic::fetch_and() when in debug mode or in an architecture
+ * where an optimization is not possible
+ */
+template <typename Atomic>
+bool atomic_fetch_reset(
+    Atomic& atomic,
+    std::size_t bit,
+    std::memory_order order = std::memory_order_seq_cst);
+
+} // namespace folly
+
+#include <folly/synchronization/AtomicUtil-inl.h>
--- a/third-party/folly/folly/synchronization/Baton.h
+++ b/third-party/folly/folly/synchronization/Baton.h
@ -0,0 +1,327 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+#include <atomic>
+#include <thread>
+
+#include <folly/detail/Futex.h>
+#include <folly/portability/Asm.h>
+#include <folly/synchronization/WaitOptions.h>
+#include <folly/synchronization/detail/Spin.h>
+
+namespace folly {
+
+/// A Baton allows a thread to block once and be awoken. Captures a
+/// single handoff, and during its lifecycle (from construction/reset
+/// to destruction/reset) a baton must either be post()ed and wait()ed
+/// exactly once each, or not at all.
+///
+/// Baton includes no internal padding, and is only 4 bytes in size.
+/// Any alignment or padding to avoid false sharing is up to the user.
+///
+/// This is basically a stripped-down semaphore that supports only a
+/// single call to sem_post and a single call to sem_wait.
+///
+/// The non-blocking version (MayBlock == false) provides more speed
+/// by using only load acquire and store release operations in the
+/// critical path, at the cost of disallowing blocking.
+///
+/// The current posix semaphore sem_t isn't too bad, but this provides
+/// more a bit more speed, inlining, smaller size, a guarantee that
+/// the implementation won't change, and compatibility with
+/// DeterministicSchedule.  By having a much more restrictive
+/// lifecycle we can also add a bunch of assertions that can help to
+/// catch race conditions ahead of time.
+template <bool MayBlock = true, template <typename> class Atom = std::atomic>
+class Baton {
+ public:
+  static constexpr WaitOptions wait_options() {
+    return {};
+  }
+
+  constexpr Baton() noexcept : state_(INIT) {}
+
+  Baton(Baton const&) = delete;
+  Baton& operator=(Baton const&) = delete;
+
+  /// It is an error to destroy a Baton on which a thread is currently
+  /// wait()ing.  In practice this means that the waiter usually takes
+  /// responsibility for destroying the Baton.
+  ~Baton() noexcept {
+    // The docblock for this function says that it can't be called when
+    // there is a concurrent waiter.  We assume a strong version of this
+    // requirement in which the caller must _know_ that this is true, they
+    // are not allowed to be merely lucky.  If two threads are involved,
+    // the destroying thread must actually have synchronized with the
+    // waiting thread after wait() returned.  To convey causality the the
+    // waiting thread must have used release semantics and the destroying
+    // thread must have used acquire semantics for that communication,
+    // so we are guaranteed to see the post-wait() value of state_,
+    // which cannot be WAITING.
+    //
+    // Note that since we only care about a single memory location,
+    // the only two plausible memory orders here are relaxed and seq_cst.
+    assert(state_.load(std::memory_order_relaxed) != WAITING);
+  }
+
+  bool ready() const noexcept {
+    auto s = state_.load(std::memory_order_acquire);
+    assert(s == INIT || s == EARLY_DELIVERY);
+    return (s == EARLY_DELIVERY);
+  }
+
+  /// Equivalent to destroying the Baton and creating a new one.  It is
+  /// a bug to call this while there is a waiting thread, so in practice
+  /// the waiter will be the one that resets the baton.
+  void reset() noexcept {
+    // See ~Baton for a discussion about why relaxed is okay here
+    assert(state_.load(std::memory_order_relaxed) != WAITING);
+
+    // We use a similar argument to justify the use of a relaxed store
+    // here.  Since both wait() and post() are required to be called
+    // only once per lifetime, no thread can actually call those methods
+    // correctly after a reset() unless it synchronizes with the thread
+    // that performed the reset().  If a post() or wait() on another thread
+    // didn't synchronize, then regardless of what operation we performed
+    // here there would be a race on proper use of the Baton's spec
+    // (although not on any particular load and store).  Put another way,
+    // we don't need to synchronize here because anybody that might rely
+    // on such synchronization is required by the baton rules to perform
+    // an additional synchronization that has the desired effect anyway.
+    //
+    // There is actually a similar argument to be made about the
+    // constructor, in which the fenceless constructor initialization
+    // of state_ is piggybacked on whatever synchronization mechanism
+    // distributes knowledge of the Baton's existence
+    state_.store(INIT, std::memory_order_relaxed);
+  }
+
+  /// Causes wait() to wake up.  For each lifetime of a Baton (where a
+  /// lifetime starts at construction or reset() and ends at
+  /// destruction or reset()) there can be at most one call to post(),
+  /// in the single poster version.  Any thread may call post().
+  void post() noexcept {
+    if (!MayBlock) {
+      /// Spin-only version
+      ///
+      assert(
+          ((1 << state_.load(std::memory_order_relaxed)) &
+           ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0);
+      state_.store(EARLY_DELIVERY, std::memory_order_release);
+      return;
+    }
+
+    /// May-block versions
+    ///
+    uint32_t before = state_.load(std::memory_order_acquire);
+
+    assert(before == INIT || before == WAITING || before == TIMED_OUT);
+
+    if (before == INIT &&
+        state_.compare_exchange_strong(
+            before,
+            EARLY_DELIVERY,
+            std::memory_order_release,
+            std::memory_order_relaxed)) {
+      return;
+    }
+
+    assert(before == WAITING || before == TIMED_OUT);
+
+    if (before == TIMED_OUT) {
+      return;
+    }
+
+    assert(before == WAITING);
+    state_.store(LATE_DELIVERY, std::memory_order_release);
+    detail::futexWake(&state_, 1);
+  }
+
+  /// Waits until post() has been called in the current Baton lifetime.
+  /// May be called at most once during a Baton lifetime (construction
+  /// |reset until destruction|reset).  If post is called before wait in
+  /// the current lifetime then this method returns immediately.
+  ///
+  /// The restriction that there can be at most one wait() per lifetime
+  /// could be relaxed somewhat without any perf or size regressions,
+  /// but by making this condition very restrictive we can provide better
+  /// checking in debug builds.
+  void wait(const WaitOptions& opt = wait_options()) noexcept {
+    if (try_wait()) {
+      return;
+    }
+
+    auto const deadline = std::chrono::steady_clock::time_point::max();
+    tryWaitSlow(deadline, opt);
+  }
+
+  /// Similar to wait, but doesn't block the thread if it hasn't been posted.
+  ///
+  /// try_wait has the following semantics:
+  /// - It is ok to call try_wait any number times on the same baton until
+  ///   try_wait reports that the baton has been posted.
+  /// - It is ok to call timed_wait or wait on the same baton if try_wait
+  ///   reports that baton hasn't been posted.
+  /// - If try_wait indicates that the baton has been posted, it is invalid to
+  ///   call wait, try_wait or timed_wait on the same baton without resetting
+  ///
+  /// @return       true if baton has been posted, false othewise
+  bool try_wait() const noexcept {
+    return ready();
+  }
+
+  /// Similar to wait, but with a timeout. The thread is unblocked if the
+  /// timeout expires.
+  /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
+  /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
+  /// words, after try_wait_for the caller can't invoke
+  /// wait/try_wait/try_wait_for/try_wait_until
+  /// again on the same baton without resetting it.
+  ///
+  /// @param  timeout       Time until which the thread can block
+  /// @return               true if the baton was posted to before timeout,
+  ///                       false otherwise
+  template <typename Rep, typename Period>
+  bool try_wait_for(
+      const std::chrono::duration<Rep, Period>& timeout,
+      const WaitOptions& opt = wait_options()) noexcept {
+    if (try_wait()) {
+      return true;
+    }
+
+    auto const deadline = std::chrono::steady_clock::now() + timeout;
+    return tryWaitSlow(deadline, opt);
+  }
+
+  /// Similar to wait, but with a deadline. The thread is unblocked if the
+  /// deadline expires.
+  /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
+  /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
+  /// words, after try_wait_until the caller can't invoke
+  /// wait/try_wait/try_wait_for/try_wait_until
+  /// again on the same baton without resetting it.
+  ///
+  /// @param  deadline      Time until which the thread can block
+  /// @return               true if the baton was posted to before deadline,
+  ///                       false otherwise
+  template <typename Clock, typename Duration>
+  bool try_wait_until(
+      const std::chrono::time_point<Clock, Duration>& deadline,
+      const WaitOptions& opt = wait_options()) noexcept {
+    if (try_wait()) {
+      return true;
+    }
+
+    return tryWaitSlow(deadline, opt);
+  }
+
+  /// Alias to try_wait_for. Deprecated.
+  template <typename Rep, typename Period>
+  bool timed_wait(
+      const std::chrono::duration<Rep, Period>& timeout) noexcept {
+    return try_wait_for(timeout);
+  }
+
+  /// Alias to try_wait_until. Deprecated.
+  template <typename Clock, typename Duration>
+  bool timed_wait(
+      const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
+    return try_wait_until(deadline);
+  }
+
+ private:
+  enum State : uint32_t {
+    INIT = 0,
+    EARLY_DELIVERY = 1,
+    WAITING = 2,
+    LATE_DELIVERY = 3,
+    TIMED_OUT = 4,
+  };
+
+  template <typename Clock, typename Duration>
+  bool tryWaitSlow(
+      const std::chrono::time_point<Clock, Duration>& deadline,
+      const WaitOptions& opt) noexcept {
+    switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) {
+      case detail::spin_result::success:
+        return true;
+      case detail::spin_result::timeout:
+        return false;
+      case detail::spin_result::advance:
+        break;
+    }
+
+    if (!MayBlock) {
+      switch (detail::spin_yield_until(deadline, [=] { return ready(); })) {
+        case detail::spin_result::success:
+          return true;
+        case detail::spin_result::timeout:
+          return false;
+        case detail::spin_result::advance:
+          break;
+      }
+    }
+
+    // guess we have to block :(
+    uint32_t expected = INIT;
+    if (!state_.compare_exchange_strong(
+            expected,
+            WAITING,
+            std::memory_order_relaxed,
+            std::memory_order_relaxed)) {
+      // CAS failed, last minute reprieve
+      assert(expected == EARLY_DELIVERY);
+      // TODO: move the acquire to the compare_exchange failure load after C++17
+      std::atomic_thread_fence(std::memory_order_acquire);
+      return true;
+    }
+
+    while (true) {
+      auto rv = detail::futexWaitUntil(&state_, WAITING, deadline);
+
+      // Awoken by the deadline passing.
+      if (rv == detail::FutexResult::TIMEDOUT) {
+        assert(deadline != (std::chrono::time_point<Clock, Duration>::max()));
+        state_.store(TIMED_OUT, std::memory_order_release);
+        return false;
+      }
+
+      // Probably awoken by a matching wake event, but could also by awoken
+      // by an asynchronous signal or by a spurious wakeup.
+      //
+      // state_ is the truth even if FUTEX_WAIT reported a matching
+      // FUTEX_WAKE, since we aren't using type-stable storage and we
+      // don't guarantee reuse.  The scenario goes like this: thread
+      // A's last touch of a Baton is a call to wake(), which stores
+      // LATE_DELIVERY and gets an unlucky context switch before delivering
+      // the corresponding futexWake.  Thread B sees LATE_DELIVERY
+      // without consuming a futex event, because it calls futexWait
+      // with an expected value of WAITING and hence doesn't go to sleep.
+      // B returns, so the Baton's memory is reused and becomes another
+      // Baton (or a reuse of this one).  B calls futexWait on the new
+      // Baton lifetime, then A wakes up and delivers a spurious futexWake
+      // to the same memory location.  B's futexWait will then report a
+      // consumed wake event even though state_ is still WAITING.
+      //
+      // It would be possible to add an extra state_ dance to communicate
+      // that the futexWake has been sent so that we can be sure to consume
+      // it before returning, but that would be a perf and complexity hit.
+      uint32_t s = state_.load(std::memory_order_acquire);
+      assert(s == WAITING || s == LATE_DELIVERY);
+      if (s == LATE_DELIVERY) {
+        return true;
+      }
+    }
+  }
+
+  detail::Futex<Atom> state_;
+};
+
+} // namespace folly
--- a/third-party/folly/folly/synchronization/DistributedMutex-inl.h
+++ b/third-party/folly/folly/synchronization/DistributedMutex-inl.h
--- a/third-party/folly/folly/synchronization/DistributedMutex.cpp
+++ b/third-party/folly/folly/synchronization/DistributedMutex.cpp
@ -0,0 +1,16 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <folly/synchronization/DistributedMutex.h>
+
+namespace folly {
+namespace detail {
+namespace distributed_mutex {
+
+template class DistributedMutex<std::atomic, true>;
+
+} // namespace distributed_mutex
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/synchronization/DistributedMutex.h
+++ b/third-party/folly/folly/synchronization/DistributedMutex.h
@ -0,0 +1,304 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+
+namespace folly {
+namespace detail {
+namespace distributed_mutex {
+
+/**
+ * DistributedMutex is a small, exclusive-only mutex that distributes the
+ * bookkeeping required for mutual exclusion in the stacks of threads that are
+ * contending for it.  It has a mode that can combine critical sections when
+ * the mutex experiences contention; this allows the implementation to elide
+ * several expensive coherence and synchronization operations to boost
+ * throughput, surpassing even atomic instructions in some cases.  It has a
+ * smaller memory footprint than std::mutex, a similar level of fairness
+ * (better in some cases) and no dependencies on heap allocation.  It is the
+ * same width as a single pointer (8 bytes on most platforms), where on the
+ * other hand, std::mutex and pthread_mutex_t are both 40 bytes.  It is larger
+ * than some of the other smaller locks, but the wide majority of cases using
+ * the small locks are wasting the difference in alignment padding anyway
+ *
+ * Benchmark results are good - at the time of writing, in the contended case,
+ * for lock/unlock based critical sections, it is about 4-5x faster than the
+ * smaller locks and about ~2x faster than std::mutex.  When used in
+ * combinable mode, it is much faster than the alternatives, going more than
+ * 10x faster than the small locks, about 6x faster than std::mutex, 2-3x
+ * faster than flat combining and even faster than std::atomic<> in some
+ * cases, allowing more work with higher throughput.  In the uncontended case,
+ * it is a few cycles faster than folly::MicroLock but a bit slower than
+ * std::mutex.  DistributedMutex is also resistent to tail latency pathalogies
+ * unlike many of the other mutexes in use, which sleep for large time
+ * quantums to reduce spin churn, this causes elevated latencies for threads
+ * that enter the sleep cycle.  The tail latency of lock acquisition can go up
+ * to 10x lower because of a more deterministic scheduling algorithm that is
+ * managed almost entirely in userspace.  Detailed results comparing the
+ * throughput and latencies of different mutex implementations and atomics are
+ * at the bottom of folly/synchronization/test/SmallLocksBenchmark.cpp
+ *
+ * Theoretically, write locks promote concurrency when the critical sections
+ * are small as most of the work is done outside the lock.  And indeed,
+ * performant concurrent applications go through several pains to limit the
+ * amount of work they do while holding a lock.  However, most times, the
+ * synchronization and scheduling overhead of a write lock in the critical
+ * path is so high, that after a certain point, making critical sections
+ * smaller does not actually increase the concurrency of the application and
+ * throughput plateaus.  DistributedMutex moves this breaking point to the
+ * level of hardware atomic instructions, so applications keep getting
+ * concurrency even under very high contention.  It does this by reducing
+ * cache misses and contention in userspace and in the kernel by making each
+ * thread wait on a thread local node and futex.  When combined critical
+ * sections are used DistributedMutex leverages template metaprogramming to
+ * allow the mutex to make better synchronization decisions based on the
+ * layout of the input and output data.  This allows threads to keep working
+ * only on their own cache lines without requiring cache coherence operations
+ * when a mutex experiences heavy contention
+ *
+ * Non-timed mutex acquisitions are scheduled through intrusive LIFO
+ * contention chains.  Each thread starts by spinning for a short quantum and
+ * falls back to two phased sleeping.  Enqueue operations are lock free and
+ * are piggybacked off mutex acquisition attempts.  The LIFO behavior of a
+ * contention chain is good in the case where the mutex is held for a short
+ * amount of time, as the head of the chain is likely to not have slept on
+ * futex() after exhausting its spin quantum.  This allow us to avoid
+ * unnecessary traversal and syscalls in the fast path with a higher
+ * probability.  Even though the contention chains are LIFO, the mutex itself
+ * does not adhere to that scheduling policy globally.  During contention,
+ * threads that fail to lock the mutex form a LIFO chain on the central mutex
+ * state, this chain is broken when a wakeup is scheduled, and future enqueue
+ * operations form a new chain.  This makes the chains themselves LIFO, but
+ * preserves global fairness through a constant factor which is limited to the
+ * number of concurrent failed mutex acquisition attempts.  This binds the
+ * last in first out behavior to the number of contending threads and helps
+ * prevent starvation and latency outliers
+ *
+ * This strategy of waking up wakers one by one in a queue does not scale well
+ * when the number of threads goes past the number of cores.  At which point
+ * preemption causes elevated lock acquisition latencies.  DistributedMutex
+ * implements a hardware timestamp publishing heuristic to detect and adapt to
+ * preemption.
+ *
+ * DistributedMutex does not have the typical mutex API - it does not satisfy
+ * the Lockable concept.  It requires the user to maintain ephemeral bookkeeping
+ * and pass that bookkeeping around to unlock() calls.  The API overhead,
+ * however, comes for free when you wrap this mutex for usage with
+ * std::unique_lock, which is the recommended usage (std::lock_guard, in
+ * optimized mode, has no performance benefit over std::unique_lock, so has been
+ * omitted).  A benefit of this API is that it disallows incorrect usage where a
+ * thread unlocks a mutex that it does not own, thinking a mutex is functionally
+ * identical to a binary semaphore, which, unlike a mutex, is a suitable
+ * primitive for that usage
+ *
+ * Combined critical sections allow the implementation to elide several
+ * expensive operations during the lifetime of a critical section that cause
+ * slowdowns with regular lock/unlock based usage.  DistributedMutex resolves
+ * contention through combining up to a constant factor of 2 contention chains
+ * to prevent issues with fairness and latency outliers, so we retain the
+ * fairness benefits of the lock/unlock implementation with no noticeable
+ * regression when switching between the lock methods.  Despite the efficiency
+ * benefits, combined critical sections can only be used when the critical
+ * section does not depend on thread local state and does not introduce new
+ * dependencies between threads when the critical section gets combined.  For
+ * example, locking or unlocking an unrelated mutex in a combined critical
+ * section might lead to unexpected results or even undefined behavior.  This
+ * can happen if, for example, a different thread unlocks a mutex locked by
+ * the calling thread, leading to undefined behavior as the mutex might not
+ * allow locking and unlocking from unrelated threads (the posix and C++
+ * standard disallow this usage for their mutexes)
+ *
+ * Timed locking through DistributedMutex is implemented through a centralized
+ * algorithm.  The underlying contention-chains framework used in
+ * DistributedMutex is not abortable so we build abortability on the side.
+ * All waiters wait on the central mutex state, by setting and resetting bits
+ * within the pointer-length word.  Since pointer length atomic integers are
+ * incompatible with futex(FUTEX_WAIT) on most systems, a non-standard
+ * implementation of futex() is used, where wait queues are managed in
+ * user-space (see p1135r0 and folly::ParkingLot for more)
+ */
+template <
+    template <typename> class Atomic = std::atomic,
+    bool TimePublishing = true>
+class DistributedMutex {
+ public:
+  class DistributedMutexStateProxy;
+
+  /**
+   * DistributedMutex is only default constructible, it can neither be moved
+   * nor copied
+   */
+  DistributedMutex();
+  DistributedMutex(DistributedMutex&&) = delete;
+  DistributedMutex(const DistributedMutex&) = delete;
+  DistributedMutex& operator=(DistributedMutex&&) = delete;
+  DistributedMutex& operator=(const DistributedMutex&) = delete;
+
+  /**
+   * Acquires the mutex in exclusive mode
+   *
+   * This returns an ephemeral proxy that contains internal mutex state.  This
+   * must be kept around for the duration of the critical section and passed
+   * subsequently to unlock() as an rvalue
+   *
+   * The proxy has no public API and is intended to be for internal usage only
+   *
+   * There are three notable cases where this method causes undefined
+   * behavior:
+   *
+   *  - This is not a recursive mutex.  Trying to acquire the mutex twice from
+   *    the same thread without unlocking it results in undefined behavior
+   *  - Thread, coroutine or fiber migrations from within a critical section
+   *    are disallowed.  This is because the implementation requires owning the
+   *    stack frame through the execution of the critical section for both
+   *    lock/unlock or combined critical sections.  This also means that you
+   *    cannot allow another thread, fiber or coroutine to unlock the mutex
+   *  - This mutex cannot be used in a program compiled with segmented stacks,
+   *    there is currently no way to detect the presence of segmented stacks
+   *    at compile time or runtime, so we have no checks against this
+   */
+  DistributedMutexStateProxy lock();
+
+  /**
+   * Unlocks the mutex
+   *
+   * The proxy returned by lock must be passed to unlock as an rvalue.  No
+   * other option is possible here, since the proxy is only movable and not
+   * copyable
+   *
+   * It is undefined behavior to unlock from a thread that did not lock the
+   * mutex
+   */
+  void unlock(DistributedMutexStateProxy);
+
+  /**
+   * Try to acquire the mutex
+   *
+   * A non blocking version of the lock() function.  The returned object is
+   * contextually convertible to bool.  And has the value true when the mutex
+   * was successfully acquired, false otherwise
+   *
+   * This is allowed to return false spuriously, i.e. this is not guaranteed
+   * to return true even when the mutex is currently unlocked.  In the event
+   * of a failed acquisition, this does not impose any memory ordering
+   * constraints for other threads
+   */
+  DistributedMutexStateProxy try_lock();
+
+  /**
+   * Try to acquire the mutex, blocking for the given time
+   *
+   * Like try_lock(), this is allowed to fail spuriously and is not guaranteed
+   * to return false even when the mutex is currently unlocked.  But only
+   * after the given time has elapsed
+   *
+   * try_lock_for() accepts a duration to block for, and try_lock_until()
+   * accepts an absolute wall clock time point
+   */
+  template <typename Rep, typename Period>
+  DistributedMutexStateProxy try_lock_for(
+      const std::chrono::duration<Rep, Period>& duration);
+
+  /**
+   * Try to acquire the lock, blocking until the given deadline
+   *
+   * Other than the difference in the meaning of the second argument, the
+   * semantics of this function are identical to try_lock_for()
+   */
+  template <typename Clock, typename Duration>
+  DistributedMutexStateProxy try_lock_until(
+      const std::chrono::time_point<Clock, Duration>& deadline);
+
+  /**
+   * Execute a task as a combined critical section
+   *
+   * Unlike traditional lock and unlock methods, lock_combine() enqueues the
+   * passed task for execution on any arbitrary thread.  This allows the
+   * implementation to prevent cache line invalidations originating from
+   * expensive synchronization operations.  The thread holding the lock is
+   * allowed to execute the task before unlocking, thereby forming a "combined
+   * critical section".
+   *
+   * This idea is inspired by Flat Combining.  Flat Combining was introduced
+   * in the SPAA 2010 paper titled "Flat Combining and the
+   * Synchronization-Parallelism Tradeoff", by Danny Hendler, Itai Incze, Nir
+   * Shavit, and Moran Tzafrir -
+   * https://www.cs.bgu.ac.il/~hendlerd/papers/flat-combining.pdf.  The
+   * implementation used here is significantly different from that described
+   * in the paper.  The high-level goal of reducing the overhead of
+   * synchronization, however, is the same.
+   *
+   * Combined critical sections work best when kept simple.  Since the
+   * critical section might be executed on any arbitrary thread, relying on
+   * things like thread local state or mutex locking and unlocking might cause
+   * incorrectness.  Associativity is important.  For example
+   *
+   *    auto one = std::unique_lock{one_};
+   *    two_.lock_combine([&]() {
+   *      if (bar()) {
+   *        one.unlock();
+   *      }
+   *    });
+   *
+   * This has the potential to cause undefined behavior because mutexes are
+   * only meant to be acquired and released from the owning thread.  Similar
+   * errors can arise from a combined critical section introducing implicit
+   * dependencies based on the state of the combining thread.  For example
+   *
+   *    // thread 1
+   *    auto one = std::unique_lock{one_};
+   *    auto two = std::unique_lock{two_};
+   *
+   *    // thread 2
+   *    two_.lock_combine([&]() {
+   *      auto three = std::unique_lock{three_};
+   *    });
+   *
+   * Here, because we used a combined critical section, we have introduced a
+   * dependency from one -> three that might not obvious to the reader
+   *
+   * This function is exception-safe.  If the passed task throws an exception,
+   * it will be propagated to the caller, even if the task is running on
+   * another thread
+   *
+   * There are three notable cases where this method causes undefined
+   * behavior:
+   *
+   *  - This is not a recursive mutex.  Trying to acquire the mutex twice from
+   *    the same thread without unlocking it results in undefined behavior
+   *  - Thread, coroutine or fiber migrations from within a critical section
+   *    are disallowed.  This is because the implementation requires owning the
+   *    stack frame through the execution of the critical section for both
+   *    lock/unlock or combined critical sections.  This also means that you
+   *    cannot allow another thread, fiber or coroutine to unlock the mutex
+   *  - This mutex cannot be used in a program compiled with segmented stacks,
+   *    there is currently no way to detect the presence of segmented stacks
+   *    at compile time or runtime, so we have no checks against this
+   */
+  template <typename Task>
+  auto lock_combine(Task task) -> decltype(std::declval<const Task&>()());
+
+ private:
+  Atomic<std::uintptr_t> state_{0};
+};
+
+} // namespace distributed_mutex
+} // namespace detail
+
+/**
+ * Bring the default instantiation of DistributedMutex into the folly
+ * namespace without requiring any template arguments for public usage
+ */
+extern template class detail::distributed_mutex::DistributedMutex<>;
+using DistributedMutex = detail::distributed_mutex::DistributedMutex<>;
+
+} // namespace folly
+
+#include <folly/synchronization/DistributedMutex-inl.h>
+#include <folly/synchronization/DistributedMutexSpecializations.h>
--- a/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h
+++ b/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h
@ -0,0 +1,39 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/synchronization/DistributedMutex.h>
+#include <folly/synchronization/detail/ProxyLockable.h>
+
+/**
+ * Specializations for DistributedMutex allow us to use it like a normal
+ * mutex.  Even though it has a non-usual interface
+ */
+namespace std {
+template <template <typename> class Atom, bool TimePublishing>
+class unique_lock<
+    ::folly::detail::distributed_mutex::DistributedMutex<Atom, TimePublishing>>
+    : public ::folly::detail::ProxyLockableUniqueLock<
+          ::folly::detail::distributed_mutex::
+              DistributedMutex<Atom, TimePublishing>> {
+ public:
+  using ::folly::detail::ProxyLockableUniqueLock<
+      ::folly::detail::distributed_mutex::
+          DistributedMutex<Atom, TimePublishing>>::ProxyLockableUniqueLock;
+};
+
+template <template <typename> class Atom, bool TimePublishing>
+class lock_guard<
+    ::folly::detail::distributed_mutex::DistributedMutex<Atom, TimePublishing>>
+    : public ::folly::detail::ProxyLockableLockGuard<
+          ::folly::detail::distributed_mutex::
+              DistributedMutex<Atom, TimePublishing>> {
+ public:
+  using ::folly::detail::ProxyLockableLockGuard<
+      ::folly::detail::distributed_mutex::
+          DistributedMutex<Atom, TimePublishing>>::ProxyLockableLockGuard;
+};
+} // namespace std
--- a/third-party/folly/folly/synchronization/ParkingLot.cpp
+++ b/third-party/folly/folly/synchronization/ParkingLot.cpp
@ -0,0 +1,26 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <folly/synchronization/ParkingLot.h>
+
+#include <array>
+
+namespace folly {
+namespace parking_lot_detail {
+
+Bucket& Bucket::bucketFor(uint64_t key) {
+  constexpr size_t const kNumBuckets = 4096;
+
+  // Statically allocating this lets us use this in allocation-sensitive
+  // contexts. This relies on the assumption that std::mutex won't dynamically
+  // allocate memory, which we assume to be the case on Linux and iOS.
+  static Indestructible<std::array<Bucket, kNumBuckets>> gBuckets;
+  return (*gBuckets)[key % kNumBuckets];
+}
+
+std::atomic<uint64_t> idallocator{0};
+
+} // namespace parking_lot_detail
+} // namespace folly
--- a/third-party/folly/folly/synchronization/ParkingLot.h
+++ b/third-party/folly/folly/synchronization/ParkingLot.h
@ -0,0 +1,318 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+
+#include <folly/hash/Hash.h>
+#include <folly/Indestructible.h>
+#include <folly/Unit.h>
+
+namespace folly {
+
+namespace parking_lot_detail {
+
+struct WaitNodeBase {
+  const uint64_t key_;
+  const uint64_t lotid_;
+  WaitNodeBase* next_{nullptr};
+  WaitNodeBase* prev_{nullptr};
+
+  // tricky: hold both bucket and node mutex to write, either to read
+  bool signaled_;
+  std::mutex mutex_;
+  std::condition_variable cond_;
+
+  WaitNodeBase(uint64_t key, uint64_t lotid)
+      : key_(key), lotid_(lotid), signaled_(false) {}
+
+  template <typename Clock, typename Duration>
+  std::cv_status wait(std::chrono::time_point<Clock, Duration> deadline) {
+    std::cv_status status = std::cv_status::no_timeout;
+    std::unique_lock<std::mutex> nodeLock(mutex_);
+    while (!signaled_ && status != std::cv_status::timeout) {
+      if (deadline != std::chrono::time_point<Clock, Duration>::max()) {
+        status = cond_.wait_until(nodeLock, deadline);
+      } else {
+        cond_.wait(nodeLock);
+      }
+    }
+    return status;
+  }
+
+  void wake() {
+    std::lock_guard<std::mutex> nodeLock(mutex_);
+    signaled_ = true;
+    cond_.notify_one();
+  }
+
+  bool signaled() {
+    return signaled_;
+  }
+};
+
+extern std::atomic<uint64_t> idallocator;
+
+// Our emulated futex uses 4096 lists of wait nodes.  There are two levels
+// of locking: a per-list mutex that controls access to the list and a
+// per-node mutex, condvar, and bool that are used for the actual wakeups.
+// The per-node mutex allows us to do precise wakeups without thundering
+// herds.
+struct Bucket {
+  std::mutex mutex_;
+  WaitNodeBase* head_;
+  WaitNodeBase* tail_;
+  std::atomic<uint64_t> count_;
+
+  static Bucket& bucketFor(uint64_t key);
+
+  void push_back(WaitNodeBase* node) {
+    if (tail_) {
+      assert(head_);
+      node->prev_ = tail_;
+      tail_->next_ = node;
+      tail_ = node;
+    } else {
+      tail_ = node;
+      head_ = node;
+    }
+  }
+
+  void erase(WaitNodeBase* node) {
+    assert(count_.load(std::memory_order_relaxed) >= 1);
+    if (head_ == node && tail_ == node) {
+      assert(node->prev_ == nullptr);
+      assert(node->next_ == nullptr);
+      head_ = nullptr;
+      tail_ = nullptr;
+    } else if (head_ == node) {
+      assert(node->prev_ == nullptr);
+      assert(node->next_);
+      head_ = node->next_;
+      head_->prev_ = nullptr;
+    } else if (tail_ == node) {
+      assert(node->next_ == nullptr);
+      assert(node->prev_);
+      tail_ = node->prev_;
+      tail_->next_ = nullptr;
+    } else {
+      assert(node->next_);
+      assert(node->prev_);
+      node->next_->prev_ = node->prev_;
+      node->prev_->next_ = node->next_;
+    }
+    count_.fetch_sub(1, std::memory_order_relaxed);
+  }
+};
+
+} // namespace parking_lot_detail
+
+enum class UnparkControl {
+  RetainContinue,
+  RemoveContinue,
+  RetainBreak,
+  RemoveBreak,
+};
+
+enum class ParkResult {
+  Skip,
+  Unpark,
+  Timeout,
+};
+
+/*
+ * ParkingLot provides an interface that is similar to Linux's futex
+ * system call, but with additional functionality.  It is implemented
+ * in a portable way on top of std::mutex and std::condition_variable.
+ *
+ * Additional reading:
+ * https://webkit.org/blog/6161/locking-in-webkit/
+ * https://github.com/WebKit/webkit/blob/master/Source/WTF/wtf/ParkingLot.h
+ * https://locklessinc.com/articles/futex_cheat_sheet/
+ *
+ * The main difference from futex is that park/unpark take lambdas,
+ * such that nearly anything can be done while holding the bucket
+ * lock.  Unpark() lambda can also be used to wake up any number of
+ * waiters.
+ *
+ * ParkingLot is templated on the data type, however, all ParkingLot
+ * implementations are backed by a single static array of buckets to
+ * avoid large memory overhead.  Lambdas will only ever be called on
+ * the specific ParkingLot's nodes.
+ */
+template <typename Data = Unit>
+class ParkingLot {
+  const uint64_t lotid_;
+  ParkingLot(const ParkingLot&) = delete;
+
+  struct WaitNode : public parking_lot_detail::WaitNodeBase {
+    const Data data_;
+
+    template <typename D>
+    WaitNode(uint64_t key, uint64_t lotid, D&& data)
+        : WaitNodeBase(key, lotid), data_(std::forward<D>(data)) {}
+  };
+
+ public:
+  ParkingLot() : lotid_(parking_lot_detail::idallocator++) {}
+
+  /* Park API
+   *
+   * Key is almost always the address of a variable.
+   *
+   * ToPark runs while holding the bucket lock: usually this
+   * is a check to see if we can sleep, by checking waiter bits.
+   *
+   * PreWait is usually used to implement condition variable like
+   * things, such that you can unlock the condition variable's lock at
+   * the appropriate time.
+   */
+  template <typename Key, typename D, typename ToPark, typename PreWait>
+  ParkResult park(const Key key, D&& data, ToPark&& toPark, PreWait&& preWait) {
+    return park_until(
+        key,
+        std::forward<D>(data),
+        std::forward<ToPark>(toPark),
+        std::forward<PreWait>(preWait),
+        std::chrono::steady_clock::time_point::max());
+  }
+
+  template <
+      typename Key,
+      typename D,
+      typename ToPark,
+      typename PreWait,
+      typename Clock,
+      typename Duration>
+  ParkResult park_until(
+      const Key key,
+      D&& data,
+      ToPark&& toPark,
+      PreWait&& preWait,
+      std::chrono::time_point<Clock, Duration> deadline);
+
+  template <
+      typename Key,
+      typename D,
+      typename ToPark,
+      typename PreWait,
+      typename Rep,
+      typename Period>
+  ParkResult park_for(
+      const Key key,
+      D&& data,
+      ToPark&& toPark,
+      PreWait&& preWait,
+      std::chrono::duration<Rep, Period>& timeout) {
+    return park_until(
+        key,
+        std::forward<D>(data),
+        std::forward<ToPark>(toPark),
+        std::forward<PreWait>(preWait),
+        timeout + std::chrono::steady_clock::now());
+  }
+
+  /*
+   * Unpark API
+   *
+   * Key is the same uniqueaddress used in park(), and is used as a
+   * hash key for lookup of waiters.
+   *
+   * Unparker is a function that is given the Data parameter, and
+   * returns an UnparkControl.  The Remove* results will remove and
+   * wake the waiter, the Ignore/Stop results will not, while stopping
+   * or continuing iteration of the waiter list.
+   */
+  template <typename Key, typename Unparker>
+  void unpark(const Key key, Unparker&& func);
+};
+
+template <typename Data>
+template <
+    typename Key,
+    typename D,
+    typename ToPark,
+    typename PreWait,
+    typename Clock,
+    typename Duration>
+ParkResult ParkingLot<Data>::park_until(
+    const Key bits,
+    D&& data,
+    ToPark&& toPark,
+    PreWait&& preWait,
+    std::chrono::time_point<Clock, Duration> deadline) {
+  auto key = hash::twang_mix64(uint64_t(bits));
+  auto& bucket = parking_lot_detail::Bucket::bucketFor(key);
+  WaitNode node(key, lotid_, std::forward<D>(data));
+
+  {
+    // A: Must be seq_cst.  Matches B.
+    bucket.count_.fetch_add(1, std::memory_order_seq_cst);
+
+    std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
+
+    if (!std::forward<ToPark>(toPark)()) {
+      bucketLock.unlock();
+      bucket.count_.fetch_sub(1, std::memory_order_relaxed);
+      return ParkResult::Skip;
+    }
+
+    bucket.push_back(&node);
+  } // bucketLock scope
+
+  std::forward<PreWait>(preWait)();
+
+  auto status = node.wait(deadline);
+
+  if (status == std::cv_status::timeout) {
+    // it's not really a timeout until we unlink the unsignaled node
+    std::lock_guard<std::mutex> bucketLock(bucket.mutex_);
+    if (!node.signaled()) {
+      bucket.erase(&node);
+      return ParkResult::Timeout;
+    }
+  }
+
+  return ParkResult::Unpark;
+}
+
+template <typename Data>
+template <typename Key, typename Func>
+void ParkingLot<Data>::unpark(const Key bits, Func&& func) {
+  auto key = hash::twang_mix64(uint64_t(bits));
+  auto& bucket = parking_lot_detail::Bucket::bucketFor(key);
+  // B: Must be seq_cst.  Matches A.  If true, A *must* see in seq_cst
+  // order any atomic updates in toPark() (and matching updates that
+  // happen before unpark is called)
+  if (bucket.count_.load(std::memory_order_seq_cst) == 0) {
+    return;
+  }
+
+  std::lock_guard<std::mutex> bucketLock(bucket.mutex_);
+
+  for (auto iter = bucket.head_; iter != nullptr;) {
+    auto node = static_cast<WaitNode*>(iter);
+    iter = iter->next_;
+    if (node->key_ == key && node->lotid_ == lotid_) {
+      auto result = std::forward<Func>(func)(node->data_);
+      if (result == UnparkControl::RemoveBreak ||
+          result == UnparkControl::RemoveContinue) {
+        // we unlink, but waiter destroys the node
+        bucket.erase(node);
+
+        node->wake();
+      }
+      if (result == UnparkControl::RemoveBreak ||
+          result == UnparkControl::RetainBreak) {
+        return;
+      }
+    }
+  }
+}
+
+} // namespace folly
--- a/third-party/folly/folly/synchronization/WaitOptions.cpp
+++ b/third-party/folly/folly/synchronization/WaitOptions.cpp
@ -0,0 +1,12 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <folly/synchronization/WaitOptions.h>
+
+namespace folly {
+
+constexpr std::chrono::nanoseconds WaitOptions::Defaults::spin_max;
+
+} // namespace folly
--- a/third-party/folly/folly/synchronization/WaitOptions.h
+++ b/third-party/folly/folly/synchronization/WaitOptions.h
@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <chrono>
+
+namespace folly {
+
+/// WaitOptions
+///
+/// Various synchronization primitives as well as various concurrent data
+/// structures built using them have operations which might wait. This type
+/// represents a set of options for controlling such waiting.
+class WaitOptions {
+ public:
+  struct Defaults {
+    /// spin_max
+    ///
+    /// If multiple threads are actively using a synchronization primitive,
+    /// whether indirectly via a higher-level concurrent data structure or
+    /// directly, where the synchronization primitive has an operation which
+    /// waits and another operation which wakes the waiter, it is common for
+    /// wait and wake events to happen almost at the same time. In this state,
+    /// we lose big 50% of the time if the wait blocks immediately.
+    ///
+    /// We can improve our chances of being waked immediately, before blocking,
+    /// by spinning for a short duration, although we have to balance this
+    /// against the extra cpu utilization, latency reduction, power consumption,
+    /// and priority inversion effect if we end up blocking anyway.
+    ///
+    /// We use a default maximum of 2 usec of spinning. As partial consolation,
+    /// since spinning as implemented in folly uses the pause instruction where
+    /// available, we give a small speed boost to the colocated hyperthread.
+    ///
+    /// On circa-2013 devbox hardware, it costs about 7 usec to FUTEX_WAIT and
+    /// then be awoken. Spins on this hw take about 7 nsec, where all but 0.5
+    /// nsec is the pause instruction.
+    static constexpr std::chrono::nanoseconds spin_max =
+        std::chrono::microseconds(2);
+  };
+
+  std::chrono::nanoseconds spin_max() const {
+    return spin_max_;
+  }
+  WaitOptions& spin_max(std::chrono::nanoseconds dur) {
+    spin_max_ = dur;
+    return *this;
+  }
+
+ private:
+  std::chrono::nanoseconds spin_max_ = Defaults::spin_max;
+};
+
+} // namespace folly
--- a/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h
+++ b/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h
@ -0,0 +1,219 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+#include <folly/Traits.h>
+#include <folly/Utility.h>
+#include <folly/functional/Invoke.h>
+#include <folly/lang/Launder.h>
+
+namespace folly {
+namespace detail {
+
+/**
+ * InlineFunctionRef is similar to folly::FunctionRef but has the additional
+ * benefit of being able to store the function it was instantiated with inline
+ * in a buffer of the given capacity.  Inline storage is only used if the
+ * function object and a pointer (for type-erasure) are small enough to fit in
+ * the templated size.  If there is not enough in-situ capacity for the
+ * callable, this just stores a reference to the function object like
+ * FunctionRef.
+ *
+ * This helps give a perf boost in the case where the data gets separated from
+ * the point of invocation.  If, for example, at the point of invocation, the
+ * InlineFunctionRef object is not cached, a remote memory/cache read might be
+ * required to invoke the original callable.  Customizable inline storage
+ * helps tune storage so we can store a type-erased callable with better
+ * performance and locality.  A real-life example of this might be a
+ * folly::FunctionRef with a function pointer.  The folly::FunctionRef would
+ * point to the function pointer object in a remote location.  This causes a
+ * double-indirection at the point of invocation, and if that memory is dirty,
+ * or not cached, it would cause additional cache misses.  On the other hand
+ * with InlineFunctionRef, inline storage would store the value of the
+ * function pointer, avoiding the need to do a remote lookup to fetch the
+ * value of the function pointer.
+ *
+ * To prevent misuse, InlineFunctionRef disallows construction from an lvalue
+ * callable.  This is to prevent usage where a user relies on the callable's
+ * state after invocation through InlineFunctionRef.  This has the potential
+ * to copy the callable into inline storage when the callable is small, so we
+ * might not use the same function when invoking, but rather a copy of it.
+ *
+ * Also note that InlineFunctionRef will always invoke the const qualified
+ * version of the call operator for any callable that is passed.  Regardless
+ * of whether it has a non-const version.  This is done to enforce the logical
+ * constraint of function state being immutable.
+ *
+ * This class is always trivially-copyable (and therefore
+ * trivially-destructible), making it suitable for use in a union without
+ * requiring manual destruction.
+ */
+template <typename FunctionType, std::size_t Size>
+class InlineFunctionRef;
+
+template <typename ReturnType, typename... Args, std::size_t Size>
+class InlineFunctionRef<ReturnType(Args...), Size> {
+  using Storage =
+      _t<std::aligned_storage<Size - sizeof(uintptr_t), sizeof(uintptr_t)>>;
+  using Call = ReturnType (*)(const Storage&, Args&&...);
+
+  struct InSituTag {};
+  struct RefTag {};
+
+  static_assert(
+      (Size % sizeof(uintptr_t)) == 0,
+      "Size has to be a multiple of sizeof(uintptr_t)");
+  static_assert(Size >= 2 * sizeof(uintptr_t), "This doesn't work");
+  static_assert(alignof(Call) == alignof(Storage), "Mismatching alignments");
+
+  // This defines a mode tag that is used in the construction of
+  // InlineFunctionRef to determine the storage and indirection method for the
+  // passed callable.
+  //
+  // This requires that the we pass in a type that is not ref-qualified.
+  template <typename Func>
+  using ConstructMode = _t<std::conditional<
+      folly::is_trivially_copyable<Func>{} &&
+          (sizeof(Func) <= sizeof(Storage)) &&
+          (alignof(Func) <= alignof(Storage)),
+      InSituTag,
+      RefTag>>;
+
+ public:
+  /**
+   * InlineFunctionRef can be constructed from a nullptr, callable or another
+   * InlineFunctionRef with the same size.  These are the constructors that
+   * don't take a callable.
+   *
+   * InlineFunctionRef is meant to be trivially copyable so we default the
+   * constructors and assignment operators.
+   */
+  InlineFunctionRef(std::nullptr_t) : call_{nullptr} {}
+  InlineFunctionRef() : call_{nullptr} {}
+  InlineFunctionRef(const InlineFunctionRef& other) = default;
+  InlineFunctionRef(InlineFunctionRef&&) = default;
+  InlineFunctionRef& operator=(const InlineFunctionRef&) = default;
+  InlineFunctionRef& operator=(InlineFunctionRef&&) = default;
+
+  /**
+   * Constructors from callables.
+   *
+   * If all of the following conditions are satisfied, then we store the
+   * callable in the inline storage:
+   *
+   *  1) The function has been passed as an rvalue, meaning that there is no
+   *     use of the original in the user's code after it has been passed to
+   *     us.
+   *  2) Size of the callable is less than the size of the inline storage
+   *     buffer.
+   *  3) The callable is trivially constructible and destructible.
+   *
+   * If any one of the above conditions is not satisfied, we fall back to
+   * reference semantics and store the function as a pointer, and add a level
+   * of indirection through type erasure.
+   */
+  template <
+      typename Func,
+      _t<std::enable_if<
+          !std::is_same<_t<std::decay<Func>>, InlineFunctionRef>{} &&
+          !std::is_reference<Func>{} &&
+          std::is_convertible<
+              decltype(std::declval<Func&&>()(std::declval<Args&&>()...)),
+              ReturnType>{}>>* = nullptr>
+  InlineFunctionRef(Func&& func) {
+    // We disallow construction from lvalues, so assert that this is not a
+    // reference type.  When invoked with an lvalue, Func is a lvalue
+    // reference type, when invoked with an rvalue, Func is not ref-qualified.
+    static_assert(
+        !std::is_reference<Func>{},
+        "InlineFunctionRef cannot be used with lvalues");
+    static_assert(std::is_rvalue_reference<Func&&>{}, "");
+    construct(ConstructMode<Func>{}, folly::as_const(func));
+  }
+
+  /**
+   * The call operator uses the function pointer and a reference to the
+   * storage to do the dispatch.  The function pointer takes care of the
+   * appropriate casting.
+   */
+  ReturnType operator()(Args... args) const {
+    return call_(storage_, static_cast<Args&&>(args)...);
+  }
+
+  /**
+   * We have a function engaged if the call function points to anything other
+   * than null.
+   */
+  operator bool() const noexcept {
+    return call_;
+  }
+
+ private:
+  friend class InlineFunctionRefTest;
+
+  /**
+   * Inline storage constructor implementation.
+   */
+  template <typename Func>
+  void construct(InSituTag, Func& func) {
+    using Value = _t<std::remove_reference<Func>>;
+
+    // Assert that the following two assumptions are valid
+    //    1) fit in the storage space we have and match alignments, and
+    //    2) be invocable in a const context, it does not make sense to copy a
+    //       callable into inline storage if it makes state local
+    //       modifications.
+    static_assert(alignof(Value) <= alignof(Storage), "");
+    static_assert(is_invocable<const _t<std::decay<Func>>, Args&&...>{}, "");
+    static_assert(folly::is_trivially_copyable<Value>{}, "");
+
+    new (&storage_) Value{func};
+    call_ = &callInline<Value>;
+  }
+
+  /**
+   * Ref storage constructor implementation.  This is identical to
+   * folly::FunctionRef.
+   */
+  template <typename Func>
+  void construct(RefTag, Func& func) {
+    // store a pointer to the function
+    using Pointer = _t<std::add_pointer<_t<std::remove_reference<Func>>>>;
+    new (&storage_) Pointer{&func};
+    call_ = &callPointer<Pointer>;
+  }
+
+  template <typename Func>
+  static ReturnType callInline(const Storage& object, Args&&... args) {
+    // The only type of pointer allowed is a function pointer, no other
+    // pointer types are invocable.
+    static_assert(
+        !std::is_pointer<Func>::value ||
+            std::is_function<_t<std::remove_pointer<Func>>>::value,
+        "");
+    return (*folly::launder(reinterpret_cast<const Func*>(&object)))(
+        static_cast<Args&&>(args)...);
+  }
+
+  template <typename Func>
+  static ReturnType callPointer(const Storage& object, Args&&... args) {
+    // When the function we were instantiated with was not trivial, the given
+    // pointer points to a pointer, which pointers to the callable.  So we
+    // cast to a pointer and then to the pointee.
+    static_assert(std::is_pointer<Func>::value, "");
+    return (**folly::launder(reinterpret_cast<const Func*>(&object)))(
+        static_cast<Args&&>(args)...);
+  }
+
+  Call call_;
+  Storage storage_;
+};
+
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h
+++ b/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h
@ -0,0 +1,207 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Optional.h>
+#include <folly/Portability.h>
+#include <folly/Utility.h>
+
+#include <cassert>
+#include <memory>
+#include <mutex>
+#include <stdexcept>
+#include <utility>
+
+namespace folly {
+namespace detail {
+namespace proxylockable_detail {
+template <typename Bool>
+void throwIfAlreadyLocked(Bool&& locked) {
+  if (kIsDebug && locked) {
+    throw std::system_error{
+        std::make_error_code(std::errc::resource_deadlock_would_occur)};
+  }
+}
+
+template <typename Bool>
+void throwIfNotLocked(Bool&& locked) {
+  if (kIsDebug && !locked) {
+    throw std::system_error{
+        std::make_error_code(std::errc::operation_not_permitted)};
+  }
+}
+
+template <typename Bool>
+void throwIfNoMutex(Bool&& mutex) {
+  if (kIsDebug && !mutex) {
+    throw std::system_error{
+        std::make_error_code(std::errc::operation_not_permitted)};
+  }
+}
+} // namespace proxylockable_detail
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>::~ProxyLockableUniqueLock() {
+  if (owns_lock()) {
+    unlock();
+  }
+}
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>::ProxyLockableUniqueLock(
+    mutex_type& mtx) noexcept {
+  proxy_.emplace(mtx.lock());
+  mutex_ = std::addressof(mtx);
+}
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>::ProxyLockableUniqueLock(
+    ProxyLockableUniqueLock&& a) noexcept {
+  *this = std::move(a);
+}
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>& ProxyLockableUniqueLock<Mutex>::operator=(
+    ProxyLockableUniqueLock&& other) noexcept {
+  proxy_ = std::move(other.proxy_);
+  mutex_ = folly::exchange(other.mutex_, nullptr);
+  return *this;
+}
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>::ProxyLockableUniqueLock(
+    mutex_type& mtx,
+    std::defer_lock_t) noexcept {
+  mutex_ = std::addressof(mtx);
+}
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>::ProxyLockableUniqueLock(
+    mutex_type& mtx,
+    std::try_to_lock_t) {
+  mutex_ = std::addressof(mtx);
+  if (auto state = mtx.try_lock()) {
+    proxy_.emplace(std::move(state));
+  }
+}
+
+template <typename Mutex>
+template <typename Rep, typename Period>
+ProxyLockableUniqueLock<Mutex>::ProxyLockableUniqueLock(
+    mutex_type& mtx,
+    const std::chrono::duration<Rep, Period>& duration) {
+  mutex_ = std::addressof(mtx);
+  if (auto state = mtx.try_lock_for(duration)) {
+    proxy_.emplace(std::move(state));
+  }
+}
+
+template <typename Mutex>
+template <typename Clock, typename Duration>
+ProxyLockableUniqueLock<Mutex>::ProxyLockableUniqueLock(
+    mutex_type& mtx,
+    const std::chrono::time_point<Clock, Duration>& time) {
+  mutex_ = std::addressof(mtx);
+  if (auto state = mtx.try_lock_until(time)) {
+    proxy_.emplace(std::move(state));
+  }
+}
+
+template <typename Mutex>
+void ProxyLockableUniqueLock<Mutex>::lock() {
+  proxylockable_detail::throwIfAlreadyLocked(proxy_);
+  proxylockable_detail::throwIfNoMutex(mutex_);
+
+  proxy_.emplace(mutex_->lock());
+}
+
+template <typename Mutex>
+void ProxyLockableUniqueLock<Mutex>::unlock() {
+  proxylockable_detail::throwIfNoMutex(mutex_);
+  proxylockable_detail::throwIfNotLocked(proxy_);
+
+  mutex_->unlock(std::move(*proxy_));
+  proxy_.reset();
+}
+
+template <typename Mutex>
+bool ProxyLockableUniqueLock<Mutex>::try_lock() {
+  proxylockable_detail::throwIfNoMutex(mutex_);
+  proxylockable_detail::throwIfAlreadyLocked(proxy_);
+
+  if (auto state = mutex_->try_lock()) {
+    proxy_.emplace(std::move(state));
+    return true;
+  }
+
+  return false;
+}
+
+template <typename Mutex>
+template <typename Rep, typename Period>
+bool ProxyLockableUniqueLock<Mutex>::try_lock_for(
+    const std::chrono::duration<Rep, Period>& duration) {
+  proxylockable_detail::throwIfNoMutex(mutex_);
+  proxylockable_detail::throwIfAlreadyLocked(proxy_);
+
+  if (auto state = mutex_->try_lock_for(duration)) {
+    proxy_.emplace(std::move(state));
+    return true;
+  }
+
+  return false;
+}
+
+template <typename Mutex>
+template <typename Clock, typename Duration>
+bool ProxyLockableUniqueLock<Mutex>::try_lock_until(
+    const std::chrono::time_point<Clock, Duration>& time) {
+  proxylockable_detail::throwIfNoMutex(mutex_);
+  proxylockable_detail::throwIfAlreadyLocked(proxy_);
+
+  if (auto state = mutex_->try_lock_until(time)) {
+    proxy_.emplace(std::move(state));
+    return true;
+  }
+
+  return false;
+}
+
+template <typename Mutex>
+void ProxyLockableUniqueLock<Mutex>::swap(
+    ProxyLockableUniqueLock& other) noexcept {
+  std::swap(mutex_, other.mutex_);
+  std::swap(proxy_, other.proxy_);
+}
+
+template <typename Mutex>
+typename ProxyLockableUniqueLock<Mutex>::mutex_type*
+ProxyLockableUniqueLock<Mutex>::mutex() const noexcept {
+  return mutex_;
+}
+
+template <typename Mutex>
+typename ProxyLockableUniqueLock<Mutex>::proxy_type*
+ProxyLockableUniqueLock<Mutex>::proxy() const noexcept {
+  return proxy_ ? std::addressof(proxy_.value()) : nullptr;
+}
+
+template <typename Mutex>
+bool ProxyLockableUniqueLock<Mutex>::owns_lock() const noexcept {
+  return proxy_.has_value();
+}
+
+template <typename Mutex>
+ProxyLockableUniqueLock<Mutex>::operator bool() const noexcept {
+  return owns_lock();
+}
+
+template <typename Mutex>
+ProxyLockableLockGuard<Mutex>::ProxyLockableLockGuard(mutex_type& mtx)
+    : ProxyLockableUniqueLock<Mutex>{mtx} {}
+
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/synchronization/detail/ProxyLockable.h
+++ b/third-party/folly/folly/synchronization/detail/ProxyLockable.h
@ -0,0 +1,164 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <folly/Optional.h>
+
+#include <mutex>
+
+namespace folly {
+namespace detail {
+
+/**
+ * ProxyLockable is a "concept" that is used usually for mutexes that don't
+ * return void, but rather a proxy object that contains data that should be
+ * passed to the unlock function.
+ *
+ * This is in contrast with the normal Lockable concept that imposes no
+ * requirement on the return type of lock(), and requires an unlock() with no
+ * parameters.  Here we require that lock() returns non-void and that unlock()
+ * accepts the return type of lock() by value, rvalue-reference or
+ * const-reference
+ *
+ * Here we define two classes, that can be used by the top level to implement
+ * specializations for std::unique_lock and std::lock_guard.  Both
+ * ProxyLockableUniqueLock and ProxyLockableLockGuard implement the entire
+ * interface of std::unique_lock and std::lock_guard respectively
+ */
+template <typename Mutex>
+class ProxyLockableUniqueLock {
+ public:
+  using mutex_type = Mutex;
+  using proxy_type =
+      _t<std::decay<decltype(std::declval<mutex_type>().lock())>>;
+
+  /**
+   * Default constructor initializes the unique_lock to an empty state
+   */
+  ProxyLockableUniqueLock() = default;
+
+  /**
+   * Destructor releases the mutex if it is locked
+   */
+  ~ProxyLockableUniqueLock();
+
+  /**
+   * Move constructor and move assignment operators take state from the other
+   * lock
+   */
+  ProxyLockableUniqueLock(ProxyLockableUniqueLock&& other) noexcept;
+  ProxyLockableUniqueLock& operator=(ProxyLockableUniqueLock&&) noexcept;
+
+  /**
+   * Locks the mutex, blocks until the mutex can be acquired.
+   *
+   * The mutex is guaranteed to be acquired after this function returns.
+   */
+  ProxyLockableUniqueLock(mutex_type&) noexcept;
+
+  /**
+   * Explicit locking constructors to control how the lock() method is called
+   *
+   * std::defer_lock_t causes the mutex to get tracked, but not locked
+   * std::try_to_lock_t causes try_lock() to be called.  The current object is
+   *                    converts to true if the lock was successful
+   */
+  ProxyLockableUniqueLock(mutex_type& mtx, std::defer_lock_t) noexcept;
+  ProxyLockableUniqueLock(mutex_type& mtx, std::try_to_lock_t);
+
+  /**
+   * Timed locking constructors
+   */
+  template <typename Rep, typename Period>
+  ProxyLockableUniqueLock(
+      mutex_type& mtx,
+      const std::chrono::duration<Rep, Period>& duration);
+  template <typename Clock, typename Duration>
+  ProxyLockableUniqueLock(
+      mutex_type& mtx,
+      const std::chrono::time_point<Clock, Duration>& time);
+
+  /**
+   * Lock and unlock methods
+   *
+   * lock() and try_lock() throw if the mutex is already locked, or there is
+   * no mutex.  unlock() throws if there is no mutex or if the mutex was not
+   * locked
+   */
+  void lock();
+  void unlock();
+  bool try_lock();
+
+  /**
+   * Timed locking methods
+   *
+   * These throw if there was no mutex, or if the mutex was already locked
+   */
+  template <typename Rep, typename Period>
+  bool try_lock_for(const std::chrono::duration<Rep, Period>& duration);
+  template <typename Clock, typename Duration>
+  bool try_lock_until(const std::chrono::time_point<Clock, Duration>& time);
+
+  /**
+   * Swap this unique lock with the other one
+   */
+  void swap(ProxyLockableUniqueLock& other) noexcept;
+
+  /**
+   * Returns true if the unique lock contains a lock and also has acquired an
+   * exclusive lock successfully
+   */
+  bool owns_lock() const noexcept;
+  explicit operator bool() const noexcept;
+
+  /**
+   * mutex() return a pointer to the mutex if there is a contained mutex and
+   * proxy() returns a pointer to the contained proxy if the mutex is locked
+   *
+   * If the unique lock was not constructed with a mutex, then mutex() returns
+   * nullptr.  If the mutex is not locked, then proxy() returns nullptr
+   */
+  mutex_type* mutex() const noexcept;
+  proxy_type* proxy() const noexcept;
+
+ private:
+  friend class ProxyLockableTest;
+
+  /**
+   * If the optional has a value, the mutex is locked, if it is empty, it is
+   * not
+   */
+  mutable folly::Optional<proxy_type> proxy_{};
+  mutex_type* mutex_{nullptr};
+};
+
+template <typename Mutex>
+class ProxyLockableLockGuard : private ProxyLockableUniqueLock<Mutex> {
+ public:
+  using mutex_type = Mutex;
+
+  /**
+   * Constructor locks the mutex, and destructor unlocks
+   */
+  ProxyLockableLockGuard(mutex_type& mtx);
+  ~ProxyLockableLockGuard() = default;
+
+  /**
+   * This class is not movable or assignable
+   *
+   * For more complicated usecases, consider the UniqueLock variant, which
+   * provides more options
+   */
+  ProxyLockableLockGuard(const ProxyLockableLockGuard&) = delete;
+  ProxyLockableLockGuard(ProxyLockableLockGuard&&) = delete;
+  ProxyLockableLockGuard& operator=(ProxyLockableLockGuard&&) = delete;
+  ProxyLockableLockGuard& operator=(const ProxyLockableLockGuard&) = delete;
+};
+
+} // namespace detail
+} // namespace folly
+
+#include <folly/synchronization/detail/ProxyLockable-inl.h>
--- a/third-party/folly/folly/synchronization/detail/Sleeper.h
+++ b/third-party/folly/folly/synchronization/detail/Sleeper.h
@ -0,0 +1,57 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+/*
+ * @author Keith Adams <kma@fb.com>
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#include <cstdint>
+#include <thread>
+
+#include <folly/portability/Asm.h>
+
+namespace folly {
+
+//////////////////////////////////////////////////////////////////////
+
+namespace detail {
+
+/*
+ * A helper object for the contended case. Starts off with eager
+ * spinning, and falls back to sleeping for small quantums.
+ */
+class Sleeper {
+  static const uint32_t kMaxActiveSpin = 4000;
+
+  uint32_t spinCount;
+
+ public:
+  Sleeper() noexcept : spinCount(0) {}
+
+  static void sleep() noexcept {
+    /*
+     * Always sleep 0.5ms, assuming this will make the kernel put
+     * us down for whatever its minimum timer resolution is (in
+     * linux this varies by kernel version from 1ms to 10ms).
+     */
+    std::this_thread::sleep_for(std::chrono::microseconds{500});
+  }
+
+  void wait() noexcept {
+    if (spinCount < kMaxActiveSpin) {
+      ++spinCount;
+      asm_volatile_pause();
+    } else {
+      sleep();
+    }
+  }
+};
+
+} // namespace detail
+} // namespace folly
+
--- a/third-party/folly/folly/synchronization/detail/Spin.h
+++ b/third-party/folly/folly/synchronization/detail/Spin.h
@ -0,0 +1,77 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <chrono>
+#include <thread>
+
+#include <folly/portability/Asm.h>
+#include <folly/synchronization/WaitOptions.h>
+
+namespace folly {
+namespace detail {
+
+enum class spin_result {
+  success, // condition passed
+  timeout, // exceeded deadline
+  advance, // exceeded current wait-options component timeout
+};
+
+template <typename Clock, typename Duration, typename F>
+spin_result spin_pause_until(
+    std::chrono::time_point<Clock, Duration> const& deadline,
+    WaitOptions const& opt,
+    F f) {
+  if (opt.spin_max() <= opt.spin_max().zero()) {
+    return spin_result::advance;
+  }
+
+  auto tbegin = Clock::now();
+  while (true) {
+    if (f()) {
+      return spin_result::success;
+    }
+
+    auto const tnow = Clock::now();
+    if (tnow >= deadline) {
+      return spin_result::timeout;
+    }
+
+    //  Backward time discontinuity in Clock? revise pre_block starting point
+    tbegin = std::min(tbegin, tnow);
+    if (tnow >= tbegin + opt.spin_max()) {
+      return spin_result::advance;
+    }
+
+    //  The pause instruction is the polite way to spin, but it doesn't
+    //  actually affect correctness to omit it if we don't have it. Pausing
+    //  donates the full capabilities of the current core to its other
+    //  hyperthreads for a dozen cycles or so.
+    asm_volatile_pause();
+  }
+}
+
+template <typename Clock, typename Duration, typename F>
+spin_result spin_yield_until(
+    std::chrono::time_point<Clock, Duration> const& deadline,
+    F f) {
+  while (true) {
+    if (f()) {
+      return spin_result::success;
+    }
+
+    auto const max = std::chrono::time_point<Clock, Duration>::max();
+    if (deadline != max && Clock::now() >= deadline) {
+      return spin_result::timeout;
+    }
+
+    std::this_thread::yield();
+  }
+}
+
+} // namespace detail
+} // namespace folly
--- a/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp
+++ b/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp