diff --git a/.github/workflows/sanity_check.yml b/.github/workflows/sanity_check.yml index 92b25bb7bc..3ee92fc85c 100644 --- a/.github/workflows/sanity_check.yml +++ b/.github/workflows/sanity_check.yml @@ -35,7 +35,7 @@ jobs: args: https://raw.githubusercontent.com/llvm-mirror/clang/master/tools/clang-format/clang-format-diff.py - name: Check format - run: make check-format + run: VERBOSE_CHECK=1 make check-format - name: Compare buckify output run: make check-buck-targets diff --git a/build_tools/format-diff.sh b/build_tools/format-diff.sh index 6e3cbbd479..70da9a5782 100755 --- a/build_tools/format-diff.sh +++ b/build_tools/format-diff.sh @@ -124,6 +124,10 @@ then elif [ $CHECK_ONLY ] then echo "Your change has unformatted code. Please run make format!" + if [ $VERBOSE_CHECK ]; then + clang-format --version + echo "$diffs" + fi exit 1 fi diff --git a/file/writable_file_writer.cc b/file/writable_file_writer.cc index 2f44f7f58c..66fcf95405 100644 --- a/file/writable_file_writer.cc +++ b/file/writable_file_writer.cc @@ -31,7 +31,7 @@ IOStatus WritableFileWriter::Append(const Slice& data) { rocksdb_kill_odds * REDUCE_ODDS2); // Calculate the checksum of appended data - CalculateFileChecksum(data); + UpdateFileChecksum(data); { IOSTATS_TIMER_GUARD(prepare_write_nanos); @@ -225,6 +225,7 @@ IOStatus WritableFileWriter::Flush() { std::string WritableFileWriter::GetFileChecksum() { if (checksum_generator_ != nullptr) { + assert(checksum_finalized_); return checksum_generator_->GetChecksum(); } else { return kUnknownFileChecksum; @@ -346,7 +347,7 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) { return s; } -void WritableFileWriter::CalculateFileChecksum(const Slice& data) { +void WritableFileWriter::UpdateFileChecksum(const Slice& data) { if (checksum_generator_ != nullptr) { checksum_generator_->Update(data.data(), data.size()); } diff --git a/file/writable_file_writer.h b/file/writable_file_writer.h index 4a422a6bbf..8fa5d53f6c 100644 --- a/file/writable_file_writer.h +++ b/file/writable_file_writer.h @@ -51,7 +51,7 @@ class WritableFileWriter { #endif // ROCKSDB_LITE bool ShouldNotifyListeners() const { return !listeners_.empty(); } - void CalculateFileChecksum(const Slice& data); + void UpdateFileChecksum(const Slice& data); std::unique_ptr writable_file_; std::string file_name_; diff --git a/include/rocksdb/file_checksum.h b/include/rocksdb/file_checksum.h index e7b27fb785..3a75ef234c 100644 --- a/include/rocksdb/file_checksum.h +++ b/include/rocksdb/file_checksum.h @@ -24,6 +24,10 @@ struct FileChecksumGenContext { // FileChecksumGenerator is the class to generates the checksum value // for each file when the file is written to the file system. +// Implementations may assume that +// * Finalize is called at most once during the life of the object +// * All calls to Update come before Finalize +// * All calls to GetChecksum come after Finalize class FileChecksumGenerator { public: virtual ~FileChecksumGenerator() {} @@ -36,7 +40,8 @@ class FileChecksumGenerator { // Generate the final results if no further new data will be updated. virtual void Finalize() = 0; - // Get the checksum + // Get the checksum. The result should not be the empty string and may + // include arbitrary bytes, including non-printable characters. virtual std::string GetChecksum() const = 0; // Returns a name that identifies the current file checksum function. @@ -97,9 +102,13 @@ class FileChecksumList { // Create a new file checksum list. extern FileChecksumList* NewFileChecksumList(); -// Return a shared_ptr of the builtin Crc32 based file checksum generatory +// Return a shared_ptr of the builtin Crc32c based file checksum generatory // factory object, which can be shared to create the Crc32c based checksum // generator object. +// Note: this implementation is compatible with many other crc32c checksum +// implementations and uses big-endian encoding of the result, unlike most +// other crc32c checksums in RocksDB, which alter the result with +// crc32c::Mask and use little-endian encoding. extern std::shared_ptr GetFileChecksumGenCrc32cFactory(); diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index 89a8a9d5a2..3f7ec9965d 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -89,7 +89,7 @@ struct BackupableDBOptions { // Only used if share_table_files is set to true. If true, will consider that // backups can come from different databases, hence a sst is not uniquely - // identifed by its name, but by the triple (file name, crc32, file length) + // identifed by its name, but by the triple (file name, crc32c, file length) // Default: false // Note: this is an experimental option, and you'll need to set it manually // *turn it on only if you know what you're doing* diff --git a/table/table_test.cc b/table/table_test.cc index 8e1d971330..8846331384 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -3297,7 +3297,7 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) { ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum.c_str()); } -TEST_P(BlockBasedTableTest, Crc32FileChecksum) { +TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { FileChecksumGenCrc32cFactory* file_checksum_gen_factory = new FileChecksumGenCrc32cFactory(); Options options; @@ -3322,12 +3322,12 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) { FileChecksumGenContext gen_context; gen_context.file_name = "db/tmp"; - std::unique_ptr checksum_crc32_gen1 = + std::unique_ptr checksum_crc32c_gen1 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); FileChecksumTestHelper f(true); f.CreateWriteableFile(); - f.SetFileChecksumGenerator(checksum_crc32_gen1.release()); + f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); std::unique_ptr builder; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, *comparator, @@ -3342,12 +3342,22 @@ TEST_P(BlockBasedTableTest, Crc32FileChecksum) { f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); - std::unique_ptr checksum_crc32_gen2 = + std::unique_ptr checksum_crc32c_gen2 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); std::string checksum; - ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum)); + ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); + + // Unit test the generator itself for schema stability + std::unique_ptr checksum_crc32c_gen3 = + options.file_checksum_gen_factory->CreateFileChecksumGenerator( + gen_context); + const char data[] = "here is some data"; + checksum_crc32c_gen3->Update(data, sizeof(data)); + checksum_crc32c_gen3->Finalize(); + checksum = checksum_crc32c_gen3->GetChecksum(); + ASSERT_STREQ(checksum.c_str(), "\345\245\277\110"); } // Plain table is not supported in ROCKSDB_LITE @@ -3441,7 +3451,7 @@ TEST_F(PlainTableTest, NoFileChecksum) { EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum.c_str()); } -TEST_F(PlainTableTest, Crc32FileChecksum) { +TEST_F(PlainTableTest, Crc32cFileChecksum) { PlainTableOptions plain_table_options; plain_table_options.user_key_len = 20; plain_table_options.bloom_bits_per_key = 8; @@ -3462,12 +3472,12 @@ TEST_F(PlainTableTest, Crc32FileChecksum) { FileChecksumGenContext gen_context; gen_context.file_name = "db/tmp"; - std::unique_ptr checksum_crc32_gen1 = + std::unique_ptr checksum_crc32c_gen1 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); FileChecksumTestHelper f(true); f.CreateWriteableFile(); - f.SetFileChecksumGenerator(checksum_crc32_gen1.release()); + f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); std::unique_ptr builder(factory.NewTableBuilder( TableBuilderOptions( @@ -3481,11 +3491,11 @@ TEST_F(PlainTableTest, Crc32FileChecksum) { f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); - std::unique_ptr checksum_crc32_gen2 = + std::unique_ptr checksum_crc32c_gen2 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); std::string checksum; - ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum)); + ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); } diff --git a/util/coding.h b/util/coding.h index d46654ec4d..0dd10ee5f6 100644 --- a/util/coding.h +++ b/util/coding.h @@ -7,8 +7,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // -// Endian-neutral encoding: +// Encoding independent of machine byte order: // * Fixed-length numbers are encoded with least-significant byte first +// (little endian, native order on Intel and others) // * In addition we support variable length "varint" encoding // * Strings are encoded prefixed by their length in varint format @@ -402,13 +403,34 @@ inline bool GetVarsignedint64(Slice* input, int64_t* value) { } } -// Provide an interface for platform independent endianness transformation -inline uint64_t EndianTransform(uint64_t input, size_t size) { - char* pos = reinterpret_cast(&input); - uint64_t ret_val = 0; - for (size_t i = 0; i < size; ++i) { - ret_val |= (static_cast(static_cast(pos[i])) - << ((size - i - 1) << 3)); +// Swaps between big and little endian. Can be used to in combination +// with the little-endian encoding/decoding functions to encode/decode +// big endian. +template +inline T EndianSwapValue(T v) { + static_assert(std::is_integral::value, "non-integral type"); + +#ifdef _MSC_VER + if (sizeof(T) == 2) { + return static_cast(_byteswap_ushort(static_cast(v))); + } else if (sizeof(T) == 4) { + return static_cast(_byteswap_ulong(static_cast(v))); + } else if (sizeof(T) == 8) { + return static_cast(_byteswap_uint64(static_cast(v))); + } +#else + if (sizeof(T) == 2) { + return static_cast(__builtin_bswap16(static_cast(v))); + } else if (sizeof(T) == 4) { + return static_cast(__builtin_bswap32(static_cast(v))); + } else if (sizeof(T) == 8) { + return static_cast(__builtin_bswap64(static_cast(v))); + } +#endif + // Recognized by clang as bswap, but not by gcc :( + T ret_val = 0; + for (size_t i = 0; i < sizeof(T); ++i) { + ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i)); } return ret_val; } diff --git a/util/file_checksum_helper.h b/util/file_checksum_helper.h index f83b80912d..d9a3c8e477 100644 --- a/util/file_checksum_helper.h +++ b/util/file_checksum_helper.h @@ -6,11 +6,12 @@ #pragma once #include #include + #include "port/port.h" #include "rocksdb/file_checksum.h" #include "rocksdb/status.h" +#include "util/coding.h" #include "util/crc32c.h" -#include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -26,60 +27,19 @@ class FileChecksumGenCrc32c : public FileChecksumGenerator { checksum_ = crc32c::Extend(checksum_, data, n); } - void Finalize() override { checksum_str_ = Uint32ToString(checksum_); } + void Finalize() override { + assert(checksum_str_.empty()); + // Store as big endian raw bytes + PutFixed32(&checksum_str_, EndianSwapValue(checksum_)); + } - std::string GetChecksum() const override { return checksum_str_; } + std::string GetChecksum() const override { + assert(!checksum_str_.empty()); + return checksum_str_; + } const char* Name() const override { return "FileChecksumCrc32c"; } - // Convert a uint32_t type data into a 4 bytes string. - static std::string Uint32ToString(uint32_t v) { - std::string s; - if (port::kLittleEndian) { - s.append(reinterpret_cast(&v), sizeof(v)); - } else { - char buf[sizeof(v)]; - buf[0] = v & 0xff; - buf[1] = (v >> 8) & 0xff; - buf[2] = (v >> 16) & 0xff; - buf[3] = (v >> 24) & 0xff; - s.append(buf, sizeof(v)); - } - size_t i = 0, j = s.size() - 1; - while (i < j) { - char tmp = s[i]; - s[i] = s[j]; - s[j] = tmp; - ++i; - --j; - } - return s; - } - - // Convert a 4 bytes size string into a uint32_t type data. - static uint32_t StringToUint32(std::string s) { - assert(s.size() == sizeof(uint32_t)); - size_t i = 0, j = s.size() - 1; - while (i < j) { - char tmp = s[i]; - s[i] = s[j]; - s[j] = tmp; - ++i; - --j; - } - uint32_t v = 0; - if (port::kLittleEndian) { - memcpy(&v, s.c_str(), sizeof(uint32_t)); - } else { - const char* buf = s.c_str(); - v |= static_cast(buf[0]); - v |= (static_cast(buf[1]) << 8); - v |= (static_cast(buf[2]) << 16); - v |= (static_cast(buf[3]) << 24); - } - return v; - } - private: uint32_t checksum_; std::string checksum_str_; diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index a2058b75cc..e4eb8e19cd 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -1197,7 +1197,7 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options, std::string dst; // 1. extract the filename size_t slash = file.find_last_of('/'); - // file will either be shared/, shared_checksum/ + // file will either be shared/, shared_checksum/ // or private// assert(slash != std::string::npos); dst = file.substr(slash + 1); @@ -1759,8 +1759,8 @@ Slice kMetaDataPrefix("metadata "); // // (optional) // -// -// +// +// // ... Status BackupEngineImpl::BackupMeta::LoadFromFile( const std::string& backup_dir, @@ -1808,6 +1808,7 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( std::vector> files; + // WART: The checksums are crc32c, not original crc32 Slice checksum_prefix("crc32 "); for (uint32_t i = 0; s.ok() && i < num_files; ++i) { @@ -1921,7 +1922,8 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { } for (const auto& file : files_) { - // use crc32 for now, switch to something else if needed + // use crc32c for now, switch to something else if needed + // WART: The checksums are crc32c, not original crc32 size_t newlen = len + file->filename.length() + snprintf(writelen_temp, sizeof(writelen_temp), " crc32 %u\n", file->checksum_value);