From fd43ee09dae5a1280efd34586fa2fea5b4fe2d3a Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 21 Nov 2016 12:07:09 -0800 Subject: [PATCH] Range deletion microoptimizations Summary: - Made RangeDelAggregator's InternalKeyComparator member a reference-to-const so we don't need to copy-construct it. Also added InternalKeyComparator to ImmutableCFOptions so we don't need to construct one for each DBIter. - Made MemTable::NewRangeTombstoneIterator and the table readers' NewRangeTombstoneIterator() functions return nullptr instead of NewEmptyInternalIterator to avoid the allocation. Updated callers accordingly. Closes https://github.com/facebook/rocksdb/pull/1548 Differential Revision: D4208169 Pulled By: ajkr fbshipit-source-id: 2fd65cf --- db/builder.cc | 1 - db/db_compaction_filter_test.cc | 12 ++++++------ db/db_iter.cc | 2 +- db/db_test_util.cc | 8 ++++---- db/flush_job.cc | 6 ++++-- db/memtable.cc | 8 ++++++-- db/memtable.h | 1 + db/range_del_aggregator.h | 2 +- db/table_cache.cc | 16 ++++++++++++---- db/write_batch_test.cc | 3 +++ table/block_based_table_reader.cc | 24 +++++++++++++----------- table/table_reader.h | 2 +- table/table_test.cc | 3 +++ tools/ldb_cmd.cc | 4 ++-- util/cf_options.cc | 1 + util/cf_options.h | 2 ++ 16 files changed, 60 insertions(+), 35 deletions(-) diff --git a/db/builder.cc b/db/builder.cc index e31371d1b6..d59146c3d5 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -82,7 +82,6 @@ Status BuildTable( Status s; meta->fd.file_size = 0; iter->SeekToFirst(); - range_del_iter->SeekToFirst(); std::unique_ptr range_del_agg( new RangeDelAggregator(internal_comparator, snapshots)); s = range_del_agg->AddTombstones(std::move(range_del_iter)); diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc index cba0b34679..8b6f50bfa1 100644 --- a/db/db_compaction_filter_test.cc +++ b/db/db_compaction_filter_test.cc @@ -261,8 +261,8 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) { int total = 0; Arena arena; { - RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), - {} /* snapshots */); + InternalKeyComparator icmp(options.comparator); + RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); ScopedArenaIterator iter( dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[1])); iter->SeekToFirst(); @@ -351,8 +351,8 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) { // level Lmax because this record is at the tip count = 0; { - RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), - {} /* snapshots */); + InternalKeyComparator icmp(options.comparator); + RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); ScopedArenaIterator iter( dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[1])); iter->SeekToFirst(); @@ -570,8 +570,8 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { int count = 0; int total = 0; Arena arena; - RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), - {} /* snapshots */); + InternalKeyComparator icmp(options.comparator); + RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); ScopedArenaIterator iter( dbfull()->NewInternalIterator(&arena, &range_del_agg)); iter->SeekToFirst(); diff --git a/db/db_iter.cc b/db/db_iter.cc index 1f49bb3d4f..a97a5af74d 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -123,7 +123,7 @@ class DBIter: public Iterator { prefix_same_as_start_(prefix_same_as_start), pin_thru_lifetime_(pin_data), total_order_seek_(total_order_seek), - range_del_agg_(InternalKeyComparator(cmp), s) { + range_del_agg_(ioptions.internal_comparator, s) { RecordTick(statistics_, NO_ITERATORS); prefix_extractor_ = ioptions.prefix_extractor; max_skip_ = max_sequential_skip_in_iterations; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 91fc87d1a4..c54dfcc27b 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -590,8 +590,8 @@ std::string DBTestBase::Contents(int cf) { std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) { Arena arena; auto options = CurrentOptions(); - RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), - {} /* snapshots */); + InternalKeyComparator icmp(options.comparator); + RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); ScopedArenaIterator iter; if (cf == 0) { iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg)); @@ -999,8 +999,8 @@ void DBTestBase::validateNumberOfEntries(int numValues, int cf) { ScopedArenaIterator iter; Arena arena; auto options = CurrentOptions(); - RangeDelAggregator range_del_agg(InternalKeyComparator(options.comparator), - {} /* snapshots */); + InternalKeyComparator icmp(options.comparator); + RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); if (cf != 0) { iter.set( dbfull()->NewInternalIterator(&arena, &range_del_agg, handles_[cf])); diff --git a/db/flush_job.cc b/db/flush_job.cc index ff09a8a161..80914e06fc 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -256,12 +256,14 @@ Status FlushJob::WriteLevel0Table() { "[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n", cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber()); memtables.push_back(m->NewIterator(ro, &arena)); - range_del_iters.push_back(m->NewRangeTombstoneIterator(ro)); + auto* range_del_iter = m->NewRangeTombstoneIterator(ro); + if (range_del_iter != nullptr) { + range_del_iters.push_back(range_del_iter); + } total_num_entries += m->num_entries(); total_num_deletes += m->num_deletes(); total_memory_usage += m->ApproximateMemoryUsage(); } - assert(memtables.size() == range_del_iters.size()); event_logger_->Log() << "job" << job_context_->job_id << "event" << "flush_started" diff --git a/db/memtable.cc b/db/memtable.cc index e31a69a0e8..6de1bfff97 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -75,6 +75,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp, range_del_table_(SkipListFactory().CreateMemTableRep( comparator_, &allocator_, nullptr /* transform */, ioptions.info_log)), + is_range_del_table_empty_(true), data_size_(0), num_entries_(0), num_deletes_(0), @@ -375,8 +376,8 @@ InternalIterator* MemTable::NewIterator(const ReadOptions& read_options, InternalIterator* MemTable::NewRangeTombstoneIterator( const ReadOptions& read_options) { - if (read_options.ignore_range_deletions) { - return NewEmptyInternalIterator(); + if (read_options.ignore_range_deletions || is_range_del_table_empty_) { + return nullptr; } return new MemTableIterator(*this, read_options, nullptr /* arena */, true /* use_range_del_table */); @@ -508,6 +509,9 @@ void MemTable::Add(SequenceNumber s, ValueType type, !first_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) { } } + if (is_range_del_table_empty_ && type == kTypeRangeDeletion) { + is_range_del_table_empty_ = false; + } } // Callback from MemTable::Get() diff --git a/db/memtable.h b/db/memtable.h index 8b68bd8a41..a6ad3f2431 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -352,6 +352,7 @@ class MemTable { MemTableAllocator allocator_; unique_ptr table_; unique_ptr range_del_table_; + bool is_range_del_table_empty_; // Total data size of all data inserted std::atomic data_size_; diff --git a/db/range_del_aggregator.h b/db/range_del_aggregator.h index 8781fb2975..403a6033f8 100644 --- a/db/range_del_aggregator.h +++ b/db/range_del_aggregator.h @@ -105,7 +105,7 @@ class RangeDelAggregator { SequenceNumber upper_bound_; std::unique_ptr rep_; - const InternalKeyComparator icmp_; + const InternalKeyComparator& icmp_; }; } // namespace rocksdb diff --git a/db/table_cache.cc b/db/table_cache.cc index bee2565b18..6688dd1244 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -178,7 +178,9 @@ InternalIterator* TableCache::NewIterator( if (range_del_agg != nullptr && !options.ignore_range_deletions) { std::unique_ptr range_del_iter(NewRangeDeletionIterator( options, icomparator, fd, file_read_hist, skip_filters, level)); - s = range_del_iter->status(); + if (range_del_iter != nullptr) { + s = range_del_iter->status(); + } if (s.ok()) { s = range_del_agg->AddTombstones(std::move(range_del_iter)); } @@ -253,7 +255,7 @@ InternalIterator* TableCache::NewRangeDeletionIterator( const FileDescriptor& fd, HistogramImpl* file_read_hist, bool skip_filters, int level) { if (options.ignore_range_deletions) { - return NewEmptyInternalIterator(); + return nullptr; } Status s; TableReader* table_reader = fd.table_reader; @@ -270,7 +272,11 @@ InternalIterator* TableCache::NewRangeDeletionIterator( if (s.ok()) { auto* result = table_reader->NewRangeTombstoneIterator(options); if (cache_handle != nullptr) { - result->RegisterCleanup(&UnrefEntry, cache_, cache_handle); + if (result == nullptr) { + ReleaseHandle(cache_handle); + } else { + result->RegisterCleanup(&UnrefEntry, cache_, cache_handle); + } } return result; } @@ -287,7 +293,9 @@ Status TableCache::Get(const ReadOptions& options, !options.ignore_range_deletions) { std::unique_ptr range_del_iter(NewRangeDeletionIterator( options, internal_comparator, fd, file_read_hist, skip_filters, level)); - s = range_del_iter->status(); + if (range_del_iter != nullptr) { + s = range_del_iter->status(); + } if (s.ok()) { s = get_context->range_del_agg()->AddTombstones( std::move(range_del_iter)); diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index a680692370..5a4bfb6916 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -55,6 +55,9 @@ static std::string PrintContents(WriteBatch* b) { iter = mem->NewRangeTombstoneIterator(ReadOptions()); iter_guard.reset(iter); } + if (iter == nullptr) { + continue; + } for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ParsedInternalKey ikey; memset((void*)&ikey, 0, sizeof(ikey)); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 1a6622158e..6617d2ec25 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -1489,7 +1489,7 @@ InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options, InternalIterator* BlockBasedTable::NewRangeTombstoneIterator( const ReadOptions& read_options) { if (rep_->range_del_handle.IsNull()) { - return NewEmptyInternalIterator(); + return nullptr; } std::string str; rep_->range_del_handle.EncodeTo(&str); @@ -1967,18 +1967,20 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { } // Output range deletions block auto* range_del_iter = NewRangeTombstoneIterator(ReadOptions()); - range_del_iter->SeekToFirst(); - if (range_del_iter->Valid()) { - out_file->Append( - "Range deletions:\n" - "--------------------------------------\n" - " "); - for (; range_del_iter->Valid(); range_del_iter->Next()) { - DumpKeyValue(range_del_iter->key(), range_del_iter->value(), out_file); + if (range_del_iter != nullptr) { + range_del_iter->SeekToFirst(); + if (range_del_iter->Valid()) { + out_file->Append( + "Range deletions:\n" + "--------------------------------------\n" + " "); + for (; range_del_iter->Valid(); range_del_iter->Next()) { + DumpKeyValue(range_del_iter->key(), range_del_iter->value(), out_file); + } + out_file->Append("\n"); } - out_file->Append("\n"); + delete range_del_iter; } - delete range_del_iter; // Output Data blocks s = DumpDataBlocks(out_file); diff --git a/table/table_reader.h b/table/table_reader.h index 4c2faf3659..3e4ba585b7 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -44,7 +44,7 @@ class TableReader { virtual InternalIterator* NewRangeTombstoneIterator( const ReadOptions& read_options) { - return NewEmptyInternalIterator(); + return nullptr; } // Given a key, return an approximate byte offset in the file where diff --git a/table/table_test.cc b/table/table_test.cc index f2670acafe..841fd9e024 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2459,6 +2459,9 @@ TEST_F(MemTableTest, Simple) { iter = memtable->NewRangeTombstoneIterator(ReadOptions()); iter_guard.reset(iter); } + if (iter == nullptr) { + continue; + } iter->SeekToFirst(); while (iter->Valid()) { fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(), diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index da170776f5..eed8f20997 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1187,8 +1187,8 @@ void InternalDumpCommand::DoCommand() { uint64_t s1=0,s2=0; // Setup internal key iterator Arena arena; - RangeDelAggregator range_del_agg(InternalKeyComparator(options_.comparator), - {} /* snapshots */); + auto icmp = InternalKeyComparator(options_.comparator); + RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); ScopedArenaIterator iter(idb->NewInternalIterator(&arena, &range_del_agg)); Status st = iter->status(); if (!st.ok()) { diff --git a/util/cf_options.cc b/util/cf_options.cc index 19685d156a..e91bdc85cd 100644 --- a/util/cf_options.cc +++ b/util/cf_options.cc @@ -31,6 +31,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options, compaction_options_fifo(cf_options.compaction_options_fifo), prefix_extractor(cf_options.prefix_extractor.get()), user_comparator(cf_options.comparator), + internal_comparator(InternalKeyComparator(cf_options.comparator)), merge_operator(cf_options.merge_operator.get()), compaction_filter(cf_options.compaction_filter), compaction_filter_factory(cf_options.compaction_filter_factory.get()), diff --git a/util/cf_options.h b/util/cf_options.h index f16d738696..46e093cdff 100644 --- a/util/cf_options.h +++ b/util/cf_options.h @@ -8,6 +8,7 @@ #include #include +#include "db/dbformat.h" #include "rocksdb/options.h" #include "util/compression.h" #include "util/db_options.h" @@ -35,6 +36,7 @@ struct ImmutableCFOptions { const SliceTransform* prefix_extractor; const Comparator* user_comparator; + InternalKeyComparator internal_comparator; MergeOperator* merge_operator;