From 009034cf12770ea087181c161d3ceeed8e8d83b2 Mon Sep 17 00:00:00 2001 From: Abhishek Kona Date: Tue, 29 Jan 2013 12:23:31 -0800 Subject: [PATCH] Performant util/histogram. Summary: Earlier way to record in histogram=> Linear search BucketLimit array to find the bucket and increment the counter Current way to record in histogram=> Store a HistMap statically which points the buckets of each value in the range [kFirstValue, kLastValue); In the proccess use vectors instead of array's and refactor some code to HistogramHelper class. Test Plan: run db_bench with histogram=1 and see a histogram being printed. Reviewers: dhruba, chip, heyongqiang Reviewed By: chip CC: leveldb Differential Revision: https://reviews.facebook.net/D8265 --- Makefile | 4 ++ util/histogram.cc | 90 ++++++++++++++++++++++++++++-------------- util/histogram.h | 54 ++++++++++++++++++++----- util/histogram_test.cc | 57 ++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 39 deletions(-) create mode 100644 util/histogram_test.cc diff --git a/Makefile b/Makefile index 86beb05e88..8c31a8c8e3 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,7 @@ TESTS = \ c_test \ cache_test \ coding_test \ + histogram_test \ corruption_test \ crc32c_test \ db_test \ @@ -149,6 +150,9 @@ cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) +histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) + corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) diff --git a/util/histogram.cc b/util/histogram.cc index bb95f583ea..4e73c69a12 100644 --- a/util/histogram.cc +++ b/util/histogram.cc @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include #include #include #include "port/port.h" @@ -9,7 +10,10 @@ namespace leveldb { -const double Histogram::kBucketLimit[kNumBuckets] = { +HistogramBucketMapper::HistogramBucketMapper() : + // Add newer bucket index here. + // Should be alwyas added in sorted order. + bucketValues_({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, @@ -24,30 +28,50 @@ const double Histogram::kBucketLimit[kNumBuckets] = { 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, - 1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, - 2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, - 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, - 1e200, -}; + 1000000000}), + maxBucketValue_(bucketValues_.back()), + minBucketValue_(bucketValues_.front()) { + for (size_t i =0; i < bucketValues_.size(); ++i) { + valueIndexMap_[bucketValues_[i]] = i; + } +} + +const size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const { + if (value >= maxBucketValue_) { + return bucketValues_.size() - 1; + } else if ( value >= minBucketValue_ ) { + std::map::const_iterator lowerBound = + valueIndexMap_.lower_bound(value); + if (lowerBound != valueIndexMap_.end()) { + return lowerBound->second; + } else { + return 0; + } + } else { + return 0; + } +} + +namespace { + const HistogramBucketMapper bucketMapper; +} + + +Histogram::Histogram() : + buckets_(std::vector(bucketMapper.BucketCount(), 0)) {} void Histogram::Clear() { - min_ = kBucketLimit[kNumBuckets-1]; + min_ = bucketMapper.LastValue(); max_ = 0; num_ = 0; sum_ = 0; sum_squares_ = 0; - for (int i = 0; i < kNumBuckets; i++) { - buckets_[i] = 0; - } + buckets_.resize(bucketMapper.BucketCount(), 0); } -void Histogram::Add(double value) { - // Linear search is fast enough for our usage in db_bench - int b = 0; - while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) { - b++; - } - buckets_[b] += 1.0; +void Histogram::Add(uint64_t value) { + const size_t index = bucketMapper.IndexForValue(value); + buckets_[index] += 1; if (min_ > value) min_ = value; if (max_ < value) max_ = value; num_++; @@ -55,13 +79,17 @@ void Histogram::Add(double value) { sum_squares_ += (value * value); } +void Histogram::Add(double value) { + Add(static_cast(value)); +} + void Histogram::Merge(const Histogram& other) { if (other.min_ < min_) min_ = other.min_; if (other.max_ > max_) max_ = other.max_; num_ += other.num_; sum_ += other.sum_; sum_squares_ += other.sum_squares_; - for (int b = 0; b < kNumBuckets; b++) { + for (int b = 0; b < bucketMapper.BucketCount(); b++) { buckets_[b] += other.buckets_[b]; } } @@ -73,15 +101,19 @@ double Histogram::Median() const { double Histogram::Percentile(double p) const { double threshold = num_ * (p / 100.0); double sum = 0; - for (int b = 0; b < kNumBuckets; b++) { + for (int b = 0; b < bucketMapper.BucketCount(); b++) { sum += buckets_[b]; if (sum >= threshold) { // Scale linearly within this bucket - double left_point = (b == 0) ? 0 : kBucketLimit[b-1]; - double right_point = kBucketLimit[b]; + double left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1); + double right_point = bucketMapper.BucketLimit(b); double left_sum = sum - buckets_[b]; double right_sum = sum; - double pos = (threshold - left_sum) / (right_sum - left_sum); + double pos = 0; + double right_left_diff = right_sum - left_sum; + if (right_left_diff != 0) { + pos = (threshold - left_sum) / (right_sum - left_sum); + } double r = left_point + (right_point - left_point) * pos; if (r < min_) r = min_; if (r > max_) r = max_; @@ -116,16 +148,16 @@ std::string Histogram::ToString() const { r.append("------------------------------------------------------\n"); const double mult = 100.0 / num_; double sum = 0; - for (int b = 0; b < kNumBuckets; b++) { + for (int b = 0; b < bucketMapper.BucketCount(); b++) { if (buckets_[b] <= 0.0) continue; sum += buckets_[b]; snprintf(buf, sizeof(buf), - "[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", - ((b == 0) ? 0.0 : kBucketLimit[b-1]), // left - kBucketLimit[b], // right - buckets_[b], // count - mult * buckets_[b], // percentage - mult * sum); // cumulative percentage + "[ %ld, %ld ) %ld %7.3f%% %7.3f%% ", + ((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)), // left + bucketMapper.BucketLimit(b), // right + buckets_[b], // count + mult * buckets_[b], // percentage + mult * sum); // cumulative percentage r.append(buf); // Add hash marks based on percentage; 20 marks for 100%. diff --git a/util/histogram.h b/util/histogram.h index 1ef9f3c8ab..03d7c6a8bd 100644 --- a/util/histogram.h +++ b/util/histogram.h @@ -5,36 +5,70 @@ #ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ #define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ +#include #include +#include +#include namespace leveldb { +class HistogramBucketMapper { + public: + + HistogramBucketMapper(); + + // converts a value to the bucket index. + const size_t IndexForValue(const uint64_t value) const; + // number of buckets required. + + const size_t BucketCount() const { + return bucketValues_.size(); + } + + uint64_t LastValue() const { + return maxBucketValue_; + } + + uint64_t FirstValue() const { + return minBucketValue_; + } + + uint64_t BucketLimit(const uint64_t bucketNumber) const { + assert(bucketNumber < BucketCount()); + return bucketValues_[bucketNumber]; + } + + private: + const std::vector bucketValues_; + const uint64_t maxBucketValue_; + const uint64_t minBucketValue_; + std::map valueIndexMap_; +}; + class Histogram { public: - Histogram() { } - ~Histogram() { } + Histogram(); void Clear(); + void Add(uint64_t value); void Add(double value); void Merge(const Histogram& other); std::string ToString() const; + double Median() const; + double Percentile(double p) const; + double Average() const; + double StandardDeviation() const; + private: double min_; double max_; double num_; double sum_; double sum_squares_; + std::vector buckets_; - enum { kNumBuckets = 154 }; - static const double kBucketLimit[kNumBuckets]; - double buckets_[kNumBuckets]; - - double Median() const; - double Percentile(double p) const; - double Average() const; - double StandardDeviation() const; }; } // namespace leveldb diff --git a/util/histogram_test.cc b/util/histogram_test.cc new file mode 100644 index 0000000000..2a7aae4caf --- /dev/null +++ b/util/histogram_test.cc @@ -0,0 +1,57 @@ +#include "util/histogram.h" + +#include "util/testharness.h" + +namespace leveldb { + +class HistogramTest { }; + +TEST(HistogramTest, BasicOperation) { + + Histogram histogram; + for (uint64_t i = 1; i <= 100; i++) { + histogram.Add(i); + } + + { + double median = histogram.Median(); + // ASSERT_LE(median, 50); + ASSERT_GT(median, 0); + } + + { + double percentile100 = histogram.Percentile(100.0); + ASSERT_LE(percentile100, 100.0); + ASSERT_GT(percentile100, 0.0); + double percentile99 = histogram.Percentile(99.0); + double percentile85 = histogram.Percentile(85.0); + ASSERT_LE(percentile99, 99.0); + ASSERT_TRUE(percentile99 >= percentile85); + } + + ASSERT_EQ(histogram.Average(), 50.5); // avg is acurately caluclated. +} + +TEST(HistogramTest, EmptyHistogram) { + Histogram histogram; + ASSERT_EQ(histogram.Median(), 0.0); + ASSERT_EQ(histogram.Percentile(85.0), 0.0); + ASSERT_EQ(histogram.Average(), 0.0); +} + +TEST(HistogramTest, ClearHistogram) { + Histogram histogram; + for (uint64_t i = 1; i <= 100; i++) { + histogram.Add(i); + } + histogram.Clear(); + ASSERT_EQ(histogram.Median(), 0); + ASSERT_EQ(histogram.Percentile(85.0), 0); + ASSERT_EQ(histogram.Average(), 0); +} + +} // namespace leveldb + +int main(int argc, char** argv) { + return leveldb::test::RunAllTests(); +}