readahead backwards from sst end

Summary:
prefetch some data from the end of the file for each compaction to reduce IO.
Closes https://github.com/facebook/rocksdb/pull/2149

Differential Revision: D4880576

Pulled By: lightmark

fbshipit-source-id: aa767cd1afc84c541837fbf1ad6c0d45b34d3932
This commit is contained in:
Aaron Gao 2017-04-14 18:43:32 -07:00 committed by Facebook Github Bot
parent ca96654d85
commit 6e8d6f429d
4 changed files with 60 additions and 33 deletions

View file

@ -472,6 +472,7 @@ class SequentialFile {
// A file abstraction for randomly reading the contents of a file. // A file abstraction for randomly reading the contents of a file.
class RandomAccessFile { class RandomAccessFile {
public: public:
RandomAccessFile() { } RandomAccessFile() { }
virtual ~RandomAccessFile(); virtual ~RandomAccessFile();
@ -488,6 +489,11 @@ class RandomAccessFile {
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0; char* scratch) const = 0;
// Readahead the file starting from offset by n bytes for caching.
virtual Status Prefetch(uint64_t offset, size_t n) {
return Status::OK();
}
// Used by the file_reader_writer to decide if the ReadAhead wrapper // Used by the file_reader_writer to decide if the ReadAhead wrapper
// should simply forward the call and do not enact buffering or locking. // should simply forward the call and do not enact buffering or locking.
virtual bool ShouldForwardRawRequest() const { virtual bool ShouldForwardRawRequest() const {

View file

@ -530,7 +530,12 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
table_reader->reset(); table_reader->reset();
Footer footer; Footer footer;
auto s = ReadFooterFromFile(file.get(), file_size, &footer,
// Before read footer, readahead backwards to prefetch data
Status s =
file->Prefetch((file_size < 512 * 1024 ? 0 : file_size - 512 * 1024),
512 * 1024 /* 512 KB prefetching */);
s = ReadFooterFromFile(file.get(), file_size, &footer,
kBlockBasedTableMagicNumber); kBlockBasedTableMagicNumber);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -541,8 +546,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
"version of RocksDB?"); "version of RocksDB?");
} }
// We've successfully read the footer and the index block: we're // We've successfully read the footer. We are ready to serve requests.
// ready to serve requests.
// Better not mutate rep_ after the creation. eg. internal_prefix_transform // Better not mutate rep_ after the creation. eg. internal_prefix_transform
// raw pointer will be used to create HashIndexReader, whose reset may // raw pointer will be used to create HashIndexReader, whose reset may
// access a dangling pointer. // access a dangling pointer.

View file

@ -472,7 +472,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
// complitely or partially in the buffer // complitely or partially in the buffer
// If it's completely cached, including end of file case when offset + n is // If it's completely cached, including end of file case when offset + n is
// greater than EOF, return // greater than EOF, return
if (TryReadFromCache_(offset, n, &cached_len, scratch) && if (TryReadFromCache(offset, n, &cached_len, scratch) &&
(cached_len == n || (cached_len == n ||
// End of file // End of file
buffer_len_ < readahead_size_)) { buffer_len_ < readahead_size_)) {
@ -484,35 +484,35 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
// chunk_offset equals to advanced_offset // chunk_offset equals to advanced_offset
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset); size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
Slice readahead_result; Slice readahead_result;
Status s = file_->Read(chunk_offset, readahead_size_, &readahead_result,
buffer_.BufferStart()); Status s = ReadIntoBuffer(chunk_offset, readahead_size_);
if (!s.ok()) { if (s.ok()) {
return s;
}
// In the case of cache miss, i.e. when cached_len equals 0, an offset can // In the case of cache miss, i.e. when cached_len equals 0, an offset can
// exceed the file end position, so the following check is required // exceed the file end position, so the following check is required
if (advanced_offset < chunk_offset + readahead_result.size()) { if (advanced_offset < chunk_offset + buffer_len_) {
// In the case of cache miss, the first chunk_padding bytes in buffer_ are // In the case of cache miss, the first chunk_padding bytes in buffer_
// are
// stored for alignment only and must be skipped // stored for alignment only and must be skipped
size_t chunk_padding = advanced_offset - chunk_offset; size_t chunk_padding = advanced_offset - chunk_offset;
auto remaining_len = auto remaining_len =
std::min(readahead_result.size() - chunk_padding, n - cached_len); std::min(buffer_len_ - chunk_padding, n - cached_len);
memcpy(scratch + cached_len, readahead_result.data() + chunk_padding, memcpy(scratch + cached_len, buffer_.BufferStart() + chunk_padding,
remaining_len); remaining_len);
*result = Slice(scratch, cached_len + remaining_len); *result = Slice(scratch, cached_len + remaining_len);
} else { } else {
*result = Slice(scratch, cached_len); *result = Slice(scratch, cached_len);
} }
}
if (readahead_result.data() == buffer_.BufferStart()) { return s;
buffer_offset_ = chunk_offset;
buffer_len_ = readahead_result.size();
} else {
buffer_len_ = 0;
} }
virtual Status Prefetch(uint64_t offset, size_t n) override {
size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset);
if (prefetch_offset == buffer_offset_) {
return Status::OK(); return Status::OK();
} }
return ReadIntoBuffer(prefetch_offset, offset - prefetch_offset + n);
}
virtual size_t GetUniqueId(char* id, size_t max_size) const override { virtual size_t GetUniqueId(char* id, size_t max_size) const override {
return file_->GetUniqueId(id, max_size); return file_->GetUniqueId(id, max_size);
@ -529,7 +529,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
} }
private: private:
bool TryReadFromCache_(uint64_t offset, size_t n, size_t* cached_len, bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len,
char* scratch) const { char* scratch) const {
if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_len_) { if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_len_) {
*cached_len = 0; *cached_len = 0;
@ -542,6 +542,19 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
return true; return true;
} }
Status ReadIntoBuffer(uint64_t offset, size_t n) const {
if (n > buffer_.Capacity()) {
n = buffer_.Capacity();
}
Slice result;
Status s = file_->Read(offset, n, &result, buffer_.BufferStart());
if (s.ok()) {
buffer_offset_ = offset;
buffer_len_ = result.size();
}
return s;
}
std::unique_ptr<RandomAccessFile> file_; std::unique_ptr<RandomAccessFile> file_;
const size_t alignment_; const size_t alignment_;
size_t readahead_size_; size_t readahead_size_;

View file

@ -92,6 +92,10 @@ class RandomAccessFileReader {
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const; Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
Status Prefetch(uint64_t offset, size_t n) const {
return file_->Prefetch(offset, n);
}
RandomAccessFile* file() { return file_.get(); } RandomAccessFile* file() { return file_.get(); }
bool use_direct_io() const { return file_->use_direct_io(); } bool use_direct_io() const { return file_->use_direct_io(); }