mirror of https://github.com/facebook/rocksdb.git
readahead backwards from sst end
Summary: prefetch some data from the end of the file for each compaction to reduce IO. Closes https://github.com/facebook/rocksdb/pull/2149 Differential Revision: D4880576 Pulled By: lightmark fbshipit-source-id: aa767cd1afc84c541837fbf1ad6c0d45b34d3932
This commit is contained in:
parent
ca96654d85
commit
6e8d6f429d
|
@ -472,6 +472,7 @@ class SequentialFile {
|
||||||
// A file abstraction for randomly reading the contents of a file.
|
// A file abstraction for randomly reading the contents of a file.
|
||||||
class RandomAccessFile {
|
class RandomAccessFile {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
RandomAccessFile() { }
|
RandomAccessFile() { }
|
||||||
virtual ~RandomAccessFile();
|
virtual ~RandomAccessFile();
|
||||||
|
|
||||||
|
@ -488,6 +489,11 @@ class RandomAccessFile {
|
||||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||||
char* scratch) const = 0;
|
char* scratch) const = 0;
|
||||||
|
|
||||||
|
// Readahead the file starting from offset by n bytes for caching.
|
||||||
|
virtual Status Prefetch(uint64_t offset, size_t n) {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
// Used by the file_reader_writer to decide if the ReadAhead wrapper
|
// Used by the file_reader_writer to decide if the ReadAhead wrapper
|
||||||
// should simply forward the call and do not enact buffering or locking.
|
// should simply forward the call and do not enact buffering or locking.
|
||||||
virtual bool ShouldForwardRawRequest() const {
|
virtual bool ShouldForwardRawRequest() const {
|
||||||
|
|
|
@ -530,7 +530,12 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||||
table_reader->reset();
|
table_reader->reset();
|
||||||
|
|
||||||
Footer footer;
|
Footer footer;
|
||||||
auto s = ReadFooterFromFile(file.get(), file_size, &footer,
|
|
||||||
|
// Before read footer, readahead backwards to prefetch data
|
||||||
|
Status s =
|
||||||
|
file->Prefetch((file_size < 512 * 1024 ? 0 : file_size - 512 * 1024),
|
||||||
|
512 * 1024 /* 512 KB prefetching */);
|
||||||
|
s = ReadFooterFromFile(file.get(), file_size, &footer,
|
||||||
kBlockBasedTableMagicNumber);
|
kBlockBasedTableMagicNumber);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
|
@ -541,8 +546,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||||
"version of RocksDB?");
|
"version of RocksDB?");
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've successfully read the footer and the index block: we're
|
// We've successfully read the footer. We are ready to serve requests.
|
||||||
// ready to serve requests.
|
|
||||||
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
|
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
|
||||||
// raw pointer will be used to create HashIndexReader, whose reset may
|
// raw pointer will be used to create HashIndexReader, whose reset may
|
||||||
// access a dangling pointer.
|
// access a dangling pointer.
|
||||||
|
|
|
@ -472,7 +472,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||||
// complitely or partially in the buffer
|
// complitely or partially in the buffer
|
||||||
// If it's completely cached, including end of file case when offset + n is
|
// If it's completely cached, including end of file case when offset + n is
|
||||||
// greater than EOF, return
|
// greater than EOF, return
|
||||||
if (TryReadFromCache_(offset, n, &cached_len, scratch) &&
|
if (TryReadFromCache(offset, n, &cached_len, scratch) &&
|
||||||
(cached_len == n ||
|
(cached_len == n ||
|
||||||
// End of file
|
// End of file
|
||||||
buffer_len_ < readahead_size_)) {
|
buffer_len_ < readahead_size_)) {
|
||||||
|
@ -484,34 +484,34 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||||
// chunk_offset equals to advanced_offset
|
// chunk_offset equals to advanced_offset
|
||||||
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
|
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
|
||||||
Slice readahead_result;
|
Slice readahead_result;
|
||||||
Status s = file_->Read(chunk_offset, readahead_size_, &readahead_result,
|
|
||||||
buffer_.BufferStart());
|
|
||||||
if (!s.ok()) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
// In the case of cache miss, i.e. when cached_len equals 0, an offset can
|
|
||||||
// exceed the file end position, so the following check is required
|
|
||||||
if (advanced_offset < chunk_offset + readahead_result.size()) {
|
|
||||||
// In the case of cache miss, the first chunk_padding bytes in buffer_ are
|
|
||||||
// stored for alignment only and must be skipped
|
|
||||||
size_t chunk_padding = advanced_offset - chunk_offset;
|
|
||||||
auto remaining_len =
|
|
||||||
std::min(readahead_result.size() - chunk_padding, n - cached_len);
|
|
||||||
memcpy(scratch + cached_len, readahead_result.data() + chunk_padding,
|
|
||||||
remaining_len);
|
|
||||||
*result = Slice(scratch, cached_len + remaining_len);
|
|
||||||
} else {
|
|
||||||
*result = Slice(scratch, cached_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (readahead_result.data() == buffer_.BufferStart()) {
|
Status s = ReadIntoBuffer(chunk_offset, readahead_size_);
|
||||||
buffer_offset_ = chunk_offset;
|
if (s.ok()) {
|
||||||
buffer_len_ = readahead_result.size();
|
// In the case of cache miss, i.e. when cached_len equals 0, an offset can
|
||||||
} else {
|
// exceed the file end position, so the following check is required
|
||||||
buffer_len_ = 0;
|
if (advanced_offset < chunk_offset + buffer_len_) {
|
||||||
|
// In the case of cache miss, the first chunk_padding bytes in buffer_
|
||||||
|
// are
|
||||||
|
// stored for alignment only and must be skipped
|
||||||
|
size_t chunk_padding = advanced_offset - chunk_offset;
|
||||||
|
auto remaining_len =
|
||||||
|
std::min(buffer_len_ - chunk_padding, n - cached_len);
|
||||||
|
memcpy(scratch + cached_len, buffer_.BufferStart() + chunk_padding,
|
||||||
|
remaining_len);
|
||||||
|
*result = Slice(scratch, cached_len + remaining_len);
|
||||||
|
} else {
|
||||||
|
*result = Slice(scratch, cached_len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
return Status::OK();
|
virtual Status Prefetch(uint64_t offset, size_t n) override {
|
||||||
|
size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset);
|
||||||
|
if (prefetch_offset == buffer_offset_) {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
return ReadIntoBuffer(prefetch_offset, offset - prefetch_offset + n);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual size_t GetUniqueId(char* id, size_t max_size) const override {
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override {
|
||||||
|
@ -529,7 +529,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool TryReadFromCache_(uint64_t offset, size_t n, size_t* cached_len,
|
bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len,
|
||||||
char* scratch) const {
|
char* scratch) const {
|
||||||
if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_len_) {
|
if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_len_) {
|
||||||
*cached_len = 0;
|
*cached_len = 0;
|
||||||
|
@ -542,15 +542,28 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status ReadIntoBuffer(uint64_t offset, size_t n) const {
|
||||||
|
if (n > buffer_.Capacity()) {
|
||||||
|
n = buffer_.Capacity();
|
||||||
|
}
|
||||||
|
Slice result;
|
||||||
|
Status s = file_->Read(offset, n, &result, buffer_.BufferStart());
|
||||||
|
if (s.ok()) {
|
||||||
|
buffer_offset_ = offset;
|
||||||
|
buffer_len_ = result.size();
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<RandomAccessFile> file_;
|
std::unique_ptr<RandomAccessFile> file_;
|
||||||
const size_t alignment_;
|
const size_t alignment_;
|
||||||
size_t readahead_size_;
|
size_t readahead_size_;
|
||||||
const bool forward_calls_;
|
const bool forward_calls_;
|
||||||
|
|
||||||
mutable std::mutex lock_;
|
mutable std::mutex lock_;
|
||||||
mutable AlignedBuffer buffer_;
|
mutable AlignedBuffer buffer_;
|
||||||
mutable uint64_t buffer_offset_;
|
mutable uint64_t buffer_offset_;
|
||||||
mutable size_t buffer_len_;
|
mutable size_t buffer_len_;
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,10 @@ class RandomAccessFileReader {
|
||||||
|
|
||||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
|
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
|
||||||
|
|
||||||
|
Status Prefetch(uint64_t offset, size_t n) const {
|
||||||
|
return file_->Prefetch(offset, n);
|
||||||
|
}
|
||||||
|
|
||||||
RandomAccessFile* file() { return file_.get(); }
|
RandomAccessFile* file() { return file_.get(); }
|
||||||
|
|
||||||
bool use_direct_io() const { return file_->use_direct_io(); }
|
bool use_direct_io() const { return file_->use_direct_io(); }
|
||||||
|
|
Loading…
Reference in New Issue