mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-29 09:36:17 +00:00
e628f59e87
Summary: This PR does the following: -> Creates a WinFileSystem class. This class is the Windows equivalent of the PosixFileSystem and will be used on Windows systems. -> Introduces a CustomEnv class. A CustomEnv is an Env that takes a FileSystem as constructor argument. I believe there will only ever be two implementations of this class (PosixEnv and WinEnv). There is still a CustomEnvWrapper class that takes an Env and a FileSystem and wraps the Env calls with the input Env but uses the FileSystem for the FileSystem calls -> Eliminates the public uses of the LegacyFileSystemWrapper. With this change in place, there are effectively the following patterns of Env: - "Base Env classes" (PosixEnv, WinEnv). These classes implement the core Env functions (e.g. Threads) and have a hard-coded input FileSystem. These classes inherit from CompositeEnv, implement the core Env functions (threads) and delegate the FileSystem-like calls to the input file system. - Wrapped Composite Env classes (MemEnv). These classes take in an Env and a FileSystem. The core env functions are re-directed to the wrapped env. The file system calls are redirected to the input file system - Legacy Wrapped Env classes. These classes take in an Env input (but no FileSystem). The core env functions are re-directed to the wrapped env. A "Legacy File System" is created using this env and the file system calls directed to the env itself. With these changes in place, the PosixEnv becomes a singleton -- there is only ever one created. Any other use of the PosixEnv is via another wrapped env. This cleans up some of the issues with the env construction and destruction. Additionally, there were places in the code that required had an Env when they required a FileSystem. Many of these places would wrap the Env with a LegacyFileSystemWrapper instead of using the env->GetFileSystem(). These places were changed, thereby removing layers of additional redirection (LegacyFileSystem --> Env --> Env::FileSystem). Pull Request resolved: https://github.com/facebook/rocksdb/pull/7703 Reviewed By: zhichao-cao Differential Revision: D25762190 Pulled By: anand1976 fbshipit-source-id: 1a088e97fc916f28ac69c149cd1dcad0ab31704b
495 lines
17 KiB
C++
495 lines
17 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
#pragma once
|
|
|
|
#include <stdint.h>
|
|
#include <windows.h>
|
|
|
|
#include <mutex>
|
|
#include <string>
|
|
|
|
#include "rocksdb/file_system.h"
|
|
#include "rocksdb/status.h"
|
|
#include "util/aligned_buffer.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
namespace port {
|
|
|
|
std::string GetWindowsErrSz(DWORD err);
|
|
|
|
inline IOStatus IOErrorFromWindowsError(const std::string& context, DWORD err) {
|
|
return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
|
|
? IOStatus::NoSpace(context, GetWindowsErrSz(err))
|
|
: ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
|
|
? IOStatus::PathNotFound(context, GetWindowsErrSz(err))
|
|
: IOStatus::IOError(context, GetWindowsErrSz(err));
|
|
}
|
|
|
|
inline IOStatus IOErrorFromLastWindowsError(const std::string& context) {
|
|
return IOErrorFromWindowsError(context, GetLastError());
|
|
}
|
|
|
|
inline IOStatus IOError(const std::string& context, int err_number) {
|
|
return (err_number == ENOSPC)
|
|
? IOStatus::NoSpace(context, strerror(err_number))
|
|
: (err_number == ENOENT)
|
|
? IOStatus::PathNotFound(context, strerror(err_number))
|
|
: IOStatus::IOError(context, strerror(err_number));
|
|
}
|
|
|
|
class WinFileData;
|
|
|
|
IOStatus pwrite(const WinFileData* file_data, const Slice& data,
|
|
uint64_t offset, size_t& bytes_written);
|
|
|
|
IOStatus pread(const WinFileData* file_data, char* src, size_t num_bytes,
|
|
uint64_t offset, size_t& bytes_read);
|
|
|
|
IOStatus fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
|
|
|
|
IOStatus ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
|
|
|
|
size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
|
|
|
|
class WinFileData {
|
|
protected:
|
|
const std::string filename_;
|
|
HANDLE hFile_;
|
|
// If true, the I/O issued would be direct I/O which the buffer
|
|
// will need to be aligned (not sure there is a guarantee that the buffer
|
|
// passed in is aligned).
|
|
const bool use_direct_io_;
|
|
|
|
public:
|
|
// We want this class be usable both for inheritance (prive
|
|
// or protected) and for containment so __ctor and __dtor public
|
|
WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
|
|
: filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
|
|
|
|
virtual ~WinFileData() { this->CloseFile(); }
|
|
|
|
bool CloseFile() {
|
|
bool result = true;
|
|
|
|
if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
|
|
result = ::CloseHandle(hFile_);
|
|
assert(result);
|
|
hFile_ = NULL;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
const std::string& GetName() const { return filename_; }
|
|
|
|
HANDLE GetFileHandle() const { return hFile_; }
|
|
|
|
bool use_direct_io() const { return use_direct_io_; }
|
|
|
|
WinFileData(const WinFileData&) = delete;
|
|
WinFileData& operator=(const WinFileData&) = delete;
|
|
};
|
|
|
|
class WinSequentialFile : protected WinFileData, public FSSequentialFile {
|
|
// Override for behavior change when creating a custom env
|
|
virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
|
|
uint64_t offset,
|
|
size_t& bytes_read) const;
|
|
|
|
public:
|
|
WinSequentialFile(const std::string& fname, HANDLE f,
|
|
const FileOptions& options);
|
|
|
|
~WinSequentialFile();
|
|
|
|
WinSequentialFile(const WinSequentialFile&) = delete;
|
|
WinSequentialFile& operator=(const WinSequentialFile&) = delete;
|
|
|
|
IOStatus Read(size_t n, const IOOptions& options, Slice* result,
|
|
char* scratch, IODebugContext* dbg) override;
|
|
IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
|
|
Slice* result, char* scratch,
|
|
IODebugContext* dbg) override;
|
|
|
|
IOStatus Skip(uint64_t n) override;
|
|
|
|
IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
|
|
virtual bool use_direct_io() const override {
|
|
return WinFileData::use_direct_io();
|
|
}
|
|
};
|
|
|
|
// mmap() based random-access
|
|
class WinMmapReadableFile : private WinFileData, public FSRandomAccessFile {
|
|
HANDLE hMap_;
|
|
|
|
const void* mapped_region_;
|
|
const size_t length_;
|
|
|
|
public:
|
|
// mapped_region_[0,length-1] contains the mmapped contents of the file.
|
|
WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
|
|
const void* mapped_region, size_t length);
|
|
|
|
~WinMmapReadableFile();
|
|
|
|
WinMmapReadableFile(const WinMmapReadableFile&) = delete;
|
|
WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
|
|
|
|
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
|
|
Slice* result, char* scratch,
|
|
IODebugContext* dbg) const override;
|
|
|
|
virtual IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
};
|
|
|
|
// We preallocate and use memcpy to append new
|
|
// data to the file. This is safe since we either properly close the
|
|
// file before reading from it, or for log files, the reading code
|
|
// knows enough to skip zero suffixes.
|
|
class WinMmapFile : private WinFileData, public FSWritableFile {
|
|
private:
|
|
HANDLE hMap_;
|
|
|
|
const size_t page_size_; // We flush the mapping view in page_size
|
|
// increments. We may decide if this is a memory
|
|
// page size or SSD page size
|
|
const size_t
|
|
allocation_granularity_; // View must start at such a granularity
|
|
|
|
size_t reserved_size_; // Preallocated size
|
|
|
|
size_t mapping_size_; // The max size of the mapping object
|
|
// we want to guess the final file size to minimize the remapping
|
|
size_t view_size_; // How much memory to map into a view at a time
|
|
|
|
char* mapped_begin_; // Must begin at the file offset that is aligned with
|
|
// allocation_granularity_
|
|
char* mapped_end_;
|
|
char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
|
|
char* last_sync_; // Where have we synced up to
|
|
|
|
uint64_t file_offset_; // Offset of mapped_begin_ in file
|
|
|
|
// Do we have unsynced writes?
|
|
bool pending_sync_;
|
|
|
|
// Can only truncate or reserve to a sector size aligned if
|
|
// used on files that are opened with Unbuffered I/O
|
|
IOStatus TruncateFile(uint64_t toSize);
|
|
|
|
IOStatus UnmapCurrentRegion();
|
|
|
|
IOStatus MapNewRegion(const IOOptions& options, IODebugContext* dbg);
|
|
|
|
virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
|
|
|
|
public:
|
|
WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
|
|
size_t allocation_granularity, const FileOptions& options);
|
|
|
|
~WinMmapFile();
|
|
|
|
WinMmapFile(const WinMmapFile&) = delete;
|
|
WinMmapFile& operator=(const WinMmapFile&) = delete;
|
|
|
|
IOStatus Append(const Slice& data, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
IOStatus Append(const Slice& data, const IOOptions& opts,
|
|
const DataVerificationInfo& /* verification_info */,
|
|
IODebugContext* dbg) override {
|
|
return Append(data, opts, dbg);
|
|
}
|
|
|
|
// Means Close() will properly take care of truncate
|
|
// and it does not need any additional information
|
|
IOStatus Truncate(uint64_t size, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
|
|
IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
// Flush only data
|
|
IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
/**
|
|
* Flush data as well as metadata to stable storage.
|
|
*/
|
|
IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
/**
|
|
* Get the size of valid data in the file. This will not match the
|
|
* size that is returned from the filesystem because we use mmap
|
|
* to extend file by map_size every time.
|
|
*/
|
|
uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
|
|
IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
};
|
|
|
|
class WinRandomAccessImpl {
|
|
protected:
|
|
WinFileData* file_base_;
|
|
size_t alignment_;
|
|
|
|
// Override for behavior change when creating a custom env
|
|
virtual IOStatus PositionedReadInternal(char* src, size_t numBytes,
|
|
uint64_t offset,
|
|
size_t& bytes_read) const;
|
|
|
|
WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
|
|
const FileOptions& options);
|
|
|
|
virtual ~WinRandomAccessImpl() {}
|
|
|
|
IOStatus ReadImpl(uint64_t offset, size_t n, Slice* result,
|
|
char* scratch) const;
|
|
|
|
size_t GetAlignment() const { return alignment_; }
|
|
|
|
public:
|
|
WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
|
|
WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
|
|
};
|
|
|
|
// pread() based random-access
|
|
class WinRandomAccessFile
|
|
: private WinFileData,
|
|
protected WinRandomAccessImpl, // Want to be able to override
|
|
// PositionedReadInternal
|
|
public FSRandomAccessFile {
|
|
public:
|
|
WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
|
const FileOptions& options);
|
|
|
|
~WinRandomAccessFile();
|
|
|
|
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
|
|
Slice* result, char* scratch,
|
|
IODebugContext* dbg) const override;
|
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
|
|
virtual bool use_direct_io() const override {
|
|
return WinFileData::use_direct_io();
|
|
}
|
|
|
|
IOStatus InvalidateCache(size_t offset, size_t length) override;
|
|
|
|
virtual size_t GetRequiredBufferAlignment() const override;
|
|
};
|
|
|
|
// This is a sequential write class. It has been mimicked (as others) after
|
|
// the original Posix class. We add support for unbuffered I/O on windows as
|
|
// well
|
|
// we utilize the original buffer as an alignment buffer to write directly to
|
|
// file with no buffering.
|
|
// No buffering requires that the provided buffer is aligned to the physical
|
|
// sector size (SSD page size) and
|
|
// that all SetFilePointer() operations to occur with such an alignment.
|
|
// We thus always write in sector/page size increments to the drive and leave
|
|
// the tail for the next write OR for Close() at which point we pad with zeros.
|
|
// No padding is required for
|
|
// buffered access.
|
|
class WinWritableImpl {
|
|
protected:
|
|
WinFileData* file_data_;
|
|
const uint64_t alignment_;
|
|
uint64_t
|
|
next_write_offset_; // Needed because Windows does not support O_APPEND
|
|
uint64_t reservedsize_; // how far we have reserved space
|
|
|
|
virtual IOStatus PreallocateInternal(uint64_t spaceToReserve);
|
|
|
|
WinWritableImpl(WinFileData* file_data, size_t alignment);
|
|
|
|
~WinWritableImpl() {}
|
|
|
|
uint64_t GetAlignement() const { return alignment_; }
|
|
|
|
IOStatus AppendImpl(const Slice& data);
|
|
|
|
// Requires that the data is aligned as specified by
|
|
// GetRequiredBufferAlignment()
|
|
IOStatus PositionedAppendImpl(const Slice& data, uint64_t offset);
|
|
|
|
IOStatus TruncateImpl(uint64_t size);
|
|
|
|
IOStatus CloseImpl();
|
|
|
|
IOStatus SyncImpl(const IOOptions& options, IODebugContext* dbg);
|
|
|
|
uint64_t GetFileNextWriteOffset() {
|
|
// Double accounting now here with WritableFileWriter
|
|
// and this size will be wrong when unbuffered access is used
|
|
// but tests implement their own writable files and do not use
|
|
// WritableFileWrapper
|
|
// so we need to squeeze a square peg through
|
|
// a round hole here.
|
|
return next_write_offset_;
|
|
}
|
|
|
|
IOStatus AllocateImpl(uint64_t offset, uint64_t len);
|
|
|
|
public:
|
|
WinWritableImpl(const WinWritableImpl&) = delete;
|
|
WinWritableImpl& operator=(const WinWritableImpl&) = delete;
|
|
};
|
|
|
|
class WinWritableFile : private WinFileData,
|
|
protected WinWritableImpl,
|
|
public FSWritableFile {
|
|
public:
|
|
WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
|
size_t capacity, const FileOptions& options);
|
|
|
|
~WinWritableFile();
|
|
|
|
IOStatus Append(const Slice& data, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
IOStatus Append(const Slice& data, const IOOptions& opts,
|
|
const DataVerificationInfo& /* verification_info */,
|
|
IODebugContext* dbg) override {
|
|
return Append(data, opts, dbg);
|
|
}
|
|
|
|
// Requires that the data is aligned as specified by
|
|
// GetRequiredBufferAlignment()
|
|
IOStatus PositionedAppend(const Slice& data, uint64_t offset,
|
|
const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
IOStatus PositionedAppend(const Slice& data, uint64_t offset,
|
|
const IOOptions& opts,
|
|
const DataVerificationInfo& /* verification_info */,
|
|
IODebugContext* dbg) override {
|
|
return PositionedAppend(data, offset, opts, dbg);
|
|
}
|
|
|
|
// Need to implement this so the file is truncated correctly
|
|
// when buffered and unbuffered mode
|
|
IOStatus Truncate(uint64_t size, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
|
|
IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
// write out the cached data to the OS cache
|
|
// This is now taken care of the WritableFileWriter
|
|
IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
virtual bool IsSyncThreadSafe() const override;
|
|
|
|
// Indicates if the class makes use of direct I/O
|
|
// Use PositionedAppend
|
|
virtual bool use_direct_io() const override;
|
|
|
|
virtual size_t GetRequiredBufferAlignment() const override;
|
|
|
|
uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
};
|
|
|
|
class WinRandomRWFile : private WinFileData,
|
|
protected WinRandomAccessImpl,
|
|
protected WinWritableImpl,
|
|
public FSRandomRWFile {
|
|
public:
|
|
WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
|
|
const FileOptions& options);
|
|
|
|
~WinRandomRWFile() {}
|
|
|
|
// Indicates if the class makes use of direct I/O
|
|
// If false you must pass aligned buffer to Write()
|
|
virtual bool use_direct_io() const override;
|
|
|
|
// Use the returned alignment value to allocate aligned
|
|
// buffer for Write() when use_direct_io() returns true
|
|
virtual size_t GetRequiredBufferAlignment() const override;
|
|
|
|
// Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
|
|
// Pass aligned buffer when use_direct_io() returns true.
|
|
IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options,
|
|
IODebugContext* dbg) override;
|
|
|
|
// Read up to `n` bytes starting from offset `offset` and store them in
|
|
// result, provided `scratch` size should be at least `n`.
|
|
// Returns Status::OK() on success.
|
|
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
|
|
Slice* result, char* scratch,
|
|
IODebugContext* dbg) const override;
|
|
|
|
IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override {
|
|
return Sync(options, dbg);
|
|
}
|
|
|
|
IOStatus Close(const IOOptions& options, IODebugContext* dbg) override;
|
|
};
|
|
|
|
class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
|
|
private:
|
|
HANDLE file_handle_;
|
|
HANDLE map_handle_;
|
|
|
|
public:
|
|
WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base,
|
|
size_t size)
|
|
: MemoryMappedFileBuffer(base, size),
|
|
file_handle_(file_handle),
|
|
map_handle_(map_handle) {}
|
|
~WinMemoryMappedBuffer() override;
|
|
};
|
|
|
|
class WinDirectory : public FSDirectory {
|
|
HANDLE handle_;
|
|
|
|
public:
|
|
explicit WinDirectory(HANDLE h) noexcept : handle_(h) {
|
|
assert(handle_ != INVALID_HANDLE_VALUE);
|
|
}
|
|
~WinDirectory() { ::CloseHandle(handle_); }
|
|
IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override;
|
|
|
|
size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
};
|
|
|
|
class WinFileLock : public FileLock {
|
|
public:
|
|
explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
|
|
assert(hFile != NULL);
|
|
assert(hFile != INVALID_HANDLE_VALUE);
|
|
}
|
|
|
|
~WinFileLock();
|
|
|
|
private:
|
|
HANDLE hFile_;
|
|
};
|
|
} // namespace port
|
|
} // namespace ROCKSDB_NAMESPACE
|