b126b45bcc
This patch adds support for "asyncForce" which forces all pages to disk that were dirtied before the *previous* call call to asyncForce. This method only applies to dirty pages in the Linux file cache (not in the Stasis buffer pool), and is used by new writeback logic that bounds the amount of such backlog that exists at any given time. This patch also changes writeback behavior; the dirty page table now sorts pages by pageid instead of LSN when it is invoked by writeback. When it is invoked by log truncation, it still sorts by LSN.
343 lines
13 KiB
C
343 lines
13 KiB
C
#ifndef IO_HANDLE_H
|
|
#define IO_HANDLE_H
|
|
#include <stasis/common.h>
|
|
|
|
/**
|
|
stasis_handle() is a macro that prepends a unique prefix to the its
|
|
argument's function name. It's used to cope with namespace
|
|
collisions
|
|
|
|
@todo Do away with macros like this one.
|
|
*/
|
|
#define stasis_handle(x) stasis_handle_##x
|
|
|
|
/**
|
|
|
|
@file
|
|
|
|
Interface for I/O handle implementations.
|
|
|
|
This interface is designed to provide some extra features needed by
|
|
the buffer manager and the log, and to hide the operating system's
|
|
I/O interface from the rest of Stasis.
|
|
|
|
Handles are composable, and some features, such as log truncation,
|
|
non-blocking writes are not implemented by all file handle
|
|
implementations, and are instead supported by wrapping a file
|
|
handle that performs raw I/O with one that adds extra
|
|
functionality.
|
|
|
|
This file describes the minimum concurrency guarantees provided by
|
|
handle implementations. See the handle implementations'
|
|
documentation for more information about concurrency.
|
|
|
|
Each handle defines two sets of methods that read, write and append
|
|
to the file. The first set (read(), write() and append()) take a
|
|
buffer that is allocated by the caller. The second set
|
|
(read_buffer(), write_buffer() and append_buffer()) use memory that
|
|
is managed by the handle. Callers must explicitly release these
|
|
buffers by calling release_read_buffer() or release_write_buffer().
|
|
|
|
Finally, handles support truncation from the <i>beginning</i> of
|
|
the file, which is needed by the log manager. The off parameters
|
|
passed into functions are relative to the original start of the
|
|
file. Negative file offsets are reserved for
|
|
implementation-specific purposes.
|
|
|
|
|
|
|
|
Error handling:
|
|
|
|
read, write, append, open, release_read_buffer and
|
|
release_write_buffer return 0 on success, and an error code
|
|
otherwise. read_buffer() and write_buffer() return error codes via
|
|
the error field of the handles they produce.
|
|
|
|
An error that occurs while writing to the handle leaves the region
|
|
that was being written in an undefined state.
|
|
|
|
Errors in num_copies, num_copies_buffer, start_position, and end_position
|
|
are always unrecoverable, and return -1.
|
|
|
|
close returns 0 on success, or an error code otherwise. close
|
|
always frees the handle that was passed into it, regardless of
|
|
whether an error occurred.
|
|
|
|
Here are the meanings of the various error codes:
|
|
|
|
EDOM off is less than the beginning of the file (possibly due to
|
|
truncation).
|
|
|
|
EBADF an unrecoverable error occurred; the handle is no longer vaild. The
|
|
error that caused this one is stored in the handle's error field.
|
|
|
|
Handle implementations may return return other errors as appropriate.
|
|
|
|
|
|
@todo rename *_buffer() functions to get_*_buffer()
|
|
|
|
*/
|
|
|
|
/**
|
|
This struct contains the function pointers that define handle
|
|
implementations. Implementations of the handle interface should
|
|
instantiate this struct, and set each function pointer accordingly.
|
|
The contents of the "impl" pointer is implementation defined.
|
|
*/
|
|
typedef struct stasis_handle_t {
|
|
/** Some handle implementations maintain their own internal buffers,
|
|
and must use memcpy in order to read or write from their
|
|
caller's buffers. The num_copies* functions provide perfomance
|
|
hints to the caller so that the more efficient set of methods
|
|
can be used.
|
|
|
|
@return the number of in-memory copies made when the caller
|
|
provides the buffer, or some other proxy for performance (higher
|
|
values are slower)
|
|
|
|
*/
|
|
int (*num_copies)(struct stasis_handle_t * h);
|
|
|
|
/** @see num_copies() */
|
|
int (*num_copies_buffer)(struct stasis_handle_t * h);
|
|
|
|
/** Close this handle, and release any associated resources. */
|
|
int (*close)(struct stasis_handle_t *);
|
|
|
|
/** Duplicate this handle. This is useful for performance hinting;
|
|
* sending sequential disk operations to different handles than random
|
|
* operations allows the kernel's prefetch algorithm to kick in.
|
|
*
|
|
* XXX calling dup on a handle, then calling truncate or append on the resulting handles has undefined semantics.
|
|
*
|
|
* @return a stasis_handle_t that should eventually have close() called on it.
|
|
*/
|
|
struct stasis_handle_t * (*dup)(struct stasis_handle_t *h);
|
|
/**
|
|
* Optimize the handle for sequential reads and writes.
|
|
*/
|
|
void (*enable_sequential_optimizations)(struct stasis_handle_t *h);
|
|
|
|
/** The offset of the byte after the end of the handle's data. */
|
|
lsn_t (*end_position)(struct stasis_handle_t * h);
|
|
|
|
/** Obtain a write buffer.
|
|
|
|
The behavior of calls that attempt to access this region before
|
|
release_write_buffer() returns is undefined.
|
|
|
|
@param h The handle
|
|
@param off The offset of the first byte in the write buffer.
|
|
@param len The length, in bytes, of the write buffer.
|
|
*/
|
|
struct stasis_write_buffer_t * (*write_buffer)(struct stasis_handle_t * h,
|
|
lsn_t off, lsn_t len);
|
|
/**
|
|
Release a write buffer and associated resources.
|
|
*/
|
|
int (*release_write_buffer)(struct stasis_write_buffer_t * w);
|
|
/**
|
|
Read a region of the file. Attempts to modify the region that is
|
|
being read will have undefined behavior until release_read_buffer
|
|
returns.
|
|
|
|
The behavior of calls that attempt to write to this region before
|
|
release_read_buffer() returns is undefined.
|
|
|
|
@param h The handle
|
|
@param off The offset of the first byte in the read buffer.
|
|
@param len The length, in bytes, of the read buffer.
|
|
|
|
*/
|
|
struct stasis_read_buffer_t * (*read_buffer)(struct stasis_handle_t * h,
|
|
lsn_t offset, lsn_t length);
|
|
/**
|
|
Release a read buffer and associated resources.
|
|
*/
|
|
int (*release_read_buffer)(struct stasis_read_buffer_t * r);
|
|
/**
|
|
Write data to the handle from memory managed by the caller. Once
|
|
write returns, the handle will reflect the update.
|
|
|
|
@param h The handle
|
|
@param off The position of the first byte to be written
|
|
@param dat A buffer containin the data to be written
|
|
@param len The number of bytes to be written
|
|
*/
|
|
int (*write)(struct stasis_handle_t * h, lsn_t off,
|
|
const byte * dat, lsn_t len);
|
|
/**
|
|
Read data from the file. The region may be safely written to
|
|
once read returns.
|
|
|
|
@param h The handle
|
|
@param off The position of the first byte to be written
|
|
@param dat A buffer containin the data to be written
|
|
@param len The number of bytes to be written
|
|
*/
|
|
int (*read)(struct stasis_handle_t * h,
|
|
lsn_t off, byte * buf, lsn_t len);
|
|
/**
|
|
Force any outstanding writes to disk. In progress writes (those
|
|
whose calls to write() or release_write_buffer() have not yet
|
|
returned) may or may not be forced to disk.
|
|
*/
|
|
int (*force)(struct stasis_handle_t * h);
|
|
/**
|
|
* Force any writes that were outstanding the last time this was called to
|
|
* disk. This does not force things like block allocation information, or
|
|
* issue a hardware write barrier, making it essentially useless for
|
|
* durability. However, it is useful as a performance hint, as it allows us
|
|
* to bound the number of data pages that are outstanding in Linux's I/O
|
|
* request queue, which means that we can use it to bound the latency of
|
|
* future force() and force_range() operations.
|
|
*
|
|
* This call is based on sync_file_range(2).
|
|
*/
|
|
int (*async_force)(struct stasis_handle_t * h);
|
|
int (*force_range)(struct stasis_handle_t * h, lsn_t start, lsn_t stop);
|
|
int (*fallocate)(struct stasis_handle_t * h, lsn_t off, lsn_t len);
|
|
/**
|
|
The handle's error flag; this passes errors to the caller when
|
|
they can't be returned directly.
|
|
*/
|
|
int error;
|
|
/**
|
|
Reserved for implementation specific data.
|
|
*/
|
|
void * impl;
|
|
|
|
} stasis_handle_t;
|
|
|
|
typedef struct stasis_write_buffer_t {
|
|
stasis_handle_t * h;
|
|
lsn_t off;
|
|
byte * buf;
|
|
lsn_t len;
|
|
void * impl;
|
|
int error;
|
|
} stasis_write_buffer_t;
|
|
|
|
typedef struct stasis_read_buffer_t {
|
|
stasis_handle_t * h;
|
|
lsn_t off;
|
|
const byte * buf;
|
|
lsn_t len;
|
|
void * impl;
|
|
int error;
|
|
} stasis_read_buffer_t;
|
|
|
|
/**
|
|
Open a handle that is backed by RAM
|
|
|
|
@param start_offset The logical offset of the first byte in the handle
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_memory)(void);
|
|
/**
|
|
Open a handle that is backed by a file. This handle uses the unix
|
|
read(),write() I/O interfaces. Due to limitations in read() and
|
|
write(), it must hold a mutex during system calls, and therefore
|
|
cannot perform concurrent I/O.
|
|
|
|
Attempts to read or write to a region that is already being written
|
|
to have undefined behavior, but non-overlapping regions can be
|
|
concurrently accessed.
|
|
|
|
@param start_offset The logical offset of the first byte in the handle
|
|
@param path The name of the file to be opened.
|
|
@param flags Flags to be passed to open() (eg O_CREAT)
|
|
@param perm The file permissions to be passed to open()
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_file)
|
|
(const char * path, int flags, int perm);
|
|
/**
|
|
Open a handle that is backed by a file. This handle uses pread()
|
|
and pwrite(). It never holds a mutex while perfoming I/O.
|
|
|
|
Attempts to read or write to a region that is already being written
|
|
to have undefined behavior, but non-overlapping regions can be
|
|
concurrently accessed.
|
|
|
|
@param start_offset The logical offset of the first byte in the handle
|
|
@param path The name of the file to be opened.
|
|
@param flags Flags to be passed to open(). (eg O_CREAT)
|
|
@param perm The file permissions to be passed to open()
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_pfile)
|
|
(const char * path, int flags, int perm);
|
|
/**
|
|
Given a factory for creating "fast" and "slow" handles, provide a
|
|
handle that never makes callers wait for write requests to
|
|
complete. ("Never" is a strong word; callers will begin to block
|
|
if the supply of write buffers is depleted.)
|
|
|
|
Attempts to read or write to a region that is already being written
|
|
to are undefined, but non-overlapping regions can be concurrently
|
|
accessed.
|
|
|
|
@param slow_factory A callback function that returns a handle with
|
|
offset zero. These handles will be accessed
|
|
concurrently, but do not need to support
|
|
concurrent writes, or reads from regions that
|
|
are being written to. For performance reasons,
|
|
handles that cannot exploit concurrency should
|
|
probably be allocated from a pool (@see
|
|
open_file), while a single truely concurrent
|
|
handle (@see open_pfile) should suffice.
|
|
|
|
@param slow_factory_close A callback that will be called when this
|
|
Stasis handle is closed.
|
|
|
|
@param slow_factory_arg A pointer to data that will be passed into
|
|
slow_factory.
|
|
|
|
@param slow_force_once If zero, call force on each slow handle when
|
|
force is called. When 1, only call force on
|
|
one of the slow handles (this is useful when
|
|
slow_factory returns a singleton handle...).
|
|
|
|
@param fast_factory A callback function that returns a handle with
|
|
a given offest and length. The handle need not
|
|
support persistant storage, and is used as
|
|
write buffer space. Typically, fast handles
|
|
will be allocated out of a pool.
|
|
|
|
@param fast_factory_arg A pointer to data that will be passed into
|
|
fast_factory.
|
|
|
|
@param worker_thread_count This many workers will be spawned in
|
|
order to service this handle
|
|
|
|
@param buffer_size The maximum number of outstanding bytes to
|
|
buffer before blocking.
|
|
|
|
@param max_writes The maximum number of outstanding writes to allow
|
|
before blocking.
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_non_blocking)
|
|
(stasis_handle_t * (*slow_factory)(void * arg),
|
|
int (*slow_factory_close)(void * arg),
|
|
void * slow_factory_arg,
|
|
int slow_force_once,
|
|
stasis_handle_t * (*fast_factory)(lsn_t off, lsn_t len, void * arg),
|
|
void * fast_factory_arg, int worker_thread_count, lsn_t buffer_size,
|
|
int max_writes);
|
|
/**
|
|
@todo implement open_verifying in handle.h
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_verifying)(stasis_handle_t * h);
|
|
/**
|
|
Writes each action performed (and return values) to standard out.
|
|
|
|
@param h All handle operations will be forwarded to h.
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_debug)(stasis_handle_t * h);
|
|
stasis_handle_t * stasis_handle(open_raid1)(stasis_handle_t *a, stasis_handle_t *b);
|
|
stasis_handle_t * stasis_handle_raid1_factory();
|
|
|
|
/**
|
|
* Open a Stasis file handle using default arguments.
|
|
*/
|
|
stasis_handle_t * stasis_handle_default_factory();
|
|
|
|
#endif
|