3a559a3b3c
------------------------------------------------------------------------ r1601 | sears.russell@gmail.com | 2012-03-20 18:43:00 -0400 (Tue, 20 Mar 2012) | 1 line commit bLSM bloom filter to stasis/util, which is where it really belongs ------------------------------------------------------------------------ r1600 | sears.russell@gmail.com | 2012-03-04 01:58:38 -0500 (Sun, 04 Mar 2012) | 1 line fix memory leak in skiplist unit test (now it is valgrind clean) ------------------------------------------------------------------------ r1599 | sears.russell@gmail.com | 2012-03-04 01:58:05 -0500 (Sun, 04 Mar 2012) | 1 line fix typo in finalize type ------------------------------------------------------------------------ r1598 | sears.russell@gmail.com | 2012-03-04 00:59:59 -0500 (Sun, 04 Mar 2012) | 1 line add comparator and finalizer parameters to skiplist constructor ------------------------------------------------------------------------ r1597 | sears.russell@gmail.com | 2012-03-03 18:23:16 -0500 (Sat, 03 Mar 2012) | 1 line bugfixes for skiplist ------------------------------------------------------------------------ r1596 | sears.russell@gmail.com | 2012-03-02 15:05:07 -0500 (Fri, 02 Mar 2012) | 1 line updated concurrentSkipList. Seeing strange crashes ------------------------------------------------------------------------ r1595 | sears.russell@gmail.com | 2012-03-01 16:51:59 -0500 (Thu, 01 Mar 2012) | 1 line add progress reports ------------------------------------------------------------------------ r1594 | sears.russell@gmail.com | 2012-02-28 13:17:05 -0500 (Tue, 28 Feb 2012) | 1 line experimental support for automatic logfile preallocation ------------------------------------------------------------------------ r1593 | sears.russell@gmail.com | 2012-02-28 12:10:01 -0500 (Tue, 28 Feb 2012) | 1 line add histogram reporting to rawIOPS benchmark ------------------------------------------------------------------------ r1592 | sears.russell@gmail.com | 2012-02-24 16:31:36 -0500 (Fri, 24 Feb 2012) | 1 line userspace raid 0 implementation ------------------------------------------------------------------------ r1591 | sears.russell@gmail.com | 2012-02-12 01:47:25 -0500 (Sun, 12 Feb 2012) | 1 line add skiplist unit test, fix compile warnings ------------------------------------------------------------------------ r1590 | sears.russell@gmail.com | 2012-02-12 00:52:52 -0500 (Sun, 12 Feb 2012) | 1 line fix compile error ------------------------------------------------------------------------ r1589 | sears.russell@gmail.com | 2012-02-12 00:50:21 -0500 (Sun, 12 Feb 2012) | 1 line fix some bugs in hazard.h surrounding thread list management and overruns of R under high contention ------------------------------------------------------------------------ r1588 | sears.russell@gmail.com | 2012-02-11 14:23:10 -0500 (Sat, 11 Feb 2012) | 1 line add hazard pointer for get_lock. It was implicitly blowing away the hazard pointer protecting y in the caller ------------------------------------------------------------------------ r1587 | sears.russell@gmail.com | 2012-02-10 18:51:25 -0500 (Fri, 10 Feb 2012) | 1 line fix null pointer bug ------------------------------------------------------------------------ r1586 | sears.russell@gmail.com | 2012-02-10 18:03:39 -0500 (Fri, 10 Feb 2012) | 1 line add simple refcounting scheme to concurrentSkipList. This solves the problem where a deleted node points to another deleted node, and we only have a hazard pointer for the first node. ------------------------------------------------------------------------ r1585 | sears.russell@gmail.com | 2012-02-10 14:19:14 -0500 (Fri, 10 Feb 2012) | 1 line add hazard pointers for update using the smallest free slot first. The old method left a race condition, since hazard_scan stops at the first null pointer. ------------------------------------------------------------------------ r1584 | sears.russell@gmail.com | 2012-02-10 02:45:30 -0500 (Fri, 10 Feb 2012) | 1 line add hazard pointers for update array ------------------------------------------------------------------------ r1583 | sears.russell@gmail.com | 2012-02-10 00:04:50 -0500 (Fri, 10 Feb 2012) | 1 line skiplist update: concurrent, but broken ------------------------------------------------------------------------ r1582 | sears.russell@gmail.com | 2012-02-09 17:44:27 -0500 (Thu, 09 Feb 2012) | 1 line skip list implementation. Not concurrent yet. ------------------------------------------------------------------------ r1581 | sears.russell@gmail.com | 2012-02-08 13:33:29 -0500 (Wed, 08 Feb 2012) | 1 line Commit of a bunch of new, unused code: KISS random number generator, Hazard Pointers, SUX latches (untested) and bit twiddling for concurrent b-tree ------------------------------------------------------------------------ r1580 | sears.russell@gmail.com | 2012-01-17 19:17:37 -0500 (Tue, 17 Jan 2012) | 1 line fix typo ------------------------------------------------------------------------ r1579 | sears.russell@gmail.com | 2012-01-11 18:33:31 -0500 (Wed, 11 Jan 2012) | 1 line static build fixes for linux. hopefully these do not break macos... ------------------------------------------------------------------------ r1578 | sears.russell@gmail.com | 2012-01-09 19:13:34 -0500 (Mon, 09 Jan 2012) | 1 line fix cmake under linux ------------------------------------------------------------------------ r1577 | sears.russell@gmail.com | 2012-01-09 18:37:15 -0500 (Mon, 09 Jan 2012) | 1 line fix linux static binary compilation bugs ------------------------------------------------------------------------ r1576 | sears.russell | 2012-01-09 18:00:08 -0500 (Mon, 09 Jan 2012) | 1 line port to macos x ------------------------------------------------------------------------ r1575 | sears.russell | 2012-01-09 17:39:43 -0500 (Mon, 09 Jan 2012) | 1 line add missing _ from sync call name ------------------------------------------------------------------------ r1574 | sears.russell@gmail.com | 2012-01-09 14:26:31 -0500 (Mon, 09 Jan 2012) | 1 line add -rt flag to static builds ------------------------------------------------------------------------ r1573 | sears.russell@gmail.com | 2011-12-20 23:38:29 -0500 (Tue, 20 Dec 2011) | 1 line Simple makefile geared toward building libstasis.so and libstasis.a (and nothing else) ------------------------------------------------------------------------ r1572 | sears.russell@gmail.com | 2011-12-20 22:37:54 -0500 (Tue, 20 Dec 2011) | 1 line add some missing #include<config.h> lines
357 lines
13 KiB
C
357 lines
13 KiB
C
#ifndef IO_HANDLE_H
|
|
#define IO_HANDLE_H
|
|
#include <stasis/common.h>
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/**
|
|
stasis_handle() is a macro that prepends a unique prefix to the its
|
|
argument's function name. It's used to cope with namespace
|
|
collisions
|
|
|
|
@todo Do away with macros like this one.
|
|
*/
|
|
#define stasis_handle(x) stasis_handle_##x
|
|
|
|
/**
|
|
|
|
@file
|
|
|
|
Interface for I/O handle implementations.
|
|
|
|
This interface is designed to provide some extra features needed by
|
|
the buffer manager and the log, and to hide the operating system's
|
|
I/O interface from the rest of Stasis.
|
|
|
|
Handles are composable, and some features, such as log truncation,
|
|
non-blocking writes are not implemented by all file handle
|
|
implementations, and are instead supported by wrapping a file
|
|
handle that performs raw I/O with one that adds extra
|
|
functionality.
|
|
|
|
This file describes the minimum concurrency guarantees provided by
|
|
handle implementations. See the handle implementations'
|
|
documentation for more information about concurrency.
|
|
|
|
Each handle defines two sets of methods that read, write and append
|
|
to the file. The first set (read(), write() and append()) take a
|
|
buffer that is allocated by the caller. The second set
|
|
(read_buffer(), write_buffer() and append_buffer()) use memory that
|
|
is managed by the handle. Callers must explicitly release these
|
|
buffers by calling release_read_buffer() or release_write_buffer().
|
|
|
|
Finally, handles support truncation from the <i>beginning</i> of
|
|
the file, which is needed by the log manager. The off parameters
|
|
passed into functions are relative to the original start of the
|
|
file. Negative file offsets are reserved for
|
|
implementation-specific purposes.
|
|
|
|
|
|
|
|
Error handling:
|
|
|
|
read, write, append, open, release_read_buffer and
|
|
release_write_buffer return 0 on success, and an error code
|
|
otherwise. read_buffer() and write_buffer() return error codes via
|
|
the error field of the handles they produce.
|
|
|
|
An error that occurs while writing to the handle leaves the region
|
|
that was being written in an undefined state.
|
|
|
|
Errors in num_copies, num_copies_buffer, start_position, and end_position
|
|
are always unrecoverable, and return -1.
|
|
|
|
close returns 0 on success, or an error code otherwise. close
|
|
always frees the handle that was passed into it, regardless of
|
|
whether an error occurred.
|
|
|
|
Here are the meanings of the various error codes:
|
|
|
|
EDOM off is less than the beginning of the file (possibly due to
|
|
truncation).
|
|
|
|
EBADF an unrecoverable error occurred; the handle is no longer vaild. The
|
|
error that caused this one is stored in the handle's error field.
|
|
|
|
Handle implementations may return return other errors as appropriate.
|
|
|
|
|
|
@todo rename *_buffer() functions to get_*_buffer()
|
|
|
|
*/
|
|
|
|
/**
|
|
This struct contains the function pointers that define handle
|
|
implementations. Implementations of the handle interface should
|
|
instantiate this struct, and set each function pointer accordingly.
|
|
The contents of the "impl" pointer is implementation defined.
|
|
*/
|
|
typedef struct stasis_handle_t {
|
|
/** Some handle implementations maintain their own internal buffers,
|
|
and must use memcpy in order to read or write from their
|
|
caller's buffers. The num_copies* functions provide perfomance
|
|
hints to the caller so that the more efficient set of methods
|
|
can be used.
|
|
|
|
@return the number of in-memory copies made when the caller
|
|
provides the buffer, or some other proxy for performance (higher
|
|
values are slower)
|
|
|
|
*/
|
|
int (*num_copies)(struct stasis_handle_t * h);
|
|
|
|
/** @see num_copies() */
|
|
int (*num_copies_buffer)(struct stasis_handle_t * h);
|
|
|
|
/** Close this handle, and release any associated resources. */
|
|
int (*close)(struct stasis_handle_t *);
|
|
|
|
/** Duplicate this handle. This is useful for performance hinting;
|
|
* sending sequential disk operations to different handles than random
|
|
* operations allows the kernel's prefetch algorithm to kick in.
|
|
*
|
|
* XXX calling dup on a handle, then calling truncate or append on the resulting handles has undefined semantics.
|
|
*
|
|
* @return a stasis_handle_t that should eventually have close() called on it.
|
|
*/
|
|
struct stasis_handle_t * (*dup)(struct stasis_handle_t *h);
|
|
/**
|
|
* Optimize the handle for sequential reads and writes.
|
|
*/
|
|
void (*enable_sequential_optimizations)(struct stasis_handle_t *h);
|
|
|
|
/** The offset of the byte after the end of the handle's data. */
|
|
lsn_t (*end_position)(struct stasis_handle_t * h);
|
|
|
|
/** Obtain a write buffer.
|
|
|
|
The behavior of calls that attempt to access this region before
|
|
release_write_buffer() returns is undefined.
|
|
|
|
@param h The handle
|
|
@param off The offset of the first byte in the write buffer.
|
|
@param len The length, in bytes, of the write buffer.
|
|
*/
|
|
struct stasis_write_buffer_t * (*write_buffer)(struct stasis_handle_t * h,
|
|
lsn_t off, lsn_t len);
|
|
/**
|
|
Release a write buffer and associated resources.
|
|
*/
|
|
int (*release_write_buffer)(struct stasis_write_buffer_t * w);
|
|
/**
|
|
Read a region of the file. Attempts to modify the region that is
|
|
being read will have undefined behavior until release_read_buffer
|
|
returns.
|
|
|
|
The behavior of calls that attempt to write to this region before
|
|
release_read_buffer() returns is undefined.
|
|
|
|
@param h The handle
|
|
@param off The offset of the first byte in the read buffer.
|
|
@param len The length, in bytes, of the read buffer.
|
|
|
|
*/
|
|
struct stasis_read_buffer_t * (*read_buffer)(struct stasis_handle_t * h,
|
|
lsn_t offset, lsn_t length);
|
|
/**
|
|
Release a read buffer and associated resources.
|
|
*/
|
|
int (*release_read_buffer)(struct stasis_read_buffer_t * r);
|
|
/**
|
|
Write data to the handle from memory managed by the caller. Once
|
|
write returns, the handle will reflect the update.
|
|
|
|
@param h The handle
|
|
@param off The position of the first byte to be written
|
|
@param dat A buffer containin the data to be written
|
|
@param len The number of bytes to be written
|
|
*/
|
|
int (*write)(struct stasis_handle_t * h, lsn_t off,
|
|
const byte * dat, lsn_t len);
|
|
/**
|
|
Read data from the file. The region may be safely written to
|
|
once read returns.
|
|
|
|
@param h The handle
|
|
@param off The position of the first byte to be written
|
|
@param dat A buffer containin the data to be written
|
|
@param len The number of bytes to be written
|
|
*/
|
|
int (*read)(struct stasis_handle_t * h,
|
|
lsn_t off, byte * buf, lsn_t len);
|
|
/**
|
|
Force any outstanding writes to disk. In progress writes (those
|
|
whose calls to write() or release_write_buffer() have not yet
|
|
returned) may or may not be forced to disk.
|
|
*/
|
|
int (*force)(struct stasis_handle_t * h);
|
|
/**
|
|
* Force any writes that were outstanding the last time this was called to
|
|
* disk. This does not force things like block allocation information, or
|
|
* issue a hardware write barrier, making it essentially useless for
|
|
* durability. However, it is useful as a performance hint, as it allows us
|
|
* to bound the number of data pages that are outstanding in Linux's I/O
|
|
* request queue, which means that we can use it to bound the latency of
|
|
* future force() and force_range() operations.
|
|
*
|
|
* This call is based on sync_file_range(2).
|
|
*/
|
|
int (*async_force)(struct stasis_handle_t * h);
|
|
int (*force_range)(struct stasis_handle_t * h, lsn_t start, lsn_t stop);
|
|
int (*fallocate)(struct stasis_handle_t * h, lsn_t off, lsn_t len);
|
|
/**
|
|
The handle's error flag; this passes errors to the caller when
|
|
they can't be returned directly.
|
|
*/
|
|
int error;
|
|
/**
|
|
Reserved for implementation specific data.
|
|
*/
|
|
void * impl;
|
|
|
|
} stasis_handle_t;
|
|
|
|
typedef struct stasis_write_buffer_t {
|
|
stasis_handle_t * h;
|
|
lsn_t off;
|
|
byte * buf;
|
|
lsn_t len;
|
|
void * impl;
|
|
int error;
|
|
} stasis_write_buffer_t;
|
|
|
|
typedef struct stasis_read_buffer_t {
|
|
stasis_handle_t * h;
|
|
lsn_t off;
|
|
const byte * buf;
|
|
lsn_t len;
|
|
void * impl;
|
|
int error;
|
|
} stasis_read_buffer_t;
|
|
|
|
/**
|
|
Open a handle that is backed by RAM
|
|
|
|
@param start_offset The logical offset of the first byte in the handle
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_memory)(void);
|
|
/**
|
|
Open a handle that is backed by a file. This handle uses the unix
|
|
read(),write() I/O interfaces. Due to limitations in read() and
|
|
write(), it must hold a mutex during system calls, and therefore
|
|
cannot perform concurrent I/O.
|
|
|
|
Attempts to read or write to a region that is already being written
|
|
to have undefined behavior, but non-overlapping regions can be
|
|
concurrently accessed.
|
|
|
|
@param start_offset The logical offset of the first byte in the handle
|
|
@param path The name of the file to be opened.
|
|
@param flags Flags to be passed to open() (eg O_CREAT)
|
|
@param perm The file permissions to be passed to open()
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_file)
|
|
(const char * path, int flags, int perm);
|
|
/**
|
|
Open a handle that is backed by a file. This handle uses pread()
|
|
and pwrite(). It never holds a mutex while perfoming I/O.
|
|
|
|
Attempts to read or write to a region that is already being written
|
|
to have undefined behavior, but non-overlapping regions can be
|
|
concurrently accessed.
|
|
|
|
@param start_offset The logical offset of the first byte in the handle
|
|
@param path The name of the file to be opened.
|
|
@param flags Flags to be passed to open(). (eg O_CREAT)
|
|
@param perm The file permissions to be passed to open()
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_pfile)
|
|
(const char * path, int flags, int perm);
|
|
/**
|
|
Given a factory for creating "fast" and "slow" handles, provide a
|
|
handle that never makes callers wait for write requests to
|
|
complete. ("Never" is a strong word; callers will begin to block
|
|
if the supply of write buffers is depleted.)
|
|
|
|
Attempts to read or write to a region that is already being written
|
|
to are undefined, but non-overlapping regions can be concurrently
|
|
accessed.
|
|
|
|
@param slow_factory A callback function that returns a handle with
|
|
offset zero. These handles will be accessed
|
|
concurrently, but do not need to support
|
|
concurrent writes, or reads from regions that
|
|
are being written to. For performance reasons,
|
|
handles that cannot exploit concurrency should
|
|
probably be allocated from a pool (@see
|
|
open_file), while a single truely concurrent
|
|
handle (@see open_pfile) should suffice.
|
|
|
|
@param slow_factory_close A callback that will be called when this
|
|
Stasis handle is closed.
|
|
|
|
@param slow_factory_arg A pointer to data that will be passed into
|
|
slow_factory.
|
|
|
|
@param slow_force_once If zero, call force on each slow handle when
|
|
force is called. When 1, only call force on
|
|
one of the slow handles (this is useful when
|
|
slow_factory returns a singleton handle...).
|
|
|
|
@param fast_factory A callback function that returns a handle with
|
|
a given offest and length. The handle need not
|
|
support persistant storage, and is used as
|
|
write buffer space. Typically, fast handles
|
|
will be allocated out of a pool.
|
|
|
|
@param fast_factory_arg A pointer to data that will be passed into
|
|
fast_factory.
|
|
|
|
@param worker_thread_count This many workers will be spawned in
|
|
order to service this handle
|
|
|
|
@param buffer_size The maximum number of outstanding bytes to
|
|
buffer before blocking.
|
|
|
|
@param max_writes The maximum number of outstanding writes to allow
|
|
before blocking.
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_non_blocking)
|
|
(stasis_handle_t * (*slow_factory)(void * arg),
|
|
int (*slow_factory_close)(void * arg),
|
|
void * slow_factory_arg,
|
|
int slow_force_once,
|
|
stasis_handle_t * (*fast_factory)(lsn_t off, lsn_t len, void * arg),
|
|
void * fast_factory_arg, int worker_thread_count, lsn_t buffer_size,
|
|
int max_writes);
|
|
/**
|
|
@todo implement open_verifying in handle.h
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_verifying)(stasis_handle_t * h);
|
|
/**
|
|
Writes each action performed (and return values) to standard out.
|
|
|
|
@param h All handle operations will be forwarded to h.
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_debug)(stasis_handle_t * h);
|
|
stasis_handle_t * stasis_handle(open_raid1)(stasis_handle_t *a, stasis_handle_t *b);
|
|
/**
|
|
* Open a raid0 handle
|
|
*
|
|
* @param handle_count The number of underlying file handles.
|
|
* @param h An array of pointers to the handles. The caller manages the memory that backs the array.
|
|
* @param stripe_size The raid 0 stripe size. Must be a multiple of PAGE_SIZE.
|
|
*/
|
|
stasis_handle_t * stasis_handle(open_raid0)(int handle_count, stasis_handle_t **h, uint32_t stripe_size);
|
|
stasis_handle_t * stasis_handle_raid1_factory();
|
|
stasis_handle_t * stasis_handle_raid0_factory();
|
|
|
|
/**
|
|
* Open a Stasis file handle using default arguments.
|
|
*/
|
|
stasis_handle_t * stasis_handle_default_factory();
|
|
|
|
|
|
END_C_DECLS
|
|
|
|
#endif
|