new figures
This commit is contained in:
parent
67a0295a6b
commit
ca2c373829
7 changed files with 232 additions and 58 deletions
|
@ -28,7 +28,7 @@
|
|||
|
||||
@Article{excel,
|
||||
author = {B Zeeberg and J Riss and D Kane D and K Bussey and E Uchio and W Linehan and J Barret and J Weinstein},
|
||||
title = {Mistaken identifiers: gene name errors can be introduced inadvertently when using {E}xcel in bioinformatics},
|
||||
title = {Mistaken identifiers: Gene name errors can be introduced inadvertently when using {E}xcel in bioinformatics},
|
||||
journal = {BMC Bioinformatics},
|
||||
year = {2004},
|
||||
OPTkey = {},
|
||||
|
@ -40,3 +40,177 @@
|
|||
OPTannote = {}
|
||||
}
|
||||
|
||||
|
||||
@Article{batoryPhysical,
|
||||
author = {D. S. Batory and C. C. Gotlieb},
|
||||
title = {A Unifying Model of Physical Databases},
|
||||
journal = {ACM Transactions on Database Systems},
|
||||
year = {1982},
|
||||
OPTkey = {},
|
||||
volume = {7},
|
||||
number = {4},
|
||||
pages = {509-539},
|
||||
OPTmonth = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@InProceedings{batoryConceptual,
|
||||
author = {D. S. Batory},
|
||||
title = {Conceptual-to-internal mappings in commercial database systems},
|
||||
OPTcrossref = {},
|
||||
OPTkey = {},
|
||||
booktitle = {Proceedings of the 3rd SIGACT-SIGMOD symposium on Principles of database systems},
|
||||
pages = {70-78},
|
||||
year = {1984},
|
||||
OPTeditor = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTmonth = {},
|
||||
OPTorganization = {},
|
||||
OPTpublisher = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Misc{hibernate,
|
||||
OPTkey = {},
|
||||
OPTauthor = {},
|
||||
title = {Hibernate: Relational Persistence for {J}ava and {.NET}},
|
||||
OPThowpublished = {},
|
||||
OPTmonth = {},
|
||||
OPTyear = {},
|
||||
note = {http://www.hibernate.org/},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
|
||||
@Article{lrvm,
|
||||
author = {M. Satyanarayanan and Henry H. Mashburn and Puneet Kumar and David C. Steere and James J. Kistler},
|
||||
title = {Lightweight recoverable virtual memory},
|
||||
journal = {ACM Transactions on Computer Systems},
|
||||
year = {1994},
|
||||
OPTkey = {},
|
||||
volume = {12},
|
||||
number = {1},
|
||||
pages = {33-57},
|
||||
month = {Februrary},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{genesis,
|
||||
author = {D. S. Batory and J. R. Barnett and J. F. Garza and K. P. Smith and K. Tsukuda and B. C. Twichell and T. E. Wise},
|
||||
title = {{GENESIS}: An Extensible Database Management System},
|
||||
journal = {IEEE Transactions on Software Engineering},
|
||||
year = {1988},
|
||||
OPTkey = {},
|
||||
volume = {14},
|
||||
number = {11},
|
||||
pages = {1711-1729},
|
||||
month = {November},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@InProceedings{exodus,
|
||||
author = {Michael J Carey and David J. DeWitt and Daniel Frank and Goetz Graefe and M. Muralikrishna and Joel Richardson and Eugene J. Shekita},
|
||||
title = {The Architecture of the {EXODUS} Extensible {DBMS}},
|
||||
OPTcrossref = {},
|
||||
OPTkey = {},
|
||||
booktitle = {Proceedings on the 1986 international workshop on Object-oriented database systems},
|
||||
pages = {52-65},
|
||||
year = {1986},
|
||||
OPTeditor = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTmonth = {},
|
||||
OPTorganization = {},
|
||||
OPTpublisher = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{codd,
|
||||
author = {E. F. Codd},
|
||||
title = {A relational model of data for large shared data banks},
|
||||
journal = {Communications of the ACM},
|
||||
year = {1970},
|
||||
OPTkey = {},
|
||||
volume = {13},
|
||||
number = {6},
|
||||
pages = {377-387},
|
||||
month = {June},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{starburst,
|
||||
author = {Guy M. Lohman and Bruce Lindsay and Hamid Pirahesh and K. Bernhard Schiefer},
|
||||
title = {Extensions to {S}tarburst: Objects, types, functions, and rules},
|
||||
journal = {Communications of the ACM},
|
||||
year = {1991},
|
||||
OPTkey = {},
|
||||
volume = {34},
|
||||
number = {10},
|
||||
pages = {95-109},
|
||||
month = {October},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{postgres,
|
||||
author = {M. Stonebraker and Greg Kemnitz},
|
||||
title = {The {POSTGRES} Next-Generation Database Management System},
|
||||
journal = {Communications of the ACM},
|
||||
year = {1991},
|
||||
OPTkey = {},
|
||||
volume = {34},
|
||||
number = {10},
|
||||
pages = {79-92},
|
||||
month = {October},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@InProceedings{aries,
|
||||
author = { C. Mohan and D. Haderle and B. Lindsay and H. Pirahesh and P Schwarz },
|
||||
title = {{ARIES}, A Transaction Recovery Method Supporting Fine-Granularity Locking and Partial Rollbacks Using Write-Ahead Logging},
|
||||
OPTcrossref = {},
|
||||
OPTkey = {},
|
||||
booktitle = {ACM Transactions on Database Systems},
|
||||
pages = {94-162},
|
||||
year = {1992},
|
||||
OPTeditor = {},
|
||||
volume = {17},
|
||||
number = {1},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTmonth = {},
|
||||
OPTorganization = {},
|
||||
OPTpublisher = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Book{ariesIM,
|
||||
author = {C Mohan and F Levine},
|
||||
ALTeditor = {},
|
||||
title = {ARIES/IM: an efficient and high concurrency index management method using write-ahead logging},
|
||||
publisher = {ACM Press},
|
||||
year = {1992},
|
||||
OPTkey = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTedition = {},
|
||||
OPTmonth = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
|
|
|
@ -63,15 +63,14 @@ UC Berkeley
|
|||
|
||||
%\subsection*{Abstract}
|
||||
|
||||
{\em There is an increasing need to manage data well in a wide variety of
|
||||
systems, including robust support for atomic durable concurrent
|
||||
{\em An increasing range of applications require robust support for atomic, durable and concurrent
|
||||
transactions. Databases provide the default solution, but force
|
||||
applications to interact via SQL and to forfeit control over data
|
||||
layout and access mechanisms. We argue there is a gap between DBMSs and file systems that limits designers of data-oriented applications.
|
||||
|
||||
\yad is a storage framework that incorporates ideas from traditional
|
||||
write-ahead-logging storage algorithms and file systems,
|
||||
while providing applications with flexible control over data structures, layout, and performance vs. robustness tradeoffs.
|
||||
write-ahead-logging storage algorithms and file systems.
|
||||
It provides applications with flexible control over data structures and layout, and transactional performance and robustness properties.
|
||||
\yad enables the development of
|
||||
unforeseen variants on transactional storage by generalizing
|
||||
write-ahead-logging algorithms. Our partial implementation of these
|
||||
|
@ -81,9 +80,9 @@ We evaluate the performance of a traditional transactional storage
|
|||
system based on \yad, and show that it performs favorably relative to existing
|
||||
systems. We present examples that make use of custom access methods, modifed
|
||||
buffer manager semantics, direct log file manipulation, and LSN-free
|
||||
pages that facilitate zero-copy optimizations, and discuss the
|
||||
composability of these extensions. Many of these optimizations are
|
||||
easy to implement and more than double performance.
|
||||
pages. These examples facilitate sophisticated performance
|
||||
optimizations such as zero-copy I/O. These extensions are composable,
|
||||
easy to implement and frequently more than double performance.
|
||||
|
||||
}
|
||||
%We argue that our ability to support such a diverse range of
|
||||
|
@ -109,7 +108,7 @@ easy to implement and more than double performance.
|
|||
|
||||
\section{Introduction}
|
||||
|
||||
As our reliance on computing infrastructure has increased, a wider range of
|
||||
As our reliance on computing infrastructure increases, a wider range of
|
||||
applications require robust data management. Traditionally, data management
|
||||
has been the province of database management systems (DBMSs), which are
|
||||
well-suited to enterprise applications, but lead to poor support for
|
||||
|
@ -185,7 +184,7 @@ storage at a level of abstraction as close to the hardware as
|
|||
possible. The library can support special purpose, transactional
|
||||
storage interfaces in addition to ACID database-style interfaces to
|
||||
abstract data models. \yad incorporates techniques from databases
|
||||
(e.g. write-ahead logging) and systems (e.g. zero-copy techniques).
|
||||
(e.g. write-ahead-logging) and systems (e.g. zero-copy techniques).
|
||||
Our goal is to combine the flexibility and layering of low-level
|
||||
abstractions typical for systems work, with the complete semantics
|
||||
that exemplify the database field.
|
||||
|
@ -209,8 +208,9 @@ delivers these properties as reusable building blocks for systems
|
|||
that implement complete transactions.
|
||||
|
||||
Through examples and their good performance, we show how \yad{}
|
||||
supports a wide range of uses that in the database gap, including
|
||||
persistent objects, graph or XML apps, and recoverable
|
||||
supports a wide range of uses that fall in the gap between
|
||||
database and filesystem technologies, including
|
||||
persistent objects, graph or XML based applications, and recoverable
|
||||
virtual memory~\cite{lrvm}.
|
||||
|
||||
For example, on an object serialization workload, we provide up to
|
||||
|
@ -222,21 +222,20 @@ We implemented this extension in 150 lines of C, including comments and boilerpl
|
|||
in mind when we wrote \yad. In fact, the idea came from a potential
|
||||
user that is not familiar with \yad.
|
||||
|
||||
An (early) open-source implementation of
|
||||
the ideas presented here is available.
|
||||
|
||||
\eab{others? CVS, windows registry, berk DB, Grid FS?}
|
||||
\rcs{maybe in related work?}
|
||||
|
||||
This paper begins by contrasting \yad's approach with that of
|
||||
conventional database and transactional storage systems. It proceeds
|
||||
to discuss write ahead logging, and describe ways in which \yad can be
|
||||
customized to implement many existing (and some new) write ahead
|
||||
logging variants. Implementations of some of these variants are
|
||||
to discuss write-ahead-logging, and describe ways in which \yad can be
|
||||
customized to implement many existing (and some new) write-ahead-logging variants. Implementations of some of these variants are
|
||||
presented, and benchmarked against popular real-world systems. We
|
||||
conclude with a survey of the technologies the \yad implementation is
|
||||
based upon.
|
||||
|
||||
An (early) open-source implementation of
|
||||
the ideas presented here is available.
|
||||
|
||||
\section{\yad is not a Database}
|
||||
\label{sec:notDB}
|
||||
Database research has a long history, including the development of
|
||||
|
@ -354,7 +353,7 @@ applications presented in Section~\ref{sec:extensions} are efficiently
|
|||
supported by Berkeley DB. This is a result of Berkeley DB's
|
||||
assumptions regarding workloads and decisions regarding low level data
|
||||
representation. Thus, although Berkeley DB could be built on top of \yad,
|
||||
Berkeley DB's data model, and write ahead logging system are both too specialized to support \yad.
|
||||
Berkeley DB's data model, and write-ahead-logging system are too specialized to support \yad.
|
||||
|
||||
\eab{for BDB, should we say that it still has a data model?} \rcs{ Does the last sentence above fix it?}
|
||||
|
||||
|
@ -429,7 +428,7 @@ to build a system that enables a wider range of data management options.
|
|||
Section~\ref{sec:notDB} described the ways in which a top-down data model
|
||||
limits the generality and flexibility of databases. In this section,
|
||||
we cover the basic bottom-up approach of \yad: {\em transactional
|
||||
pages}. Although similar to the underlying write-ahead logging
|
||||
pages}. Although similar to the underlying write-ahead-logging
|
||||
approaches of databases, particularly ARIES~\cite{aries}, \yads
|
||||
bottom-up approach yields unexpected flexibility.
|
||||
|
||||
|
@ -475,7 +474,7 @@ property.
|
|||
However, \yad takes customization of transactional semantics one step
|
||||
further, allowing applications to add support for transactional
|
||||
semantics that we have not anticipated. We do not believe that
|
||||
we can anticipate every possible variation of write ahead logging.
|
||||
we can anticipate every possible variation of write-ahead-logging.
|
||||
However, we
|
||||
have observed that most changes that we are interested in making
|
||||
involve a few common underlying primitives.
|
||||
|
@ -484,7 +483,7 @@ As we have
|
|||
implemented new extensions, we have located portions of the system
|
||||
that are prone to change, and have extended the API accordingly. Our
|
||||
goal is to allow applications to implement their own modules to
|
||||
replace our implementations of each of the major write ahead logging
|
||||
replace our implementations of each of the major write-ahead-logging
|
||||
components.
|
||||
}
|
||||
|
||||
|
@ -492,10 +491,13 @@ components.
|
|||
\subsection{Single-page Transactions}
|
||||
|
||||
In this section we show how to implement single-page transactions.
|
||||
This is not at all novel, and is in fact based on ARIES~\cite{aries}, but it forms
|
||||
important background. We also gloss over many important and
|
||||
well-known optimizations that \yad exploits, such as group
|
||||
commit~\cite{group-commit}.
|
||||
This is not at all novel, and is in fact based on ARIES~\cite{aries},
|
||||
but it forms important background. We also gloss over many important
|
||||
and well-known optimizations that \yad exploits, such as group
|
||||
commit~\cite{group-commit}. These aspects of recovery algorithms are
|
||||
described in the literature, and in any good textbook that describes
|
||||
database implementations. The are not particularly important to the
|
||||
discussion here, so we do not cover them.
|
||||
|
||||
The trivial way to acheive single-page transactions is simply to apply
|
||||
all the updates to the page and then write it out on commit. The page
|
||||
|
@ -511,8 +513,8 @@ transactions: we write (sequential) ``redo'' information to the log on commit, a
|
|||
then can write the pages later. If we crash, we can use the log to
|
||||
redo the lost updates during recovery.
|
||||
|
||||
For this to work, we need to be able to tell which updates to
|
||||
re-apply, which is solved by using a per-page sequence number called a
|
||||
For this to work, recovery must be able to decide which updates to
|
||||
re-apply. This is solved by using a per-page sequence number called a
|
||||
{\em log sequence number}. Each log entry contains the sequence
|
||||
number, and each page contains the sequence number of the last applied
|
||||
update. Thus on recovery, we load a page, look at its sequence
|
||||
|
@ -524,7 +526,7 @@ We also need to make sure that only the results of committed
|
|||
transactions still exist after recovery. This is best done by writing
|
||||
a commit record to the log during the commit. If the system pins uncommitted
|
||||
dirty pages in memory, recovery does not need to worry about undoing
|
||||
any updates, and simply plays back the redo records from
|
||||
any updates. Therefore recovery simply plays back unapplied redo records from
|
||||
transactions that have commit records.
|
||||
|
||||
However, pinning the pages of active transactions in memory is problematic.
|
||||
|
@ -550,7 +552,7 @@ redo log entry (with its LSN and argument) reaches the disk before
|
|||
commit. Similarly, an undo log entry, with its LSN and argument,
|
||||
always reaches the disk before a page is stolen. ARIES works
|
||||
essentially the same way, but hard-codes recommended page
|
||||
formats and index structures.~\cite{ariesIM}
|
||||
formats and index structures~\cite{ariesIM}.
|
||||
|
||||
To manually abort a transaction, \yad could either reload the page
|
||||
from disk and roll it forward to reflect committed transactions (this would imply ``no steal''), or it
|
||||
|
@ -559,7 +561,7 @@ order. (It currently does the latter.)
|
|||
|
||||
|
||||
\eat{
|
||||
Write ahead logging algorithms are quite simple if each operation
|
||||
Write-ahead-logging algorithms are quite simple if each operation
|
||||
applied to the page file can be applied atomically. This section will
|
||||
describe a write ahead logging scheme that can transactionally update
|
||||
a single page of storage that is guaranteed to be written to disk
|
||||
|
@ -580,7 +582,7 @@ each other. Normally, only calls to abort and recovery will invoke undo, so
|
|||
we will assume that transactions consist of repeated applications of
|
||||
the redo function.
|
||||
|
||||
Following the lead of ARIES (the write ahead logging system \yad
|
||||
Following the lead of ARIES (the write-ahead-logging system \yad
|
||||
originally set out to implement), assume that the function is also
|
||||
passed a distinct, monotonically increasing number each time it is
|
||||
invoked, and that it records that number in an LSN (log sequence number)
|
||||
|
@ -608,9 +610,7 @@ is also written to the log.
|
|||
This section very briefly described how a simplified
|
||||
write-ahead-logging algorithm might work, and glossed over many
|
||||
details. Like ARIES, \yad actually implements recovery in three
|
||||
phases: Analysis, Redo and Undo. Because recovery algorithms are
|
||||
desribed in the literature, and in an good database textbook, we
|
||||
will not desribe them in further detail.
|
||||
phases: Analysis, Redo and Undo.
|
||||
|
||||
%Recovery is handled by playing the log forward, and only applying log
|
||||
%entries that are newer than the version of the page on disk. Once the
|
||||
|
@ -638,8 +638,8 @@ is relatively easy.
|
|||
|
||||
First, we need to ensure that all log entries have a transaction ID
|
||||
(XID) so that we can tell that updates to different pages are part of
|
||||
the same transaction (we need this for multiple updates within a
|
||||
single page too). Given single-page recovery, we can just apply it to
|
||||
the same transaction (we need this in the single page case as well).
|
||||
Given single-page recovery, we can just apply it to
|
||||
all of the pages touched by a transaction to recover a multi-page
|
||||
transaction. This works because steal and no-force already imply
|
||||
that pages can be written back early or late (respectively), so there
|
||||
|
@ -648,12 +648,12 @@ need only ensure that redo entries for all pages reach the disk before
|
|||
the commit record (and before commit returns).
|
||||
|
||||
\eat{
|
||||
\subsection{Write ahead logging invariants}
|
||||
\subsection{Write-ahead-logging invariants}
|
||||
|
||||
In order to support recovery, a write-ahead-logging algorithm must
|
||||
identify pages that {\em may} be written back to disk, and those that
|
||||
{\em must} be written back to disk. \yad provides full support for
|
||||
Steal/no-Force write ahead logging, due to its generally favorable
|
||||
Steal/no-Force write-ahead-logging, due to its generally favorable
|
||||
performance properties. ``Steal'' refers to the fact that pages may
|
||||
be written back to disk before a transaction completes. ``No-Force''
|
||||
means that a transaction may commit before the pages it modified are
|
||||
|
@ -694,8 +694,8 @@ structure, and then A aborted. When A rolls back, its UNDO entries
|
|||
will undo the rearrangment that it made to the data structure, without
|
||||
regard to B's modifications. This is likely to cause corruption.
|
||||
|
||||
Two common solutions to this problem are ``total isolation'' and
|
||||
``nested top actions.'' Total isolation simply prevents any
|
||||
Two common solutions to this problem are {\em total isolation} and
|
||||
{\em nested top actions}. Total isolation simply prevents any
|
||||
transaction from accessing a data structure that has been modified by
|
||||
another in-progress transaction. An application can achieve this
|
||||
using its own concurrency control mechanisms, or by holding a lock on
|
||||
|
@ -715,7 +715,7 @@ aborts.
|
|||
The key idea is to distinguish between the logical operations of a
|
||||
data structure, such as inserting a key, and the physical operations
|
||||
such as splitting tree nodes or or rebalancing a tree. The physical
|
||||
operations do not need to undone if the containing logical operation
|
||||
operations do not need to be undone if the containing logical operation
|
||||
(insert) aborts.
|
||||
|
||||
Because nested top actions are easy to use and do not lead to
|
||||
|
@ -751,7 +751,7 @@ As described above, and in all database implementations of which we
|
|||
are aware, transactional pages use LSNs on each page. This makes it
|
||||
difficult to map large objects onto multiple pages, as the LSNs break
|
||||
up the object. It is tempting to try to move the LSNs elsewhere, but
|
||||
then they will not be written atomically with their page, which
|
||||
then they would not be written atomically with their page, which
|
||||
defeats their purpose.
|
||||
|
||||
LSNs were introduced to prevent recovery from applying updates more
|
||||
|
@ -760,6 +760,7 @@ entries,\endnote{Idempotency does not guarantee that $f(g(x)) =
|
|||
f(g(f(g(x))))$. Therefore, idempotency does not guarantee that it is safe
|
||||
to assume that a page is older than it is.}
|
||||
\yad can eliminate the LSN on each page.
|
||||
|
||||
Consider purely physical logging operations that overwrite a fixed
|
||||
byte range on the page regardless of the page's initial state.
|
||||
We say that such operations perform ``blind writes.''
|
||||
|
@ -784,7 +785,7 @@ update some subset of the bits on the page. If the log entries do not
|
|||
update a bit, then its value was correct before recovery began, so it
|
||||
must be correct after recovery. Otherwise, we know that recovery will
|
||||
update the bit. Furthermore, after all redos, the bit's value will be the
|
||||
value it contained at crash, so we know that undo will behave
|
||||
last value it contained before the crash, so we know that undo will behave
|
||||
properly.
|
||||
|
||||
We call such pages ``LSN-free'' pages. Although this technique is
|
||||
|
@ -792,9 +793,9 @@ novel for databases, it resembles the mechanism used by
|
|||
RVM~\cite{rvm}; \yad generalizes the concept and allows it to
|
||||
co-exist with traditional pages. Furthermore, efficient recovery and
|
||||
log truncation require only minor modifications to our recovery
|
||||
algorithm. In practice, this is implemented by providing a callback
|
||||
for LSN free pages that allows the buffer manager to compute a
|
||||
conservative estimate of the page's LSN whenever it is read from disk.
|
||||
algorithm. In practice, this is implemented by providing a buffer manager callback
|
||||
for LSN free pages. The callback computes a
|
||||
conservative estimate of the page's LSN whenever the page is read from disk.
|
||||
For a less conservative estimate, it suffices to write a page's LSN to
|
||||
the log shortly after the page itself is written out; on recovery the
|
||||
log entry is thus a conservative but close estimate.
|
||||
|
@ -880,7 +881,7 @@ These issues are beyond the scope of this discussion. Section~\ref{logReorderin
|
|||
\subsection{Summary of Transactional Pages}
|
||||
|
||||
This section provided an extremely brief overview of transactional
|
||||
pages and write-ahead logging. Transactional pages are a valuable
|
||||
pages and write-ahead-logging. Transactional pages are a valuable
|
||||
building block for a wide variety of data management systems, as we
|
||||
show in the next section. Nested top actions and LSN-free pages
|
||||
enable important optimizations. In particular, \yad allows general
|
||||
|
@ -940,7 +941,7 @@ Optimizations to Berkeley DB that we performed included disabling the
|
|||
lock manager, though we still use ``Free Threaded'' handles for all
|
||||
tests. This yielded a significant increase in performance because it
|
||||
removed the possibility of transaction deadlock, abort, and
|
||||
repetition. However, disabling the lock manager, caused highly
|
||||
repetition. However, disabling the lock manager caused highly
|
||||
concurrent Berkeley DB benchmarks to become unstable, suggesting either a
|
||||
bug or misuse of the feature.
|
||||
|
||||
|
@ -1034,7 +1035,7 @@ straightforward. We then compare our simple, straightforward
|
|||
implementation to our hand-tuned version and Berkeley DB's implementation.
|
||||
|
||||
The simple hash table uses nested top actions to atomically update its
|
||||
internal structure. It is based on a {\em linear} hash function~\cite{lht}, allowing
|
||||
internal structure. It uses a {\em linear} hash function~\cite{lht}, allowing
|
||||
it to incrementally grow its buffer list. It is based on a number of
|
||||
modular subcomponents. Notably, its bucket list is a growable array
|
||||
of fixed length entries (a linkset, in the terms of the physical
|
||||
|
@ -1048,8 +1049,8 @@ hashtable is a popular, commonly deployed implementation, and serves
|
|||
as a baseline for our experiments.
|
||||
|
||||
Both of our hashtables outperform Berkeley DB on a workload that
|
||||
bulk loads the tables by repeatedly inserting (key, value) pairs,
|
||||
although we do not wish to imply this is always the case.
|
||||
bulk loads the tables by repeatedly inserting (key, value) pairs.
|
||||
However, we do not wish to imply this is always the case.
|
||||
%We do not claim that our partial implementation of \yad
|
||||
%generally outperforms, or is a robust alternative
|
||||
%to Berkeley DB. Instead, this test shows that \yad is comparable to
|
||||
|
@ -1071,7 +1072,7 @@ optimize key primitives.
|
|||
%forced to redesign and application to avoid sub-optimal properties of
|
||||
%the transactional data structure implementation.
|
||||
|
||||
Figure~\ref{fig:TPS} describes performance of the two systems under
|
||||
Figure~\ref{fig:TPS} describes the performance of the two systems under
|
||||
highly concurrent workloads. For this test, we used the simple
|
||||
(unoptimized) hash table, since we are interested in the performance a
|
||||
clean, modular data structure that a typical system implementor would
|
||||
|
@ -1108,7 +1109,7 @@ The effect of \yad object serialization optimizations under low and high memory
|
|||
\subsection{Object persistance}
|
||||
\label{sec:oasys}
|
||||
Numerous schemes are used for object serialization. Support for two
|
||||
different styles of object serialization have been eimplemented in
|
||||
different styles of object serialization have been implemented in
|
||||
\yad. We could have just as easily implemented a persistance
|
||||
mechanism for a statically typed functional programming language, a
|
||||
dynamically typed scripting language, or a particular application,
|
||||
|
@ -1157,7 +1158,7 @@ entries, and wrote them all before committing.
|
|||
page file, increasing the working set of the program, and increasing
|
||||
disk activity.
|
||||
|
||||
Furthermore, because objects may be written to disk in an
|
||||
Furthermore, objects may be written to disk in an
|
||||
order that differs from the order in which they were updated,
|
||||
violating one of the write-ahead-logging invariants. One way to
|
||||
deal with this is to maintain multiple LSN's per page. This means we would need to register a
|
||||
|
@ -1166,7 +1167,6 @@ callback will be needed in Section~\ref{sec:zeroCopy}), and
|
|||
extend \yads page format to contain per-record LSN's.
|
||||
Also, we must prevent \yads storage allocation routine from overwriting the per-object
|
||||
LSN's of deleted objects that may still be addressed during abort or recovery.
|
||||
\yad can support this approach.
|
||||
|
||||
Alternatively, we could arrange for the object pool to cooperate
|
||||
further with the buffer pool by atomically updating the buffer
|
||||
|
@ -1174,7 +1174,7 @@ manager's copy of all objects that share a given page, removing the
|
|||
need for multiple LSN's per page, and simplifying storage allocation.
|
||||
|
||||
However, the simplest solution, and the one we take here, is based on the observation that
|
||||
updates (not allocations or deletions) to fixed length objects are blind writes.
|
||||
updates (not allocations or deletions) of fixed length objects are blind writes.
|
||||
This allows us to do away with per-object LSN's entirely. Allocation and deletion can then be handled
|
||||
as updates to normal LSN containing pages. At recovery time, object
|
||||
updates are executed based on the existence of the object on the page
|
||||
|
@ -1486,7 +1486,7 @@ is a common pattern in system software design, and manages
|
|||
dependencies and ordering constraints between sets of components.
|
||||
Over time, we hope to shrink \yads core to the point where it is
|
||||
simply a resource manager and a set of implementations of a few unavoidable
|
||||
algorithms related to write-ahead logging. For instance,
|
||||
algorithms related to write-ahead-logging. For instance,
|
||||
we suspect that support for appropriaite callbacks will
|
||||
allow us to hardcode a generic recovery agorithm into the
|
||||
system. Similarly, and code that manages book-keeping information, such as
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in a new issue