A bunch of new references; The "Transactional Programming Models"
section is now complete, but missing a few references, and way too long.
This commit is contained in:
parent
2788116412
commit
e56a7bf58f
2 changed files with 449 additions and 101 deletions
|
@ -29,6 +29,122 @@
|
|||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{argus,
|
||||
author = {Barbara Liskov},
|
||||
title = {Distributed Programming in {Argus}},
|
||||
journal = {Communications of the ACM},
|
||||
year = {1988},
|
||||
OPTkey = {},
|
||||
volume = {31},
|
||||
number = {3},
|
||||
pages = {300-312},
|
||||
month = {March},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@inproceedings{ejbCritique,
|
||||
author = {Raul Silaghi and Alfred Strohmeier},
|
||||
title = {Critical Evaluation of the {EJB} Transaction Model},
|
||||
booktitle = {Proceedings of FIDJI},
|
||||
year = {2002},
|
||||
pages = {15-28},
|
||||
OPTee = {http://link.springer.de/link/service/series/0558/bibs/2604/26040015.htm},
|
||||
OPTcrossref = {DBLP:conf/fidji/2002},
|
||||
OPTbibsource = {DBLP, http://dblp.uni-trier.de}
|
||||
}
|
||||
|
||||
@inproceedings{omtt,
|
||||
author = {J{\"o}rg Kienzle and
|
||||
Alfred Strohmeier and
|
||||
Alexander B. Romanovsky},
|
||||
title = {Open Multithreaded Transactions: Keeping Threads and Exceptions
|
||||
under Control.},
|
||||
booktitle = {Proceedings of WORDS},
|
||||
year = {2001},
|
||||
pages = {197-205},
|
||||
OPTee = {http://doi.ieeecomputersociety.org/10.1109/WORDS.2001.945131},
|
||||
OPTcrossref = {DBLP:conf/words/2001},
|
||||
OPTbibsource = {DBLP, http://dblp.uni-trier.de}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Book{nestedTransactionBook,
|
||||
author = {J. E. B. Moss},
|
||||
ALTeditor = {},
|
||||
title = {Nested transactions: an approach to reliable distributed computing},
|
||||
publisher = {MIT},
|
||||
year = {1985},
|
||||
OPTkey = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTedition = {},
|
||||
OPTmonth = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@InProceedings{nestedTransactionPoster,
|
||||
author = {J. E. B. Moss},
|
||||
title = {Open Nested Transactions: Semantics and Support},
|
||||
OPTcrossref = {},
|
||||
OPTkey = {},
|
||||
booktitle = {Proceedings of WMPI 2006},
|
||||
OPTpages = {},
|
||||
OPTyear = {},
|
||||
OPTeditor = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTmonth = {},
|
||||
OPTorganization = {},
|
||||
OPTpublisher = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@InProceedings{mapReduce,
|
||||
author = {Jeffrey Dean and Sanjay Ghemawat},
|
||||
title = {Map{R}educe: Simplified Data Processing on Large Clusters},
|
||||
OPTcrossref = {},
|
||||
OPTkey = {},
|
||||
booktitle = {Proceedings of OSDI},
|
||||
OPTpages = {},
|
||||
year = {2004},
|
||||
OPTeditor = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTseries = {},
|
||||
OPTaddress = {},
|
||||
OPTmonth = {},
|
||||
OPTorganization = {},
|
||||
OPTpublisher = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{argusImplementation,
|
||||
author = {William Weihl and Barbara Liskov},
|
||||
title = {Implementation of Resilient, Atomic Data Types},
|
||||
journal = {ACM Transactions on Programming Languages and Systems},
|
||||
year = {1985},
|
||||
key = {},
|
||||
volume = {7},
|
||||
number = {2},
|
||||
pages = {244-269},
|
||||
month = April,
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
@Article{perl,
|
||||
author = {Lincoln Stein},
|
||||
title = {How {P}erl Saved the {H}uman {G}enome {P}roject},
|
||||
|
@ -475,4 +591,65 @@
|
|||
OPTdoi = {http://doi.acm.org/10.1145/356989.357000},
|
||||
OPTpublisher = {ACM Press},
|
||||
OPTaddress = {New York, NY, USA},
|
||||
}
|
||||
}
|
||||
|
||||
@inproceedings{mcrt,
|
||||
author = {Richard L. Hudson and
|
||||
Bratin Saha and
|
||||
Ali-Reza Adl-Tabatabai and
|
||||
Ben Hertzberg},
|
||||
title = {{McRT-Malloc}: a scalable transactional memory allocator.},
|
||||
booktitle = {ISMM},
|
||||
year = {2006},
|
||||
pages = {74-83},
|
||||
OPTee = {http://doi.acm.org/10.1145/1133956.1133967},
|
||||
OPTcrossref = {DBLP:conf/iwmm/2006},
|
||||
OPTbibsource = {DBLP, http://dblp.uni-trier.de}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Article{orion,
|
||||
author = {Won Kim and Jorge F. Garza and Nathaniel Ballou and Darrell Woelk},
|
||||
title = {Architecture of the {ORION} Next-Generation Database System},
|
||||
journal = {IEEE Transactions on Knowledge and Data Engineering},
|
||||
year = {1990},
|
||||
OPTkey = {},
|
||||
OPTvolume = {},
|
||||
OPTnumber = {},
|
||||
OPTpages = {},
|
||||
OPTmonth = {},
|
||||
OPTnote = {},
|
||||
OPTannote = {}
|
||||
}
|
||||
|
||||
|
||||
@inproceedings{cricket,
|
||||
author = {Eugene J. Shekita and
|
||||
Michael J. Zwilling},
|
||||
title = {Cricket: A Mapped, Persistent Object Store.},
|
||||
booktitle = {Proceedings of POS},
|
||||
year = {1990},
|
||||
pages = {89-102},
|
||||
OPTee = {db/conf/pos/ShekitaZ90.html},
|
||||
OPTcrossref = {DBLP:conf/pos/90},
|
||||
OPTbibsource = {DBLP, http://dblp.uni-trier.de}
|
||||
}
|
||||
|
||||
@inproceedings{storageReorganization,
|
||||
author = {Voon-Fee Yong and
|
||||
Jeffrey F. Naughton and
|
||||
Jie-Bing Yu},
|
||||
title = {Storage Reclamation and Reorganization in Client-Server Persistent
|
||||
Object Stores},
|
||||
booktitle = {Proceedings of the Tenth International Conference on Data Engineering},
|
||||
OPTfoo = {February 14-18, 1994, Houston, Texas, USA},
|
||||
publisher = {IEEE Computer Society},
|
||||
year = {1994},
|
||||
OPTisbn = {0-8186-5400-7},
|
||||
pages = {120-131},
|
||||
OPTee = {db/conf/icde/YongNY94.html},
|
||||
OPTcrossref = {DBLP:conf/icde/94},
|
||||
OPTbibsource = {DBLP, http://dblp.uni-trier.de}
|
||||
}
|
||||
|
||||
|
|
|
@ -304,8 +304,8 @@ use of a structured physical model and abstract conceptual mappings.
|
|||
|
||||
The systems community has also worked on this mismatch for 20 years,
|
||||
which has led to many interesting projects. Examples include
|
||||
alternative durability models such as Quicksilver or LRVM, persistent
|
||||
objects systems such as Argus, and cluster hash tables [add cites].
|
||||
alternative durability models such as Quicksilver or RVM, persistent
|
||||
objects systems such as Argus~\cite{argus}, and cluster hash tables [add cites].
|
||||
We expect that \yad would simplify the implementation of most if not
|
||||
all of these systems. We look at these in more detail in
|
||||
Section~\ref{related=work}.
|
||||
|
@ -689,7 +689,7 @@ This section explains how we can avoid storing LSNs on pages in \yad
|
|||
without giving up durable transactional updates. The techniques here
|
||||
are similar to those used by RVM~\cite{lrvm}, a system that supports
|
||||
transactional updates to virtual memory. However, \yad generalizes
|
||||
the concept, allowing it to co-exist with traditional pages and fully
|
||||
the concept, allowing it to co-exist with traditional pages and more easily
|
||||
support concurrent transactions.
|
||||
|
||||
In the process of removing LSNs from pages, we
|
||||
|
@ -703,11 +703,8 @@ described in this section. However, \yad avoids hard-coding most of
|
|||
the relevant subsytems. LSN-free pages are essentially an alternative
|
||||
protocol for atomically and durably applying updates to the page file.
|
||||
This will require the addition of a new page type (\yad currently has
|
||||
3 such types, not including a few minor variants). The new page type
|
||||
will need to communicate with the logger and recovery modules in order
|
||||
to estimate page LSNs, which will need to make use of callbacks in
|
||||
those modules. Of course, upon providing support for LSN free pages,
|
||||
we will want to add operations to \yad that make use of them. We plan
|
||||
3 such types, not including a few minor variants) that will estimate
|
||||
LSN's by communicating with the logger and recovery modules. We plan
|
||||
to eventually support the coexistance of LSN-free pages, traditional
|
||||
pages, and similar third-party modules within the same page file, log,
|
||||
transactions, and even logical operations.
|
||||
|
@ -725,16 +722,15 @@ systems, but are often not idempotent, and rely upon the consistency
|
|||
of the page they modify. The recovery scheme described in this
|
||||
section does not guarantee that such operations will be applied
|
||||
exactly once, or even that they will be presented with a consistent
|
||||
version of a page. Therefore, it is incompatible with physiological
|
||||
operations.
|
||||
version of a page.
|
||||
|
||||
Therefore, in this section we eliminate such operations and instead
|
||||
make use of deterministic REDO operations that do not examine page
|
||||
state. We call such operations ``blind writes.'' For concreteness,
|
||||
state. We call such operations ``blind writes.'' Note that we still
|
||||
allow code that invokes operations to examine the page file. For concreteness,
|
||||
assume that all physical operations produce log entries that contain a
|
||||
set of byte ranges, and the pre- and post-value of each byte in the
|
||||
range. \diff{Note that we still allow code that invokes operations to
|
||||
examine the page file.}
|
||||
range.
|
||||
|
||||
Recovery works the same way as it does above, except that is computes
|
||||
a lower bound of each page LSN instead of reading the LSN from the
|
||||
|
@ -803,32 +799,34 @@ to a total of three, mostly sequential disk operations. (Two
|
|||
writes and one read.) However, in the best case, the blob would only be written once.
|
||||
In contrast, conventional blob implementations generally write the blob twice.
|
||||
|
||||
Alternatively, we could use DMA to overwrite the blob in the page file
|
||||
in a non-atomic fashion, providing file system style semantics.
|
||||
(Existing database servers often provide this mode based on the
|
||||
observation that many blobs are static data that does not really need
|
||||
to be updated transactionally.\rcs{SQL Server doesn't do this.... Remove this parenthetical statement?}~\cite{sqlserver}) Of course, \yad could
|
||||
also support other approaches to blob storage, such as B-Tree layouts
|
||||
that allow arbitrary insertions and deletions in the middle of
|
||||
objects~\cite{esm}.
|
||||
Of course, \yad could also support other approaches to blob storage,
|
||||
such as using DMA and update in place to provide file system style
|
||||
semantics, or by using B-Tree layouts that allow arbitrary insertions
|
||||
and deletions in the middle of objects~\cite{esm}.
|
||||
|
||||
\subsection{Concurrent recoverable virtual memory}
|
||||
|
||||
Our LSN-free pages are somewhat similar to the recovery scheme used by
|
||||
RVM, recoverable virtual memory. \rcs{, and camelot, argus(?)} That system used purely physical
|
||||
logging and LSN-free pages so that it could use mmap() to map portions
|
||||
of the page file into application memory~\cite{lrvm}. However, without
|
||||
support for logical log entries and nested top actions, it would be
|
||||
difficult to implement a concurrent, durable data structure using RVM.
|
||||
RVM, recoverable virtual memory, and Camelot~\cite{camelot}. RVM
|
||||
used purely physical logging and LSN-free pages so that it
|
||||
could use mmap() to map portions of the page file into application
|
||||
memory\cite{lrvm}. However, without support for logical log entries
|
||||
and nested top actions, it would be extremely difficult to implement a
|
||||
concurrent, durable data structure using RVM or Camelot. (The description of
|
||||
Argus in Section~\ref{sec:transactionalProgramming} sketches the
|
||||
general approach.)
|
||||
|
||||
In contrast, LSN-free pages allow for logical undo, allowing for the
|
||||
use of nested top actions and concurrent transactions.
|
||||
In contrast, LSN-free pages allow for logical
|
||||
undo, allowing for the use of nested top actions and concurrent
|
||||
transactions; the concurrent data structure needs only provide \yad
|
||||
with an appropriate inverse each time its logical state changes.
|
||||
|
||||
We plan to add RVM style transactional memory to \yad in a way that is
|
||||
compatible with fully concurrent in-memory data structures such as
|
||||
hash tables and trees. Of course, since \yad will support coexistance
|
||||
of conventional and LSN-free pages, applications will be free to use
|
||||
the \yad data structure implementations as well.
|
||||
|
||||
We plan to add RVM-style transactional memory to \yad in a way that is
|
||||
compatible with fully concurrent collections such as hash tables and
|
||||
tree structures. Of course, since \yad will support coexistance of
|
||||
conventional and LSN-free pages, applications would be free to use the
|
||||
\yad data structure implementations as well.
|
||||
|
||||
\subsection{Page-independent transactions}
|
||||
\label{sec:torn-page}
|
||||
|
@ -1434,8 +1432,9 @@ implement (in theory) any of these abstract models and their extensions.
|
|||
|
||||
\subsubsection{Extensible databases}
|
||||
|
||||
Genesis~\cite{genesis}, an early database toolkit, was built in terms
|
||||
of a physical data model and the conceptual mappings described above. \rcs{I think they say this is an explicit design choice.}
|
||||
Genesis~\cite{genesis}, an early database toolkit was explicitly
|
||||
structured in terms of the physical data models and conceptual
|
||||
mappings described above.
|
||||
It is designed to allow database implementors to easily swap out
|
||||
implementations of the various components defined by its framework.
|
||||
Like subsequent systems (including \yad), it allows its users to
|
||||
|
@ -1461,9 +1460,9 @@ a database toolkit, new types are defined when the database server is
|
|||
compiled. In today's object-relational database systems, new types
|
||||
are defined at runtime. Each approach has its advantages. However,
|
||||
both types of systems aim to extend a high-level data model with new
|
||||
abstract data types, and thus are quite limited in the range of new
|
||||
abstract data types, and are quite limited in the range of new
|
||||
applications they support, essentially queries over sets of a wider
|
||||
range of elements.
|
||||
range of elements.~\rcs{fix wording}
|
||||
|
||||
\subsubsection{Modular databases}
|
||||
|
||||
|
@ -1476,7 +1475,7 @@ implemented (or understood) as a monolithic entity.
|
|||
|
||||
It supports this argument with real-world evidence that suggests
|
||||
database servers are too unpredictable and unmanagable to
|
||||
scale up the size of today's systems. Similarly, they are a poor fit
|
||||
scale up to the size of today's systems. Similarly, they are a poor fit
|
||||
for small devices. SQL's declarative interface only complicates the
|
||||
situation.
|
||||
|
||||
|
@ -1514,34 +1513,61 @@ explore those applications that are a weaker fit for DMBSs.
|
|||
|
||||
\subsection{Transactional Programming Models}
|
||||
|
||||
\label{sec:transactionalProgramming}
|
||||
|
||||
\rcs{\ref{sec:transactionalProgramming} is too long.}
|
||||
|
||||
Special-purpose languages for transaction processing allow programmers
|
||||
to express transactional operations naturally. However, programs
|
||||
written in these languages are generally limited to a particular
|
||||
concurrency model and transactional storage system. Therefore, these
|
||||
systems address a different problem than \yad; each provides one
|
||||
high-level interface that implements a particular programming model
|
||||
and storage infrastructure. In contrast, \yad provides low-level
|
||||
primitives that make it easier to implement and support new types of
|
||||
high-level transactional interfaces.
|
||||
systems are complementary to \yad; they provide a specialized
|
||||
high-level interface that hard-codes a particular programming model
|
||||
and specialized storage infrastructure. In contrast, \yad is a
|
||||
general-purpose storage infrastructure that avoids hardcoding
|
||||
programming model assumptions. \yad provides a substrate that makes
|
||||
it easier to implement transactional programming models.
|
||||
|
||||
\subsubsection{Nested Transactions}
|
||||
|
||||
{\em Nested transactions} form trees of transactions, where children
|
||||
were spawned by their parents. They can be used to increase
|
||||
concurrency, provide partial rollback, and improve fault tolerance.
|
||||
{\em Linear} nesting occurs when transactions are nested to arbitrary
|
||||
depths, but have at most one child. In {\em closed} nesting, child
|
||||
transactions are rolled back when the parent
|
||||
aborts~\cite{nestedTransactionBook}. With {\em open} nesting, child
|
||||
transactions are not rolled back if the parent aborts.
|
||||
|
||||
\eab{add Argus and Camelot; also we are getting pretty technical here -- maybe move some of this later???}
|
||||
Closed nesting aids in intra-transaction concurrency and fault
|
||||
tolerance. Increased fault tolerance is achieved by isolating each
|
||||
child transaction from the others, and automatically retrying failed
|
||||
transactions. This technique is similar to the one used by MapReduce,
|
||||
which isolates subtasks by restricting the data that each unit of work
|
||||
may read and write, and which provides atomicity by ensuring
|
||||
exactly-once execution of each unit of work~\cite{mapReduce}.
|
||||
|
||||
\rcs{ I think Argus makes use of shadow copies for durability, and for
|
||||
in-memory transactions. A tree of shadow copies exists, and is handled as
|
||||
follows (I think): All transaction locks are commit duration, per
|
||||
object. There are read locks and write locks, and it uses strict 2PL.
|
||||
Each transaction is a tree of ``subactions'' that can get R/W locks
|
||||
according to the 2PL rules. Two subactions in the same action cannot
|
||||
get a write lock on the same object because each one gets its own copy
|
||||
of the object to write to. If a subaction or transaction abort their
|
||||
local copy is simply discarded. At commit, the local copy replaces
|
||||
the global copy.}
|
||||
\yads nested top actions, and support for custom lock managers also
|
||||
allow for inter-transcation concurrency. In some respect, nested top
|
||||
actions implement a form of open, linear nesting. Actions performed
|
||||
inside the nested top are not rolled back because a parent aborts.
|
||||
However, the logical undo gives the programmer the option to
|
||||
compensate for the nested top action in aborted transactions. We are
|
||||
interested in determining whether nested transactions
|
||||
could be implemented as a layer on top of \yad.
|
||||
|
||||
\rcs{Still need to mention CORBA / EJB + ORDBMS here. Also, missing a high-level point: Most research systems were backed with
|
||||
non-concurrent transactional storage; current commercial systems (eg:
|
||||
EJB) tend to make use of object relational mappings. Bill's stuff would be a good fit for that section, along with work describing how to let multiple threads / machines handle locking in an easy to reason about fashion.}
|
||||
\subsubsection{Distributed Programming Models}
|
||||
|
||||
%\rcs{ I think Argus makes use of shadow copies for durability, and for
|
||||
%in-memory transactions~\cite{argusImplementation}. A tree of shadow
|
||||
%copies exists, and is handled as follows (I think): All transaction
|
||||
%locks are commit duration, per object. There are read locks and write
|
||||
%locks, and it uses strict 2PL. Each transaction is a tree of
|
||||
%``subactions'' that can get R/W locks according to the 2PL rules. Two
|
||||
%subactions in the same action cannot get a write lock on the same
|
||||
%object because each one gets its own copy of the object to write to.
|
||||
%If a subaction or transaction abort their local copy is simply
|
||||
%discarded. At commit, the local copy replaces the global copy.}
|
||||
|
||||
|
||||
%System R was one of the first relational database implementations, and
|
||||
|
@ -1550,43 +1576,171 @@ EJB) tend to make use of object relational mappings. Bill's stuff would be a go
|
|||
%the storage subsystem, which remains the architecture for modern
|
||||
%databases.
|
||||
|
||||
Camelot was a distributed transaction processing system. It provides
|
||||
two physical logging modes; redo only (no-Steal, no-Force), and
|
||||
redo-undo (Steal, no-Force), but does not contain provisions for
|
||||
logical logging or compensations. Therefore, commit duration locks
|
||||
are required to protect data structures from concurrent
|
||||
transactions,
|
||||
\rcs{This sentence is problematic for two reasons: (1)
|
||||
Camelot allowed hybrid atomicity and other schemes in addition to 2PL.
|
||||
(2) According to \cite{camelot}, pg 433 ``Logical locks, implemented
|
||||
within servers, and support for hybrid atomicity provide the
|
||||
possibilty of high concurrency.'' I think this is a mistake in their
|
||||
paper; logical locking isn't very helpful when ``This [Camelot's
|
||||
Nested Transaction] model states that if one transaction modifies a
|
||||
region, the region cannot be modified by another transacion unless
|
||||
that transaction is an active descendant of original transaction or
|
||||
the original transaction compeletes... If comodification does occur,
|
||||
no guarantees concerning data integrity are given'' (Camelot + Avalon
|
||||
book, pg 117)'' I think the same mistake is repeated in the RVM
|
||||
paper, when they discuss multi-threaded code.}
|
||||
limiting the applicability of Camelot to high-concurrency applications
|
||||
or its scalability to multi-processor systems.
|
||||
Transactions provide a number of properties that are attractive to
|
||||
distributed systems; they provide isolation between nodes, protecting
|
||||
live systems when other nodes crash. Atomicity and durability
|
||||
simplify recovery after a node crashes. Finally, nested transactions
|
||||
allow for concurrency within a single transaction, allow partial
|
||||
rollback, and isolate working subtransactions from those that must be
|
||||
rolled back and retried due to node failure.
|
||||
|
||||
However, Camelot introduced a nested transaction model that allows
|
||||
concurrency within a single transaction. In Camelot, nested
|
||||
transactions can run in parallel and make use of locks acquired by the
|
||||
transaction that spawned them. Parent transactions are suspended
|
||||
until children transactions complete, and children are protected from
|
||||
each other using locks, or other similar methods. We beleive that
|
||||
\yads support for logical undo would allow it to support such
|
||||
transactions with more concurrency than Camelot allowed. Camelot is
|
||||
an early example of a C library that provides transactional semantics
|
||||
over custom data types. Also, it introduced a number of features,
|
||||
such as distributed logging and commit semantics, and transactional
|
||||
RPC that we plan to integrate into \yad as we add support for
|
||||
multi-node transactions. Avalon, which was built on top of Camelot is
|
||||
a persistent version of C++ that introduced the idea of persistent
|
||||
programming language types.
|
||||
Argus is a language for reliable distributed applications. An Argus
|
||||
program consists of guardians, which are essentially objects that
|
||||
encapsulate persistent and atomic data. Persistent data allows
|
||||
concurrent operations to be implemented, while accesses to atomic data
|
||||
are serializable~\cite{argus}. Typically, the data structure that is being
|
||||
implemented is stored in persistent storage, but is agumented with
|
||||
extra information in atomic storage. This extra data tracks the
|
||||
status of each item stored in the structure. Conceptually, in a hash
|
||||
table, atomic storage would contain the values ``Not present'',
|
||||
``Committed'' or ``Aborted; Old Value = x'' for each key in (or
|
||||
missing from) the hash. Before accessing the hash, the operation
|
||||
implementation would consult the appropriate piece of atomic data, and
|
||||
update the persitent storage if necessary. Because the atomic data is
|
||||
protected by a lock manager, attempts to update the hashtable are serializable.
|
||||
Therefore, clever use of atomic storage can be used to provide logical locking~\rcs{Double check this}
|
||||
|
||||
Note that implementation of efficient data structures using this
|
||||
method forces each operation implementation to track a great deal of
|
||||
extra state (they suggest implementing a log structure to support a
|
||||
concurrent hash table), and to set policies regarding the granularity
|
||||
with which the data structures should be written to
|
||||
disk~\cite{argusImplementation}. \yad avoids these problems by
|
||||
forcing operation implementors to provide logical undos, and by
|
||||
leaving lock managment to higher-level code. We argue that logical
|
||||
undos are easily provided in most circumstances, while higher-level
|
||||
lock management decouples data structure implementations from
|
||||
application concurrency models.
|
||||
|
||||
%The Argus designers assumed that only a few core concurrent
|
||||
%transactional data structures would be implemented, and that higher
|
||||
%level code would make use of these structures. Also, Argus assumed
|
||||
%that transactions should be serializable.
|
||||
|
||||
Camelot, a successor to Argus made a number of important
|
||||
contributions, both in system design, and in algorithms for
|
||||
distributed transactions~\cite{camelot}. It left locking to application level code,
|
||||
and updated data in place. (Argus used shadow copies to provide
|
||||
atomic updates.) Camelot provided two logging modes: Redo only
|
||||
(no-Steal,no-Force) and Undo/Redo (Steal, no-Force). It was
|
||||
implemented using Mach, and provided recoverable virtual memory. It
|
||||
was decoupled from Avalon, which used Camelot to provide a
|
||||
higher-level (C++) programming model. Camelot provided a lower-level
|
||||
C interface that allowed other programming models to be
|
||||
implemented. It provided a limited form of closed nested transactions
|
||||
where parents are suspended while children are active. Camelot also
|
||||
provided mechanisms for distributed transactions and transactional
|
||||
RPC. However, concurrent operations in Camelot were similar to those
|
||||
in Argus since Camelot did not provide logical undo. Camelot's focus
|
||||
was upon support for distributed transactions, therefore, it hardcoded
|
||||
assumptions regarding the structure of nested transactions, consensus
|
||||
algorithms, communication mechanisms, and so on. In contrast, \yads
|
||||
goal is to efficiently support a wide range of such mechanisms.
|
||||
|
||||
More recent transactional programming schemes allow for more multiple
|
||||
transaction implementations to cooperate as part of the same
|
||||
distributed transaction. For example, X/Open DTP provides a standard
|
||||
networking protocol that allows multiple transactional systems to be
|
||||
controlled by a single transaction manager~\cite{something}.
|
||||
Enterprise Java Beans is a standard for developing transactional
|
||||
middleware that may make use of heterogenous storage. Its
|
||||
transactions may not be nested~\cite{something}. This simplifies its
|
||||
semantics somewhat, and leads to many, short transactions, which
|
||||
improves concurrency. However, it is somewhat rigid, and may lead to
|
||||
situations where committed transactions have to be manually rolled
|
||||
back by other transactions after the fact~\cite{ejbCritique}. Open
|
||||
Multithreaded Transactions provide a model for nested transactions
|
||||
that incorporates exception handling, and allows parents to execute
|
||||
concurrently with their children.
|
||||
|
||||
%Argus transactions use shadow copies to provide atomic updates.
|
||||
%Instead of making use of logical undo, concurrent guardians make use
|
||||
%of two types of persistant state. One type behaves transactionally,
|
||||
%and will be rolled back at abort, while the other type can be
|
||||
%atomically written to disk, but is not automatically modified at
|
||||
%commit or abort. The transactional portions of the state can be
|
||||
%provided by built-in atomic types, or by another guardian.
|
||||
|
||||
%A transactional Argus hashtable could consist of a simple,
|
||||
%non-transactional, hashtable that is written back to disk atomically
|
||||
%each time it is updated and a set of transactional flags that are
|
||||
%automatically updated each time a transaction accesses the table,
|
||||
%commits or aborts. During a lookup, the hashtable would consult these
|
||||
%flags to determine the status of the key in question. To minimize the
|
||||
%amount of data written to disk, one could use a log to emulate
|
||||
%explicit per-key flags, and partition the hashtable and logfile into
|
||||
%multiple atomically updated regions~\cite{argusImplementation}.
|
||||
|
||||
%While this approach does allow the layout and implementation of the
|
||||
%data structure to be completely independent from the mechanisms used
|
||||
%for transactional updates, it forces the operation implementor to
|
||||
%provide a module that explicitly tracks the relationship between
|
||||
%object states and transactions. Some of this information is required
|
||||
%for locking, making it easier to provide a logical lock mananger.
|
||||
%However, taking that approach couples the data structure
|
||||
%implementation to the application's concurrency model.
|
||||
|
||||
%The Argus also work provides high-level models for atomicity,
|
||||
%reconfiguration, and other issues faced by developers of transactional
|
||||
%systems. These models do not depend on the low-level Argus
|
||||
%implementation, and may be useful to applications built on top of
|
||||
%\yad.~\rcs{citations here?}
|
||||
|
||||
%Camelot is a distributed transaction processing system. It provides
|
||||
%two physical logging modes; redo only (no-Steal, no-Force), and
|
||||
%redo-undo (Steal, no-Force), but does not contain provisions for
|
||||
%logical logging or compensations. It supports nested transactions,
|
||||
%which makes it possible to implement concurrent data structures in a
|
||||
%style similar to concurrent guardians in Argus.
|
||||
|
||||
%Therefore, commit duration locks are required to protect data
|
||||
%structures from concurrent transactions, \rcs{This sentence is
|
||||
%problematic for two reasons: (1) Camelot allowed hybrid atomicity and
|
||||
%other schemes in addition to 2PL. (2) According to \cite{camelot}, pg
|
||||
%433 ``Logical locks, implemented within servers, and support for
|
||||
%hybrid atomicity provide the possibilty of high concurrency.'' I
|
||||
%think this is a mistake in their paper; logical locking isn't very
|
||||
%helpful when ``This [Camelot's Nested Transaction] model states that
|
||||
%if one transaction modifies a region, the region cannot be modified by
|
||||
%another transacion unless that transaction is an active descendant of
|
||||
%original transaction or the original transaction compeletes... If
|
||||
%comodification does occur, no guarantees concerning data integrity are
|
||||
%given'' (Camelot + Avalon book, pg 117)'' I think the same mistake is
|
||||
%repeated in the RVM paper, when they discuss multi-threaded code.
|
||||
%Also, see the discussion on Argus; you could do concurrency that way
|
||||
%on Camelot...} limiting the applicability of Camelot to
|
||||
%high-concurrency applications or its scalability to multi-processor
|
||||
%systems.
|
||||
|
||||
%Camelot makes use of a nested transaction model that allows
|
||||
%concurrency within a single transaction. In Camelot, nested
|
||||
%transactions can run in parallel and make use of locks acquired by the
|
||||
%transaction that spawned them. Parent transactions are suspended
|
||||
%until children transactions complete, and children are protected from
|
||||
%each other using locks, or other similar methods. We beleive that
|
||||
%\yads support for logical undo would allow it to support such
|
||||
%transactions with more concurrency than Camelot allowed. Camelot is
|
||||
%an early example of a C library that provides transactional semantics
|
||||
%over custom data types. Also, it introduced a number of features,
|
||||
%such as distributed logging and commit semantics, and transactional
|
||||
%RPC that we plan to integrate into \yad as we add support for
|
||||
%multi-node transactions. Avalon, which was built on top of Camelot is
|
||||
%a persistent version of C++ that introduced the idea of persistent
|
||||
%programming language types.
|
||||
|
||||
%Both Argus and Camelot make use of {\em closed} nested transactions.
|
||||
%In this context, ``closed'' means that subtransactions must abort if
|
||||
%their parents abort. In contrast, \yads nested transactions provide a
|
||||
%limited form of {\em open} nested transactions, in that they are able
|
||||
%to commit even if their parents abort. Currently, \yad limits each
|
||||
%transaction (or nested top action) to have a single child (although
|
||||
%these may be nested to arbitrary depths). This limitation is sometimes
|
||||
%called {\em linear nesting}. Schemes to naturally integrate linear
|
||||
%and open nesting of transactions with modern languages such as Java
|
||||
%have recently been been proposed~\cite{nestedTransactionPoster}.
|
||||
|
||||
%\rcs{More information on nested transcations is available in this book
|
||||
%(which I haven't looked at yet)\cite{nestedTransactionBook}.}
|
||||
|
||||
\subsection{Berkeley DB}
|
||||
|
||||
|
@ -1650,8 +1804,8 @@ incorporate into \yad.
|
|||
%goals similar to our own is in Section~\ref{sec:otherDBs}.
|
||||
|
||||
Different large object storage systems provide different API's.
|
||||
Some allow arbitrary insertion and deletion of bytes~\cite{esm} or
|
||||
pages~\cite{sqlserver} within the object, while typical file systems
|
||||
Some allow arbitrary insertion and deletion of bytes~\cite{esm}
|
||||
within the object, while typical file systems
|
||||
provide append-only storage allocation~\cite{ffs}.
|
||||
Record-oriented file systems are an older, but still-used~\cite{gfs}
|
||||
alternative. Each of these API's addresses
|
||||
|
@ -1664,11 +1818,22 @@ objects exist as well. Relational databases allow users to specify the order
|
|||
in which tuples will be laid out, and often leave portions of pages
|
||||
unallocated to reduce fragmentation as new records are allocated.
|
||||
|
||||
\rcs{The new allocator is written + working, so this should be reworded. We have one that is based on hoard; support for other possibilities would be nice.}
|
||||
Memory allocation routines also address this problem. For example, the Hoard memory
|
||||
allocator is a highly concurrent version of malloc that
|
||||
makes use of thread context to allocate memory in a way that favors
|
||||
cache locality~\cite{hoard}.
|
||||
Memory allocation routines address this problem, although with limited
|
||||
information. For example, the Hoard memory allocator is a highly
|
||||
concurrent version of malloc that makes use of thread context to
|
||||
allocate memory in a way that favors cache locality~\cite{hoard}.
|
||||
%Essentially, each thread allocates memory from its own pool of
|
||||
%freespace, and consecutive memory allocations are a good predictor of
|
||||
%clustered access patterns and deallocations.
|
||||
McRT-malloc is non-blocking and extends the ideas
|
||||
presented in Hoard for software transactional memory~\cite{mcrt}.
|
||||
|
||||
Allocation of records that must fit within pages and be persisted to
|
||||
disk raises concerns regarding locality and page layouts. Depending
|
||||
on the application, data may be arranged based upon
|
||||
hints~\cite{cricket}, pointer values and write order~\cite{starburst},
|
||||
data type~\cite{orion}, or regoranization based on access
|
||||
patterns~\cite{storageReorganization}.
|
||||
|
||||
%Other work makes use of the caller's stack to infer
|
||||
%information about memory management.~\cite{xxx} \rcs{Eric, do you have
|
||||
|
@ -1684,6 +1849,12 @@ minimum, this is particularly attractive on a single disk system. We
|
|||
plan to use ideas from LFS~\cite{lfs} and POSTGRES~\cite{postgres}
|
||||
to implement this.
|
||||
|
||||
\yads record allocation currently implements a policy that is similar
|
||||
to Hoard and McRT, although it has not been as heavily optmized for
|
||||
CPU utilization. The record allocator obtains pages from a region
|
||||
allocator that provides contiguous regions of space to other
|
||||
allocators.
|
||||
|
||||
Starburst~\cite{starburst} provides a flexible approach to index
|
||||
management and database trigger support, as well as hints for small
|
||||
object layout.
|
||||
|
|
Loading…
Reference in a new issue