2004-07-06 01:22:18 +00:00
|
|
|
#include <config.h>
|
|
|
|
#include <lladd/common.h>
|
|
|
|
|
2005-01-20 21:19:47 +00:00
|
|
|
#include <lladd/operations.h>
|
2004-07-06 01:22:18 +00:00
|
|
|
#include <lladd/transactional.h>
|
2004-06-24 21:10:31 +00:00
|
|
|
#include <lladd/bufferManager.h>
|
2004-06-30 01:09:57 +00:00
|
|
|
#include "../blobManager.h"
|
2004-07-23 20:21:44 +00:00
|
|
|
#include "../page.h"
|
2004-08-17 01:46:17 +00:00
|
|
|
#include "../page/slotted.h"
|
|
|
|
|
|
|
|
#include <assert.h>
|
2005-02-24 21:12:36 +00:00
|
|
|
//try{
|
2004-06-24 21:10:31 +00:00
|
|
|
/**
|
2004-07-20 03:40:57 +00:00
|
|
|
@file
|
|
|
|
|
2004-06-24 21:10:31 +00:00
|
|
|
Implementation of Talloc() as an operation
|
|
|
|
|
|
|
|
This is a bit strange compared to other operations, as it happens
|
|
|
|
in two phases. The buffer manager reserves space for a record
|
|
|
|
before the log entry is allocated. Then, the recordid of this
|
|
|
|
space is written to the log. Finally, alloc tells bufferManager
|
|
|
|
that it will use the space.
|
|
|
|
|
|
|
|
@todo Currently, if the system crashes during an alloc, (before the
|
|
|
|
log is flushed, but after bufferManager returns a rid), then the
|
|
|
|
space alloced during the crash is leaked. This doesn't seem to be
|
2004-06-28 22:48:02 +00:00
|
|
|
too big of a deal, but it should be fixed someday. A more serious
|
|
|
|
problem results from crashes during blob allocation.
|
2004-07-20 03:40:57 +00:00
|
|
|
|
2005-02-10 21:55:35 +00:00
|
|
|
@todo The entire allocaction system needs to be redone.
|
|
|
|
|
|
|
|
Here are some requirements for the next version of alloc:
|
|
|
|
|
|
|
|
Space Reuse: There are many ways to implement this. One method
|
|
|
|
(that I'm not particularly attached to) is to maintain seperate
|
|
|
|
linked lists for each type of page, seperated by an estimate of the
|
|
|
|
amount of space free (actually 'un-reserved'; see below) on the
|
|
|
|
page. Allocation would move pages between linked lists, and search
|
|
|
|
in the appropriate linked list before expanding the page file.
|
|
|
|
|
|
|
|
Treserve: Hashtables, linked lists, and other graph-like structures
|
|
|
|
can be optimized by exploiting physical locality. A call such as
|
|
|
|
this allows page-level locality to be established / maintained:
|
|
|
|
|
|
|
|
int page = Treserve(int xid, int size)
|
|
|
|
|
|
|
|
This would tell Talloc to treat the page as though 'size' bytes had
|
2005-02-24 21:12:36 +00:00
|
|
|
already been reserved. The 'free space' that Talloc () reasons
|
2005-02-10 21:55:35 +00:00
|
|
|
about would be: max(reservedSpace, usedSpace). A seperate call,
|
2005-02-24 21:12:36 +00:00
|
|
|
TallocFromPage (xid, page, size) already exists, and should ignore
|
2005-02-10 21:55:35 +00:00
|
|
|
the presence of the 'reserved space' field.
|
|
|
|
|
|
|
|
Track level locality is another problem that Talloc should address,
|
|
|
|
especially for the blob implementation.
|
|
|
|
|
|
|
|
Better support for locking. Consider this sequence of events:
|
|
|
|
|
2005-02-24 21:12:36 +00:00
|
|
|
recordid rid1 = Talloc (xid1, 1);
|
|
|
|
recordid rid2 = Talloc (xid2, 1); // May deadlock if page level
|
2005-02-10 21:55:35 +00:00
|
|
|
// locking is used.
|
|
|
|
|
|
|
|
The lock manager needs a 'try lock' operation that allows
|
|
|
|
transactions to attempt to read multiple pages. When the current
|
|
|
|
lock manager returns "LLADD_DEADLOCK", it pretends the lock request
|
|
|
|
never happened (ie; it's externally visible state is left unchanged
|
|
|
|
by the call), effectively providing 'try lock' by default. Talloc
|
|
|
|
should make use of this by trying to alloc from a different page
|
|
|
|
whenever deadlock is encountered. Better, the system should
|
|
|
|
provide a list of 'cold' pages that are in memory, but haven't been
|
|
|
|
accessed recently. This requires integration with the page reuse
|
|
|
|
policy.
|
|
|
|
|
2004-07-20 03:40:57 +00:00
|
|
|
@ingroup OPERATIONS
|
|
|
|
|
|
|
|
$Id$
|
2004-06-24 21:10:31 +00:00
|
|
|
|
|
|
|
*/
|
2005-02-24 21:12:36 +00:00
|
|
|
//}end
|
2004-07-23 20:21:44 +00:00
|
|
|
static int operate(int xid, Page * p, lsn_t lsn, recordid rid, const void * dat) {
|
2006-06-21 00:45:18 +00:00
|
|
|
slottedPostRalloc(xid, p, lsn, rid);
|
2004-06-30 01:09:57 +00:00
|
|
|
|
2004-06-24 21:10:31 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-10-06 06:08:09 +00:00
|
|
|
/** @todo Currently, we leak empty pages on dealloc. */
|
2004-07-23 20:21:44 +00:00
|
|
|
static int deoperate(int xid, Page * p, lsn_t lsn, recordid rid, const void * dat) {
|
2004-10-02 07:29:34 +00:00
|
|
|
assert(rid.page == p->id);
|
2005-02-10 03:51:09 +00:00
|
|
|
slottedDeRalloc(xid, p, lsn, rid);
|
2004-10-02 07:29:34 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reoperate(int xid, Page *p, lsn_t lsn, recordid rid, const void * dat) {
|
|
|
|
|
2006-06-17 00:25:09 +00:00
|
|
|
// if(rid.size >= BLOB_THRESHOLD_SIZE) { // && rid.size != BLOB_SLOT) {
|
2005-01-31 01:29:52 +00:00
|
|
|
// rid.size = BLOB_REC_SIZE; /* Don't reuse blob space yet... */
|
2006-06-17 00:25:09 +00:00
|
|
|
// rid.size = BLOB_SLOT; //sizeof(blob_record_t);
|
|
|
|
// }
|
2004-10-02 07:29:34 +00:00
|
|
|
|
2005-02-10 03:51:09 +00:00
|
|
|
slottedPostRalloc(xid, p, lsn, rid);
|
2004-10-06 06:08:09 +00:00
|
|
|
/** @todo dat should be the pointer to the space in the blob store. */
|
2004-10-02 07:29:34 +00:00
|
|
|
writeRecord(xid, p, lsn, rid, dat);
|
|
|
|
|
2004-06-24 21:10:31 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2004-10-06 06:08:09 +00:00
|
|
|
static pthread_mutex_t talloc_mutex;
|
|
|
|
|
2004-06-24 21:10:31 +00:00
|
|
|
Operation getAlloc() {
|
2004-10-06 06:08:09 +00:00
|
|
|
pthread_mutex_init(&talloc_mutex, NULL);
|
2004-06-24 21:10:31 +00:00
|
|
|
Operation o = {
|
|
|
|
OPERATION_ALLOC, /* ID */
|
|
|
|
0,
|
2004-10-18 18:24:54 +00:00
|
|
|
OPERATION_DEALLOC, /* OPERATION_NOOP, */
|
2004-06-24 21:10:31 +00:00
|
|
|
&operate
|
|
|
|
};
|
|
|
|
return o;
|
|
|
|
}
|
|
|
|
|
2004-06-28 21:10:10 +00:00
|
|
|
|
2004-10-02 07:29:34 +00:00
|
|
|
Operation getDealloc() {
|
|
|
|
Operation o = {
|
|
|
|
OPERATION_DEALLOC,
|
|
|
|
SIZEOF_RECORD,
|
2004-10-18 18:24:54 +00:00
|
|
|
OPERATION_REALLOC, /* OPERATION_NOOP, */
|
2004-10-02 07:29:34 +00:00
|
|
|
&deoperate
|
|
|
|
};
|
|
|
|
return o;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*This is only used to undo deallocs... */
|
|
|
|
Operation getRealloc() {
|
|
|
|
Operation o = {
|
|
|
|
OPERATION_REALLOC,
|
|
|
|
0,
|
|
|
|
OPERATION_NOOP,
|
|
|
|
&reoperate
|
|
|
|
};
|
|
|
|
return o;
|
|
|
|
}
|
|
|
|
|
2006-06-15 05:31:20 +00:00
|
|
|
static uint64_t lastFreepage;
|
|
|
|
|
|
|
|
void TallocInit() {
|
|
|
|
lastFreepage = UINT64_MAX;
|
|
|
|
}
|
|
|
|
|
2006-06-17 00:25:09 +00:00
|
|
|
static compensated_function recordid TallocFromPageInternal(int xid, Page * p, unsigned long size);
|
|
|
|
|
|
|
|
compensated_function recordid Talloc(int xid, unsigned long size) {
|
|
|
|
short type;
|
|
|
|
if(size >= BLOB_THRESHOLD_SIZE) {
|
|
|
|
type = BLOB_SLOT;
|
|
|
|
} else {
|
|
|
|
type = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
recordid rid;
|
|
|
|
|
|
|
|
begin_action_ret(pthread_mutex_unlock, &talloc_mutex, NULLRID) {
|
|
|
|
pthread_mutex_lock(&talloc_mutex);
|
|
|
|
Page * p;
|
|
|
|
if(lastFreepage == UINT64_MAX) {
|
|
|
|
try_ret(NULLRID) {
|
|
|
|
lastFreepage = TpageAlloc(xid);
|
|
|
|
} end_ret(NULLRID);
|
|
|
|
try_ret(NULLRID) {
|
|
|
|
p = loadPage(xid, lastFreepage);
|
|
|
|
} end_ret(NULLRID);
|
|
|
|
assert(*page_type_ptr(p) == UNINITIALIZED_PAGE);
|
|
|
|
slottedPageInitialize(p);
|
|
|
|
} else {
|
|
|
|
try_ret(NULLRID) {
|
|
|
|
p = loadPage(xid, lastFreepage);
|
|
|
|
} end_ret(NULLRID);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if(slottedFreespace(p) < physical_slot_length(type) ) {
|
|
|
|
// XXX compact page?!?
|
|
|
|
releasePage(p);
|
|
|
|
try_ret(NULLRID) {
|
|
|
|
lastFreepage = TpageAlloc(xid);
|
|
|
|
} end_ret(NULLRID);
|
|
|
|
try_ret(NULLRID) {
|
|
|
|
p = loadPage(xid, lastFreepage);
|
|
|
|
} end_ret(NULLRID);
|
|
|
|
slottedPageInitialize(p);
|
|
|
|
}
|
|
|
|
rid = TallocFromPageInternal(xid, p, size);
|
|
|
|
} compensate_ret(NULLRID);
|
|
|
|
return rid;
|
|
|
|
}
|
|
|
|
|
2006-06-13 23:58:04 +00:00
|
|
|
compensated_function recordid TallocFromPage(int xid, long page, unsigned long size) {
|
2006-06-17 00:25:09 +00:00
|
|
|
Page * p = loadPage(xid, page);
|
|
|
|
recordid ret = TallocFromPageInternal(xid, p, size);
|
|
|
|
releasePage(p);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static compensated_function recordid TallocFromPageInternal(int xid, Page * p, unsigned long size) {
|
2005-01-28 03:32:17 +00:00
|
|
|
recordid rid;
|
|
|
|
|
2006-06-16 00:27:02 +00:00
|
|
|
// Does TallocFromPage need to understand blobs? This function
|
|
|
|
// seems to be too complex; all it does it delegate the allocation
|
|
|
|
// request to the page type's implementation. (Does it really need
|
|
|
|
// to check for freespace?)
|
2006-06-13 20:00:46 +00:00
|
|
|
|
2006-06-17 00:25:09 +00:00
|
|
|
short type;
|
|
|
|
if(size >= BLOB_THRESHOLD_SIZE) {
|
|
|
|
type = BLOB_SLOT;
|
|
|
|
} else {
|
|
|
|
type = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long slotSize = INVALID_SLOT;
|
|
|
|
|
|
|
|
slotSize = physical_slot_length(type);
|
|
|
|
|
|
|
|
assert(slotSize < PAGE_SIZE && slotSize > 0);
|
|
|
|
|
|
|
|
if(slottedFreespace(p) < slotSize) {
|
|
|
|
slottedCompact(p);
|
|
|
|
}
|
|
|
|
if(slottedFreespace(p) < slotSize) {
|
|
|
|
rid = NULLRID;
|
|
|
|
} else {
|
|
|
|
rid = slottedRawRalloc(p, type);
|
|
|
|
assert(rid.size == type);
|
|
|
|
rid.size = size;
|
|
|
|
Tupdate(xid, rid, NULL, OPERATION_ALLOC);
|
2006-06-21 00:45:18 +00:00
|
|
|
|
|
|
|
if(type == BLOB_SLOT) {
|
|
|
|
allocBlob(xid, rid);
|
|
|
|
}
|
|
|
|
|
2006-06-17 00:25:09 +00:00
|
|
|
rid.size = type;
|
2006-06-21 00:45:18 +00:00
|
|
|
|
2006-06-17 00:25:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if(rid.size == type && // otherwise TallocFromPage failed
|
|
|
|
type == BLOB_SLOT // only special case blobs (for now)
|
|
|
|
) {
|
|
|
|
rid.size = size;
|
2005-01-28 03:32:17 +00:00
|
|
|
}
|
|
|
|
return rid;
|
|
|
|
}
|
|
|
|
|
2005-02-24 21:12:36 +00:00
|
|
|
compensated_function void Tdealloc(int xid, recordid rid) {
|
2006-06-16 00:27:02 +00:00
|
|
|
|
|
|
|
// @todo this needs to garbage collect emptry pages / storage regions.
|
2004-10-02 07:29:34 +00:00
|
|
|
void * preimage = malloc(rid.size);
|
2005-02-24 21:12:36 +00:00
|
|
|
Page * p;
|
|
|
|
try {
|
|
|
|
p = loadPage(xid, rid.page);
|
|
|
|
} end;
|
|
|
|
begin_action(releasePage, p) {
|
|
|
|
readRecord(xid, p, rid, preimage);
|
|
|
|
/** @todo race in Tdealloc; do we care, or is this something that the log manager should cope with? */
|
|
|
|
Tupdate(xid, rid, preimage, OPERATION_DEALLOC);
|
|
|
|
} compensate;
|
2004-10-02 07:29:34 +00:00
|
|
|
free(preimage);
|
2004-06-24 21:10:31 +00:00
|
|
|
}
|
2004-12-01 01:26:25 +00:00
|
|
|
|
2005-02-24 21:12:36 +00:00
|
|
|
compensated_function int TrecordType(int xid, recordid rid) {
|
|
|
|
Page * p;
|
|
|
|
try_ret(compensation_error()) {
|
|
|
|
p = loadPage(xid, rid.page);
|
|
|
|
} end_ret(compensation_error());
|
|
|
|
int ret;
|
|
|
|
ret = getRecordType(xid, p, rid);
|
2004-12-01 01:26:25 +00:00
|
|
|
releasePage(p);
|
|
|
|
return ret;
|
|
|
|
}
|
2005-01-28 03:32:17 +00:00
|
|
|
|
2005-02-24 21:12:36 +00:00
|
|
|
compensated_function int TrecordSize(int xid, recordid rid) {
|
2005-01-28 03:32:17 +00:00
|
|
|
int ret;
|
2005-02-24 21:12:36 +00:00
|
|
|
Page * p;
|
|
|
|
try_ret(compensation_error()) {
|
|
|
|
p = loadPage(xid, rid.page);
|
|
|
|
} end_ret(compensation_error());
|
2005-01-28 03:32:17 +00:00
|
|
|
ret = getRecordSize(xid, p, rid);
|
|
|
|
releasePage(p);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-02-24 21:12:36 +00:00
|
|
|
compensated_function int TrecordsInPage(int xid, int pageid) {
|
|
|
|
Page * p;
|
|
|
|
try_ret(compensation_error()) {
|
|
|
|
p = loadPage(xid, pageid);
|
|
|
|
} end_ret(compensation_error());
|
2005-01-28 03:32:17 +00:00
|
|
|
readlock(p->rwlatch, 187);
|
|
|
|
int ret = *numslots_ptr(p);
|
|
|
|
unlock(p->rwlatch);
|
|
|
|
releasePage(p);
|
|
|
|
return ret;
|
|
|
|
}
|