Implement blob deallocation; add test case

This commit is contained in:
Sears Russell 2008-11-13 00:13:11 +00:00
parent e2f604175b
commit 46fbfa3c35
4 changed files with 203 additions and 290 deletions

View file

@ -18,6 +18,10 @@ void allocBlob(int xid, recordid rid) {
// printf("rid = {%d %d %d}\n", rid.page, rid.slot, rid.size);
}
void deallocBlob(int xid, recordid rid) {
TregionDealloc(xid, rid.page);
}
void readBlob(int xid, Page * p2, recordid rid, byte * buf) {
pageid_t chunk;
recordid rawRid = rid;

View file

@ -92,7 +92,7 @@ static int operate_helper(int xid, Page * p, recordid rid) {
typedef struct {
slotid_t slot;
int64_t size;
int64_t type;
} alloc_arg;
static int op_alloc(const LogEntry* e, Page* p) {
@ -102,14 +102,18 @@ static int op_alloc(const LogEntry* e, Page* p) {
recordid rid = {
p->id,
arg->slot,
arg->size
arg->type
};
int ret = operate_helper(e->xid,p,rid);
if(e->update.arg_size == sizeof(alloc_arg) + arg->size) {
// if we're aborting a dealloc, we'd better have a sane preimage to apply
int64_t size = stasis_record_length_read(e->xid,p,rid);
if(e->update.arg_size == sizeof(alloc_arg) + size) {
// if we're aborting a dealloc we better have a sane preimage to apply
rid.size = size;
stasis_record_write(e->xid,p,e->LSN,rid,(const byte*)(arg+1));
rid.size = arg->type;
} else {
// otherwise, no preimage
assert(e->update.arg_size == sizeof(alloc_arg));
@ -124,10 +128,12 @@ static int op_dealloc(const LogEntry* e, Page* p) {
recordid rid = {
p->id,
arg->slot,
arg->size
arg->type
};
// assert that we've got a sane preimage or we're aborting a talloc (no preimage)
assert(e->update.arg_size == sizeof(alloc_arg) + arg->size || e->update.arg_size == sizeof(alloc_arg));
int64_t size = stasis_record_length_read(e->xid,p,rid);
assert(e->update.arg_size == sizeof(alloc_arg) + size ||
e->update.arg_size == sizeof(alloc_arg));
stasis_record_free(e->xid, p, rid);
assert(stasis_record_type_read(e->xid, p, rid) == INVALID_SLOT);
@ -141,17 +147,20 @@ static int op_realloc(const LogEntry* e, Page* p) {
recordid rid = {
p->id,
arg->slot,
arg->size
arg->type
};
assert(stasis_record_type_read(e->xid, p, rid) == INVALID_SLOT);
int ret = operate_helper(e->xid, p, rid);
assert(e->update.arg_size == sizeof(alloc_arg)
+ stasis_record_length_read(e->xid,p,rid));
int64_t size = stasis_record_length_read(e->xid,p,rid);
assert(e->update.arg_size == sizeof(alloc_arg)
+ size);
rid.size = size;
byte * buf = stasis_record_write_begin(e->xid,p,rid);
memcpy(buf, arg+1, stasis_record_length_read(e->xid,p,rid));
memcpy(buf, arg+1, size);
stasis_record_write_done(e->xid,p,rid,buf);
rid.size = arg->type;
return ret;
}
@ -271,6 +280,7 @@ compensated_function recordid Talloc(int xid, unsigned long size) {
if(size >= BLOB_THRESHOLD_SIZE) {
type = BLOB_SLOT;
} else {
assert(size >= 0);
type = size;
}
@ -280,11 +290,14 @@ compensated_function recordid Talloc(int xid, unsigned long size) {
pthread_mutex_lock(&talloc_mutex);
Page * p;
availablePage * ap = allocationPolicyFindPage(allocPolicy, xid, stasis_record_type_to_size(type));
availablePage * ap =
allocationPolicyFindPage(allocPolicy, xid,
stasis_record_type_to_size(type));
if(!ap) {
reserveNewRegion(xid);
ap = allocationPolicyFindPage(allocPolicy, xid, stasis_record_type_to_size(type));
ap = allocationPolicyFindPage(allocPolicy, xid,
stasis_record_type_to_size(type));
}
lastFreepage = ap->pageid;
@ -300,17 +313,21 @@ compensated_function recordid Talloc(int xid, unsigned long size) {
unlock(p->rwlatch);
if(!ap->lockCount) {
allocationPolicyUpdateFreespaceUnlockedPage(allocPolicy, ap, newFreespace);
allocationPolicyUpdateFreespaceUnlockedPage(allocPolicy, ap,
newFreespace);
} else {
allocationPolicyUpdateFreespaceLockedPage(allocPolicy, xid, ap, newFreespace);
allocationPolicyUpdateFreespaceLockedPage(allocPolicy, xid, ap,
newFreespace);
}
releasePage(p);
ap = allocationPolicyFindPage(allocPolicy, xid, stasis_record_type_to_size(type));
ap = allocationPolicyFindPage(allocPolicy, xid,
stasis_record_type_to_size(type));
if(!ap) {
reserveNewRegion(xid);
ap = allocationPolicyFindPage(allocPolicy, xid, stasis_record_type_to_size(type));
ap = allocationPolicyFindPage(allocPolicy, xid,
stasis_record_type_to_size(type));
}
lastFreepage = ap->pageid;
@ -329,7 +346,7 @@ compensated_function recordid Talloc(int xid, unsigned long size) {
allocationPolicyUpdateFreespaceLockedPage(allocPolicy, xid, ap, newFreespace);
unlock(p->rwlatch);
alloc_arg a = { rid.slot, rid.size };
alloc_arg a = { rid.slot, type };
Tupdate(xid, rid.page, &a, sizeof(a), OPERATION_ALLOC);
@ -378,7 +395,7 @@ compensated_function recordid TallocFromPage(int xid, pageid_t page, unsigned lo
allocationPolicyAllocedFromPage(allocPolicy, xid, page);
unlock(p->rwlatch);
alloc_arg a = { rid.slot, rid.size };
alloc_arg a = { rid.slot, type };
Tupdate(xid, rid.page, &a, sizeof(a), OPERATION_ALLOC);
@ -399,14 +416,11 @@ compensated_function recordid TallocFromPage(int xid, pageid_t page, unsigned lo
}
compensated_function void Tdealloc(int xid, recordid rid) {
// @todo this needs to garbage collect empty storage regions.
Page * p;
pthread_mutex_lock(&talloc_mutex);
try {
p = loadPage(xid, rid.page);
} end;
Page * p = loadPage(xid, rid.page);
readlock(p->rwlatch,0);
@ -414,19 +428,46 @@ compensated_function void Tdealloc(int xid, recordid rid) {
allocationPolicyLockPage(allocPolicy, xid, newrid.page);
int64_t size = stasis_record_length_read(xid,p,rid);
int64_t type = stasis_record_type_read(xid,p,rid);
byte * preimage = malloc(sizeof(alloc_arg)+rid.size);
if(type == NORMAL_SLOT) { type = size; }
byte * preimage = malloc(sizeof(alloc_arg)+size);
((alloc_arg*)preimage)->slot = rid.slot;
((alloc_arg*)preimage)->size = size;
((alloc_arg*)preimage)->type = type;
begin_action(releasePage, p) {
stasis_record_read(xid, p, rid, preimage+sizeof(alloc_arg));
unlock(p->rwlatch);
// stasis_record_read() wants rid to have its raw size to prevent
// code that doesn't know about record types from introducing memory
// bugs.
rid.size = size;
stasis_record_read(xid, p, rid, preimage+sizeof(alloc_arg));
// restore rid to valid state.
rid.size = type;
// Ok to release latch; page is still pinned (so no WAL problems).
// allocationPolicy protects us from running out of space due to concurrent
// xacts.
// Also, there can be no reordering of allocations / deallocations ,
// since we're holding talloc_mutex. However, we might reorder a Tset()
// to and a Tdealloc() or Talloc() on the same page. If this happens,
// it's an unsafe race in the application, and not technically our problem.
// @todo Tupdate forces allocation to release a latch, leading to potentially nasty application bugs. Perhaps this is the wrong API!
// @todo application-level allocation races can lead to unrecoverable logs.
unlock(p->rwlatch);
Tupdate(xid, rid.page, preimage,
sizeof(alloc_arg)+size, OPERATION_DEALLOC);
releasePage(p);
if(type==BLOB_SLOT) {
deallocBlob(xid,rid);
}
/** @todo race in Tdealloc; do we care, or is this something that the log manager should cope with? */
Tupdate(xid, rid.page, preimage, sizeof(alloc_arg)+rid.size, OPERATION_DEALLOC);
} compensate;
pthread_mutex_unlock(&talloc_mutex);
free(preimage);
@ -473,7 +514,10 @@ static int op_initialize_page(const LogEntry* e, Page* p) {
stasis_slotted_initialize_page(p);
break;
case FIXED_PAGE:
stasis_fixed_initialize_page(p, arg->size, stasis_fixed_records_per_page(arg->size));
stasis_fixed_initialize_page(p, arg->type,
stasis_fixed_records_per_page
(
stasis_record_type_to_size(arg->type)));
break;
default:
abort();

View file

@ -68,6 +68,7 @@ compensated_function recordid preAllocBlobFromPage(int xid, long page, long blob
*/
void allocBlob(int xid, recordid rid);
void deallocBlob(int xid, recordid rid);
page_impl blobImpl();

View file

@ -63,8 +63,11 @@ static void arraySet(int * a, int mul) {
}
}
static int arryCmp(int * a, int * b) {
return memcmp(a,b,ARRAY_SIZE*sizeof(int));
}
/**
/**
@test
Simple test: Insert some stuff. Commit. Call Tdeinit(). Call
Tinit() (Which initiates recovery), and see if the stuff we
@ -122,67 +125,20 @@ START_TEST (recoverBlob__idempotent) {
}
END_TEST
/**
/*
@test
Simple test: Alloc a record, commit. Call Tincrement on it, and
Simple test: Alloc a blob, commit. Call Tincrement on it, and
remember its value and commit. Then, call Tdeinit() and Tinit()
(Which initiates recovery), and see if the value changes.
@todo: Until we have a non-idempotent operation on blobs, this test can't be written.
*/
/* START_TEST (recoverBlob__exactlyOnce) {
int xid;
int j;
int k;
recordid rid;
/ * if(1) {
return;
} * /
fail_unless(0, "Need to write this test...");
Tinit();
xid = Tbegin();
rid = Talloc(xid, sizeof(int));
Tincrement(xid, rid);
Tread(xid, rid, &j);
Tcommit(xid);
xid = Tbegin();
Tread(xid, rid, &k);
fail_unless(j == k, "Get/Set broken?");
Tcommit(xid);
Tdeinit();
Tinit(); / * Runs recovery.. * /
k = 12312;
xid = Tbegin();
Tread(xid, rid, &k);
fail_unless(j == k, "Recovery messed something up!");
Tcommit(xid);
Tdeinit();
}
END_TEST
*/
/**
/**
@test
Makes sure that aborted idempotent operations are correctly undone.
*/
@ -260,136 +216,31 @@ START_TEST (recoverBlob__idempotentAbort) {
END_TEST
/**
/**
@test Makes sure that aborted non-idempotent operations are
correctly undone. Curently, we don't support such operations on
blobs, so this test is not implemented.
@todo logical operations on blobs.
@todo need non-idempotent blob operation to implement this test.
*/
/* START_TEST (recoverBlob__exactlyOnceAbort) {
int xid;
int j;
int k;
recordid rid;
/ * if(1)
return ;
* /
fail_unless(0, "Need to write this test...");
Tinit();
xid = Tbegin();
rid = Talloc(xid, sizeof(int));
j = 1;
Tincrement(xid, rid);
Tread(xid, rid, &j);
Tcommit(xid);
xid = Tbegin();
Tincrement(xid, rid);
Tread(xid, rid, &k);
fail_unless(j == k-1, NULL);
Tabort(xid);
xid = Tbegin();
Tread(xid, rid, &k);
fail_unless(j == k, "didn't abort?");
Tcommit(xid);
Tdeinit();
Tinit();
xid = Tbegin();
Tread(xid, rid, &k);
fail_unless(j == k, "Recovery didn't abort correctly");
Tcommit(xid);
Tdeinit();
}
END_TEST
*/
/**
@test
Check the CLR mechanism with an aborted logical operation, and multipl Tinit()/Tdeinit() cycles.
@test Check the CLR mechanism with an aborted logical operation, and multiple Tinit()/Tdeinit() cycles.
@todo Devise a way of implementing this for blobs.
@todo need blob operation w/ logical undo to implement this.
*/
/*START_TEST(recoverBlob__clr) {
recordid rid;
int xid;
int j;
int k;
/ * if(1) return; * /
fail_unless(0, "Need to write this test...");
DEBUG("\n\nStart CLR test\n\n");
Tinit();
xid = Tbegin();
rid = Talloc(xid, sizeof(int));
Tread(xid, rid, &j);
Tincrement(xid, rid);
Tabort(xid);
xid = Tbegin();
Tread(xid, rid, &k);
Tcommit(xid);
fail_unless(j == k, NULL);
Tdeinit();
Tinit();
Tdeinit();
Tinit();
xid = Tbegin();
Tread(xid, rid, &k);
Tcommit(xid);
fail_unless(j == k, NULL);
Tdeinit();
Tinit();
xid = Tbegin();
Tread(xid, rid, &k);
Tcommit(xid);
fail_unless(j == k, NULL);
Tdeinit();
} END_TEST
*/
extern int numActiveXactions;
/**
/**
@test
Tests the undo phase of recovery by simulating a crash, and calling Tinit().
Tests the undo phase of recovery by simulating a crash, and calling Tinit().
@todo Really should check logical operations, if they are ever supported for blobs.
@todo logical operations, if they are ever supported for blobs.
*/
START_TEST(recoverBlob__crash) {
@ -441,7 +292,8 @@ START_TEST(recoverBlob__crash) {
arraySet(k, 9);
fail_unless(!memcmp(j,k,ARRAY_SIZE * sizeof(int)), "Recovery didn't roll back in-progress xact!");
fail_unless(!memcmp(j,k,ARRAY_SIZE * sizeof(int)),
"Recovery didn't roll back in-progress xact!");
Tdeinit();
@ -452,94 +304,105 @@ START_TEST(recoverBlob__crash) {
assert(!memcmp(j,k,ARRAY_SIZE * sizeof(int)));
fail_unless(!memcmp(j,k,ARRAY_SIZE * sizeof(int)), "Recovery failed on second re-open.");
fail_unless(!memcmp(j,k,ARRAY_SIZE * sizeof(int)),
"Recovery failed on second re-open.");
Tdeinit();
} END_TEST
/**
@test Tests blob allocation and deallocation, and recovery
*/
START_TEST(recoverBlob__allocation) {
Tinit();
int xid = Tbegin();
int arry1[ARRAY_SIZE];
int arry2[ARRAY_SIZE];
int arry3[ARRAY_SIZE];
int arry4[ARRAY_SIZE];
int scratch[ARRAY_SIZE];
arraySet(arry1, 1);
arraySet(arry2, 2);
arraySet(arry3, 3);
arraySet(arry4, 4);
recordid rid1, rid2, rid3, rid4;
// Abort w/ allocation (no set)
rid1 = Talloc(xid, ARRAY_SIZE * sizeof(int));
assert(TrecordType(xid,rid1)==BLOB_SLOT);
Tabort(xid);
xid = Tbegin();
assert(TrecordType(xid,rid1)==INVALID_SLOT);
// Abort w/ allocation (set)
rid2 = Talloc(xid, ARRAY_SIZE * sizeof(int));
assert((!memcmp(&rid1,&rid2,sizeof(rid1)))||
TrecordType(xid,rid1)==INVALID_SLOT);
assert(TrecordType(xid,rid2)==BLOB_SLOT);
Tset(xid,rid1,arry1);
Tabort(xid);
xid = Tbegin();
assert(TrecordType(xid,rid1)==INVALID_SLOT);
assert(TrecordType(xid,rid2)==INVALID_SLOT);
// Abort w/ committed alloc (no set)
rid2 = Talloc(xid, ARRAY_SIZE * sizeof(int));
Tset(xid, rid2, arry2);
Tcommit(xid);
// Abort alloc of rid A + dealloc, alloc + overwrite rid B
xid = Tbegin();
rid3 = Talloc(xid, ARRAY_SIZE * sizeof(int));
Tread(xid, rid2, scratch); assert(!arryCmp(arry2,scratch));
Tset(xid, rid3, arry3);
Tread(xid, rid2, scratch); assert(!arryCmp(arry2,scratch));
Tread(xid, rid3, scratch); assert(!arryCmp(arry3,scratch));
Tdealloc(xid,rid2);
rid4 = Talloc(xid, ARRAY_SIZE * sizeof(int));
Tset(xid, rid4, arry4);
Tabort(xid);
xid = Tbegin();
Tread(xid, rid2, scratch); assert(!arryCmp(arry2,scratch));
assert((!memcmp(&rid2,&rid4,sizeof(rid2))) ||
TrecordType(xid,rid4) == INVALID_SLOT);
Tcommit(xid);
Tdeinit();
// make sure downing + upping stasis doesn't change state.
Tinit();
xid = Tbegin();
Tread(xid, rid2, scratch); assert(!arryCmp(arry2,scratch));
assert((!memcmp(&rid2,&rid4,sizeof(rid2))) ||
TrecordType(xid,rid4) == INVALID_SLOT);
Tabort(xid);
Tdeinit();
Tinit();
xid = Tbegin();
Tread(xid, rid2, scratch); assert(!arryCmp(arry2,scratch));
assert((!memcmp(&rid2,&rid4,sizeof(rid2))) ||
TrecordType(xid,rid4) == INVALID_SLOT);
Tcommit(xid);
Tdeinit();
} END_TEST
/**
@test Tests recovery when more than one transaction is in progress
at the time of the crash. This test is interesting because blob
operations from multiple transactions could hit the same page.
@todo implement this sometime...
@todo implement this one transactions may write subset of blob pages
*/
START_TEST (recoverBlob__multiple_xacts) {
int xid1, xid2, xid3, xid4;
recordid rid1, rid2, rid3, rid4;
int j1, j2, j3, j4, k;
Tinit();
j1 = 1;
j2 = 2;
j3 = 4;
j4 = 3;
xid1 = Tbegin();
rid1 = Talloc(xid1, sizeof(int));
xid2 = Tbegin();
xid3 = Tbegin();
Tset(xid1, rid1, &j1);
rid2 = Talloc(xid2, sizeof(int));
rid3 = Talloc(xid3, sizeof(int));
Tread(xid3, rid3, &k);
Tset(xid3, rid3, &j3);
Tcommit(xid3);
xid3 = Tbegin();
Tincrement(xid3, rid3);
Tset(xid2, rid2, &j2);
Tcommit(xid1);
xid4 = Tbegin();
Tcommit(xid2);
rid4 = Talloc(xid4, sizeof(int));
Tset(xid4, rid4, &j4);
Tincrement(xid4, rid4);
Tcommit(xid4);
xid1 = Tbegin();
k = 100000;
Tset(xid1, rid1,&k);
xid2 = Tbegin();
Tdecrement(xid2, rid2);
Tdecrement(xid2, rid2);
Tdecrement(xid2, rid2);
Tdecrement(xid2, rid2);
Tdecrement(xid2, rid2);
Tincrement(xid1, rid1);
Tset(xid1, rid1,&k);
TuncleanShutdown();
Tinit();
Tdeinit();
Tinit();
xid1 = Tbegin();
xid2 = Tbegin();
xid3 = Tbegin();
xid4 = Tbegin();
Tread(xid1, rid1, &j1);
Tread(xid2, rid2, &j2);
Tread(xid3, rid3, &j3);
Tread(xid4, rid4, &j4);
fail_unless(j1 == 1, NULL);
fail_unless(j2 == 2, NULL);
fail_unless(j3 == 4, NULL);
fail_unless(j4 == 4, NULL);
Tdeinit();
} END_TEST
/*START_TEST (recoverBlob__multiple_xacts) {
} END_TEST*/
/**
Add suite declarations here
@ -549,25 +412,26 @@ Suite * check_suite(void) {
/* Begin a new test */
TCase *tc = tcase_create("recovery");
tcase_set_timeout(tc, 0); // disable timeouts
if(LOG_TO_MEMORY != loggerType) {
/* void * foobar; */ /* used to supress warnings. */
/* Sub tests are added, one per line, here */
tcase_add_test(tc, recoverBlob__idempotent);
/* tcase_add_test(tc, recoverBlob__exactlyOnce);
foobar = (void*)&recoverBlob__exactlyOnce; */
tcase_set_timeout(tc, 0); // disable timeouts
if(LOG_TO_MEMORY != loggerType) {
/* void * foobar; */ /* used to supress warnings. */
/* Sub tests are added, one per line, here */
tcase_add_test(tc, recoverBlob__idempotent);
tcase_add_test(tc, recoverBlob__idempotentAbort);
tcase_add_test(tc, recoverBlob__idempotentAbort);
/* tcase_add_test(tc, recoverBlob__exactlyOnceAbort);
foobar = (void*)&recoverBlob__exactlyOnceAbort;
tcase_add_test(tc, recoverBlob__allocation);
tcase_add_test(tc, recoverBlob__clr);
foobar = (void*)&recoverBlob__clr; */
tcase_add_test(tc, recoverBlob__crash);
tcase_add_test(tc, recoverBlob__crash);
tcase_add_test(tc, recoverBlob__multiple_xacts);
/*foobar = (void*)&recoverBlob__multiple_xacts; */
}
// The following tests are analagous to those in check_recovery,
// but would test functionality that hasn't been implemented for blobs.
//tcase_add_test(tc, recoverBlob__exactlyOnce);
//tcase_add_test(tc, recoverBlob__exactlyOnceAbort);
//tcase_add_test(tc, recoverBlob__clr);
//tcase_add_test(tc, recoverBlob__multiple_xacts);
}
/* --------------------------------------------- */
tcase_add_checked_fixture(tc, setup, teardown);
suite_add_tcase(s, tc);