use RegionAllocator for InternalNodes and DataPages

git-svn-id: svn+ssh://svn.corp.yahoo.com/yahoo/yrl/labs/pnuts/code/logstore@685 8dad8b1f-cf64-0410-95b6-bcf113ffbcfe
This commit is contained in:
sears 2010-03-09 23:17:03 +00:00
parent 8d7199676c
commit 5f0d5c4f97
7 changed files with 40 additions and 203 deletions

View file

@ -23,8 +23,6 @@
// LOGTREE implementation
/////////////////////////////////////////////////////////////////
const diskTreeComponent::internalNodes::RegionAllocConf_t diskTreeComponent::internalNodes::REGION_ALLOC_STATIC_INITIALIZER = { {0,0,-1}, 0, -1, -1, 1000 };
//LSM_ROOT_PAGE
const int64_t diskTreeComponent::internalNodes::DEPTH = 0; //in root this is the slot num where the DEPTH (of tree) is stored
@ -58,122 +56,15 @@ void diskTreeComponent::internalNodes::init_stasis() {
}
void diskTreeComponent::internalNodes::deinit_stasis() { Tdeinit(); }
void diskTreeComponent::internalNodes::free_region_rid(int xid, recordid tree,
diskTreeComponent_page_deallocator_t dealloc, void *allocator_state) {
dealloc(xid,allocator_state);
// XXX fishy shouldn't caller do this?
Tdealloc(xid, *(recordid*)allocator_state);
}
void diskTreeComponent::internalNodes::dealloc_region_rid(int xid, recordid rid) {
RegionAllocConf_t a;
Tread(xid,rid,&a);
DEBUG("{%lld <- dealloc region arraylist}\n", a.regionList.page);
for(int i = 0; i < a.regionCount; i++) {
a.regionList.slot = i;
pageid_t pid;
Tread(xid,a.regionList,&pid);
TregionDealloc(xid,pid);
}
a.regionList.slot = 0;
TarrayListDealloc(xid, a.regionList);
}
void diskTreeComponent::internalNodes::force_region_rid(int xid, recordid rid) {
RegionAllocConf_t a;
Tread(xid,rid,&a);
for(int i = 0; i < a.regionCount; i++)
{
a.regionList.slot = i;
pageid_t pid;
Tread(xid,a.regionList,&pid);
stasis_dirty_page_table_flush_range(
(stasis_dirty_page_table_t*)stasis_runtime_dirty_page_table(),
pid, pid+a.regionSize);
stasis_buffer_manager_t *bm =
(stasis_buffer_manager_t*)stasis_runtime_buffer_manager();
bm->forcePageRange(bm, pid, pid+a.regionSize);
}
}
pageid_t diskTreeComponent::internalNodes::alloc_region(int xid, void *conf) {
RegionAllocConf_t* a = (RegionAllocConf_t*)conf;
if(a->nextPage == a->endOfRegion) {
if(a->regionList.size == -1) {
//DEBUG("nextPage: %lld\n", a->nextPage);
a->regionList = TarrayListAlloc(xid, 1, 4, sizeof(pageid_t));
DEBUG("regionList.page: %lld\n", a->regionList.page);
DEBUG("regionList.slot: %d\n", a->regionList.slot);
DEBUG("regionList.size: %lld\n", a->regionList.size);
a->regionCount = 0;
}
DEBUG("{%lld <- alloc region arraylist}\n", a->regionList.page);
TarrayListExtend(xid,a->regionList,1);
a->regionList.slot = a->regionCount;
DEBUG("region lst slot %d\n",a->regionList.slot);
a->regionCount++;
DEBUG("region count %lld\n",a->regionCount);
a->nextPage = TregionAlloc(xid, a->regionSize,12);
DEBUG("next page %lld\n",a->nextPage);
a->endOfRegion = a->nextPage + a->regionSize;
Tset(xid,a->regionList,&a->nextPage);
DEBUG("next page %lld\n",a->nextPage);
}
DEBUG("%lld ?= %lld\n", a->nextPage,a->endOfRegion);
pageid_t ret = a->nextPage;
(a->nextPage)++;
DEBUG("tree %lld-%lld\n", (long long)ret, a->endOfRegion);
return ret;
}
pageid_t diskTreeComponent::internalNodes::alloc_region_rid(int xid, void * ridp) {
recordid rid = *(recordid*)ridp;
RegionAllocConf_t conf;
Tread(xid,rid,&conf);
pageid_t ret = alloc_region(xid,&conf);
// XXX get rid of Tset by storing next page in memory, and losing it
// on crash.
Tset(xid,rid,&conf);
return ret;
}
pageid_t * diskTreeComponent::internalNodes::list_region_rid(int xid, void *ridp, pageid_t * region_len, pageid_t * region_count) {
recordid header = *(recordid*)ridp;
RegionAllocConf_t conf;
Tread(xid,header,&conf);
recordid header_list = conf.regionList;
*region_len = conf.regionSize;
*region_count = conf.regionCount;
pageid_t * ret = (pageid_t*) malloc(sizeof(pageid_t) * *region_count);
for(pageid_t i = 0; i < *region_count; i++) {
header_list.slot = i;
Tread(xid,header_list,&ret[i]);
}
return ret;
}
recordid diskTreeComponent::internalNodes::create(int xid) {
internal_node_alloc = Talloc(xid,sizeof(RegionAllocConf_t));
Tset(xid,internal_node_alloc, &REGION_ALLOC_STATIC_INITIALIZER);
pageid_t root = alloc_region_rid(xid, &internal_node_alloc);
pageid_t root = internal_node_alloc->alloc_extent(xid, 1);
DEBUG("Root = %lld\n", root);
recordid ret = { root, 0, 0 };
Page *p = loadPage(xid, ret.page);
writelock(p->rwlatch,0);
lastLeaf = -1;
//initialize root node
stasis_page_slotted_initialize_page(p);
recordid tmp = stasis_record_alloc_begin(xid, p, root_rec_size);
@ -229,11 +120,6 @@ void diskTreeComponent::internalNodes::initializeNodePage(int xid, Page *p) {
recordid diskTreeComponent::internalNodes::appendPage(int xid,
const byte *key, size_t keySize, pageid_t val_page) {
recordid tree = root_rec;
lsm_page_allocator_t allocator = alloc_region;
diskTreeComponent::internalNodes::RegionAllocConf_t allocator_state;
//insert the record key and id of the first page of the datapage to the diskTreeComponent
Tread(xid,get_tree_state(), &allocator_state);
Page *p = loadPage(xid, tree.page);
writelock(p->rwlatch, 0);
@ -275,13 +161,12 @@ recordid diskTreeComponent::internalNodes::appendPage(int xid,
assert(tree.page == p->id);
ret = appendInternalNode(xid, p, depth, key, keySize, val_page,
lastLeaf == tree.page ? -1 : lastLeaf,
allocator, &allocator_state);
lastLeaf == tree.page ? -1 : lastLeaf);
if(ret.size == INVALID_SLOT) {
DEBUG("Need to split root; depth = %d\n", depth);
pageid_t child = allocator(xid, &allocator_state);
pageid_t child = internal_node_alloc->alloc_extent(xid, 1);
Page *lc = loadPage(xid, child);
writelock(lc->rwlatch,0);
@ -352,8 +237,7 @@ recordid diskTreeComponent::internalNodes::appendPage(int xid,
assert(tree.page == p->id);
ret = appendInternalNode(xid, p, depth, key, keySize, val_page,
lastLeaf == tree.page ? -1 : lastLeaf,
allocator, &allocator_state);
lastLeaf == tree.page ? -1 : lastLeaf);
assert(ret.size != INVALID_SLOT);
@ -386,9 +270,6 @@ recordid diskTreeComponent::internalNodes::appendPage(int xid,
unlock(p->rwlatch);
releasePage(p);
// XXX don't call tset on each page append!
Tset(xid,get_tree_state(),&allocator_state);
return ret;
}
@ -409,9 +290,7 @@ recordid diskTreeComponent::internalNodes::appendPage(int xid,
recordid diskTreeComponent::internalNodes::appendInternalNode(int xid, Page *p,
int64_t depth,
const byte *key, size_t key_len,
pageid_t val_page, pageid_t lastLeaf,
diskTreeComponent_page_allocator_t allocator,
void *allocator_state) {
pageid_t val_page, pageid_t lastLeaf) {
assert(p->pageType == SLOTTED_PAGE);
@ -439,7 +318,7 @@ recordid diskTreeComponent::internalNodes::appendInternalNode(int xid, Page *p,
Page *child_page = loadPage(xid, child_id);
writelock(child_page->rwlatch,0);
ret = appendInternalNode(xid, child_page, depth-1, key, key_len,
val_page, lastLeaf, allocator, allocator_state);
val_page, lastLeaf);
unlock(child_page->rwlatch);
releasePage(child_page);
@ -456,7 +335,7 @@ recordid diskTreeComponent::internalNodes::appendInternalNode(int xid, Page *p,
if(ret.size != INVALID_SLOT) {
stasis_record_alloc_done(xid, p, ret);
ret = buildPathToLeaf(xid, ret, p, depth, key, key_len, val_page,
lastLeaf, allocator, allocator_state);
lastLeaf);
DEBUG("split tree rooted at %lld, wrote value to {%d %d %lld}\n",
p->id, ret.page, ret.slot, ret.size);
@ -473,16 +352,14 @@ recordid diskTreeComponent::internalNodes::appendInternalNode(int xid, Page *p,
recordid diskTreeComponent::internalNodes::buildPathToLeaf(int xid, recordid root, Page *root_p,
int64_t depth, const byte *key, size_t key_len,
pageid_t val_page, pageid_t lastLeaf,
diskTreeComponent_page_allocator_t allocator,
void *allocator_state) {
pageid_t val_page, pageid_t lastLeaf) {
// root is the recordid on the root page that should point to the
// new subtree.
assert(depth);
DEBUG("buildPathToLeaf(depth=%lld) (lastleaf=%lld) called\n",depth, lastLeaf);
pageid_t child = allocator(xid,allocator_state);
pageid_t child = internal_node_alloc->alloc_extent(xid, 1);
DEBUG("new child = %lld internal? %lld\n", child, depth-1);
Page *child_p = loadPage(xid, child);
@ -498,7 +375,7 @@ recordid diskTreeComponent::internalNodes::buildPathToLeaf(int xid, recordid roo
stasis_record_alloc_done(xid, child_p, child_rec);
ret = buildPathToLeaf(xid, child_rec, child_p, depth-1, key, key_len,
val_page,lastLeaf, allocator, allocator_state);
val_page,lastLeaf);
unlock(child_p->rwlatch);
releasePage(child_p);

View file

@ -31,24 +31,18 @@ public:
pageid_t ptr;
};
typedef struct RegionAllocConf_t
{
recordid regionList;
pageid_t regionCount;
pageid_t nextPage;
pageid_t endOfRegion;
pageid_t regionSize;
} RegionAllocConf_t;
internalNodes(int xid)
: lastLeaf(-1),
internal_node_alloc(new RegionAllocator(xid, 1000)),
datapage_alloc(new RegionAllocator(xid, 10000))
{ create(xid); } // XXX shouldn't hardcode region size.
typedef pageid_t(*diskTreeComponent_page_allocator_t)(int, void *);
typedef void(*diskTreeComponent_page_deallocator_t)(int, void *);
internalNodes(int xid): datapage_alloc(new RegionAllocator(xid, 10000)) {create(xid);} // XXX shouldn't hardcode region size.
internalNodes(int xid, recordid root, recordid internal_node_state, recordid datapage_state)
: internal_node_alloc(internal_node_state),
: lastLeaf(-1),
root_rec(root),
datapage_alloc(new RegionAllocator(xid, datapage_state)) { lastLeaf = -1; }
internal_node_alloc(new RegionAllocator(xid, internal_node_state)),
datapage_alloc(new RegionAllocator(xid, datapage_state))
{ }
private:
recordid create(int xid);
public:
@ -57,26 +51,20 @@ public:
static void init_stasis();
static void deinit_stasis();
private:
static pageid_t alloc_region(int xid, void *conf);
static void writeNodeRecord(int xid, Page *p, recordid &rid,
const byte *key, size_t keylen, pageid_t ptr);
//reads the given record and returns the page id stored in it
static pageid_t lookupLeafPageFromRid(int xid, recordid rid);
static recordid appendInternalNode(int xid, Page *p,
recordid appendInternalNode(int xid, Page *p,
int64_t depth,
const byte *key, size_t key_len,
pageid_t val_page, pageid_t lastLeaf,
diskTreeComponent_page_allocator_t allocator,
void *allocator_state);
pageid_t val_page, pageid_t lastLeaf);
static recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
recordid buildPathToLeaf(int xid, recordid root, Page *root_p,
int64_t depth, const byte *key, size_t key_len,
pageid_t val_page, pageid_t lastLeaf,
diskTreeComponent_page_allocator_t allocator,
void *allocator_state);
pageid_t val_page, pageid_t lastLeaf);
/**
Initialize a page for use as an internal node of the tree.
@ -95,28 +83,17 @@ public:
size_t keySize);
public:
static pageid_t alloc_region_rid(int xid, void * ridp);
static void force_region_rid(int xid, recordid rid);
static pageid_t*list_region_rid(int xid, void * ridp,
pageid_t * region_len, pageid_t * region_count);
static void dealloc_region_rid(int xid, recordid rid);
static void free_region_rid(int xid, recordid tree,
diskTreeComponent_page_deallocator_t dealloc,
void *allocator_state);
//returns the id of the data page that could contain the given key
pageid_t findPage(int xid, const byte *key, size_t keySize);
//appends a leaf page, val_page is the id of the leaf page
//rmLeafID --> rightmost leaf id
recordid appendPage(int xid,// recordid tree,
const byte *key,size_t keySize, pageid_t val_page);
recordid appendPage(int xid, const byte *key,size_t keySize, pageid_t val_page);
inline RegionAllocator* get_datapage_alloc() { return datapage_alloc; }
recordid &get_tree_state(){return internal_node_alloc;}
recordid &get_root_rec(){return root_rec;}
inline RegionAllocator* get_internal_node_alloc() { return internal_node_alloc; }
const recordid &get_root_rec(){return root_rec;}
private:
const static int64_t DEPTH;
@ -125,16 +102,12 @@ public:
const static size_t root_rec_size;
const static int64_t PREV_LEAF;
const static int64_t NEXT_LEAF;
public:
const static RegionAllocConf_t REGION_ALLOC_STATIC_INITIALIZER;
private:
pageid_t lastLeaf;
void print_tree(int xid, pageid_t pid, int64_t depth);
recordid internal_node_alloc;
recordid root_rec;
RegionAllocator* internal_node_alloc;
RegionAllocator* datapage_alloc;
public:
@ -145,7 +118,6 @@ public:
int next();
void close();
inline size_t key (byte **key) {
*key = (byte*)(t+1);
return current.size - sizeof(indexnode_rec);

View file

@ -526,17 +526,14 @@ int op_stat_space_usage(pthread_data* data) {
}
pageid_t * datapage_c2_regions = data->ltable->get_tree_c2()->get_datapage_alloc()->list_regions(xid, &datapage_c2_region_length, &datapage_c2_region_count);
recordid tree_c1_region_header = data->ltable->get_tree_c1()->get_tree_state();
pageid_t * tree_c1_regions = diskTreeComponent::internalNodes::list_region_rid(xid, &tree_c1_region_header, &tree_c1_region_length, &tree_c1_region_count);
pageid_t * tree_c1_regions = data->ltable->get_tree_c1()->get_internal_node_alloc()->list_regions(xid, &tree_c1_region_length, &tree_c1_region_count);
pageid_t * tree_c1_mergeable_regions = NULL;
if(data->ltable->get_tree_c1_mergeable()) {
recordid tree_c1_mergeable_region_header = data->ltable->get_tree_c1_mergeable()->get_tree_state();
tree_c1_mergeable_regions = diskTreeComponent::internalNodes::list_region_rid(xid, &tree_c1_mergeable_region_header, &tree_c1_mergeable_region_length, &tree_c1_mergeable_region_count);
tree_c1_mergeable_regions = data->ltable->get_tree_c1_mergeable()->get_internal_node_alloc()->list_regions(xid, &tree_c1_mergeable_region_length, &tree_c1_mergeable_region_count);
}
recordid tree_c2_region_header = data->ltable->get_tree_c2()->get_tree_state();
pageid_t * tree_c2_regions = diskTreeComponent::internalNodes::list_region_rid(xid, &tree_c2_region_header, &tree_c2_region_length, &tree_c2_region_count);
pageid_t * tree_c2_regions = data->ltable->get_tree_c2()->get_internal_node_alloc()->list_regions(xid, &tree_c2_region_length, &tree_c2_region_count);
free(datapage_c1_regions);
free(datapage_c1_mergeable_regions);
@ -662,16 +659,13 @@ int op_dbg_blockmap(pthread_data* data) {
pageid_t tree_c1_region_length, tree_c1_mergeable_region_length = 0, tree_c2_region_length;
pageid_t tree_c1_region_count, tree_c1_mergeable_region_count = 0, tree_c2_region_count;
recordid tree_c1_region_header = data->ltable->get_tree_c1()->get_tree_state();
recordid tree_c2_region_header = data->ltable->get_tree_c2()->get_tree_state();
pageid_t * tree_c1_regions = data->ltable->get_tree_c1()->get_internal_node_alloc()->list_regions(xid, &tree_c1_region_length, &tree_c1_region_count);
pageid_t * tree_c1_regions = diskTreeComponent::internalNodes::list_region_rid(xid, &tree_c1_region_header, &tree_c1_region_length, &tree_c1_region_count);
pageid_t * tree_c1_mergeable_regions = NULL;
if(data->ltable->get_tree_c1_mergeable()) {
recordid tree_c1_mergeable_region_header = data->ltable->get_tree_c1_mergeable()->get_tree_state();
tree_c1_mergeable_regions = diskTreeComponent::internalNodes::list_region_rid(xid, &tree_c1_mergeable_region_header, &tree_c1_mergeable_region_length, &tree_c1_mergeable_region_count);
tree_c1_mergeable_regions = data->ltable->get_tree_c1_mergeable()->get_internal_node_alloc()->list_regions(xid, &tree_c1_mergeable_region_length, &tree_c1_mergeable_region_count);
}
pageid_t * tree_c2_regions = diskTreeComponent::internalNodes::list_region_rid(xid, &tree_c2_region_header, &tree_c2_region_length, &tree_c2_region_count);
pageid_t * tree_c2_regions = data->ltable->get_tree_c2()->get_internal_node_alloc()->list_regions(xid, &tree_c2_region_length, &tree_c2_region_count);
unlock(data->ltable->header_lock);
Tcommit(xid);

View file

@ -90,10 +90,10 @@ void logtable::update_persistent_header(int xid) {
tbl_header.c2_root = tree_c2->get_root_rec();
tbl_header.c2_dp_state = tree_c2->get_datapage_alloc()->header_rid();
tbl_header.c2_state = tree_c2->get_tree_state();
tbl_header.c2_state = tree_c2->get_internal_node_alloc()->header_rid();
tbl_header.c1_root = tree_c1->get_root_rec();
tbl_header.c1_dp_state = tree_c1->get_datapage_alloc()->header_rid();
tbl_header.c1_state = tree_c1->get_tree_state();
tbl_header.c1_state = tree_c1->get_internal_node_alloc()->header_rid();
Tset(xid, table_rec, &tbl_header);
}

View file

@ -107,8 +107,6 @@ public:
recordid c1_dp_state;
};
const static diskTreeComponent::internalNodes::RegionAllocConf_t DATAPAGE_REGION_ALLOC_STATIC_INITIALIZER;
logtable_mergedata * mergedata;
rwl * header_lock;

View file

@ -356,7 +356,7 @@ void* memMergeThread(void*arg)
// 5: force c1'
//force write the new region to disk
diskTreeComponent::internalNodes::force_region_rid(xid, c1_prime->get_tree_state());
c1_prime->get_internal_node_alloc()->force_regions(xid);
//force write the new datapages
c1_prime->get_datapage_alloc()->force_regions(xid);
@ -391,7 +391,7 @@ void* memMergeThread(void*arg)
}
// 12: delete old c1
diskTreeComponent::internalNodes::dealloc_region_rid(xid, ltable->get_tree_c1()->get_tree_state());
ltable->get_tree_c1()->get_internal_node_alloc()->dealloc_regions(xid);
ltable->get_tree_c1()->get_datapage_alloc()->dealloc_regions(xid);
delete ltable->get_tree_c1();
@ -510,18 +510,18 @@ void *diskMergeThread(void*arg)
delete itrB;
//5: force write the new region to disk
diskTreeComponent::internalNodes::force_region_rid(xid, c2_prime->get_tree_state());
c2_prime->get_internal_node_alloc()->force_regions(xid);
c2_prime->get_datapage_alloc()->force_regions(xid);
// (skip 6, 7, 8, 8.5, 9))
writelock(ltable->header_lock,0);
//12
diskTreeComponent::internalNodes::dealloc_region_rid(xid, ltable->get_tree_c2()->get_tree_state());
ltable->get_tree_c2()->get_internal_node_alloc()->dealloc_regions(xid);
ltable->get_tree_c2()->get_datapage_alloc()->dealloc_regions(xid);
delete ltable->get_tree_c2();
//11.5
diskTreeComponent::internalNodes::dealloc_region_rid(xid, ltable->get_tree_c1_mergeable()->get_tree_state());
ltable->get_tree_c1_mergeable()->get_internal_node_alloc()->dealloc_regions(xid);
ltable->get_tree_c1_mergeable()->get_datapage_alloc()->dealloc_regions(xid);
//11
delete ltable->get_tree_c1_mergeable();

View file

@ -50,10 +50,6 @@ void insertProbeIter(size_t NUM_ENTRIES)
RegionAllocator * alloc = new RegionAllocator(xid, 10000); // ~ 10 datapages per region.
recordid alloc_state = Talloc(xid,sizeof(diskTreeComponent::internalNodes::RegionAllocConf_t));
Tset(xid,alloc_state, &diskTreeComponent::internalNodes::REGION_ALLOC_STATIC_INITIALIZER);
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
int pcount = 1000;