initial checkin of bloom filter logic for lsm trees

git-svn-id: svn+ssh://svn.corp.yahoo.com/yahoo/yrl/labs/pnuts/code/logstore@1233 8dad8b1f-cf64-0410-95b6-bcf113ffbcfe
This commit is contained in:
sears 2010-10-03 23:05:45 +00:00
parent 3619e86271
commit f9a1f411e1
3 changed files with 37 additions and 5 deletions

View file

@ -64,6 +64,9 @@ void diskTreeComponent::writes_done() {
int diskTreeComponent::insertTuple(int xid, datatuple *t)
{
if(bloom_filter) {
bloom_filter_insert(bloom_filter, (const char*)t->key(), t->keylen());
}
int ret = 0; // no error.
if(dp==0) {
dp = insertDataPage(xid, t);
@ -117,6 +120,12 @@ datatuple * diskTreeComponent::findTuple(int xid, datatuple::key_t key, size_t k
{
datatuple * tup=0;
if(bloom_filter) {
if(!bloom_filter_lookup(bloom_filter, (const char*)key, keySize)) {
return NULL;
}
}
//find the datapage
pageid_t pid = ltree->findPage(xid, (byte*)key, keySize);

View file

@ -11,17 +11,34 @@
#include "datapage.h"
#include "datatuple.h"
#include "mergeStats.h"
#include "bloomFilter.h"
#include <stasis/crc32.h>
extern "C" {
static uint64_t diskTreeComponent_hash_func_a(const char* a, int len) {
return stasis_crc32(a,len,0xcafebabe);
}
static uint64_t diskTreeComponent_hash_func_b(const char* a, int len) {
return stasis_crc32(a,len,0xdeadbeef);
}
}
class diskTreeComponent {
public:
class internalNodes;
class iterator;
diskTreeComponent(int xid, pageid_t internal_region_size, pageid_t datapage_region_size, pageid_t datapage_size,
mergeStats* stats) :
mergeStats* stats, uint64_t bloom_filter_size = 0) :
ltree(new diskTreeComponent::internalNodes(xid, internal_region_size, datapage_region_size, datapage_size)),
dp(0),
datapage_size(datapage_size),
stats(stats) {}
stats(stats),
bloom_filter(bloom_filter_size == 0 ? 0 :
bloom_filter_create(diskTreeComponent_hash_func_a, diskTreeComponent_hash_func_b, bloom_filter_size, 0.01)
) {
if(bloom_filter) bloom_filter_print_stats(bloom_filter);
}
diskTreeComponent(int xid, recordid root, recordid internal_node_state, recordid datapage_state,
@ -29,9 +46,11 @@ class diskTreeComponent {
ltree(new diskTreeComponent::internalNodes(xid, root, internal_node_state, datapage_state)),
dp(0),
datapage_size(-1),
stats(stats) {}
stats(stats),
bloom_filter(0) {}
~diskTreeComponent() {
if(bloom_filter) bloom_filter_destroy(bloom_filter);
delete dp;
delete ltree;
}
@ -175,6 +194,9 @@ class diskTreeComponent {
};
};
bloom_filter_t * bloom_filter;
class iterator
{
@ -207,6 +229,7 @@ class diskTreeComponent {
DataPage<datatuple> *curr_page; //current page
typedef DataPage<datatuple>::iterator DPITR_T;
DPITR_T *dp_itr;
};
};
#endif /* DISKTREECOMPONENT_H_ */

View file

@ -172,9 +172,9 @@ void* memMergeThread(void*arg)
// memTreeComponent<datatuple>::batchedRevalidatingIterator *itrB =
// new memTreeComponent<datatuple>::batchedRevalidatingIterator(ltable->get_tree_c0(), &ltable->tree_bytes, ltable->max_c0_size, &ltable->flushing, 100, &ltable->rb_mut);
#endif
const int64_t min_bloom_target = 1000000000;
//create a new tree
diskTreeComponent * c1_prime = new diskTreeComponent(xid, ltable->internal_region_size, ltable->datapage_region_size, ltable->datapage_size, stats);
diskTreeComponent * c1_prime = new diskTreeComponent(xid, ltable->internal_region_size, ltable->datapage_region_size, ltable->datapage_size, stats, (stats->target_size < min_bloom_target ? min_bloom_target : stats->target_size) / 1000);
ltable->set_tree_c1_prime(c1_prime);