stasis-bLSM/test/check_datapage.cpp
sears b820aaa3fb commit cmakelists file, move tests into subdirectory
git-svn-id: svn+ssh://svn.corp.yahoo.com/yahoo/yrl/labs/pnuts/code/logstore@524 8dad8b1f-cf64-0410-95b6-bcf113ffbcfe
2010-01-26 00:50:01 +00:00

321 lines
8.2 KiB
C++

#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include <logstore.h>
#include <datapage.cpp> // XXX
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <pthread.h>
#include <sys/time.h>
#include <time.h>
#undef begin
#undef end
template class DataPage<datatuple>;
bool mycmp(const std::string & k1,const std::string & k2)
{
//for char* ending with \0
return strcmp(k1.c_str(),k2.c_str()) < 0;
//for int32_t
//printf("%d\t%d\n",(*((int32_t*)k1)) ,(*((int32_t*)k2)));
//return (*((int32_t*)k1)) <= (*((int32_t*)k2));
}
//must be given a sorted array
void removeduplicates(std::vector<std::string> &arr)
{
for(int i=arr.size()-1; i>0; i--)
{
if(! (mycmp(arr[i], arr[i-1]) || mycmp(arr[i-1], arr[i])))
arr.erase(arr.begin()+i);
}
}
void preprandstr(int count, std::vector<std::string> &arr, int avg_len=50, bool duplicates_allowed=false)
{
for ( int j=0; j<count; j++)
{
int str_len = (rand()%(avg_len*2)) + 3;
char *rc = (char*)malloc(str_len);
for(int i=0; i<str_len-1; i++)
rc[i] = rand()%10+48;
rc[str_len-1]='\0';
std::string str(rc);
//make sure there is no duplicate key
if(!duplicates_allowed)
{
bool dup = false;
for(int i=0; i<j; i++)
if(! (mycmp(arr[i], str) || mycmp(str, arr[i])))
{
dup=true;
break;
}
if(dup)
{
j--;
continue;
}
}
//printf("keylen-%d\t%d\t%s\n", str_len, str.length(),rc);
free(rc);
arr.push_back(str);
}
}
/**
* REGION ALLOCATION
**/
pageid_t alloc_region(int xid, void *conf)
{
RegionAllocConf_t* a = (RegionAllocConf_t*)conf;
if(a->nextPage == a->endOfRegion) {
if(a->regionList.size == -1) {
//DEBUG("nextPage: %lld\n", a->nextPage);
a->regionList = TarrayListAlloc(xid, 1, 4, sizeof(pageid_t));
DEBUG("regionList.page: %lld\n", a->regionList.page);
DEBUG("regionList.slot: %d\n", a->regionList.slot);
DEBUG("regionList.size: %lld\n", a->regionList.size);
a->regionCount = 0;
}
DEBUG("{%lld <- alloc region arraylist}\n", a->regionList.page);
TarrayListExtend(xid,a->regionList,1);
a->regionList.slot = a->regionCount;
DEBUG("region lst slot %d\n",a->regionList.slot);
a->regionCount++;
DEBUG("region count %lld\n",a->regionCount);
a->nextPage = TregionAlloc(xid, a->regionSize,12);
DEBUG("next page %lld\n",a->nextPage);
a->endOfRegion = a->nextPage + a->regionSize;
Tset(xid,a->regionList,&a->nextPage);
DEBUG("next page %lld\n",a->nextPage);
}
DEBUG("%lld ?= %lld\n", a->nextPage,a->endOfRegion);
pageid_t ret = a->nextPage;
// Ensure the page is in buffer cache without accessing disk (this
// sets it to clean and all zeros if the page is not in cache).
// Hopefully, future reads will get a cache hit, and avoid going to
// disk.
Page * p = loadUninitializedPage(xid, ret);
releasePage(p);
DEBUG("ret %lld\n",ret);
(a->nextPage)++;
return ret;
}
pageid_t alloc_region_rid(int xid, void * ridp) {
recordid rid = *(recordid*)ridp;
RegionAllocConf_t conf;
Tread(xid,rid,&conf);
pageid_t ret = alloc_region(xid,&conf);
DEBUG("{%lld <- alloc region extend}\n", conf.regionList.page);
Tset(xid,rid,&conf);
return ret;
}
void insertProbeIter(int NUM_ENTRIES)
{
srand(1000);
unlink("storefile.txt");
unlink("logfile.txt");
sync();
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
Tinit();
int xid = Tbegin();
std::vector<std::string> data_arr;
std::vector<std::string> key_arr;
preprandstr(NUM_ENTRIES, data_arr, 5*4096, true);
preprandstr(NUM_ENTRIES+200, key_arr, 50, true);//well i can handle upto 200
std::sort(key_arr.begin(), key_arr.end(), &mycmp);
removeduplicates(key_arr);
if(key_arr.size() > NUM_ENTRIES)
key_arr.erase(key_arr.begin()+NUM_ENTRIES, key_arr.end());
NUM_ENTRIES=key_arr.size();
if(data_arr.size() > NUM_ENTRIES)
data_arr.erase(data_arr.begin()+NUM_ENTRIES, data_arr.end());
//for(int i = 0; i < NUM_ENTRIES; i++)
//{
// printf("%s\t", arr[i].c_str());
// int keylen = arr[i].length()+1;
// printf("%d\n", keylen);
//}
recordid alloc_state = Talloc(xid,sizeof(RegionAllocConf_t));
Tset(xid,alloc_state, &logtree::REGION_ALLOC_STATIC_INITIALIZER);
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
int pcount = 10;
int dpages = 0;
DataPage<datatuple> *dp=0;
int64_t datasize = 0;
std::vector<pageid_t> dsp;
for(int i = 0; i < NUM_ENTRIES; i++)
{
//prepare the key
datatuple newtuple;
uint32_t keylen = key_arr[i].length()+1;
newtuple.keylen = &keylen;
newtuple.key = (datatuple::key_t) malloc(keylen);
for(int j=0; j<keylen-1; j++)
newtuple.key[j] = key_arr[i][j];
newtuple.key[keylen-1]='\0';
//prepare the data
uint32_t datalen = data_arr[i].length()+1;
newtuple.datalen = &datalen;
newtuple.data = (datatuple::data_t) malloc(datalen);
for(int j=0; j<datalen-1; j++)
newtuple.data[j] = data_arr[i][j];
newtuple.data[datalen-1]='\0';
/*
printf("key: \t, keylen: %u\ndata: datalen: %u\n",
//newtuple.key,
*newtuple.keylen,
//newtuple.data,
*newtuple.datalen);
*/
datasize += newtuple.byte_length();
if(dp==NULL || !dp->append(xid, newtuple))
{
dpages++;
if(dp)
delete dp;
dp = new DataPage<datatuple>(xid, pcount, &DataPage<datatuple>::dp_alloc_region_rid, &alloc_state );
if(!dp->append(xid, newtuple))
{
delete dp;
dp = new DataPage<datatuple>(xid, pcount, &DataPage<datatuple>::dp_alloc_region_rid, &alloc_state );
assert(dp->append(xid, newtuple));
}
dsp.push_back(dp->get_start_pid());
}
}
printf("Total data set length: %d\n", datasize);
printf("Storage utilization: %.2f\n", (datasize+.0) / (PAGE_SIZE * pcount * dpages));
printf("Number of datapages: %d\n", dpages);
printf("Writes complete.\n");
Tcommit(xid);
xid = Tbegin();
printf("Stage 2: Reading %d tuples\n", NUM_ENTRIES);
int tuplenum = 0;
for(int i = 0; i < dpages ; i++)
{
DataPage<datatuple> dp(xid, dsp[i]);
DataPage<datatuple>::RecordIterator itr = dp.begin();
datatuple *dt=0;
while( (dt=itr.getnext(xid)) != NULL)
{
assert(*(dt->keylen) == key_arr[tuplenum].length()+1);
assert(*(dt->datalen) == data_arr[tuplenum].length()+1);
tuplenum++;
free(dt->keylen);
free(dt);
dt = 0;
}
}
printf("Reads completed.\n");
/*
int64_t count = 0;
lladdIterator_t * it = logtreeIterator::open(xid, tree);
while(logtreeIterator::next(xid, it)) {
byte * key;
byte **key_ptr = &key;
int keysize = logtreeIterator::key(xid, it, (byte**)key_ptr);
pageid_t *value;
pageid_t **value_ptr = &value;
int valsize = lsmTreeIterator_value(xid, it, (byte**)value_ptr);
//printf("keylen %d key %s\n", keysize, (char*)(key)) ;
assert(valsize == sizeof(pageid_t));
assert(!mycmp(std::string((char*)key), arr[count]) && !mycmp(arr[count],std::string((char*)key)));
assert(keysize == arr[count].length()+1);
count++;
}
assert(count == NUM_ENTRIES);
logtreeIterator::close(xid, it);
*/
Tcommit(xid);
Tdeinit();
}
/** @test
*/
int main()
{
insertProbeIter(10000);
return 0;
}