940a6da6fe
The tests now pass, except that check_merge never terminates (it takes too long) and check_mergelarge still is not passing. For better luck running this version of the code, turn off stasis' concurrent buffer manager. We're doing something bad that leads to deadlocks with the concurrent buffer manager. Another (the same?) bug less-frequently leads to page corruption with the old stasis buffer manager. git-svn-id: svn+ssh://svn.corp.yahoo.com/yahoo/yrl/labs/pnuts/code/logstore@556 8dad8b1f-cf64-0410-95b6-bcf113ffbcfe
288 lines
7.6 KiB
C++
288 lines
7.6 KiB
C++
#include <string>
|
|
#include <vector>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include "logstore.h"
|
|
#include "datapage.h"
|
|
#include "logiterators.h"
|
|
#include "merger.h"
|
|
#include <assert.h>
|
|
#include <limits.h>
|
|
#include <math.h>
|
|
#include <pthread.h>
|
|
#include <sys/time.h>
|
|
#include <time.h>
|
|
|
|
#undef begin
|
|
#undef end
|
|
|
|
#include "check_util.h"
|
|
|
|
void insertProbeIter(size_t NUM_ENTRIES)
|
|
{
|
|
srand(1000);
|
|
unlink("storefile.txt");
|
|
unlink("logfile.txt");
|
|
|
|
sync();
|
|
double delete_freq = .05;
|
|
double update_freq = .15;
|
|
|
|
//data generation
|
|
typedef std::vector<std::string> key_v_t;
|
|
const static size_t max_partition_size = 100000;
|
|
int KEY_LEN = 100;
|
|
std::vector<key_v_t*> *key_v_list = new std::vector<key_v_t*>;
|
|
int list_size = NUM_ENTRIES / max_partition_size + 1;
|
|
for(int i =0; i<list_size; i++)
|
|
{
|
|
key_v_t * key_arr = new key_v_t;
|
|
if(NUM_ENTRIES < max_partition_size*(i+1))
|
|
preprandstr(NUM_ENTRIES-max_partition_size*i, key_arr, KEY_LEN, true);
|
|
else
|
|
preprandstr(max_partition_size, key_arr, KEY_LEN, true);
|
|
|
|
std::sort(key_arr->begin(), key_arr->end(), &mycmp);
|
|
key_v_list->push_back(key_arr);
|
|
printf("size partition %d is %d\n", i+1, key_arr->size());
|
|
}
|
|
|
|
|
|
|
|
key_v_t * key_arr = new key_v_t;
|
|
|
|
std::vector<key_v_t::iterator*> iters;
|
|
for(int i=0; i<list_size; i++)
|
|
{
|
|
iters.push_back(new key_v_t::iterator((*key_v_list)[i]->begin()));
|
|
}
|
|
|
|
int lc = 0;
|
|
while(true)
|
|
{
|
|
int list_index = -1;
|
|
for(int i=0; i<list_size; i++)
|
|
{
|
|
if(*iters[i] == (*key_v_list)[i]->end())
|
|
continue;
|
|
|
|
if(list_index == -1 || mycmp(**iters[i], **iters[list_index]))
|
|
list_index = i;
|
|
}
|
|
|
|
if(list_index == -1)
|
|
break;
|
|
|
|
if(key_arr->size() == 0 || mycmp(key_arr->back(), **iters[list_index]))
|
|
key_arr->push_back(**iters[list_index]);
|
|
|
|
(*iters[list_index])++;
|
|
lc++;
|
|
if(lc % max_partition_size == 0)
|
|
printf("%d/%d completed.\n", lc, NUM_ENTRIES);
|
|
}
|
|
|
|
for(int i=0; i<list_size; i++)
|
|
{
|
|
(*key_v_list)[i]->clear();
|
|
delete (*key_v_list)[i];
|
|
delete iters[i];
|
|
}
|
|
key_v_list->clear();
|
|
delete key_v_list;
|
|
|
|
printf("key arr size: %d\n", key_arr->size());
|
|
|
|
if(key_arr->size() > NUM_ENTRIES)
|
|
key_arr->erase(key_arr->begin()+NUM_ENTRIES, key_arr->end());
|
|
|
|
NUM_ENTRIES=key_arr->size();
|
|
|
|
bufferManagerNonBlockingSlowHandleType = IO_HANDLE_PFILE;
|
|
|
|
Tinit();
|
|
|
|
int xid = Tbegin();
|
|
|
|
merge_scheduler mscheduler;
|
|
logtable ltable;
|
|
|
|
int pcount = 40;
|
|
ltable.set_fixed_page_count(pcount);
|
|
|
|
recordid table_root = ltable.allocTable(xid);
|
|
|
|
Tcommit(xid);
|
|
|
|
xid = Tbegin();
|
|
|
|
int lindex = mscheduler.addlogtable(<able);
|
|
ltable.setMergeData(mscheduler.getMergeData(lindex));
|
|
|
|
mscheduler.startlogtable(lindex);
|
|
|
|
printf("Stage 1: Writing %d keys\n", NUM_ENTRIES);
|
|
|
|
struct timeval start_tv, stop_tv, ti_st, ti_end;
|
|
double insert_time = 0;
|
|
int dpages = 0;
|
|
int npages = 0;
|
|
int delcount = 0, upcount = 0;
|
|
DataPage<datatuple> *dp=0;
|
|
int64_t datasize = 0;
|
|
std::vector<pageid_t> dsp;
|
|
std::vector<int> del_list;
|
|
gettimeofday(&start_tv,0);
|
|
for(size_t i = 0; i < NUM_ENTRIES; i++)
|
|
{
|
|
//prepare the data
|
|
std::string ditem;
|
|
getnextdata(ditem, 8192);
|
|
|
|
//prepare the key
|
|
datatuple *newtuple = datatuple::create((*key_arr)[i].c_str(), (*key_arr)[i].length()+1, ditem.c_str(), ditem.length()+1);
|
|
|
|
datasize += newtuple->byte_length();
|
|
|
|
gettimeofday(&ti_st,0);
|
|
ltable.insertTuple(newtuple);
|
|
gettimeofday(&ti_end,0);
|
|
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
|
|
|
|
datatuple::freetuple(newtuple);
|
|
|
|
double rval = ((rand() % 100)+.0)/100;
|
|
if( rval < delete_freq) //delete a key
|
|
{
|
|
int del_index = i - (rand()%50); //delete one of the last inserted 50 elements
|
|
if(del_index >= 0 && std::find(del_list.begin(), del_list.end(), del_index) == del_list.end())
|
|
{
|
|
delcount++;
|
|
|
|
datatuple *deltuple = datatuple::create((*key_arr)[del_index].c_str(), (*key_arr)[del_index].length()+1);
|
|
|
|
gettimeofday(&ti_st,0);
|
|
ltable.insertTuple(deltuple);
|
|
gettimeofday(&ti_end,0);
|
|
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
|
|
|
|
datatuple::freetuple(deltuple);
|
|
|
|
del_list.push_back(del_index);
|
|
|
|
}
|
|
}
|
|
else if(rval < delete_freq + update_freq) //update a record
|
|
{
|
|
int up_index = i - (rand()%50); //update one of the last inserted 50 elements
|
|
if(up_index >= 0 && std::find(del_list.begin(), del_list.end(), up_index) == del_list.end())
|
|
{//only update non-deleted elements
|
|
getnextdata(ditem, 512);
|
|
|
|
upcount++;
|
|
datatuple *uptuple = datatuple::create((*key_arr)[up_index].c_str(), (*key_arr)[up_index].length()+1,
|
|
ditem.c_str(), ditem.length()+1);
|
|
gettimeofday(&ti_st,0);
|
|
ltable.insertTuple(uptuple);
|
|
gettimeofday(&ti_end,0);
|
|
insert_time += tv_to_double(ti_end) - tv_to_double(ti_st);
|
|
|
|
datatuple::freetuple(uptuple);
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
gettimeofday(&stop_tv,0);
|
|
printf("insert time: %6.1f\n", insert_time);
|
|
printf("insert time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
|
|
printf("#deletions: %d\n#updates: %d\n", delcount, upcount);
|
|
|
|
printf("\nTREE STRUCTURE\n");
|
|
printf("datasize: %lld\n", datasize);
|
|
|
|
Tcommit(xid);
|
|
xid = Tbegin();
|
|
|
|
|
|
|
|
printf("Stage 2: Looking up %d keys:\n", NUM_ENTRIES);
|
|
|
|
int found_tuples=0;
|
|
for(int i=NUM_ENTRIES-1; i>=0; i--)
|
|
{
|
|
int ri = i;
|
|
|
|
//get the key
|
|
uint32_t keylen = (*key_arr)[ri].length()+1;
|
|
datatuple::key_t rkey = (datatuple::key_t) malloc(keylen);
|
|
memcpy((byte*)rkey, (*key_arr)[ri].c_str(), keylen);
|
|
|
|
//find the key with the given tuple
|
|
datatuple *dt = ltable.findTuple(xid, rkey, keylen);
|
|
|
|
if(std::find(del_list.begin(), del_list.end(), i) == del_list.end())
|
|
{
|
|
assert(dt!=0);
|
|
assert(!dt->isDelete());
|
|
found_tuples++;
|
|
assert(dt->keylen() == (*key_arr)[ri].length()+1);
|
|
datatuple::freetuple(dt);
|
|
}
|
|
else
|
|
{
|
|
if(dt!=0)
|
|
{
|
|
assert(dt->keylen() == (*key_arr)[ri].length()+1);
|
|
assert(dt->isDelete());
|
|
datatuple::freetuple(dt);
|
|
}
|
|
}
|
|
dt = 0;
|
|
free(rkey);
|
|
}
|
|
printf("found %d\n", found_tuples);
|
|
|
|
|
|
|
|
|
|
|
|
key_arr->clear();
|
|
//data_arr->clear();
|
|
delete key_arr;
|
|
//delete data_arr;
|
|
|
|
mscheduler.shutdown();
|
|
printf("merge threads finished.\n");
|
|
gettimeofday(&stop_tv,0);
|
|
printf("run time: %6.1f\n", (tv_to_double(stop_tv) - tv_to_double(start_tv)));
|
|
|
|
|
|
|
|
Tcommit(xid);
|
|
Tdeinit();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @test
|
|
*/
|
|
int main()
|
|
{
|
|
// insertProbeIter(25000);
|
|
insertProbeIter(400000);
|
|
/*
|
|
insertProbeIter(5000);
|
|
insertProbeIter(2500);
|
|
insertProbeIter(1000);
|
|
insertProbeIter(500);
|
|
insertProbeIter(1000);
|
|
insertProbeIter(100);
|
|
insertProbeIter(10);
|
|
*/
|
|
|
|
return 0;
|
|
}
|
|
|