2007-10-15 17:46:44 +00:00
# ifndef _ROSE_COMPRESSION_RLE_IMPL_H__
# define _ROSE_COMPRESSION_RLE_IMPL_H__
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sears@google.com (Rusty Sears)
# include <assert.h>
2007-11-06 02:27:04 +00:00
# include <algorithm>
2007-10-15 17:46:44 +00:00
# include "rle.h"
namespace rose {
/**
Store a new value in run length encoding . If this value matches
the previous one , increment a counter . Otherwise , create a new
triple_t to hold the new value and its count . Most of the
complexity comes from dealing with integer overflow , and running
out of space .
*/
2008-10-27 23:35:51 +00:00
template < class TYPE , class COUNT_TYPE >
2007-10-15 17:46:44 +00:00
inline slot_index_t
2008-10-27 23:35:51 +00:00
Rle < TYPE , COUNT_TYPE > : : append ( int xid , const TYPE dat ,
2007-10-15 17:46:44 +00:00
byte_off_t * except , byte * exceptions , //char *exceptional,
int * free_bytes ) {
int64_t ret ;
2008-06-08 20:28:53 +00:00
DEBUG ( " \t rle got %lld \n " , ( long long ) dat ) ;
2007-10-15 17:46:44 +00:00
ret = last_block_ptr ( ) - > index + last_block_ptr ( ) - > copies ;
2008-05-27 07:32:37 +00:00
if ( ret = = MAX_INDEX ) {
// out of address space
* free_bytes = - 1 ;
ret = NOSPACE ;
} else if ( dat ! = last_block_ptr ( ) - > data | |
2007-10-15 17:46:44 +00:00
last_block_ptr ( ) - > copies = = MAX_COPY_COUNT ) {
// this key is not the same as the last one, or
// the block is full
2007-11-06 02:27:04 +00:00
( * free_bytes ) - = sizeof ( triple_t ) ;
2007-10-15 17:46:44 +00:00
// Write the changes in our overrun space
triple_t * n = new_block_ptr ( ) ;
n - > index = ret ;
n - > copies = 1 ;
n - > data = dat ;
// Finalize the changes unless we're out of space
( * block_count_ptr ( ) ) + = ( * free_bytes > = 0 ) ;
} else {
// success; bump number of copies of this item, and return.
last_block_ptr ( ) - > copies + + ;
}
return ( slot_index_t ) ret ;
}
2008-10-27 23:35:51 +00:00
template < class TYPE , class COUNT_TYPE >
2007-10-15 17:46:44 +00:00
inline TYPE *
2008-10-27 23:35:51 +00:00
Rle < TYPE , COUNT_TYPE > : : recordRead ( int xid , slot_index_t slot , byte * exceptions ,
2007-10-15 17:46:44 +00:00
TYPE * scratch ) {
block_index_t n = nth_block_ptr ( last_ ) - > index < = slot ? last_ : 0 ;
do {
triple_t * t = nth_block_ptr ( n ) ;
if ( t - > index < = slot & & t - > index + t - > copies > slot ) {
* scratch = t - > data ;
last_ = n ;
return scratch ;
}
n + + ;
} while ( n < * block_count_ptr ( ) ) ;
return 0 ;
}
2008-06-08 20:28:53 +00:00
# ifndef COMPRESSION_BINARY_FIND
2007-11-06 02:27:04 +00:00
template < class TYPE >
inline std : : pair < slot_index_t , slot_index_t > *
Rle < TYPE > : : recordFind ( int xid , slot_index_t start , slot_index_t stop ,
byte * exceptions , TYPE value ,
std : : pair < slot_index_t , slot_index_t > & scratch ) {
block_index_t n = 0 ;
std : : pair < slot_index_t , slot_index_t > * ret = 0 ;
do {
triple_t * t = nth_block_ptr ( n ) ;
if ( t - > data > = value ) {
2008-06-08 20:28:53 +00:00
if ( t - > data = = value ) {
scratch . first = t - > index ;
do {
scratch . second = t - > index + t - > copies ;
n + + ;
t = nth_block_ptr ( n ) ;
} while ( n < * block_count_ptr ( ) & & t - > data = = value ) ;
ret = & scratch ;
}
2007-11-06 02:27:04 +00:00
break ;
}
n + + ;
} while ( n < * block_count_ptr ( ) ) ;
2008-03-08 07:33:44 +00:00
if ( scratch . first > = stop ) {
return 0 ;
} else if ( scratch . second > stop ) {
scratch . second = stop ;
}
2007-11-06 02:27:04 +00:00
return ret ;
}
2008-06-08 20:28:53 +00:00
# else // COMPRESSION_BINARY_FIND
2008-10-27 23:35:51 +00:00
template < class TYPE , class COUNT_TYPE >
2008-06-08 20:28:53 +00:00
inline std : : pair < slot_index_t , slot_index_t > *
2008-10-27 23:35:51 +00:00
Rle < TYPE , COUNT_TYPE > : : recordFind ( int xid , slot_index_t start , slot_index_t stop ,
2008-06-08 20:28:53 +00:00
byte * exceptions , TYPE value ,
std : : pair < slot_index_t , slot_index_t > & scratch ) {
DEBUG ( " \n \n called with start = %lld stop = %lld, val = %lld \n " , ( long long ) start , ( long long ) stop , ( long long ) value ) ;
DEBUG ( " 1th data: %lld \n " , ( long long ) nth_block_ptr ( 1 ) - > data ) ;
block_index_t low = 0 ;
block_index_t high = * block_count_ptr ( ) ;
int64_t bs_ret ;
int64_t bs_low = low ;
int64_t bs_high = high ;
DEBUG ( " low: %d->%ld, high %d->%ld \n " , low , bs_low , high , bs_high ) ;
TYPE bs_value = start ;
rose_binary_search_greatest_lower ( nth_high_index ) ;
if ( bs_ret ! = - 1 ) {
assert ( nth_low_index ( bs_ret ) < = start ) ;
assert ( nth_high_bound ( bs_ret ) > start ) ;
// if(bs_ret < (*block_count_ptr()-1)) {
// assert(nth_low_index(bs_ret+1) > start);
// }
low = bs_ret ;
} // else start not found because page starts after it (leave at block zero)
DEBUG ( " real low is %d \n " , low ) ;
bs_low = low ;
bs_high = ( * block_count_ptr ( ) ) ;
bs_value = stop ;
DEBUG ( " bs_low = %lld bs_high = %lld bs_val = %lld \n " , ( long long ) bs_low , ( long long ) bs_high , ( long long ) bs_value ) ;
rose_binary_search_greatest_lower ( nth_high_bound ) ;
if ( bs_ret = = - 1 ) {
high = ( * block_count_ptr ( ) ) ; //-1;
} else {
DEBUG ( " bs_ret = %lld \n " , ( long long ) bs_ret ) ;
// bs_high might not contain the index we're looking for
// high = bs_ret + XXX; /// where XXX is 1 sometimes, and zero others.
// if(nth_high_index(bs_ret+1) >= stop) { high = bs_ret+1; } else { high = bs_ret + 2; }
high = bs_ret ;
// high contains the top of the range
assert ( nth_high_bound ( high ) > = stop ) ;
assert ( nth_low_index ( high ) < stop ) ;
// set high to the first block that we don't need to consider.
high + + ;
// if(high > 0) {
// assert(nth_low_index(high-1) <= stop);
// }
2007-10-15 17:46:44 +00:00
2008-06-08 20:28:53 +00:00
}
DEBUG ( " real high is %lld (last block slots: %lld - %lld) val = %lld \n " , ( long long int ) high , ( long long int ) nth_low_index ( high - 1 ) , ( long long int ) nth_high_index ( high - 1 ) , ( long long int ) bs_value ) ;
// now low is the smallest block we need consider; high the highest.
bs_low = low ;
bs_high = high ;
bs_value = value ;
DEBUG ( " B low: %lld->%lld, high %lld->%lld val = %lld low_idx = %lld low_end = %lld low val = %lld high_idx = %lld high_end = %lld high_val = %lld \n " , ( long long int ) low , ( long long int ) bs_low , ( long long int ) high , ( long long int ) bs_high , ( long long int ) value , ( long long int ) nth_low_index ( bs_low ) , ( long long int ) nth_high_index ( bs_low ) , ( long long int ) * nth_data_ptr ( bs_low ) , ( long long int ) nth_low_index ( bs_high ) , ( long long int ) nth_high_index ( bs_high ) , ( long long int ) * nth_data_ptr ( bs_high ) ) ;
DEBUG ( " bs val =%lld bs low = %lld bs high = %lld \n " , ( long long ) bs_value , ( long long ) bs_low , ( long long ) bs_high ) ;
rose_binary_search ( nth_data_ptr ) ;
DEBUG ( " bs ret = %lld \n " , ( long long int ) bs_ret ) ;
if ( bs_ret = = - 1 ) {
printf ( " [ %lld %lld %lld ] \n " , ( long long int ) * ( TYPE * ) nth_data_ptr ( 0 ) , ( long long int ) * ( TYPE * ) nth_data_ptr ( 1 ) , ( long long int ) * ( TYPE * ) nth_data_ptr ( 2 ) ) ;
printf ( " not found by rle: start = %d stop = %d val = %lld count = %d zero copies %d low = %lld high = %lld \n " , start , stop , ( long long ) value , * block_count_ptr ( ) , nth_block_ptr ( 0 ) - > copies , ( long long ) low , ( long long ) high ) ;
abort ( ) ;
return 0 ;
} else {
block_index_t firstBlock = bs_ret ;
block_index_t lastBlock = bs_ret ; // lastBlock is the offset of the last block in range
while ( firstBlock > low & & * nth_data_ptr ( firstBlock - 1 ) = = value ) { firstBlock - - ; }
// firstblock is >= 0 and it contains items of the desired value
while ( lastBlock < high & & * nth_data_ptr ( lastBlock ) = = value ) { lastBlock + + ; }
// lastblock == block_count_ptr or it contains items of the wrong value
lastBlock - - ;
// lastblock valid, and it contains items of the correct value
DEBUG ( " looking at blocks %d - %d \n " , firstBlock , lastBlock ) ;
scratch . first = nth_block_ptr ( firstBlock ) - > index ;
// set second to one past last valid index.
scratch . second = nth_block_ptr ( lastBlock ) - > index + nth_block_ptr ( lastBlock ) - > copies ;
if ( scratch . first < start ) { scratch . first = start ; }
if ( scratch . second > stop ) { scratch . second = stop ; }
DEBUG ( " startstop = %d,%d scratch = %d,%d \n " , start , stop , scratch . first , scratch . second ) ;
return & scratch ;
}
}
# endif // COMPRESSION_BINARY_FIND
} // namespace rose
2007-10-15 17:46:44 +00:00
# endif // _ROSE_COMPRESSION_RLE_IMPL_H__