Drastic simplification of Min-heap code and resultant Delta speedup.

This commit is contained in:
Moinak Ghosh 2013-05-25 17:34:38 +05:30
parent 0a1e3b39ef
commit ddaa3b6b6d
5 changed files with 94 additions and 322 deletions

View file

@ -25,8 +25,8 @@
PROG= pcompress
MAINSRCS = main.c utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
utils/xxhash_base.c utils/heapq.c utils/cpuid.c
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heapq.h \
utils/xxhash_base.c utils/heap.c utils/cpuid.c
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heap.h \
utils/cpuid.h utils/xxhash.h
MAINOBJS = $(MAINSRCS:.c=.o)

View file

@ -73,7 +73,7 @@
#include <allocator.h>
#include <utils.h>
#include <pthread.h>
#include <heapq.h>
#include <heap.h>
#include <xxhash.h>
#include <qsort.h>
#include <lzma_crc.h>
@ -475,7 +475,7 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
uint64_t cur_roll_checksum, cur_pos_checksum;
uint32_t *ctx_heap;
rabin_blockentry_t **htab;
heap_t heap;
MinHeap heap;
DEBUG_STAT_EN(uint32_t max_count);
DEBUG_STAT_EN(max_count = 0);
DEBUG_STAT_EN(double strt, en_1, en);
@ -672,17 +672,15 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
* search engines to detect similar documents.
*/
if (ctx->delta_flag) {
memcpy(ctx_heap, buf1+last_offset, length);
length /= 8;
pc[1] = DELTA_NORMAL_PCT(length);
pc[2] = DELTA_EXTRA_PCT(length);
pc[3] = DELTA_EXTRA2_PCT(length);
reset_heap(&heap, pc[ctx->delta_flag]);
ksmallest((int64_t *)ctx_heap, length, &heap);
heap_nsmallest(&heap, (int64_t *)(buf1+last_offset),
(int64_t *)ctx_heap, pc[ctx->delta_flag], length);
ctx->blocks[blknum]->similarity_hash =
XXH32((const uchar_t *)ctx_heap, pc[ctx->delta_flag]*8, 0);
XXH32((const uchar_t *)ctx_heap, heap_size(&heap)*8, 0);
}
++blknum;
last_offset = i+1;
@ -713,16 +711,16 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
uint64_t pc[4];
if (length > ctx->rabin_poly_min_block_size) {
memcpy(ctx_heap, buf1+last_offset, length);
length /= 8;
pc[1] = DELTA_NORMAL_PCT(length);
pc[2] = DELTA_EXTRA_PCT(length);
pc[3] = DELTA_EXTRA2_PCT(length);
reset_heap(&heap, pc[ctx->delta_flag]);
ksmallest((int64_t *)ctx_heap, length, &heap);
heap_nsmallest(&heap, (int64_t *)(buf1+last_offset),
(int64_t *)ctx_heap, pc[ctx->delta_flag], length);
cur_sketch =
XXH32((const uchar_t *)ctx_heap, pc[ctx->delta_flag]*8, 0);
XXH32((const uchar_t *)ctx_heap, heap_size(&heap)*8, 0);
} else {
cur_sketch =
XXH32((const uchar_t *)(buf1+last_offset), length, 0);

75
utils/heap.c Normal file
View file

@ -0,0 +1,75 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program.
* If not, see <http://www.gnu.org/licenses/>.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
/*
* Functions for a rudimentary fast min-heap implementation.
* Adapted from "Algorithms with C", Kyle Loudon, O'Reilly.
*/
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <stdint.h>
#include <inttypes.h>
#include "heap.h"
#define heap_parent(npos) ((__TYPE)(((npos) - 1) / 2))
#define heap_left(npos) (((npos) * 2) + 1)
#define heap_right(npos) (((npos) * 2) + 2)
static void
heap_insert(MinHeap *heap, __TYPE data)
{
__TYPE temp;
__TYPE ipos, ppos;
heap->tree[heap_size(heap)] = data;
ipos = heap_size(heap);
ppos = heap_parent(ipos);
while (ipos > 0 && heap->tree[ppos] > heap->tree[ipos]) {
temp = heap->tree[ppos];
heap->tree[ppos] = heap->tree[ipos];
heap->tree[ipos] = temp;
ipos = ppos;
ppos = heap_parent(ipos);
}
if (heap->size < heap->totsize)
heap->size++;
}
void
heap_nsmallest(MinHeap *heap, __TYPE *data, __TYPE *heapbuf, __TYPE heapsize, __TYPE datasize)
{
__TYPE i;
heap->size = 1;
heap->totsize = heapsize;
heap->tree = heapbuf;
heap->tree[0] = data[0];
for (i = 1; i < datasize; i++)
heap_insert(heap, data[i]);
}

View file

@ -27,14 +27,14 @@
#define __HEAPQ_H_
#define __TYPE int64_t
typedef struct {
__TYPE *ary;
__TYPE len;
__TYPE tot;
} heap_t;
typedef struct Heap_ {
__TYPE size;
__TYPE totsize;
__TYPE *tree;
} MinHeap;
int ksmallest(__TYPE *ary, __TYPE len, heap_t *heap);
void reset_heap(heap_t *h, __TYPE tot);
void heapify(heap_t *h, __TYPE *ary);
#define heap_size(heap) ((heap)->size)
void heap_nsmallest(MinHeap *heap, __TYPE *data, __TYPE *heapbuf, __TYPE heapsize, __TYPE datasize);
#endif

View file

@ -1,301 +0,0 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program.
* If not, see <http://www.gnu.org/licenses/>.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
/*
* Functions for a rudimentary fast min-heap implementation.
* Derived from Python's _heapqmodule.c by way of drastic simplification
* and a few optimizations.
*/
/*
* Original Python _heapqmodule.c implementation was derived directly
* from heapq.py in Py2.3 which was written by Kevin O'Connor, augmented
* by Tim Peters, annotated by François Pinard, and converted to C by
* Raymond Hettinger.
*/
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <stdint.h>
#include <inttypes.h>
#include <heapq.h>
#ifndef NDEBUG
#define ERROR_CHK
#endif
void
reset_heap(heap_t *heap, __TYPE tot)
{
if (heap) {
heap->len = 0;
heap->tot = tot;
}
}
static int
_siftdownmax(heap_t *h, __TYPE startpos, __TYPE pos)
{
__TYPE newitem, parent;
__TYPE parentpos, *heap;
#ifdef ERROR_CHK
if (pos >= h->len) {
fprintf(stderr, "_siftdownmax: index out of range\n");
return -1;
}
#endif
heap = h->ary;
newitem = heap[pos];
/* Follow the path to the root, moving parents down until finding
a place newitem fits. */
while (pos > startpos){
parentpos = (pos - 1) >> 1;
parent = heap[parentpos];
if (parent < newitem)
break;
heap[pos] = parent;
pos = parentpos;
}
heap[pos] = newitem;
return 0;
}
static int
_siftupmax(heap_t *h, __TYPE spos, __TYPE epos)
{
__TYPE endpos, childpos, rightpos;
__TYPE newitem, *heap, pos;
endpos = h->len;
heap = h->ary;
#ifdef ERROR_CHK
if (spos >= endpos) {
fprintf(stderr, "_siftupmax: index out of range: %" PRId64 ", len: %" PRId64 "\n", spos, endpos);
return -1;
}
#endif
do {
pos = spos;
/* Bubble up the smaller child until hitting a leaf. */
newitem = heap[pos];
childpos = (pos << 1) + 1; /* leftmost child position */
while (childpos < endpos) {
/* Set childpos to index of smaller child. */
rightpos = childpos + 1;
if (rightpos < endpos) {
if (heap[rightpos] < heap[childpos])
childpos = rightpos;
}
/* Move the smaller child up. */
heap[pos] = heap[childpos];
pos = childpos;
childpos = (pos << 1) + 1;
}
/* The leaf at pos is empty now. Put newitem there, and and bubble
it up to its final resting place (by sifting its parents down). */
heap[pos] = newitem;
#ifdef ERROR_CHK
if (_siftdownmax(h, spos, pos) == -1)
return (-1);
#else
_siftdownmax(h, spos, pos);
#endif
spos--;
} while (spos >= epos);
return (0);
}
static int
_siftupmax_s(heap_t *h, __TYPE spos)
{
__TYPE endpos, childpos, rightpos;
__TYPE newitem, *heap, pos;
endpos = h->len;
heap = h->ary;
#ifdef ERROR_CHK
if (spos >= endpos) {
fprintf(stderr, "_siftupmax: index out of range: %" PRId64 ", len: %" PRId64 "\n", spos, endpos);
return -1;
}
#endif
pos = spos;
/* Bubble up the smaller child until hitting a leaf. */
newitem = heap[pos];
childpos = (pos << 1) + 1; /* leftmost child position */
while (childpos < endpos) {
/* Set childpos to index of smaller child. */
rightpos = childpos + 1;
if (rightpos < endpos) {
if (heap[rightpos] < heap[childpos])
childpos = rightpos;
}
/* Move the smaller child up. */
heap[pos] = heap[childpos];
pos = childpos;
childpos = (pos << 1) + 1;
}
/* The leaf at pos is empty now. Put newitem there, and and bubble
it up to its final resting place (by sifting its parents down). */
heap[pos] = newitem;
return (_siftdownmax(h, spos, pos));
}
int
ksmallest(__TYPE *ary, __TYPE len, heap_t *heap)
{
__TYPE elem, los;
__TYPE i, *hp, n;
__TYPE tmp;
n = heap->tot;
heap->ary = ary;
hp = ary;
heap->len = n;
#ifdef ERROR_CHK
if(_siftupmax(heap, n/2-1, 0) == -1)
return (-1);
#else
_siftupmax(heap, n/2-1, 0);
#endif
los = hp[0];
for (i = n; i < len; i++) {
elem = ary[i];
if (elem >= los) {
continue;
}
tmp = hp[0];
hp[0] = elem;
ary[i] = tmp;
#ifdef ERROR_CHK
if (_siftupmax_s(heap, 0) == -1)
return (-1);
#else
_siftupmax_s(heap, 0);
#endif
los = hp[0];
}
return 0;
}
static int
_siftdown(heap_t *h, __TYPE startpos, __TYPE pos)
{
__TYPE newitem, parent, *heap;
__TYPE parentpos;
heap = h->ary;
#ifdef ERROR_CHK
if (pos >= h->tot) {
fprintf(stderr, "_siftdown: index out of range: %" PRId64 ", len: %" PRId64 "\n", pos, h->len);
return -1;
}
#endif
/* Follow the path to the root, moving parents down until finding
a place newitem fits. */
newitem = heap[pos];
while (pos > startpos){
parentpos = (pos - 1) >> 1;
parent = heap[parentpos];
if (parent < newitem) {
break;
}
heap[pos] = parent;
pos = parentpos;
}
heap[pos] = newitem;
return (0);
}
static int
_siftup(heap_t *h, __TYPE pos)
{
__TYPE startpos, endpos, childpos, rightpos;
__TYPE newitem, *heap;
endpos = h->tot;
heap = h->ary;
startpos = pos;
#ifdef ERROR_CHK
if (pos >= endpos) {
fprintf(stderr, "_siftup: index out of range: %" PRId64 ", len: %" PRId64 "\n", pos, endpos);
return -1;
}
#endif
/* Bubble up the smaller child until hitting a leaf. */
newitem = heap[pos];
childpos = 2*pos + 1; /* leftmost child position */
while (childpos < endpos) {
/* Set childpos to index of smaller child. */
rightpos = childpos + 1;
if (rightpos < endpos) {
if (heap[rightpos] < heap[childpos])
childpos = rightpos;
}
/* Move the smaller child up. */
heap[pos] = heap[childpos];
pos = childpos;
childpos = 2*pos + 1;
}
/* The leaf at pos is empty now. Put newitem there, and and bubble
it up to its final resting place (by sifting its parents down). */
heap[pos] = newitem;
return _siftdown(h, startpos, pos);
}
void
heapify(heap_t *h, __TYPE *ary)
{
__TYPE i, n;
n = h->tot;
h->ary = ary;
/* Transform bottom-up. The largest index there's any point to
looking at is the largest with a child index in-range, so must
have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
(2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
and that's again n//2-1.
*/
for (i=n/2-1 ; i>=0 ; i--)
if(_siftup(h, i) == -1)
break;
}