Update to latest XXHash version.

This commit is contained in:
Moinak Ghosh 2012-12-31 11:53:47 +05:30
parent 8bfa49fc66
commit 13d9378acd
3 changed files with 336 additions and 167 deletions

View file

@ -328,7 +328,7 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
ctx->blocks[i]->index = i; // Need to store for sorting
ctx->blocks[i]->length = length;
ctx->blocks[i]->similar = 0;
ctx->blocks[i]->hash = XXH_fast32(buf1+last_offset, length, 0);
ctx->blocks[i]->hash = XXH32(buf1+last_offset, length, 0);
ctx->blocks[i]->similarity_hash = ctx->blocks[i]->hash;
last_offset += length;
}
@ -448,7 +448,7 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
reset_heap(&heap, pc[ctx->delta_flag]);
ksmallest((int32_t *)fplist, j, &heap);
ctx->blocks[blknum]->similarity_hash =
XXH_fast32((const uchar_t *)fplist, pc[ctx->delta_flag]*4, 0);
XXH32((const uchar_t *)fplist, pc[ctx->delta_flag]*4, 0);
memset(fplist, 0, ary_sz);
}
blknum++;
@ -478,11 +478,11 @@ dedupe_compress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size, uint64_t of
reset_heap(&heap, pc[ctx->delta_flag]);
ksmallest((int32_t *)fplist, j, &heap);
cur_sketch =
XXH_fast32((const uchar_t *)fplist, pc[ctx->delta_flag]*4, 0);
XXH32((const uchar_t *)fplist, pc[ctx->delta_flag]*4, 0);
} else {
if (j == 0) j = 1;
cur_sketch =
XXH_fast32((const uchar_t *)fplist, (j*4)/2, 0);
XXH32((const uchar_t *)fplist, (j*4)/2, 0);
}
ctx->blocks[blknum]->similarity_hash = cur_sketch;
}
@ -516,12 +516,12 @@ process_blocks:
*/
if (ctx->delta_flag) {
for (i=0; i<blknum; i++) {
ctx->blocks[i]->hash = XXH_fast32(buf1+ctx->blocks[i]->offset,
ctx->blocks[i]->hash = XXH32(buf1+ctx->blocks[i]->offset,
ctx->blocks[i]->length, 0);
}
} else {
for (i=0; i<blknum; i++) {
ctx->blocks[i]->hash = XXH_fast32(buf1+ctx->blocks[i]->offset,
ctx->blocks[i]->hash = XXH32(buf1+ctx->blocks[i]->offset,
ctx->blocks[i]->length, 0);
ctx->blocks[i]->similarity_hash = ctx->blocks[i]->hash;
}
@ -618,6 +618,9 @@ process_blocks:
dedupe_index_sz = (uint64_t)blknum * RABIN_ENTRY_SIZE;
if (matchlen < dedupe_index_sz) {
DEBUG_STAT_EN(en = get_wtime_millis());
DEBUG_STAT_EN(fprintf(stderr, "Chunking speed %.3f MB/s, Overall Dedupe speed %.3f MB/s\n",
get_mb_s(*size, strt, en_1), get_mb_s(*size, strt, en)));
DEBUG_STAT_EN(fprintf(stderr, "No Dedupe possible.\n"));
ctx->valid = 0;
return (0);

View file

@ -1,26 +1,3 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
* This program includes partly-modified public domain source
* code from the LZMA SDK: http://www.7-zip.org/sdk.html
*/
/*
xxHash - Fast Hash algorithm
Copyright (C) 2012, Yann Collet.
@ -54,23 +31,82 @@
*/
//**************************************
// Tuning parameters
//**************************************
// FORCE_NATIVE_FORMAT :
// By default, xxHash library provides endian-independant Hash values.
// Results are therefore identical for big-endian and little-endian CPU.
// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
// Should endian-independance be of no importance to your application, you may uncomment the #define below
// It will improve speed for Big-endian CPU.
// This option has no impact on Little_Endian CPU.
//#define FORCE_NATIVE_FORMAT 1
//**************************************
// Includes
//**************************************
#include <stdlib.h> // for malloc(), free()
#include <string.h> // for memcpy()
#include "xxhash.h"
//**************************************
// Compiler Options
// CPU Feature Detection
//**************************************
#ifdef _MSC_VER // Visual Studio
#define inline __forceinline // Visual is not C99, but supports some kind of inline
// Little Endian or Big Endian ?
// You can overwrite the #define below if you know your architecture endianess
#if defined(FORCE_NATIVE_FORMAT) && (FORCE_NATIVE_FORMAT==1)
// Force native format. The result will be endian dependant.
# define XXH_BIG_ENDIAN 0
#elif defined (__GLIBC__)
# include <endian.h>
# if (__BYTE_ORDER == __BIG_ENDIAN)
# define XXH_BIG_ENDIAN 1
# endif
#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
# define XXH_BIG_ENDIAN 1
#elif defined(__sparc) || defined(__sparc__) \
|| defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
|| defined(__hpux) || defined(__hppa) \
|| defined(_MIPSEB) || defined(__s390__)
# define XXH_BIG_ENDIAN 1
#endif
// GCC does not support _rotl outside of Windows
#if !defined(_WIN32)
#define _rotl(x,r) ((x << r) | (x >> (32 - r)))
#if !defined(XXH_BIG_ENDIAN)
// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
# define XXH_BIG_ENDIAN 0
#endif
//**************************************
// Compiler-specific Options & Functions
//**************************************
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
// Note : under GCC, it may sometimes be faster to enable the (2nd) macro definition, instead of using win32 intrinsic
#if defined(_WIN32)
# define XXH_rotl32(x,r) _rotl(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
#if defined(_MSC_VER) // Visual Studio
# define XXH_swap32 _byteswap_ulong
#elif GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
#else
static inline unsigned int XXH_swap32 (unsigned int x) {
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
#endif
@ -78,147 +114,229 @@
//**************************************
// Constants
//**************************************
#define PRIME1 2654435761U
#define PRIME2 2246822519U
#define PRIME3 3266489917U
#define PRIME4 668265263U
#define PRIME5 0x165667b1
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
//**************************************
// Macros
//**************************************
#define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(unsigned int*)(p)) : *(unsigned int*)(p))
//****************************
// Private functions
// Simple Hash Functions
//****************************
// This version is for very small inputs (< 16 bytes)
inline unsigned int XXH_small(const void* key, int len, unsigned int seed)
unsigned int XXH32(const void* input, int len, unsigned int seed)
{
const unsigned char* p = (unsigned char*)key;
const unsigned char* const bEnd = p + len;
unsigned int idx = seed + PRIME1;
unsigned int crc = PRIME5;
const unsigned char* const limit = bEnd - 4;
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
void* state = XXH32_init(seed);
XXH32_feed(state, input, len);
return XXH32_result(state);
#else
while (p<limit)
const unsigned char* p = (const unsigned char*)input;
const unsigned char* const bEnd = p + len;
unsigned int h32;
if (len>=16)
{
crc += ((*(unsigned int*)p) + idx++);
crc += _rotl(crc, 17) * PRIME4;
crc *= PRIME1;
const unsigned char* const limit = bEnd - 16;
unsigned int v1 = seed + PRIME32_1 + PRIME32_2;
unsigned int v2 = seed + PRIME32_2;
unsigned int v3 = seed + 0;
unsigned int v4 = seed - PRIME32_1;
do
{
v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
} while (p<=limit) ;
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
}
else
{
h32 = seed + PRIME32_5;
}
h32 += (unsigned int) len;
while (p<=bEnd-4)
{
h32 += XXH_LE32(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
crc += ((*p) + idx++);
crc *= PRIME1;
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
crc += len;
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
crc ^= crc >> 15;
crc *= PRIME2;
crc ^= crc >> 13;
crc *= PRIME3;
crc ^= crc >> 16;
return h32;
return crc;
#endif
}
//****************************
// Advanced Hash Functions
//****************************
//******************************
// Hash functions
//******************************
unsigned int XXH_fast32(const void* input, int len, unsigned int seed)
struct XXH_state32_t
{
// Special case, for small inputs
if (len < 16) return XXH_small(input, len, seed);
unsigned int seed;
unsigned int v1;
unsigned int v2;
unsigned int v3;
unsigned int v4;
unsigned long long total_len;
char memory[16];
int memsize;
};
{
void* XXH32_init (unsigned int seed)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) malloc ( sizeof(struct XXH_state32_t));
state->seed = seed;
state->v1 = seed + PRIME32_1 + PRIME32_2;
state->v2 = seed + PRIME32_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME32_1;
state->total_len = 0;
state->memsize = 0;
return (void*)state;
}
int XXH32_feed (void* state_in, const void* input, int len)
{
struct XXH_state32_t * state = state_in;
const unsigned char* p = (const unsigned char*)input;
const unsigned char* const bEnd = p + len;
unsigned int v1 = seed + PRIME1;
unsigned int v2 = v1 * PRIME2 + len;
unsigned int v3 = v2 * PRIME3;
unsigned int v4 = v3 * PRIME4;
const unsigned char* const limit = bEnd - 16;
unsigned int crc;
while (p<limit)
state->total_len += len;
if (state->memsize + len < 16) // fill in tmp buffer
{
v1 = _rotl(v1, 13) + (*(unsigned int*)p); p+=4;
v2 = _rotl(v2, 11) + (*(unsigned int*)p); p+=4;
v3 = _rotl(v3, 17) + (*(unsigned int*)p); p+=4;
v4 = _rotl(v4, 19) + (*(unsigned int*)p); p+=4;
memcpy(state->memory + state->memsize, input, len);
state->memsize += len;
return 0;
}
p = bEnd - 16;
v1 += _rotl(v1, 17); v2 += _rotl(v2, 19); v3 += _rotl(v3, 13); v4 += _rotl(v4, 11);
v1 *= PRIME1; v2 *= PRIME1; v3 *= PRIME1; v4 *= PRIME1;
v1 += *(unsigned int*)p; p+=4; v2 += *(unsigned int*)p; p+=4; v3 += *(unsigned int*)p; p+=4; v4 += *(unsigned int*)p; // p+=4;
v1 *= PRIME2; v2 *= PRIME2; v3 *= PRIME2; v4 *= PRIME2;
v1 += _rotl(v1, 11); v2 += _rotl(v2, 17); v3 += _rotl(v3, 19); v4 += _rotl(v4, 13);
v1 *= PRIME3; v2 *= PRIME3; v3 *= PRIME3; v4 *= PRIME3;
crc = v1 + _rotl(v2, 3) + _rotl(v3, 6) + _rotl(v4, 9);
crc ^= crc >> 11;
crc += (PRIME4+len) * PRIME1;
crc ^= crc >> 15;
crc *= PRIME2;
crc ^= crc >> 13;
return crc;
if (state->memsize) // some data left from previous feed
{
memcpy(state->memory + state->memsize, input, 16-state->memsize);
{
const unsigned int* p32 = (const unsigned int*)state->memory;
state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
{
const unsigned char* const limit = bEnd - 16;
unsigned int v1 = state->v1;
unsigned int v2 = state->v2;
unsigned int v3 = state->v3;
unsigned int v4 = state->v4;
while (p<=limit)
{
v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
}
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
memcpy(state->memory, p, bEnd-p);
state->memsize = bEnd-p;
}
return 0;
}
unsigned int XXH_strong32(const void* input, int len, unsigned int seed)
unsigned int XXH32_getIntermediateResult (void* state_in)
{
// Special case, for small inputs
if (len < 16) return XXH_small(input, len, seed);
struct XXH_state32_t * state = state_in;
unsigned char * p = (unsigned char*)state->memory;
unsigned char* bEnd = (unsigned char*)state->memory + state->memsize;
unsigned int h32;
{
const unsigned char* p = (const unsigned char*)input;
const unsigned char* const bEnd = p + len;
unsigned int v1 = seed + PRIME1;
unsigned int v2 = v1 * PRIME2 + len;
unsigned int v3 = v2 * PRIME3;
unsigned int v4 = v3 * PRIME4;
const unsigned char* const limit = bEnd - 16;
unsigned int crc;
while (p<limit)
if (state->total_len >= 16)
{
v1 += _rotl(v1, 13); v1 *= PRIME1; v1 += (*(unsigned int*)p); p+=4;
v2 += _rotl(v2, 11); v2 *= PRIME1; v2 += (*(unsigned int*)p); p+=4;
v3 += _rotl(v3, 17); v3 *= PRIME1; v3 += (*(unsigned int*)p); p+=4;
v4 += _rotl(v4, 19); v4 *= PRIME1; v4 += (*(unsigned int*)p); p+=4;
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
}
else
{
h32 = state->seed + PRIME32_5;
}
p = bEnd - 16;
v1 += _rotl(v1, 17); v2 += _rotl(v2, 19); v3 += _rotl(v3, 13); v4 += _rotl(v4, 11);
v1 *= PRIME1; v2 *= PRIME1; v3 *= PRIME1; v4 *= PRIME1;
v1 += *(unsigned int*)p; p+=4; v2 += *(unsigned int*)p; p+=4; v3 += *(unsigned int*)p; p+=4; v4 += *(unsigned int*)p; // p+=4;
v1 *= PRIME2; v2 *= PRIME2; v3 *= PRIME2; v4 *= PRIME2;
v1 += _rotl(v1, 11); v2 += _rotl(v2, 17); v3 += _rotl(v3, 19); v4 += _rotl(v4, 13);
v1 *= PRIME3; v2 *= PRIME3; v3 *= PRIME3; v4 *= PRIME3;
h32 += (unsigned int) state->total_len;
crc = v1 + _rotl(v2, 3) + _rotl(v3, 6) + _rotl(v4, 9);
crc ^= crc >> 11;
crc += (PRIME4+len) * PRIME1;
crc ^= crc >> 15;
crc *= PRIME2;
crc ^= crc >> 13;
return crc;
while (p<=bEnd-4)
{
h32 += XXH_LE32(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
unsigned int XXH32_result (void* state_in)
{
unsigned int h32 = XXH32_getIntermediateResult(state_in);
free(state_in);
return h32;
}

View file

@ -1,23 +1,3 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
/*
xxHash - Fast Hash algorithm
Header File
@ -50,6 +30,33 @@
You can contact the author at :
- xxHash source repository : http://code.google.com/p/xxhash/
*/
/* Notice extracted from xxHash homepage :
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
It also successfully passes all tests from the SMHasher suite.
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
MumurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
SuperFastHash 1.2 GB/s 1 Paul Hsieh
CityHash64 1.05 GB/s 10 Pike & Alakuijala
FNV 0.55 GB/s 5 Fowler, Noll, Vo
CRC32 0.43 GB/s 9
MD5-32 0.33 GB/s 10 Ronald L. Rivest
SHA1-32 0.28 GB/s 10
Q.Score is a measure of quality of the hash function.
It depends on successfully passing SMHasher test set.
10 is a perfect score.
*/
#pragma once
#if defined (__cplusplus)
@ -58,19 +65,60 @@ extern "C" {
//****************************
// Hash Functions
// Simple Hash Functions
//****************************
unsigned int XXH_fast32 (const void* input, int len, unsigned int seed);
unsigned int XXH_strong32(const void* input, int len, unsigned int seed);
unsigned int XXH32 (const void* input, int len, unsigned int seed);
/*
XXH_fast32() :
XXH32() :
Calculate the 32-bits hash of "input", of length "len"
"seed" can be used to alter the result
This function successfully passes all SMHasher tests.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
Note that "len" is type "int", which means it is limited to 2^31-1.
If your data is larger, use the advanced functions below.
*/
XXH_strong32() :
Same as XXH_fast(), but the resulting hash has stronger properties
//****************************
// Advanced Hash Functions
//****************************
void* XXH32_init (unsigned int seed);
int XXH32_feed (void* state, const void* input, int len);
unsigned int XXH32_result (void* state);
/*
These functions calculate the xxhash of an input provided in several small packets,
as opposed to an input provided as a single block.
You must start with :
void* XXH32_init()
The function returns a pointer which holds the state of calculation.
This pointer must be provided as "void* state" parameter for XXH32_feed().
XXH32_feed() can be called as many times as necessary.
The function returns an error code, with 0 meaning OK, and all other values meaning there is an error.
Note that "len" is type "int", which means it is limited to 2^31-1.
If your data is larger, it is recommended
to chunk your data into blocks of size 2^30 (1GB) to avoid any "int" overflow issue.
Finally, you can end the calculation anytime, by using XXH32_result().
This function returns the final 32-bits hash.
You must provide the same "void* state" parameter created by XXH32_init().
Memory will be freed by XXH32_result().
*/
unsigned int XXH32_getIntermediateResult (void* state);
/*
This function does the same as XXH32_result(), generating a 32-bit hash,
but preserve memory context.
This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_feed().
To free memory context, use XXH32_result().
*/