2012-06-21 14:57:05 +00:00
|
|
|
/*
|
|
|
|
* rabin_polynomial_constants.h
|
|
|
|
*
|
|
|
|
* Created by Joel Lawrence Tucci on 09-May-2011.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2011 Joel Lawrence Tucci
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* Neither the name of the project's author nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
|
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2012-06-21 15:10:43 +00:00
|
|
|
/*
|
|
|
|
* This file is a part of Pcompress, a chunked parallel multi-
|
|
|
|
* algorithm lossless compression and decompression program.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
|
|
|
* Use is subject to license terms.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
2012-07-07 16:48:29 +00:00
|
|
|
* version 3 of the License, or (at your option) any later version.
|
2012-06-21 15:10:43 +00:00
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2012-06-29 12:53:55 +00:00
|
|
|
#ifndef _RABIN_POLY_H_
|
|
|
|
#define _RABIN_POLY_H_
|
|
|
|
|
|
|
|
#include "utils.h"
|
|
|
|
|
2012-06-21 14:57:05 +00:00
|
|
|
//List of constants, mostly constraints and defaults for various parameters
|
|
|
|
//to the Rabin Fingerprinting algorithm
|
2012-09-28 18:39:49 +00:00
|
|
|
//Use prime constant from Bulat Ziganshin's REP. Seems to work best across wide range of data.
|
|
|
|
#define RAB_POLYNOMIAL_CONST 153191
|
|
|
|
#define POLY_MASK (0xffffffffffULL)
|
2012-08-23 17:28:44 +00:00
|
|
|
#define RAB_BLK_DEFAULT 1
|
|
|
|
#define RAB_BLK_MIN_BITS 11
|
2012-07-06 18:31:13 +00:00
|
|
|
#define LZMA_WINDOW_MAX (128L * 1024L * 1024L)
|
2012-07-13 16:36:55 +00:00
|
|
|
#define RAB_POLYNOMIAL_WIN_SIZE 16
|
2012-07-10 14:44:23 +00:00
|
|
|
#define RAB_POLYNOMIAL_MIN_WIN_SIZE 8
|
|
|
|
#define RAB_POLYNOMIAL_MAX_WIN_SIZE 64
|
2012-08-23 17:28:44 +00:00
|
|
|
#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024)
|
2012-06-29 12:53:55 +00:00
|
|
|
|
2012-07-19 16:11:07 +00:00
|
|
|
// Minimum practical chunk size when doing dedup
|
|
|
|
#define RAB_MIN_CHUNK_SIZE (1048576L)
|
|
|
|
|
2012-06-29 12:53:55 +00:00
|
|
|
// An entry in the Rabin block array in the chunk.
|
2012-07-19 16:11:07 +00:00
|
|
|
// It is either a length value <= RABIN_MAX_BLOCK_SIZE or an index value with
|
|
|
|
// which this block is a duplicate/similar. The entries are variable sized.
|
2012-06-29 12:53:55 +00:00
|
|
|
// Offset can be dynamically calculated.
|
|
|
|
//
|
|
|
|
#define RABIN_ENTRY_SIZE (sizeof (unsigned int))
|
|
|
|
|
|
|
|
// Header for a chunk deduped using Rabin
|
2012-07-01 16:14:02 +00:00
|
|
|
// Number of rabin blocks, size of original data chunk, size of compressed index,
|
|
|
|
// size of deduped data, size of compressed data
|
|
|
|
#define RABIN_HDR_SIZE (sizeof (unsigned int) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t))
|
2012-06-21 14:57:05 +00:00
|
|
|
|
2012-07-19 16:11:07 +00:00
|
|
|
// Maximum number of dedup blocks supported (2^30 - 1)
|
|
|
|
#define RABIN_MAX_BLOCKS (0x3FFFFFFFUL)
|
2012-07-02 16:38:03 +00:00
|
|
|
|
|
|
|
// Maximum possible block size for a single rabin block. This is a hard limit much
|
|
|
|
// larger than RAB_POLYNOMIAL_MAX_BLOCK_SIZE. Useful when merging non-duplicate blocks.
|
|
|
|
// This is also 2^31 - 1.
|
2012-07-19 16:11:07 +00:00
|
|
|
#define RABIN_MAX_BLOCK_SIZE (RABIN_MAX_BLOCKS)
|
2012-07-02 16:38:03 +00:00
|
|
|
|
2012-07-19 16:11:07 +00:00
|
|
|
// Masks to determine whether Rabin index entry is a length value, duplicate index value
|
|
|
|
// or similar index value.
|
2012-07-02 16:38:03 +00:00
|
|
|
// MSB = 1 : Index
|
|
|
|
// MSB = 0 : Length
|
2012-07-19 16:11:07 +00:00
|
|
|
// MSB-1 = 1: Similarity Index
|
|
|
|
// MSB-1 = 0: Exact Duplicate Index
|
|
|
|
#define RABIN_INDEX_FLAG (0x80000000UL)
|
|
|
|
#define SET_SIMILARITY_FLAG (0x40000000UL)
|
|
|
|
#define GET_SIMILARITY_FLAG SET_SIMILARITY_FLAG
|
|
|
|
#define CLEAR_SIMILARITY_FLAG (0xBFFFFFFFUL)
|
|
|
|
|
2012-07-02 16:38:03 +00:00
|
|
|
// Mask to extract value from a rabin index entry
|
2012-07-19 16:11:07 +00:00
|
|
|
#define RABIN_INDEX_VALUE (0x3FFFFFFFUL)
|
|
|
|
|
2012-07-21 18:30:41 +00:00
|
|
|
/*
|
|
|
|
* Types of block similarity.
|
|
|
|
*/
|
2012-07-19 16:11:07 +00:00
|
|
|
#define SIMILAR_EXACT 1
|
|
|
|
#define SIMILAR_PARTIAL 2
|
2012-09-23 09:27:09 +00:00
|
|
|
#define SIMILAR_REF 3
|
2012-07-19 16:11:07 +00:00
|
|
|
|
2012-09-24 16:50:27 +00:00
|
|
|
/*
|
|
|
|
* TYpes of delta operations.
|
|
|
|
* DELTA_NORMAL = Check for at least 60% similarity
|
|
|
|
* DELTA_EXTRA = Check for at least 40% similarity
|
|
|
|
*/
|
|
|
|
#define DELTA_NORMAL 1
|
|
|
|
#define DELTA_EXTRA 2
|
|
|
|
|
2012-08-15 14:43:40 +00:00
|
|
|
/*
|
|
|
|
* Irreducible polynomial for Rabin modulus. This value is from the
|
|
|
|
* Low Bandwidth Filesystem.
|
|
|
|
*/
|
|
|
|
#define FP_POLY 0xbfe6b8a5bf378d83ULL
|
|
|
|
|
2012-09-23 09:27:09 +00:00
|
|
|
typedef struct rab_blockentry {
|
2012-07-19 16:11:07 +00:00
|
|
|
ssize_t offset;
|
2012-09-23 09:27:09 +00:00
|
|
|
uint32_t similarity_hash;
|
|
|
|
uint32_t hash;
|
|
|
|
uint32_t index;
|
|
|
|
uint32_t length;
|
|
|
|
unsigned char similar;
|
|
|
|
struct rab_blockentry *other;
|
|
|
|
struct rab_blockentry *next;
|
2012-07-19 16:11:07 +00:00
|
|
|
} rabin_blockentry_t;
|
2012-07-02 16:38:03 +00:00
|
|
|
|
2012-06-21 14:57:05 +00:00
|
|
|
typedef struct {
|
|
|
|
unsigned char *current_window_data;
|
2012-08-17 05:33:02 +00:00
|
|
|
rabin_blockentry_t **blocks;
|
|
|
|
uint32_t blknum;
|
2012-06-29 12:53:55 +00:00
|
|
|
unsigned char *cbuf;
|
2012-06-21 14:57:05 +00:00
|
|
|
int window_pos;
|
2012-07-03 17:17:24 +00:00
|
|
|
uint32_t rabin_poly_max_block_size;
|
|
|
|
uint32_t rabin_poly_min_block_size;
|
|
|
|
uint32_t rabin_poly_avg_block_size;
|
|
|
|
uint32_t rabin_avg_block_mask;
|
2012-07-06 17:54:12 +00:00
|
|
|
uint32_t rabin_break_patt;
|
2012-07-05 18:46:02 +00:00
|
|
|
uint64_t real_chunksize;
|
2012-07-10 14:44:23 +00:00
|
|
|
short valid;
|
2012-07-01 16:14:02 +00:00
|
|
|
void *lzma_data;
|
2012-09-15 05:44:58 +00:00
|
|
|
int level, delta_flag, fixed_flag;
|
2012-09-16 05:42:58 +00:00
|
|
|
} dedupe_context_t;
|
2012-06-21 14:57:05 +00:00
|
|
|
|
2012-09-16 05:42:58 +00:00
|
|
|
extern dedupe_context_t *create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize,
|
2012-09-15 05:44:58 +00:00
|
|
|
int rab_blk_sz, const char *algo, int delta_flag, int fixed_flag);
|
2012-09-16 05:42:58 +00:00
|
|
|
extern void destroy_dedupe_context(dedupe_context_t *ctx);
|
|
|
|
extern unsigned int dedupe_compress(dedupe_context_t *ctx, unsigned char *buf,
|
2012-07-08 16:14:08 +00:00
|
|
|
ssize_t *size, ssize_t offset, ssize_t *rabin_pos);
|
2012-09-16 05:42:58 +00:00
|
|
|
extern void dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, ssize_t *size);
|
2012-09-27 16:59:08 +00:00
|
|
|
extern void parse_dedupe_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *dedupe_index_sz,
|
|
|
|
ssize_t *dedupe_data_sz, ssize_t *rabin_index_sz_cmp,
|
|
|
|
ssize_t *dedupe_data_sz_cmp, ssize_t *deduped_size);
|
|
|
|
extern void update_dedupe_hdr(uchar_t *buf, ssize_t dedupe_index_sz_cmp,
|
|
|
|
ssize_t dedupe_data_sz_cmp);
|
2012-09-16 05:42:58 +00:00
|
|
|
extern void reset_dedupe_context(dedupe_context_t *ctx);
|
|
|
|
extern uint32_t dedupe_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo,
|
2012-08-23 17:28:44 +00:00
|
|
|
int delta_flag);
|
2012-06-21 14:57:05 +00:00
|
|
|
|
2012-08-23 17:28:44 +00:00
|
|
|
#endif /* _RABIN_POLY_H_ */
|