pcompress/rabin/rabin_polynomial.h

153 lines
6 KiB
C
Raw Normal View History

/*
* rabin_polynomial_constants.h
*
* Created by Joel Lawrence Tucci on 09-May-2011.
*
* Copyright (c) 2011 Joel Lawrence Tucci
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* Neither the name of the project's author nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
2012-06-21 15:10:43 +00:00
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
2012-07-07 16:48:29 +00:00
* version 3 of the License, or (at your option) any later version.
2012-06-21 15:10:43 +00:00
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
*/
#ifndef _RABIN_POLY_H_
#define _RABIN_POLY_H_
#include "utils.h"
//List of constants, mostly constraints and defaults for various parameters
//to the Rabin Fingerprinting algorithm
#define RAB_POLYNOMIAL_CONST 2
// 1 << RAB_POLYNOMIAL_AVG_BLOCK_SHIFT = Average Rabin Chunk Size
// So we are always looking at power of 2 chunk sizes to avoid doing a modulus
//
#define RAB_POLYNOMIAL_AVG_BLOCK_SHIFT 12
#define RAB_POLYNOMIAL_AVG_BLOCK_SIZE (1 << RAB_POLYNOMIAL_AVG_BLOCK_SHIFT)
#define RAB_POLYNOMIAL_AVG_BLOCK_MASK (RAB_POLYNOMIAL_AVG_BLOCK_SIZE - 1)
#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE RAB_POLYNOMIAL_AVG_BLOCK_SIZE
#define RAB_POLYNOMIAL_MAX_BLOCK_SIZE (128 * 1024)
#define RAB_POLYNOMIAL_AVG_BLOCK_SHIFT2 12
#define RAB_POLYNOMIAL_AVG_BLOCK_SIZE2 (1 << RAB_POLYNOMIAL_AVG_BLOCK_SHIFT)
#define RAB_POLYNOMIAL_AVG_BLOCK_MASK2 (RAB_POLYNOMIAL_AVG_BLOCK_SIZE - 1)
#define RAB_POLYNOMIAL_MIN_BLOCK_SIZE2 2048
#define LZMA_WINDOW_MAX (128L * 1024L * 1024L)
#define RAB_POLYNOMIAL_WIN_SIZE 16
#define RAB_POLYNOMIAL_MIN_WIN_SIZE 8
#define RAB_POLYNOMIAL_MAX_WIN_SIZE 64
typedef struct {
ssize_t offset;
uint64_t cksum_n_offset; // Dual purpose variable
unsigned int index;
unsigned int length;
unsigned short refcount;
} rabin_blockentry_t;
// An entry in the Rabin block array in the chunk.
// It is either a length value <= RAB_POLYNOMIAL_MAX_BLOCK_SIZE or
// if value > RAB_POLYNOMIAL_MAX_BLOCK_SIZE then
// value - RAB_POLYNOMIAL_MAX_BLOCK_SIZE is index of block with which
// this block is a duplicate.
// Offset can be dynamically calculated.
//
#define RABIN_ENTRY_SIZE (sizeof (unsigned int))
// Header for a chunk deduped using Rabin
// Number of rabin blocks, size of original data chunk, size of compressed index,
// size of deduped data, size of compressed data
#define RABIN_HDR_SIZE (sizeof (unsigned int) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t) + sizeof (ssize_t))
// Maximum number of dedup blocks supported (2^31 - 1)
#define RABIN_MAX_BLOCKS (0x7fffffff)
// Maximum possible block size for a single rabin block. This is a hard limit much
// larger than RAB_POLYNOMIAL_MAX_BLOCK_SIZE. Useful when merging non-duplicate blocks.
// This is also 2^31 - 1.
#define RABIN_MAX_BLOCK_SIZE (RABIN_MAX_BLOCKS)
// Mask to determine whether Rabin index entry is a length value or index value.
// MSB = 1 : Index
// MSB = 0 : Length
#define RABIN_INDEX_FLAG (0x80000000)
// Mask to extract value from a rabin index entry
#define RABIN_INDEX_VALUE (0x7fffffff)
typedef struct {
unsigned char *current_window_data;
rabin_blockentry_t *blocks;
unsigned char *cbuf;
int window_pos;
uint32_t rabin_poly_max_block_size;
uint32_t rabin_poly_min_block_size;
uint32_t rabin_poly_avg_block_size;
uint32_t rabin_avg_block_mask;
uint32_t rabin_break_patt;
uint64_t real_chunksize;
short valid;
void *lzma_data;
int level;
} rabin_context_t;
extern rabin_context_t *create_rabin_context(uint64_t chunksize, uint64_t real_chunksize,
const char *algo);
extern void destroy_rabin_context(rabin_context_t *ctx);
extern unsigned int rabin_dedup(rabin_context_t *ctx, unsigned char *buf,
ssize_t *size, ssize_t offset, ssize_t *rabin_pos);
extern void rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size);
extern void rabin_parse_hdr(uchar_t *buf, unsigned int *blknum, ssize_t *rabin_index_sz,
ssize_t *rabin_data_sz, ssize_t *rabin_index_sz_cmp,
ssize_t *rabin_data_sz_cmp, ssize_t *rabin_deduped_size);
extern void rabin_update_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp,
ssize_t rabin_data_sz_cmp);
extern void reset_rabin_context(rabin_context_t *ctx);
#endif /* _RABIN_POLY_H_ */