Starting changes for single-file global dedupe.

This commit is contained in:
Moinak Ghosh 2013-02-12 21:53:04 +05:30
parent 3e1737b4ab
commit 1eae57c8a2
5 changed files with 90 additions and 38 deletions

View file

@ -39,7 +39,7 @@
#define FOUR_MB (4194304ULL)
#define EIGHT_MB (8388608ULL)
static int
static compress_algo_t
get_compress_level(compress_algo_t algo)
{
switch (algo) {
@ -57,7 +57,7 @@ get_compress_level(compress_algo_t algo)
return (0);
}
static compress_algo_t
static int
get_compress_algo(char *algo_name)
{
if (strcmp(algo_name, "none") == 0) {
@ -105,7 +105,7 @@ get_compress_str(compress_algo_t algo)
return ("invalid");
}
static chunk_cksum_t
static cksum_t
get_cksum_type(char *cksum_name)
{
if (strcmp(cksum_name, "SHA256") == 0) {
@ -114,11 +114,17 @@ get_cksum_type(char *cksum_name)
} else if (cksum_name, "SHA512") == 0) {
return (CKSUM_SHA512);
} else if (cksum_name, "SKEIN256") == 0) {
return (CKSUM_SKEIN256);
} else if (cksum_name, "BLAKE256") == 0) {
return (CKSUM_BLAKE256);
} else if (cksum_name, "SKEIN512") == 0) {
return (CKSUM_SKEIN512);
} else if (cksum_name, "BLAKE512") == 0) {
return (CKSUM_BLAKE512);
} else if (cksum_name, "KECCAK256") == 0) {
return (CKSUM_KECCAK256);
} else if (cksum_name, "KECCAK512") == 0) {
return (CKSUM_KECCAK512);
}
return (CKSUM_INVALID);
}
@ -132,11 +138,17 @@ get_cksum_str(chunk_cksum_t ck)
} else if (ck == CKSUM_SHA512) {
return ("SHA512");
} else if (ck == CKSUM_SKEIN256) {
return ("SKEIN256");
} else if (ck == CKSUM_BLAKE256) {
return ("BLAKE256");
} else if (ck == CKSUM_SKEIN512) {
return ("SKEIN512");
} else if (ck == CKSUM_BLAKE512) {
return ("BLAKE512");
} else if (ck == CKSUM_KECCAK256) {
return ("KECCAK256");
} else if (ck == CKSUM_KECCAK512) {
return ("KECCAK512");
}
return ("INVALID");
}
@ -144,10 +156,10 @@ get_cksum_str(chunk_cksum_t ck)
static int
get_cksum_sz(chunk_cksum_t ck)
{
if (ck == CKSUM_SHA256 || ck == CKSUM_SKEIN256) {
if (ck == CKSUM_SHA256 || ck == CKSUM_BLAKE256 || ck == CKSUM_KECCAK256) {
return (32);
} else if (ck == CKSUM_SHA512 || ck == CKSUM_SKEIN512) {
} else if (ck == CKSUM_SHA512 || ck == CKSUM_BLAKE512 || ck == CKSUM_KECCAK512) {
return (64);
}
return (0);
@ -315,3 +327,24 @@ write_config(char *configfile, archive_config_t *cfg)
fprintf(fh, "\n");
fclose(fh);
}
int
set_simple_config(archive_config_t *cfg, compress_algo_t algo, cksum_t ck, uint32_t chunksize,
size_t file_sz, uint32_t chunks_per_seg)
{
cfg->algo = algo;
cfg->chunk_cksum_type = ck;
cfg->compress_level = get_compress_level(cfg->algo);
cfg->chunk_cksum_sz = get_cksum_sz(cfg->chunk_cksum_type);
cfg->chunk_sz = chunksize;
cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz);
cfg->archive_sz = file_sz;
if (cfg->archive_sz < ONE_TB) {
segment_sz_bytes = FOUR_MB;
} else {
segment_sz_bytes = EIGHT_MB;
}
}

View file

@ -22,35 +22,18 @@
#define _C_ONFIG_H
#include <limits.h>
#include <utils.h>
#ifdef __cplusplus
extern "C" {
#endif
#define DEFAULT_SIMILARITY_INTERVAL 10
#define DEFAULT_CKSUM CKSUM_SHA256
#define CONTAINER_ITEMS 2048
#define DEFAULT_SIMILARITY_INTERVAL 5
#define DEFAULT_CKSUM CKSUM_BLAKE256
#define DEFAULT_COMPRESS COMPRESS_LZ4
#define MIN_CK 1
#define MAX_CK 5
enum {
COMPRESS_NONE=0,
COMPRESS_LZFX,
COMPRESS_LZ4,
COMPRESS_ZLIB,
COMPRESS_BZIP2,
COMPRESS_LZMA,
COMPRESS_INVALID
} compress_algo_t;
enum {
CKSUM_SHA256,
CKSUM_SHA512,
CKSUM_SKEIN256,
CKSUM_SKEIN512,
CKSUM_INVALID
} chunk_cksum_t;
// 8GB
#define MIN_ARCHIVE_SZ (8589934592ULL)
@ -59,9 +42,9 @@ typedef struct {
uint32_t chunk_sz; // Numeric ID: 1 - 4k ... 5 - 64k
int64_t archive_sz; // Total size of archive in bytes.
int verify_chunks; // Whether to use memcmp() to compare chunks byte for byte.
compress_algo_t algo; // Which compression algo for segments.
int compress_level; // Default preset compression level per algo.
int chunk_cksum_type; // Which digest to use for hash based chunk lookup.
int algo; // Which compression algo for segments.
compress_algo_t compress_level; // Default preset compression level per algo.
cksum_t chunk_cksum_type; // Which digest to use for hash based chunk lookup.
int chunk_cksum_sz; // Size of cksum in bytes.
int similarity_interval; // Similarity based match intervals in %age.
// The items below are computed given the above
@ -77,6 +60,8 @@ typedef struct {
int read_config(char *configfile, archive_config_t *cfg);
int write_config(char *configfile, archive_config_t *cfg);
int set_simple_config(archive_config_t *cfg, compress_algo_t algo, cksum_t ck,
uint32_t chunksize, size_t file_sz, uint32_t chunks_per_seg);
#ifdef __cplusplus
}

View file

@ -31,6 +31,13 @@
#include <config.h>
#include "initdb.h"
#include "config.h"
#define ONE_PB (1125899906842624ULL)
#define ONE_TB (1099511627776ULL)
#define FOUR_MB (4194304ULL)
#define EIGHT_MB (8388608ULL)
archive_config_t *
init_global_db(char *configfile)
@ -47,4 +54,17 @@ init_global_db(char *configfile)
rv = read_config(configfile, cfg);
if (rv != 0)
return (NULL);
return (cfg);
}
archive_config_t *
init_global_db_simple(char *path, uint32_t chunksize, uint32_t chunks_per_seg,
compress_algo_t algo, cksum_t ck, size_t file_sz, size_t memlimit)
{
archive_config_t *cfg;
int rv;
cfg = calloc(1, sizeof (archive_config_t));
rv = set_simple_config(cfg, algo, ck, chunksize, file_sz, chunks_per_seg);
}

View file

@ -25,7 +25,9 @@
extern "C" {
#endif
int init_global_db(char *configfile);
archive_config_t *init_global_db(char *configfile);
archive_config_t *init_global_db_simple(char *path, uint32_t chunksize, uint32_t chunks_per_seg,
compress_algo_t algo, cksum_t ck, size_t file_sz, size_t memlimit)
#ifdef __cplusplus
}

View file

@ -73,8 +73,10 @@ typedef int32_t bsize_t;
# endif
# if !defined(sun) && !defined (__sun)
# define LE64(x) __bswap_64(x)
# define LE32(x) __bswap_32(x)
# else
# define LE64(x) BSWAP_64(x)
# define LE32(x) BSWAP_32(x)
# endif
#else
# if !defined(sun) && !defined (__sun)
@ -86,6 +88,7 @@ typedef int32_t bsize_t;
# endif
# endif
# define LE64(x) (x)
# define LE32(x) (x)
#endif
@ -124,6 +127,15 @@ typedef int32_t bsize_t;
#define BYTES_TO_MB(x) ((x) / (1024 * 1024))
typedef enum {
COMPRESS_NONE = 0,
COMPRESS_LZFX,
COMPRESS_LZ4,
COMPRESS_ZLIB,
COMPRESS_BZIP2,
COMPRESS_LZMA
} compress_algo_t;
typedef struct {
uint32_t buf_extra;
int compress_mt_capable;