Make global dedupe bits buildable and fix errors.

Rename Adaptive compression type constants to avoid conflict with global constants.
This commit is contained in:
Moinak Ghosh 2013-02-17 21:05:40 +05:30
parent 7386f82a4f
commit 6badbcaea7
8 changed files with 127 additions and 47 deletions

View file

@ -207,14 +207,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data);
if (rv < 0)
return (rv);
rv = COMPRESS_LZMA;
rv = ADAPT_COMPRESS_LZMA;
lzma_count++;
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, NULL);
if (rv < 0)
return (rv);
rv = COMPRESS_BZIP2;
rv = ADAPT_COMPRESS_BZIP2;
bzip2_count++;
} else {
@ -223,14 +223,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data);
if (rv < 0)
return (rv);
rv = COMPRESS_BSC;
rv = ADAPT_COMPRESS_BSC;
bsc_count++;
#endif
} else {
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data);
if (rv < 0)
return (rv);
rv = COMPRESS_PPMD;
rv = ADAPT_COMPRESS_PPMD;
ppmd_count++;
}
}
@ -247,16 +247,16 @@ adapt_decompress(void *src, uint64_t srclen, void *dst,
cmp_flags = (chdr>>4) & CHDR_ALGO_MASK;
if (cmp_flags == COMPRESS_LZMA) {
if (cmp_flags == ADAPT_COMPRESS_LZMA) {
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data));
} else if (cmp_flags == COMPRESS_BZIP2) {
} else if (cmp_flags == ADAPT_COMPRESS_BZIP2) {
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, NULL));
} else if (cmp_flags == COMPRESS_PPMD) {
} else if (cmp_flags == ADAPT_COMPRESS_PPMD) {
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data));
} else if (cmp_flags == COMPRESS_BSC) {
} else if (cmp_flags == ADAPT_COMPRESS_BSC) {
#ifdef ENABLE_PC_LIBBSC
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data));
#else

View file

@ -61,7 +61,8 @@ typedef enum {
* to decode archives created with 1.2. New archives do not use SKEIN.
*/
CKSUM_SKEIN256 = 0x800,
CKSUM_SKEIN512 = 0x900
CKSUM_SKEIN512 = 0x900,
CKSUM_INVALID = 0
} cksum_t;
typedef struct {

View file

@ -70,11 +70,11 @@ extern "C" {
* lower 3 bits in higher nibble indicate chunk compression algorithm
* in adaptive modes.
*/
#define COMPRESS_NONE 0
#define COMPRESS_LZMA 1
#define COMPRESS_BZIP2 2
#define COMPRESS_PPMD 3
#define COMPRESS_BSC 4
#define ADAPT_COMPRESS_NONE 0
#define ADAPT_COMPRESS_LZMA 1
#define ADAPT_COMPRESS_BZIP2 2
#define ADAPT_COMPRESS_PPMD 3
#define ADAPT_COMPRESS_BSC 4
#define CHDR_ALGO_MASK 7
extern uint32_t zlib_buf_extra(uint64_t buflen);

View file

@ -32,7 +32,6 @@
#include <pthread.h>
#include "db.h"
#include "config.h"
#define ONE_PB (1125899906842624ULL)
#define ONE_TB (1099511627776ULL)
@ -43,7 +42,7 @@
* Hashtable structures for in-memory index.
*/
typedef struct _hash_entry {
segment_entry_t *seg;
uint64_t seg_offset;
struct _hash_entry *next;
struct _hash_entry *lru_prev;
struct _hash_entry *lru_next;
@ -61,9 +60,15 @@ typedef struct {
hash_entry_t *lru_tail;
uint64_t memlimit;
uint64_t memused;
int hash_entry_size;
int hash_entry_size, intervals;
} htablst_t;
typedef struct {
htablst_t *hlist;
int seg_fd_w;
int *tfd;
} seg_index_t;
archive_config_t *
init_global_db(char *configfile)
{
@ -83,17 +88,36 @@ init_global_db(char *configfile)
return (cfg);
}
void
static cleanup_htablst(htablst_t *htablst, int intervals)
{
int i;
if (htablst) {
if (htablst->list) {
for (i = 0; i < intervals; i++) {
if (htablst->list[i].htab)
free(htablst->list[i].htab);
}
free(htablst->list);
}
if (htablst->mlist)
free(htablst->mlist);
free(htablst);
}
}
archive_config_t *
init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
init_global_db_s(char *path, char *tmppath, uint32_t chunksize, int pct_interval,
compress_algo_t algo, cksum_t ck, cksum_t ck_sim, size_t file_sz,
size_t memlimit)
size_t memlimit, int nthreads)
{
archive_config_t *cfg;
int rv;
float diff;
cfg = calloc(1, sizeof (archive_config_t));
rv = set_config_s(cfg, algo, ck, ck_sim, chunksize, file_sz, chunks_per_seg, pct_interval);
rv = set_config_s(cfg, algo, ck, ck_sim, chunksize, file_sz, pct_interval);
if (path != NULL) {
printf("Disk based index not yet implemented.\n");
@ -104,6 +128,7 @@ init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
uint64_t memreqd;
htablst_t *htablst;
int hash_entry_size;
seg_index_t *indx;
// Compute total hashtable entries first
intervals = 100 / pct_interval - 1;
@ -129,18 +154,53 @@ init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
// Now create as many hash tables as there are similarity match intervals
// each having hash_slots / intervals slots.
htablst = calloc(1, sizeof (htablst_t));
if (!htablst) {
free(cfg);
return (NULL);
}
htablst->memlimit = memlimit;
htablst->list = (htab_t *)calloc(intervals, sizeof (htab_t));
htablst->mlist = (pthread_mutex_t *)malloc(intervals * sizeof (pthread_mutex_t));
htablst->hash_entry_size = hash_entry_size;
htablst->intervals = intervals;
for (i = 0; i < intervals; i++) {
htablst->list[i].htab = (hash_entry_t **)calloc(hash_slots / intervals,
sizeof (hash_entry_t *));
if (!(htablst->list[i].htab)) {
cleanup_htablst(htablst, intervals);
free(cfg);
return (NULL);
}
htablst->memused += ((hash_slots / intervals) * (sizeof (hash_entry_t *)));
pthread_mutex_init(&(htablst->mlist[i]), NULL);
}
cfg->dbdata = htablst;
indx = (seg_index_t *)calloc(1, sizeof (seg_index_t));
if (!indx) {
cleanup_htablst(htablst, intervals);
free(cfg);
return (NULL);
}
indx->hlist = htablst;
strcpy(cfg->rootdir, tmppath);
strcat(cfg->rootdir, "/.segXXXXXX");
indx->seg_fd_w = mkstemp(cfg->rootdir);
indx->tfd = (int *)malloc(sizeof (int) * nthreads);
if (indx->seg_fd_w == -1 || indx->tfd == NULL) {
cleanup_htablst(htablst, intervals);
free(cfg);
if (indx->tfd)
free(indx->tfd);
return (NULL);
}
for (i = 0; i < nthreads; i++) {
indx->tfd[i] = open(cfg->rootdir, O_RDONLY);
}
cfg->dbdata = indx;
slab_cache_add(hash_entry_size);
slab_cache_add(cfg->chunk_cksum_sz);
}
@ -148,6 +208,13 @@ init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
}
int
db_insert_s(archive_config_t *cfg, uchar_t *cksum, int interval_num)
db_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, segment_entry_t *seg, int thr_id)
{
return (0);
}
segment_entry_t *
db_query_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, int thr_id)
{
return (0);
}

View file

@ -21,14 +21,19 @@
#ifndef _DB_H
#define _DB_H
#include <dedupe_config.h>
#ifdef __cplusplus
extern "C" {
#endif
archive_config_t *init_global_db(char *configfile);
archive_config_t *init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
compress_algo_t algo, cksum_t ck, cksum_t ck_sim, size_t file_sz,
size_t memlimit);
archive_config_t *init_global_db_s(char *path, char *tmppath, uint32_t chunksize,
int pct_interval, compress_algo_t algo, cksum_t ck,
cksum_t ck_sim, size_t file_sz, size_t memlimit, int nthreads);
int db_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
segment_entry_t *seg, int thr_id);
segment_entry_t *db_query_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, int thr_id);
#ifdef __cplusplus
}

View file

@ -31,8 +31,8 @@
#include <sys/stat.h>
#include <rabin_dedup.h>
#include "config.h"
#include "initdb.h"
#include "dedupe_config.h"
#include "db.h"
#define ONE_PB (1125899906842624ULL)
#define ONE_TB (1099511627776ULL)
@ -44,6 +44,7 @@ get_compress_level(compress_algo_t algo)
{
switch (algo) {
case COMPRESS_NONE:
case COMPRESS_INVALID:
return (0);
case COMPRESS_LZFX:
return (5);
@ -130,7 +131,7 @@ get_cksum_type(char *cksum_name)
}
static char *
get_cksum_str(chunk_cksum_t ck)
get_cksum_str(cksum_t ck)
{
if (ck == CKSUM_SHA256) {
return ("SHA256");
@ -154,7 +155,7 @@ get_cksum_str(chunk_cksum_t ck)
}
static int
get_cksum_sz(chunk_cksum_t ck)
get_cksum_sz(cksum_t ck)
{
if (ck == CKSUM_SHA256 || ck == CKSUM_BLAKE256 || ck == CKSUM_KECCAK256) {
return (32);
@ -185,7 +186,7 @@ read_config(char *configfile, archive_config_t *cfg)
return (1);
}
while (fgets(line, 255, fh) != NULL) {
int pos;
char *pos;
if (strlen(line) < 9 || line[0] == '#') {
continue;
@ -205,7 +206,7 @@ read_config(char *configfile, archive_config_t *cfg)
}
cfg->chunk_sz = ck;
} else if (strncmp(line, "ROOTDIR") == 0) {
} else if (strncmp(line, "ROOTDIR", 7) == 0) {
struct stat sb;
if (stat(pos, &sb) == -1) {
if (errno != ENOENT) {
@ -222,7 +223,7 @@ read_config(char *configfile, archive_config_t *cfg)
fclose(fh);
return (1);
}
} else if (strncmp(line, "ARCHIVESZ") == 0) {
} else if (strncmp(line, "ARCHIVESZ", 9) == 0) {
int ovr;
ssize_t arch_sz;
ovr = parse_numeric(&arch_sz, pos);
@ -238,7 +239,7 @@ read_config(char *configfile, archive_config_t *cfg)
}
cfg->archive_sz = arch_sz;
} else if (strncmp(line, "VERIFY") == 0) {
} else if (strncmp(line, "VERIFY", 6) == 0) {
if (strcmp(pos, "no") == 0) {
cfg->verify_chunks = 0;
@ -249,21 +250,21 @@ read_config(char *configfile, archive_config_t *cfg)
fclose(fh);
return (1);
}
} else if (strncmp(line, "COMPRESS") == 0) {
} else if (strncmp(line, "COMPRESS", 8) == 0) {
cfg->algo = get_compress_algo(pos);
if (cfg->algo == COMPRESS_INVALID) {
fprintf(stderr, "Invalid COMPRESS setting.\n");
fclose(fh);
return (1);
}
} else if (strncmp(line, "CHUNK_CKSUM") == 0) {
} else if (strncmp(line, "CHUNK_CKSUM", 11) == 0) {
cfg->chunk_cksum_type = get_cksum_type(pos);
if (cfg->chunk_cksum_type == CKSUM_INVALID) {
fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n");
fclose(fh);
return (1);
}
} else if (strncmp(line, "SIMILARITY_CKSUM") == 0) {
} else if (strncmp(line, "SIMILARITY_CKSUM", 16) == 0) {
cfg->chunk_cksum_type = get_cksum_type(pos);
if (cfg->chunk_cksum_type == CKSUM_INVALID) {
fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n");
@ -306,10 +307,10 @@ read_config(char *configfile, archive_config_t *cfg)
cfg->container_sz = CONTAINER_ITEMS;
container_sz_bytes = CONTAINER_ITEMS * segment_sz_bytes;
if (cfg->archive_sz / total_dirs < container_sz)
if (cfg->archive_sz / total_dirs < cfg->container_sz)
cfg->num_containers = 1;
else
cfg->num_containers = (cfg->archive_sz / total_dirs) / container_sz + 1;
cfg->num_containers = (cfg->archive_sz / total_dirs) / cfg->container_sz + 1;
return (0);
}
@ -317,6 +318,8 @@ read_config(char *configfile, archive_config_t *cfg)
int
write_config(char *configfile, archive_config_t *cfg)
{
FILE *fh;
fh = fopen(configfile, "w");
if (fh == NULL) {
perror(" ");
@ -325,7 +328,7 @@ write_config(char *configfile, archive_config_t *cfg)
fprintf(fh, "#\n# Autogenerated config file\n# !! DO NOT EDIT !!\n#\n\n");
fprintf(fh, "ROOTDIR = %s\n", cfg->rootdir);
fprintf(fh, "CHUNKSZ = %u\n", cfg->chunk_sz;
fprintf(fh, "CHUNKSZ = %u\n", cfg->chunk_sz);
fprintf(fh, "ARCHIVESZ = %" PRId64 "\n", cfg->archive_sz);
if (cfg->verify_chunks)
@ -336,6 +339,8 @@ write_config(char *configfile, archive_config_t *cfg)
fprintf(fh, "CHUNK_CKSUM = %s\n", get_cksum_str(cfg->chunk_cksum_type));
fprintf(fh, "\n");
fclose(fh);
return (0);
}
int
@ -354,14 +359,13 @@ set_config_s(archive_config_t *cfg, compress_algo_t algo, cksum_t ck, cksum_t ck
cfg->archive_sz = file_sz;
if (cfg->archive_sz < ONE_TB) {
segment_sz_bytes = FOUR_MB;
cfg->segment_sz_bytes = FOUR_MB;
} else {
segment_sz_bytes = EIGHT_MB;
cfg->segment_sz_bytes = EIGHT_MB;
}
cfg->segment_sz_bytes = segment_sz_bytes;
cfg->segment_sz = segment_sz_bytes / cfg->chunk_sz_bytes;
cfg->segment_sz = cfg->segment_sz_bytes / cfg->chunk_sz_bytes;
return (0);
}

View file

@ -23,6 +23,7 @@
#include <limits.h>
#include <utils.h>
#include <crypto_utils.h>
#ifdef __cplusplus
extern "C" {
@ -32,6 +33,7 @@ extern "C" {
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
#define DEFAULT_COMPRESS COMPRESS_LZ4
#define CONTAINER_ITEMS 2048
#define MIN_CK 1
#define MAX_CK 5
@ -64,9 +66,9 @@ typedef struct {
} archive_config_t;
typedef struct _segment_entry {
uint64_t offset;
uint32_t length;
uchar_t *cksum;
uint64_t chunk_offset;
uint32_t chunk_length;
uchar_t *chunk_cksum;
} segment_entry_t;
int read_config(char *configfile, archive_config_t *cfg);

View file

@ -133,7 +133,8 @@ typedef enum {
COMPRESS_LZ4,
COMPRESS_ZLIB,
COMPRESS_BZIP2,
COMPRESS_LZMA
COMPRESS_LZMA,
COMPRESS_INVALID
} compress_algo_t;
typedef struct {