Make global dedupe bits buildable and fix errors.
Rename Adaptive compression type constants to avoid conflict with global constants.
This commit is contained in:
parent
7386f82a4f
commit
6badbcaea7
8 changed files with 127 additions and 47 deletions
|
@ -207,14 +207,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = COMPRESS_LZMA;
|
||||
rv = ADAPT_COMPRESS_LZMA;
|
||||
lzma_count++;
|
||||
|
||||
} else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) {
|
||||
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, NULL);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = COMPRESS_BZIP2;
|
||||
rv = ADAPT_COMPRESS_BZIP2;
|
||||
bzip2_count++;
|
||||
|
||||
} else {
|
||||
|
@ -223,14 +223,14 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = COMPRESS_BSC;
|
||||
rv = ADAPT_COMPRESS_BSC;
|
||||
bsc_count++;
|
||||
#endif
|
||||
} else {
|
||||
rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = COMPRESS_PPMD;
|
||||
rv = ADAPT_COMPRESS_PPMD;
|
||||
ppmd_count++;
|
||||
}
|
||||
}
|
||||
|
@ -247,16 +247,16 @@ adapt_decompress(void *src, uint64_t srclen, void *dst,
|
|||
|
||||
cmp_flags = (chdr>>4) & CHDR_ALGO_MASK;
|
||||
|
||||
if (cmp_flags == COMPRESS_LZMA) {
|
||||
if (cmp_flags == ADAPT_COMPRESS_LZMA) {
|
||||
return (lzma_decompress(src, srclen, dst, dstlen, level, chdr, adat->lzma_data));
|
||||
|
||||
} else if (cmp_flags == COMPRESS_BZIP2) {
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_BZIP2) {
|
||||
return (bzip2_decompress(src, srclen, dst, dstlen, level, chdr, NULL));
|
||||
|
||||
} else if (cmp_flags == COMPRESS_PPMD) {
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_PPMD) {
|
||||
return (ppmd_decompress(src, srclen, dst, dstlen, level, chdr, adat->ppmd_data));
|
||||
|
||||
} else if (cmp_flags == COMPRESS_BSC) {
|
||||
} else if (cmp_flags == ADAPT_COMPRESS_BSC) {
|
||||
#ifdef ENABLE_PC_LIBBSC
|
||||
return (libbsc_decompress(src, srclen, dst, dstlen, level, chdr, adat->bsc_data));
|
||||
#else
|
||||
|
|
|
@ -61,7 +61,8 @@ typedef enum {
|
|||
* to decode archives created with 1.2. New archives do not use SKEIN.
|
||||
*/
|
||||
CKSUM_SKEIN256 = 0x800,
|
||||
CKSUM_SKEIN512 = 0x900
|
||||
CKSUM_SKEIN512 = 0x900,
|
||||
CKSUM_INVALID = 0
|
||||
} cksum_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
10
pcompress.h
10
pcompress.h
|
@ -70,11 +70,11 @@ extern "C" {
|
|||
* lower 3 bits in higher nibble indicate chunk compression algorithm
|
||||
* in adaptive modes.
|
||||
*/
|
||||
#define COMPRESS_NONE 0
|
||||
#define COMPRESS_LZMA 1
|
||||
#define COMPRESS_BZIP2 2
|
||||
#define COMPRESS_PPMD 3
|
||||
#define COMPRESS_BSC 4
|
||||
#define ADAPT_COMPRESS_NONE 0
|
||||
#define ADAPT_COMPRESS_LZMA 1
|
||||
#define ADAPT_COMPRESS_BZIP2 2
|
||||
#define ADAPT_COMPRESS_PPMD 3
|
||||
#define ADAPT_COMPRESS_BSC 4
|
||||
#define CHDR_ALGO_MASK 7
|
||||
|
||||
extern uint32_t zlib_buf_extra(uint64_t buflen);
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include <pthread.h>
|
||||
|
||||
#include "db.h"
|
||||
#include "config.h"
|
||||
|
||||
#define ONE_PB (1125899906842624ULL)
|
||||
#define ONE_TB (1099511627776ULL)
|
||||
|
@ -43,7 +42,7 @@
|
|||
* Hashtable structures for in-memory index.
|
||||
*/
|
||||
typedef struct _hash_entry {
|
||||
segment_entry_t *seg;
|
||||
uint64_t seg_offset;
|
||||
struct _hash_entry *next;
|
||||
struct _hash_entry *lru_prev;
|
||||
struct _hash_entry *lru_next;
|
||||
|
@ -61,9 +60,15 @@ typedef struct {
|
|||
hash_entry_t *lru_tail;
|
||||
uint64_t memlimit;
|
||||
uint64_t memused;
|
||||
int hash_entry_size;
|
||||
int hash_entry_size, intervals;
|
||||
} htablst_t;
|
||||
|
||||
typedef struct {
|
||||
htablst_t *hlist;
|
||||
int seg_fd_w;
|
||||
int *tfd;
|
||||
} seg_index_t;
|
||||
|
||||
archive_config_t *
|
||||
init_global_db(char *configfile)
|
||||
{
|
||||
|
@ -83,17 +88,36 @@ init_global_db(char *configfile)
|
|||
return (cfg);
|
||||
}
|
||||
|
||||
void
|
||||
static cleanup_htablst(htablst_t *htablst, int intervals)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (htablst) {
|
||||
if (htablst->list) {
|
||||
for (i = 0; i < intervals; i++) {
|
||||
if (htablst->list[i].htab)
|
||||
free(htablst->list[i].htab);
|
||||
}
|
||||
free(htablst->list);
|
||||
}
|
||||
if (htablst->mlist)
|
||||
free(htablst->mlist);
|
||||
free(htablst);
|
||||
}
|
||||
}
|
||||
|
||||
archive_config_t *
|
||||
init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
|
||||
init_global_db_s(char *path, char *tmppath, uint32_t chunksize, int pct_interval,
|
||||
compress_algo_t algo, cksum_t ck, cksum_t ck_sim, size_t file_sz,
|
||||
size_t memlimit)
|
||||
size_t memlimit, int nthreads)
|
||||
{
|
||||
archive_config_t *cfg;
|
||||
int rv;
|
||||
float diff;
|
||||
|
||||
cfg = calloc(1, sizeof (archive_config_t));
|
||||
rv = set_config_s(cfg, algo, ck, ck_sim, chunksize, file_sz, chunks_per_seg, pct_interval);
|
||||
rv = set_config_s(cfg, algo, ck, ck_sim, chunksize, file_sz, pct_interval);
|
||||
|
||||
if (path != NULL) {
|
||||
printf("Disk based index not yet implemented.\n");
|
||||
|
@ -104,6 +128,7 @@ init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
|
|||
uint64_t memreqd;
|
||||
htablst_t *htablst;
|
||||
int hash_entry_size;
|
||||
seg_index_t *indx;
|
||||
|
||||
// Compute total hashtable entries first
|
||||
intervals = 100 / pct_interval - 1;
|
||||
|
@ -129,18 +154,53 @@ init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
|
|||
// Now create as many hash tables as there are similarity match intervals
|
||||
// each having hash_slots / intervals slots.
|
||||
htablst = calloc(1, sizeof (htablst_t));
|
||||
if (!htablst) {
|
||||
free(cfg);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
htablst->memlimit = memlimit;
|
||||
htablst->list = (htab_t *)calloc(intervals, sizeof (htab_t));
|
||||
htablst->mlist = (pthread_mutex_t *)malloc(intervals * sizeof (pthread_mutex_t));
|
||||
htablst->hash_entry_size = hash_entry_size;
|
||||
htablst->intervals = intervals;
|
||||
|
||||
for (i = 0; i < intervals; i++) {
|
||||
htablst->list[i].htab = (hash_entry_t **)calloc(hash_slots / intervals,
|
||||
sizeof (hash_entry_t *));
|
||||
if (!(htablst->list[i].htab)) {
|
||||
cleanup_htablst(htablst, intervals);
|
||||
free(cfg);
|
||||
return (NULL);
|
||||
}
|
||||
htablst->memused += ((hash_slots / intervals) * (sizeof (hash_entry_t *)));
|
||||
pthread_mutex_init(&(htablst->mlist[i]), NULL);
|
||||
}
|
||||
cfg->dbdata = htablst;
|
||||
|
||||
indx = (seg_index_t *)calloc(1, sizeof (seg_index_t));
|
||||
if (!indx) {
|
||||
cleanup_htablst(htablst, intervals);
|
||||
free(cfg);
|
||||
return (NULL);
|
||||
}
|
||||
indx->hlist = htablst;
|
||||
|
||||
strcpy(cfg->rootdir, tmppath);
|
||||
strcat(cfg->rootdir, "/.segXXXXXX");
|
||||
indx->seg_fd_w = mkstemp(cfg->rootdir);
|
||||
indx->tfd = (int *)malloc(sizeof (int) * nthreads);
|
||||
if (indx->seg_fd_w == -1 || indx->tfd == NULL) {
|
||||
cleanup_htablst(htablst, intervals);
|
||||
free(cfg);
|
||||
if (indx->tfd)
|
||||
free(indx->tfd);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
for (i = 0; i < nthreads; i++) {
|
||||
indx->tfd[i] = open(cfg->rootdir, O_RDONLY);
|
||||
}
|
||||
cfg->dbdata = indx;
|
||||
slab_cache_add(hash_entry_size);
|
||||
slab_cache_add(cfg->chunk_cksum_sz);
|
||||
}
|
||||
|
@ -148,6 +208,13 @@ init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
|
|||
}
|
||||
|
||||
int
|
||||
db_insert_s(archive_config_t *cfg, uchar_t *cksum, int interval_num)
|
||||
db_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, segment_entry_t *seg, int thr_id)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
||||
segment_entry_t *
|
||||
db_query_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, int thr_id)
|
||||
{
|
||||
return (0);
|
||||
}
|
||||
|
|
|
@ -21,14 +21,19 @@
|
|||
#ifndef _DB_H
|
||||
#define _DB_H
|
||||
|
||||
#include <dedupe_config.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
archive_config_t *init_global_db(char *configfile);
|
||||
archive_config_t *init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
|
||||
compress_algo_t algo, cksum_t ck, cksum_t ck_sim, size_t file_sz,
|
||||
size_t memlimit);
|
||||
archive_config_t *init_global_db_s(char *path, char *tmppath, uint32_t chunksize,
|
||||
int pct_interval, compress_algo_t algo, cksum_t ck,
|
||||
cksum_t ck_sim, size_t file_sz, size_t memlimit, int nthreads);
|
||||
int db_insert_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval,
|
||||
segment_entry_t *seg, int thr_id);
|
||||
segment_entry_t *db_query_s(archive_config_t *cfg, uchar_t *sim_cksum, int interval, int thr_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -31,8 +31,8 @@
|
|||
#include <sys/stat.h>
|
||||
#include <rabin_dedup.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "initdb.h"
|
||||
#include "dedupe_config.h"
|
||||
#include "db.h"
|
||||
|
||||
#define ONE_PB (1125899906842624ULL)
|
||||
#define ONE_TB (1099511627776ULL)
|
||||
|
@ -44,6 +44,7 @@ get_compress_level(compress_algo_t algo)
|
|||
{
|
||||
switch (algo) {
|
||||
case COMPRESS_NONE:
|
||||
case COMPRESS_INVALID:
|
||||
return (0);
|
||||
case COMPRESS_LZFX:
|
||||
return (5);
|
||||
|
@ -130,7 +131,7 @@ get_cksum_type(char *cksum_name)
|
|||
}
|
||||
|
||||
static char *
|
||||
get_cksum_str(chunk_cksum_t ck)
|
||||
get_cksum_str(cksum_t ck)
|
||||
{
|
||||
if (ck == CKSUM_SHA256) {
|
||||
return ("SHA256");
|
||||
|
@ -154,7 +155,7 @@ get_cksum_str(chunk_cksum_t ck)
|
|||
}
|
||||
|
||||
static int
|
||||
get_cksum_sz(chunk_cksum_t ck)
|
||||
get_cksum_sz(cksum_t ck)
|
||||
{
|
||||
if (ck == CKSUM_SHA256 || ck == CKSUM_BLAKE256 || ck == CKSUM_KECCAK256) {
|
||||
return (32);
|
||||
|
@ -185,7 +186,7 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
return (1);
|
||||
}
|
||||
while (fgets(line, 255, fh) != NULL) {
|
||||
int pos;
|
||||
char *pos;
|
||||
|
||||
if (strlen(line) < 9 || line[0] == '#') {
|
||||
continue;
|
||||
|
@ -205,7 +206,7 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
}
|
||||
cfg->chunk_sz = ck;
|
||||
|
||||
} else if (strncmp(line, "ROOTDIR") == 0) {
|
||||
} else if (strncmp(line, "ROOTDIR", 7) == 0) {
|
||||
struct stat sb;
|
||||
if (stat(pos, &sb) == -1) {
|
||||
if (errno != ENOENT) {
|
||||
|
@ -222,7 +223,7 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
} else if (strncmp(line, "ARCHIVESZ") == 0) {
|
||||
} else if (strncmp(line, "ARCHIVESZ", 9) == 0) {
|
||||
int ovr;
|
||||
ssize_t arch_sz;
|
||||
ovr = parse_numeric(&arch_sz, pos);
|
||||
|
@ -238,7 +239,7 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
}
|
||||
cfg->archive_sz = arch_sz;
|
||||
|
||||
} else if (strncmp(line, "VERIFY") == 0) {
|
||||
} else if (strncmp(line, "VERIFY", 6) == 0) {
|
||||
if (strcmp(pos, "no") == 0) {
|
||||
cfg->verify_chunks = 0;
|
||||
|
||||
|
@ -249,21 +250,21 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
} else if (strncmp(line, "COMPRESS") == 0) {
|
||||
} else if (strncmp(line, "COMPRESS", 8) == 0) {
|
||||
cfg->algo = get_compress_algo(pos);
|
||||
if (cfg->algo == COMPRESS_INVALID) {
|
||||
fprintf(stderr, "Invalid COMPRESS setting.\n");
|
||||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
} else if (strncmp(line, "CHUNK_CKSUM") == 0) {
|
||||
} else if (strncmp(line, "CHUNK_CKSUM", 11) == 0) {
|
||||
cfg->chunk_cksum_type = get_cksum_type(pos);
|
||||
if (cfg->chunk_cksum_type == CKSUM_INVALID) {
|
||||
fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n");
|
||||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
} else if (strncmp(line, "SIMILARITY_CKSUM") == 0) {
|
||||
} else if (strncmp(line, "SIMILARITY_CKSUM", 16) == 0) {
|
||||
cfg->chunk_cksum_type = get_cksum_type(pos);
|
||||
if (cfg->chunk_cksum_type == CKSUM_INVALID) {
|
||||
fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n");
|
||||
|
@ -306,10 +307,10 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
cfg->container_sz = CONTAINER_ITEMS;
|
||||
container_sz_bytes = CONTAINER_ITEMS * segment_sz_bytes;
|
||||
|
||||
if (cfg->archive_sz / total_dirs < container_sz)
|
||||
if (cfg->archive_sz / total_dirs < cfg->container_sz)
|
||||
cfg->num_containers = 1;
|
||||
else
|
||||
cfg->num_containers = (cfg->archive_sz / total_dirs) / container_sz + 1;
|
||||
cfg->num_containers = (cfg->archive_sz / total_dirs) / cfg->container_sz + 1;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -317,6 +318,8 @@ read_config(char *configfile, archive_config_t *cfg)
|
|||
int
|
||||
write_config(char *configfile, archive_config_t *cfg)
|
||||
{
|
||||
FILE *fh;
|
||||
|
||||
fh = fopen(configfile, "w");
|
||||
if (fh == NULL) {
|
||||
perror(" ");
|
||||
|
@ -325,7 +328,7 @@ write_config(char *configfile, archive_config_t *cfg)
|
|||
|
||||
fprintf(fh, "#\n# Autogenerated config file\n# !! DO NOT EDIT !!\n#\n\n");
|
||||
fprintf(fh, "ROOTDIR = %s\n", cfg->rootdir);
|
||||
fprintf(fh, "CHUNKSZ = %u\n", cfg->chunk_sz;
|
||||
fprintf(fh, "CHUNKSZ = %u\n", cfg->chunk_sz);
|
||||
fprintf(fh, "ARCHIVESZ = %" PRId64 "\n", cfg->archive_sz);
|
||||
|
||||
if (cfg->verify_chunks)
|
||||
|
@ -336,6 +339,8 @@ write_config(char *configfile, archive_config_t *cfg)
|
|||
fprintf(fh, "CHUNK_CKSUM = %s\n", get_cksum_str(cfg->chunk_cksum_type));
|
||||
fprintf(fh, "\n");
|
||||
fclose(fh);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -354,14 +359,13 @@ set_config_s(archive_config_t *cfg, compress_algo_t algo, cksum_t ck, cksum_t ck
|
|||
|
||||
cfg->archive_sz = file_sz;
|
||||
if (cfg->archive_sz < ONE_TB) {
|
||||
segment_sz_bytes = FOUR_MB;
|
||||
cfg->segment_sz_bytes = FOUR_MB;
|
||||
|
||||
} else {
|
||||
segment_sz_bytes = EIGHT_MB;
|
||||
cfg->segment_sz_bytes = EIGHT_MB;
|
||||
}
|
||||
|
||||
cfg->segment_sz_bytes = segment_sz_bytes;
|
||||
cfg->segment_sz = segment_sz_bytes / cfg->chunk_sz_bytes;
|
||||
cfg->segment_sz = cfg->segment_sz_bytes / cfg->chunk_sz_bytes;
|
||||
|
||||
return (0);
|
||||
}
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include <limits.h>
|
||||
#include <utils.h>
|
||||
#include <crypto_utils.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -32,6 +33,7 @@ extern "C" {
|
|||
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
|
||||
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
|
||||
#define DEFAULT_COMPRESS COMPRESS_LZ4
|
||||
#define CONTAINER_ITEMS 2048
|
||||
#define MIN_CK 1
|
||||
#define MAX_CK 5
|
||||
|
||||
|
@ -64,9 +66,9 @@ typedef struct {
|
|||
} archive_config_t;
|
||||
|
||||
typedef struct _segment_entry {
|
||||
uint64_t offset;
|
||||
uint32_t length;
|
||||
uchar_t *cksum;
|
||||
uint64_t chunk_offset;
|
||||
uint32_t chunk_length;
|
||||
uchar_t *chunk_cksum;
|
||||
} segment_entry_t;
|
||||
|
||||
int read_config(char *configfile, archive_config_t *cfg);
|
|
@ -133,7 +133,8 @@ typedef enum {
|
|||
COMPRESS_LZ4,
|
||||
COMPRESS_ZLIB,
|
||||
COMPRESS_BZIP2,
|
||||
COMPRESS_LZMA
|
||||
COMPRESS_LZMA,
|
||||
COMPRESS_INVALID
|
||||
} compress_algo_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
Loading…
Reference in a new issue