Several fixes and optimizations.
This commit is contained in:
parent
c0b4aa0116
commit
6b23f6a73a
4 changed files with 50 additions and 12 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
||||||
files.lst
|
files.lst
|
||||||
*.pc*
|
*.pc*
|
||||||
|
.seg*
|
||||||
*.o
|
*.o
|
||||||
*.so
|
*.so
|
||||||
pcompress
|
pcompress
|
||||||
|
|
|
@ -37,7 +37,7 @@ extern "C" {
|
||||||
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
|
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
|
||||||
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
|
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
|
||||||
#define DEFAULT_COMPRESS COMPRESS_LZ4
|
#define DEFAULT_COMPRESS COMPRESS_LZ4
|
||||||
#define DEFAULT_PCT_INTERVAL 8
|
#define DEFAULT_PCT_INTERVAL 10
|
||||||
#define CONTAINER_ITEMS 2048
|
#define CONTAINER_ITEMS 2048
|
||||||
#define MIN_CK 1
|
#define MIN_CK 1
|
||||||
#define MAX_CK 5
|
#define MAX_CK 5
|
||||||
|
@ -55,6 +55,7 @@ typedef enum {
|
||||||
struct seg_map_fd {
|
struct seg_map_fd {
|
||||||
int fd;
|
int fd;
|
||||||
void *mapping;
|
void *mapping;
|
||||||
|
uint64_t cache_offset;
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -326,6 +326,19 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs
|
||||||
uint32_t len, adj;
|
uint32_t len, adj;
|
||||||
uint64_t pos;
|
uint64_t pos;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If same mapping is re-attempted just return the pointer into the
|
||||||
|
* existing mapping.
|
||||||
|
*/
|
||||||
|
adj = *offset % cfg->pagesize;
|
||||||
|
if (*offset == cfg->seg_fd_r[tid].cache_offset && cfg->seg_fd_r[tid].mapping) {
|
||||||
|
hdr = (uchar_t *)(cfg->seg_fd_r[tid].mapping) + adj;
|
||||||
|
*blknum = *((uint32_t *)(hdr));
|
||||||
|
*offset = *((uint64_t *)(hdr + 4));
|
||||||
|
*blocks = hdr + SEGCACHE_HDR_SZ;
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure previous mapping is removed.
|
* Ensure previous mapping is removed.
|
||||||
*/
|
*/
|
||||||
|
@ -344,11 +357,11 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs
|
||||||
if (pos - *offset < len)
|
if (pos - *offset < len)
|
||||||
len = pos - *offset;
|
len = pos - *offset;
|
||||||
|
|
||||||
adj = *offset % cfg->pagesize;
|
|
||||||
mapbuf = mmap(NULL, len + adj, PROT_READ, MAP_SHARED, fd, *offset - adj);
|
mapbuf = mmap(NULL, len + adj, PROT_READ, MAP_SHARED, fd, *offset - adj);
|
||||||
if (mapbuf == MAP_FAILED)
|
if (mapbuf == MAP_FAILED)
|
||||||
return (-1);
|
return (-1);
|
||||||
|
|
||||||
|
cfg->seg_fd_r[tid].cache_offset = *offset;
|
||||||
hdr = mapbuf + adj;
|
hdr = mapbuf + adj;
|
||||||
*blknum = *((uint32_t *)(hdr));
|
*blknum = *((uint32_t *)(hdr));
|
||||||
*offset = *((uint64_t *)(hdr + 4));
|
*offset = *((uint64_t *)(hdr + 4));
|
||||||
|
|
|
@ -309,8 +309,9 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arc && dedupe_flag == RABIN_DEDUPE_FILE_GLOBAL) {
|
if (arc && dedupe_flag == RABIN_DEDUPE_FILE_GLOBAL) {
|
||||||
ctx->similarity_cksums = (uchar_t *)slab_calloc(NULL, arc->intervals + arc->sub_intervals,
|
ctx->similarity_cksums = (uchar_t *)slab_calloc(NULL,
|
||||||
arc->similarity_cksum_sz);
|
arc->intervals + arc->sub_intervals,
|
||||||
|
arc->similarity_cksum_sz);
|
||||||
if (!ctx->similarity_cksums) {
|
if (!ctx->similarity_cksums) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Could not allocate dedupe context, out of memory\n");
|
"Could not allocate dedupe context, out of memory\n");
|
||||||
|
@ -390,6 +391,26 @@ cmpint(const void *a, const void *b)
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
ckcmp(uchar_t *a, uchar_t *b, int sz)
|
||||||
|
{
|
||||||
|
size_t *v1 = (size_t *)a;
|
||||||
|
size_t *v2 = (size_t *)b;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
len = 0;
|
||||||
|
do {
|
||||||
|
if (*v1 != *v2) {
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
++v1;
|
||||||
|
++v2;
|
||||||
|
len += sizeof (size_t);
|
||||||
|
} while (len < sz);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform Deduplication.
|
* Perform Deduplication.
|
||||||
* Both Semi-Rabin fingerprinting based and Fixed Block Deduplication are supported.
|
* Both Semi-Rabin fingerprinting based and Fixed Block Deduplication are supported.
|
||||||
|
@ -859,7 +880,7 @@ process_blocks:
|
||||||
|
|
||||||
increment = length / cfg->intervals;
|
increment = length / cfg->intervals;
|
||||||
for (j=0; j<cfg->intervals-1; j++) {
|
for (j=0; j<cfg->intervals-1; j++) {
|
||||||
crc = lzma_crc64(tgt, increment, 0);
|
crc = lzma_crc64(tgt, increment/2, 0);
|
||||||
*((uint64_t *)sim_ck) = crc;
|
*((uint64_t *)sim_ck) = crc;
|
||||||
tgt += increment;
|
tgt += increment;
|
||||||
len -= increment;
|
len -= increment;
|
||||||
|
@ -880,8 +901,7 @@ process_blocks:
|
||||||
seg_offset = db_segcache_pos(cfg, ctx->id);
|
seg_offset = db_segcache_pos(cfg, ctx->id);
|
||||||
src = (uchar_t *)&(ctx->g_blocks[i]);
|
src = (uchar_t *)&(ctx->g_blocks[i]);
|
||||||
len = blks * sizeof (global_blockentry_t);
|
len = blks * sizeof (global_blockentry_t);
|
||||||
db_segcache_write(cfg, ctx->id, src, len, blks-i,
|
db_segcache_write(cfg, ctx->id, src, len, blks-i, ctx->file_offset);
|
||||||
ctx->file_offset);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert current segment blocks into local hashtable and do partial
|
* Insert current segment blocks into local hashtable and do partial
|
||||||
|
@ -903,7 +923,7 @@ process_blocks:
|
||||||
} else {
|
} else {
|
||||||
be = htab[hent];
|
be = htab[hent];
|
||||||
do {
|
do {
|
||||||
if (memcmp(ctx->g_blocks[k].cksum,
|
if (ckcmp(ctx->g_blocks[k].cksum,
|
||||||
be->cksum, cfg->chunk_cksum_sz) == 0 &&
|
be->cksum, cfg->chunk_cksum_sz) == 0 &&
|
||||||
ctx->g_blocks[k].length == be->length) {
|
ctx->g_blocks[k].length == be->length) {
|
||||||
global_blockentry_t *en;
|
global_blockentry_t *en;
|
||||||
|
@ -938,7 +958,11 @@ process_blocks:
|
||||||
hash_entry_t *he;
|
hash_entry_t *he;
|
||||||
|
|
||||||
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
|
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
|
||||||
if (he) {
|
|
||||||
|
/*
|
||||||
|
* If match found also check that match is not with self!
|
||||||
|
*/
|
||||||
|
if (he && he->item_offset != seg_offset) {
|
||||||
/*
|
/*
|
||||||
* Match found. Load segment metadata from disk and perform
|
* Match found. Load segment metadata from disk and perform
|
||||||
* identity deduplication with the segment chunks.
|
* identity deduplication with the segment chunks.
|
||||||
|
@ -967,7 +991,7 @@ process_blocks:
|
||||||
do {
|
do {
|
||||||
if (be->length & RABIN_INDEX_FLAG)
|
if (be->length & RABIN_INDEX_FLAG)
|
||||||
goto next_ent;
|
goto next_ent;
|
||||||
if (memcmp(seg_blocks[k].cksum,
|
if (ckcmp(seg_blocks[k].cksum,
|
||||||
be->cksum, cfg->chunk_cksum_sz) == 0 &&
|
be->cksum, cfg->chunk_cksum_sz) == 0 &&
|
||||||
seg_blocks[k].length == be->length) {
|
seg_blocks[k].length == be->length) {
|
||||||
be->length = (be->length |
|
be->length = (be->length |
|
||||||
|
@ -985,7 +1009,6 @@ next_ent:
|
||||||
} while(1);
|
} while(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
sim_ck -= cfg->similarity_cksum_sz;
|
sim_ck -= cfg->similarity_cksum_sz;
|
||||||
}
|
}
|
||||||
|
@ -1400,7 +1423,7 @@ dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size)
|
||||||
len = LE32(*((uint32_t *)g_dedupe_idx));
|
len = LE32(*((uint32_t *)g_dedupe_idx));
|
||||||
g_dedupe_idx += RABIN_ENTRY_SIZE;
|
g_dedupe_idx += RABIN_ENTRY_SIZE;
|
||||||
++blk;
|
++blk;
|
||||||
flag = len & GLOBAL_FLAG;
|
flag = len & RABIN_INDEX_FLAG;
|
||||||
len &= RABIN_INDEX_VALUE;
|
len &= RABIN_INDEX_VALUE;
|
||||||
|
|
||||||
if (sz + len > data_sz) {
|
if (sz + len > data_sz) {
|
||||||
|
|
Loading…
Reference in a new issue