Several fixes and optimizations.
This commit is contained in:
parent
c0b4aa0116
commit
6b23f6a73a
4 changed files with 50 additions and 12 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
|||
files.lst
|
||||
*.pc*
|
||||
.seg*
|
||||
*.o
|
||||
*.so
|
||||
pcompress
|
||||
|
|
|
@ -37,7 +37,7 @@ extern "C" {
|
|||
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
|
||||
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
|
||||
#define DEFAULT_COMPRESS COMPRESS_LZ4
|
||||
#define DEFAULT_PCT_INTERVAL 8
|
||||
#define DEFAULT_PCT_INTERVAL 10
|
||||
#define CONTAINER_ITEMS 2048
|
||||
#define MIN_CK 1
|
||||
#define MAX_CK 5
|
||||
|
@ -55,6 +55,7 @@ typedef enum {
|
|||
struct seg_map_fd {
|
||||
int fd;
|
||||
void *mapping;
|
||||
uint64_t cache_offset;
|
||||
uint32_t len;
|
||||
};
|
||||
|
||||
|
|
|
@ -326,6 +326,19 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs
|
|||
uint32_t len, adj;
|
||||
uint64_t pos;
|
||||
|
||||
/*
|
||||
* If same mapping is re-attempted just return the pointer into the
|
||||
* existing mapping.
|
||||
*/
|
||||
adj = *offset % cfg->pagesize;
|
||||
if (*offset == cfg->seg_fd_r[tid].cache_offset && cfg->seg_fd_r[tid].mapping) {
|
||||
hdr = (uchar_t *)(cfg->seg_fd_r[tid].mapping) + adj;
|
||||
*blknum = *((uint32_t *)(hdr));
|
||||
*offset = *((uint64_t *)(hdr + 4));
|
||||
*blocks = hdr + SEGCACHE_HDR_SZ;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure previous mapping is removed.
|
||||
*/
|
||||
|
@ -344,11 +357,11 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs
|
|||
if (pos - *offset < len)
|
||||
len = pos - *offset;
|
||||
|
||||
adj = *offset % cfg->pagesize;
|
||||
mapbuf = mmap(NULL, len + adj, PROT_READ, MAP_SHARED, fd, *offset - adj);
|
||||
if (mapbuf == MAP_FAILED)
|
||||
return (-1);
|
||||
|
||||
cfg->seg_fd_r[tid].cache_offset = *offset;
|
||||
hdr = mapbuf + adj;
|
||||
*blknum = *((uint32_t *)(hdr));
|
||||
*offset = *((uint64_t *)(hdr + 4));
|
||||
|
|
|
@ -309,7 +309,8 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s
|
|||
}
|
||||
|
||||
if (arc && dedupe_flag == RABIN_DEDUPE_FILE_GLOBAL) {
|
||||
ctx->similarity_cksums = (uchar_t *)slab_calloc(NULL, arc->intervals + arc->sub_intervals,
|
||||
ctx->similarity_cksums = (uchar_t *)slab_calloc(NULL,
|
||||
arc->intervals + arc->sub_intervals,
|
||||
arc->similarity_cksum_sz);
|
||||
if (!ctx->similarity_cksums) {
|
||||
fprintf(stderr,
|
||||
|
@ -390,6 +391,26 @@ cmpint(const void *a, const void *b)
|
|||
return (1);
|
||||
}
|
||||
|
||||
static inline int
|
||||
ckcmp(uchar_t *a, uchar_t *b, int sz)
|
||||
{
|
||||
size_t *v1 = (size_t *)a;
|
||||
size_t *v2 = (size_t *)b;
|
||||
int len;
|
||||
|
||||
len = 0;
|
||||
do {
|
||||
if (*v1 != *v2) {
|
||||
return (1);
|
||||
}
|
||||
++v1;
|
||||
++v2;
|
||||
len += sizeof (size_t);
|
||||
} while (len < sz);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform Deduplication.
|
||||
* Both Semi-Rabin fingerprinting based and Fixed Block Deduplication are supported.
|
||||
|
@ -859,7 +880,7 @@ process_blocks:
|
|||
|
||||
increment = length / cfg->intervals;
|
||||
for (j=0; j<cfg->intervals-1; j++) {
|
||||
crc = lzma_crc64(tgt, increment, 0);
|
||||
crc = lzma_crc64(tgt, increment/2, 0);
|
||||
*((uint64_t *)sim_ck) = crc;
|
||||
tgt += increment;
|
||||
len -= increment;
|
||||
|
@ -880,8 +901,7 @@ process_blocks:
|
|||
seg_offset = db_segcache_pos(cfg, ctx->id);
|
||||
src = (uchar_t *)&(ctx->g_blocks[i]);
|
||||
len = blks * sizeof (global_blockentry_t);
|
||||
db_segcache_write(cfg, ctx->id, src, len, blks-i,
|
||||
ctx->file_offset);
|
||||
db_segcache_write(cfg, ctx->id, src, len, blks-i, ctx->file_offset);
|
||||
|
||||
/*
|
||||
* Insert current segment blocks into local hashtable and do partial
|
||||
|
@ -903,7 +923,7 @@ process_blocks:
|
|||
} else {
|
||||
be = htab[hent];
|
||||
do {
|
||||
if (memcmp(ctx->g_blocks[k].cksum,
|
||||
if (ckcmp(ctx->g_blocks[k].cksum,
|
||||
be->cksum, cfg->chunk_cksum_sz) == 0 &&
|
||||
ctx->g_blocks[k].length == be->length) {
|
||||
global_blockentry_t *en;
|
||||
|
@ -938,7 +958,11 @@ process_blocks:
|
|||
hash_entry_t *he;
|
||||
|
||||
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
|
||||
if (he) {
|
||||
|
||||
/*
|
||||
* If match found also check that match is not with self!
|
||||
*/
|
||||
if (he && he->item_offset != seg_offset) {
|
||||
/*
|
||||
* Match found. Load segment metadata from disk and perform
|
||||
* identity deduplication with the segment chunks.
|
||||
|
@ -967,7 +991,7 @@ process_blocks:
|
|||
do {
|
||||
if (be->length & RABIN_INDEX_FLAG)
|
||||
goto next_ent;
|
||||
if (memcmp(seg_blocks[k].cksum,
|
||||
if (ckcmp(seg_blocks[k].cksum,
|
||||
be->cksum, cfg->chunk_cksum_sz) == 0 &&
|
||||
seg_blocks[k].length == be->length) {
|
||||
be->length = (be->length |
|
||||
|
@ -985,7 +1009,6 @@ next_ent:
|
|||
} while(1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
sim_ck -= cfg->similarity_cksum_sz;
|
||||
}
|
||||
|
@ -1400,7 +1423,7 @@ dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size)
|
|||
len = LE32(*((uint32_t *)g_dedupe_idx));
|
||||
g_dedupe_idx += RABIN_ENTRY_SIZE;
|
||||
++blk;
|
||||
flag = len & GLOBAL_FLAG;
|
||||
flag = len & RABIN_INDEX_FLAG;
|
||||
len &= RABIN_INDEX_VALUE;
|
||||
|
||||
if (sz + len > data_sz) {
|
||||
|
|
Loading…
Reference in a new issue