Several fixes and optimizations.

This commit is contained in:
Moinak Ghosh 2013-04-22 19:52:18 +05:30
parent c0b4aa0116
commit 6b23f6a73a
4 changed files with 50 additions and 12 deletions

1
.gitignore vendored
View file

@ -1,5 +1,6 @@
files.lst
*.pc*
.seg*
*.o
*.so
pcompress

View file

@ -37,7 +37,7 @@ extern "C" {
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
#define DEFAULT_COMPRESS COMPRESS_LZ4
#define DEFAULT_PCT_INTERVAL 8
#define DEFAULT_PCT_INTERVAL 10
#define CONTAINER_ITEMS 2048
#define MIN_CK 1
#define MAX_CK 5
@ -55,6 +55,7 @@ typedef enum {
struct seg_map_fd {
int fd;
void *mapping;
uint64_t cache_offset;
uint32_t len;
};

View file

@ -326,6 +326,19 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs
uint32_t len, adj;
uint64_t pos;
/*
* If same mapping is re-attempted just return the pointer into the
* existing mapping.
*/
adj = *offset % cfg->pagesize;
if (*offset == cfg->seg_fd_r[tid].cache_offset && cfg->seg_fd_r[tid].mapping) {
hdr = (uchar_t *)(cfg->seg_fd_r[tid].mapping) + adj;
*blknum = *((uint32_t *)(hdr));
*offset = *((uint64_t *)(hdr + 4));
*blocks = hdr + SEGCACHE_HDR_SZ;
return (0);
}
/*
* Ensure previous mapping is removed.
*/
@ -344,11 +357,11 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs
if (pos - *offset < len)
len = pos - *offset;
adj = *offset % cfg->pagesize;
mapbuf = mmap(NULL, len + adj, PROT_READ, MAP_SHARED, fd, *offset - adj);
if (mapbuf == MAP_FAILED)
return (-1);
cfg->seg_fd_r[tid].cache_offset = *offset;
hdr = mapbuf + adj;
*blknum = *((uint32_t *)(hdr));
*offset = *((uint64_t *)(hdr + 4));

View file

@ -309,7 +309,8 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s
}
if (arc && dedupe_flag == RABIN_DEDUPE_FILE_GLOBAL) {
ctx->similarity_cksums = (uchar_t *)slab_calloc(NULL, arc->intervals + arc->sub_intervals,
ctx->similarity_cksums = (uchar_t *)slab_calloc(NULL,
arc->intervals + arc->sub_intervals,
arc->similarity_cksum_sz);
if (!ctx->similarity_cksums) {
fprintf(stderr,
@ -390,6 +391,26 @@ cmpint(const void *a, const void *b)
return (1);
}
static inline int
ckcmp(uchar_t *a, uchar_t *b, int sz)
{
size_t *v1 = (size_t *)a;
size_t *v2 = (size_t *)b;
int len;
len = 0;
do {
if (*v1 != *v2) {
return (1);
}
++v1;
++v2;
len += sizeof (size_t);
} while (len < sz);
return (0);
}
/**
* Perform Deduplication.
* Both Semi-Rabin fingerprinting based and Fixed Block Deduplication are supported.
@ -859,7 +880,7 @@ process_blocks:
increment = length / cfg->intervals;
for (j=0; j<cfg->intervals-1; j++) {
crc = lzma_crc64(tgt, increment, 0);
crc = lzma_crc64(tgt, increment/2, 0);
*((uint64_t *)sim_ck) = crc;
tgt += increment;
len -= increment;
@ -880,8 +901,7 @@ process_blocks:
seg_offset = db_segcache_pos(cfg, ctx->id);
src = (uchar_t *)&(ctx->g_blocks[i]);
len = blks * sizeof (global_blockentry_t);
db_segcache_write(cfg, ctx->id, src, len, blks-i,
ctx->file_offset);
db_segcache_write(cfg, ctx->id, src, len, blks-i, ctx->file_offset);
/*
* Insert current segment blocks into local hashtable and do partial
@ -903,7 +923,7 @@ process_blocks:
} else {
be = htab[hent];
do {
if (memcmp(ctx->g_blocks[k].cksum,
if (ckcmp(ctx->g_blocks[k].cksum,
be->cksum, cfg->chunk_cksum_sz) == 0 &&
ctx->g_blocks[k].length == be->length) {
global_blockentry_t *en;
@ -938,7 +958,11 @@ process_blocks:
hash_entry_t *he;
he = db_lookup_insert_s(cfg, sim_ck, 0, seg_offset, 0, 1);
if (he) {
/*
* If match found also check that match is not with self!
*/
if (he && he->item_offset != seg_offset) {
/*
* Match found. Load segment metadata from disk and perform
* identity deduplication with the segment chunks.
@ -967,7 +991,7 @@ process_blocks:
do {
if (be->length & RABIN_INDEX_FLAG)
goto next_ent;
if (memcmp(seg_blocks[k].cksum,
if (ckcmp(seg_blocks[k].cksum,
be->cksum, cfg->chunk_cksum_sz) == 0 &&
seg_blocks[k].length == be->length) {
be->length = (be->length |
@ -985,7 +1009,6 @@ next_ent:
} while(1);
}
}
break;
}
sim_ck -= cfg->similarity_cksum_sz;
}
@ -1400,7 +1423,7 @@ dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size)
len = LE32(*((uint32_t *)g_dedupe_idx));
g_dedupe_idx += RABIN_ENTRY_SIZE;
++blk;
flag = len & GLOBAL_FLAG;
flag = len & RABIN_INDEX_FLAG;
len &= RABIN_INDEX_VALUE;
if (sz + len > data_sz) {