From 120877348c65677241b926b65c00e0e483db8c5d Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Thu, 2 May 2013 00:05:05 +0530 Subject: [PATCH] Use SHA256 for Global Dedupe chunk lookup hash by default. Allow changing Global Dedupe chunk lookup hash via env variable. --- README.md | 4 ++++ rabin/rabin_dedup.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e298011..deba7d8 100644 --- a/README.md +++ b/README.md @@ -223,6 +223,10 @@ can be a directory on a Solid State Drive to speed up Global Deduplication. The space used in this directory is proportional to the size of the dataset being processed and is slightly more than 8KB for every 1MB of data. +The default checksum used for chunk hashes during Global Deduplication is SHA256. +However this can be changed by setting the PCOMPRESS_CHUNK_HASH_GLOBAL environment +variable to one of the other checksum names except CRC64. + Examples ======== diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index 8785a11..079e185 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -207,7 +207,8 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s */ if (dedupe_flag == RABIN_DEDUPE_FILE_GLOBAL && op == COMPRESS && rab_blk_sz > 0) { my_sysinfo msys_info; - int pct_interval; + int pct_interval, chunk_cksum, cksum_bytes, mac_bytes; + char *ck; /* * Get amount of memory to use. The freeram got here is adjusted amount. @@ -217,8 +218,23 @@ create_dedupe_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_s if (pipe_mode) pct_interval = DEFAULT_PCT_INTERVAL; + chunk_cksum = 0; + if ((ck = getenv("PCOMPRESS_CHUNK_HASH_GLOBAL")) != NULL) { + if (get_checksum_props(ck, &chunk_cksum, &cksum_bytes, &mac_bytes, 0) != 0 || + strcmp(ck, "CRC64") == 0) { + fprintf(stderr, "Invalid PCOMPRESS_CHUNK_HASH_GLOBAL. Reverting to default.\n"); + chunk_cksum = -1; + } + } + if (chunk_cksum == 0) { + chunk_cksum = DEFAULT_CHUNK_CKSUM; + if (get_checksum_props(NULL, &chunk_cksum, &cksum_bytes, &mac_bytes, 0) != 0) { + fprintf(stderr, "Invalid default chunk checksum: %d\n", DEFAULT_CHUNK_CKSUM); + return (NULL); + } + } arc = init_global_db_s(NULL, tmppath, rab_blk_sz, chunksize, pct_interval, - algo, props->cksum, GLOBAL_SIM_CKSUM, file_size, + algo, chunk_cksum, GLOBAL_SIM_CKSUM, file_size, msys_info.freeram, nthreads); if (arc == NULL) { pthread_mutex_unlock(&init_lock);