From 5d6ffd969d13d101d32cb9079dbadacc021b2ac8 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Wed, 24 Apr 2013 19:13:07 +0530 Subject: [PATCH] More tweaks to slightly improve segment dedupe efficiency. Use on average 8MB segments for all cases. Some minor cleanps. --- rabin/global/dedupe_config.c | 15 ++------------- rabin/global/dedupe_config.h | 4 +--- rabin/global/index.c | 2 +- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/rabin/global/dedupe_config.c b/rabin/global/dedupe_config.c index 707f3c5..69daac8 100644 --- a/rabin/global/dedupe_config.c +++ b/rabin/global/dedupe_config.c @@ -185,8 +185,8 @@ read_config(char *configfile, archive_config_t *cfg) cfg->verify_chunks = 0; cfg->algo = COMPRESS_LZ4; cfg->chunk_cksum_type = DEFAULT_CHUNK_CKSUM; - cfg->similarity_cksum = DEFAULT_SIMILARITY_CKSUM; - cfg->pct_interval = DEFAULT_SIMILARITY_INTERVAL; + cfg->similarity_cksum = GLOBAL_SIM_CKSUM; + cfg->pct_interval = DEFAULT_PCT_INTERVAL; fh = fopen(configfile, "r"); if (fh == NULL) { @@ -272,13 +272,6 @@ read_config(char *configfile, archive_config_t *cfg) fclose(fh); return (1); } - } else if (strncmp(line, "SIMILARITY_CKSUM", 16) == 0) { - cfg->chunk_cksum_type = get_cksum_type(pos); - if (cfg->chunk_cksum_type == CKSUM_INVALID) { - fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n"); - fclose(fh); - return (1); - } } } fclose(fh); @@ -373,10 +366,6 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim cfg->dedupe_mode = MODE_SIMPLE; cfg->segment_sz_bytes = user_chunk_sz; cfg->similarity_cksum_sz = cfg->chunk_cksum_sz; - - } else if (cfg->archive_sz < (ONE_TB * 100)) { - cfg->segment_sz_bytes = FOUR_MB; - } else { cfg->segment_sz_bytes = EIGHT_MB; } diff --git a/rabin/global/dedupe_config.h b/rabin/global/dedupe_config.h index afbed5f..3cd8e7a 100644 --- a/rabin/global/dedupe_config.h +++ b/rabin/global/dedupe_config.h @@ -33,11 +33,9 @@ extern "C" { #endif -#define DEFAULT_SIMILARITY_INTERVAL 5 #define DEFAULT_CHUNK_CKSUM CKSUM_SHA256 -#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256 #define DEFAULT_COMPRESS COMPRESS_LZ4 -#define DEFAULT_PCT_INTERVAL 5 +#define DEFAULT_PCT_INTERVAL 4 #define CONTAINER_ITEMS 2048 #define MIN_CK 1 #define MAX_CK 5 diff --git a/rabin/global/index.c b/rabin/global/index.c index c8705eb..6a032ff 100644 --- a/rabin/global/index.c +++ b/rabin/global/index.c @@ -167,7 +167,7 @@ set_cfg: *pct_interval = 0; } else { cfg->intervals = 100 / *pct_interval; - cfg->sub_intervals = (cfg->segment_sz / cfg->intervals) >> 1; + cfg->sub_intervals = cfg->intervals; *hash_slots = file_sz / cfg->segment_sz_bytes + 1; *hash_slots *= cfg->sub_intervals; }