More tweaks to slightly improve segment dedupe efficiency.

Use on average 8MB segments for all cases.
Some minor cleanps.
This commit is contained in:
Moinak Ghosh 2013-04-24 19:13:07 +05:30
parent eabd670790
commit 5d6ffd969d
3 changed files with 4 additions and 17 deletions

View file

@ -185,8 +185,8 @@ read_config(char *configfile, archive_config_t *cfg)
cfg->verify_chunks = 0; cfg->verify_chunks = 0;
cfg->algo = COMPRESS_LZ4; cfg->algo = COMPRESS_LZ4;
cfg->chunk_cksum_type = DEFAULT_CHUNK_CKSUM; cfg->chunk_cksum_type = DEFAULT_CHUNK_CKSUM;
cfg->similarity_cksum = DEFAULT_SIMILARITY_CKSUM; cfg->similarity_cksum = GLOBAL_SIM_CKSUM;
cfg->pct_interval = DEFAULT_SIMILARITY_INTERVAL; cfg->pct_interval = DEFAULT_PCT_INTERVAL;
fh = fopen(configfile, "r"); fh = fopen(configfile, "r");
if (fh == NULL) { if (fh == NULL) {
@ -272,13 +272,6 @@ read_config(char *configfile, archive_config_t *cfg)
fclose(fh); fclose(fh);
return (1); return (1);
} }
} else if (strncmp(line, "SIMILARITY_CKSUM", 16) == 0) {
cfg->chunk_cksum_type = get_cksum_type(pos);
if (cfg->chunk_cksum_type == CKSUM_INVALID) {
fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n");
fclose(fh);
return (1);
}
} }
} }
fclose(fh); fclose(fh);
@ -373,10 +366,6 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim
cfg->dedupe_mode = MODE_SIMPLE; cfg->dedupe_mode = MODE_SIMPLE;
cfg->segment_sz_bytes = user_chunk_sz; cfg->segment_sz_bytes = user_chunk_sz;
cfg->similarity_cksum_sz = cfg->chunk_cksum_sz; cfg->similarity_cksum_sz = cfg->chunk_cksum_sz;
} else if (cfg->archive_sz < (ONE_TB * 100)) {
cfg->segment_sz_bytes = FOUR_MB;
} else { } else {
cfg->segment_sz_bytes = EIGHT_MB; cfg->segment_sz_bytes = EIGHT_MB;
} }

View file

@ -33,11 +33,9 @@
extern "C" { extern "C" {
#endif #endif
#define DEFAULT_SIMILARITY_INTERVAL 5
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256 #define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
#define DEFAULT_COMPRESS COMPRESS_LZ4 #define DEFAULT_COMPRESS COMPRESS_LZ4
#define DEFAULT_PCT_INTERVAL 5 #define DEFAULT_PCT_INTERVAL 4
#define CONTAINER_ITEMS 2048 #define CONTAINER_ITEMS 2048
#define MIN_CK 1 #define MIN_CK 1
#define MAX_CK 5 #define MAX_CK 5

View file

@ -167,7 +167,7 @@ set_cfg:
*pct_interval = 0; *pct_interval = 0;
} else { } else {
cfg->intervals = 100 / *pct_interval; cfg->intervals = 100 / *pct_interval;
cfg->sub_intervals = (cfg->segment_sz / cfg->intervals) >> 1; cfg->sub_intervals = cfg->intervals;
*hash_slots = file_sz / cfg->segment_sz_bytes + 1; *hash_slots = file_sz / cfg->segment_sz_bytes + 1;
*hash_slots *= cfg->sub_intervals; *hash_slots *= cfg->sub_intervals;
} }