More tweaks to slightly improve segment dedupe efficiency.
Use on average 8MB segments for all cases. Some minor cleanps.
This commit is contained in:
parent
eabd670790
commit
5d6ffd969d
3 changed files with 4 additions and 17 deletions
|
@ -185,8 +185,8 @@ read_config(char *configfile, archive_config_t *cfg)
|
||||||
cfg->verify_chunks = 0;
|
cfg->verify_chunks = 0;
|
||||||
cfg->algo = COMPRESS_LZ4;
|
cfg->algo = COMPRESS_LZ4;
|
||||||
cfg->chunk_cksum_type = DEFAULT_CHUNK_CKSUM;
|
cfg->chunk_cksum_type = DEFAULT_CHUNK_CKSUM;
|
||||||
cfg->similarity_cksum = DEFAULT_SIMILARITY_CKSUM;
|
cfg->similarity_cksum = GLOBAL_SIM_CKSUM;
|
||||||
cfg->pct_interval = DEFAULT_SIMILARITY_INTERVAL;
|
cfg->pct_interval = DEFAULT_PCT_INTERVAL;
|
||||||
|
|
||||||
fh = fopen(configfile, "r");
|
fh = fopen(configfile, "r");
|
||||||
if (fh == NULL) {
|
if (fh == NULL) {
|
||||||
|
@ -272,13 +272,6 @@ read_config(char *configfile, archive_config_t *cfg)
|
||||||
fclose(fh);
|
fclose(fh);
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
} else if (strncmp(line, "SIMILARITY_CKSUM", 16) == 0) {
|
|
||||||
cfg->chunk_cksum_type = get_cksum_type(pos);
|
|
||||||
if (cfg->chunk_cksum_type == CKSUM_INVALID) {
|
|
||||||
fprintf(stderr, "Invalid CHUNK_CKSUM setting.\n");
|
|
||||||
fclose(fh);
|
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fclose(fh);
|
fclose(fh);
|
||||||
|
@ -373,10 +366,6 @@ set_config_s(archive_config_t *cfg, const char *algo, cksum_t ck, cksum_t ck_sim
|
||||||
cfg->dedupe_mode = MODE_SIMPLE;
|
cfg->dedupe_mode = MODE_SIMPLE;
|
||||||
cfg->segment_sz_bytes = user_chunk_sz;
|
cfg->segment_sz_bytes = user_chunk_sz;
|
||||||
cfg->similarity_cksum_sz = cfg->chunk_cksum_sz;
|
cfg->similarity_cksum_sz = cfg->chunk_cksum_sz;
|
||||||
|
|
||||||
} else if (cfg->archive_sz < (ONE_TB * 100)) {
|
|
||||||
cfg->segment_sz_bytes = FOUR_MB;
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
cfg->segment_sz_bytes = EIGHT_MB;
|
cfg->segment_sz_bytes = EIGHT_MB;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,11 +33,9 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define DEFAULT_SIMILARITY_INTERVAL 5
|
|
||||||
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
|
#define DEFAULT_CHUNK_CKSUM CKSUM_SHA256
|
||||||
#define DEFAULT_SIMILARITY_CKSUM CKSUM_BLAKE256
|
|
||||||
#define DEFAULT_COMPRESS COMPRESS_LZ4
|
#define DEFAULT_COMPRESS COMPRESS_LZ4
|
||||||
#define DEFAULT_PCT_INTERVAL 5
|
#define DEFAULT_PCT_INTERVAL 4
|
||||||
#define CONTAINER_ITEMS 2048
|
#define CONTAINER_ITEMS 2048
|
||||||
#define MIN_CK 1
|
#define MIN_CK 1
|
||||||
#define MAX_CK 5
|
#define MAX_CK 5
|
||||||
|
|
|
@ -167,7 +167,7 @@ set_cfg:
|
||||||
*pct_interval = 0;
|
*pct_interval = 0;
|
||||||
} else {
|
} else {
|
||||||
cfg->intervals = 100 / *pct_interval;
|
cfg->intervals = 100 / *pct_interval;
|
||||||
cfg->sub_intervals = (cfg->segment_sz / cfg->intervals) >> 1;
|
cfg->sub_intervals = cfg->intervals;
|
||||||
*hash_slots = file_sz / cfg->segment_sz_bytes + 1;
|
*hash_slots = file_sz / cfg->segment_sz_bytes + 1;
|
||||||
*hash_slots *= cfg->sub_intervals;
|
*hash_slots *= cfg->sub_intervals;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue