Update usage text and add minor tweaks.

This commit is contained in:
Moinak Ghosh 2013-04-18 22:55:49 +05:30
parent 426c0d0bf2
commit 2f6ccca6e5
2 changed files with 25 additions and 10 deletions

21
main.c
View file

@ -150,11 +150,14 @@ usage(void)
"4) Attempt Rabin fingerprinting based deduplication on chunks:\n"
" %s -D ...\n"
" %s -D -r ... - Do NOT split chunks at a rabin boundary. Default is to split.\n\n"
"5) Perform Delta Encoding in addition to Identical Dedup:\n"
"5) Perform Deduplication across the entire dataset (Global Dedupe):\n"
" %s -G <-D|-F> - This option requires one of '-D' or '-F' to be specified\n"
" to identify the block splitting method.\n"
"6) Perform Delta Encoding in addition to Identical Dedupe:\n"
" %s -E ... - This also implies '-D'. This checks for at least 60%% similarity.\n"
" The flag can be repeated as in '-EE' to indicate at least 40%% similarity.\n\n"
"6) Number of threads can optionally be specified: -t <1 - 256 count>\n"
"7) Other flags:\n"
"7) Number of threads can optionally be specified: -t <1 - 256 count>\n"
"8) Other flags:\n"
" '-L' - Enable LZP pre-compression. This improves compression ratio of all\n"
" algorithms with some extra CPU and very low RAM overhead.\n"
" '-P' - Enable Adaptive Delta Encoding. It can improve compresion ratio for\n"
@ -164,11 +167,12 @@ usage(void)
" datasets.\n"
" '-S' <cksum>\n"
" - Specify chunk checksum to use:\n\n",
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name,
exec_name);
list_checksums(stderr, " ");
fprintf(stderr, "\n"
" '-F' - Perform Fixed-Block Deduplication. Faster than '-D' in some cases\n"
" but with lower deduplication ratio.\n"
" '-F' - Perform Fixed-Block Deduplication. Faster than '-D' but with lower\n"
" deduplication ratio.\n"
" '-B' <1..5>\n"
" - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
" '-M' - Display memory allocator statistics\n"
@ -2669,6 +2673,11 @@ main(int argc, char *argv[])
if (cksum == 0)
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes, &mac_bytes, 0);
if ((enable_rabin_scan || enable_fixed_scan) && cksum == CKSUM_CRC64) {
fprintf(stderr, "CRC64 checksum is not suitable for Deduplication.\n");
exit(1);
}
if (!encrypt_type) {
/*
* If not encrypting we compute a header CRC32.

View file

@ -169,16 +169,22 @@ set_cfg:
*pct_interval = 0;
} else {
cfg->intervals = 100 / *pct_interval;
cfg->sub_intervals = cfg->segment_sz / cfg->intervals;
cfg->sub_intervals = (cfg->segment_sz + 1) / cfg->intervals;
*hash_slots = file_sz / cfg->segment_sz_bytes + 1;
*hash_slots *= (cfg->intervals + cfg->sub_intervals);
}
// Compute memory required to hold all hash entries assuming worst case 50%
// occupancy.
/*
* Compute memory required to hold all hash entries assuming worst case 50%
* occupancy.
*/
*memreqd = MEM_REQD(*hash_slots, *hash_entry_size);
if (*memreqd > (memlimit + (memlimit >> 1)) && cfg->dedupe_mode == MODE_SIMPLE &&
/*
* If memory required is more than twice the indicated memory limit then
* we switch to Segmented Cumulative Similarity based dedupe.
*/
if (*memreqd > (memlimit * 2) && cfg->dedupe_mode == MODE_SIMPLE &&
*pct_interval == 0) {
*pct_interval = DEFAULT_PCT_INTERVAL;
set_user = 1;