From b236638e724741ea7ca4af914096c65f6598a040 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 1 Sep 2013 15:02:28 +0530 Subject: [PATCH] Remove confusing option with little practical utility. Update test cases and documentation. --- README.md | 6 ++---- pcompress.c | 11 +++-------- test/t4.tst | 4 ++-- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index c3c14e3..6762e1f 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Usage between chunk processing threads and algorithm threads. bzip2 - Bzip2 Algorithm from libbzip2. ppmd - The PPMd algorithm excellent for textual data. PPMd requires - at least 64MB X CPUs more memory than the other modes. + at least 64MB X core-count more memory than the other modes. libbsc - A Block Sorting Compressor using the Burrows Wheeler Transform like Bzip2 but runs faster and gives better compression than @@ -106,10 +106,8 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library To operate as a full pipe, read from stdin and write to stdout: pcompress -p ... - Attempt Rabin fingerprinting based deduplication on chunks: + Attempt Rabin fingerprinting based deduplication on a per-chunk basis: pcompress -D ... - pcompress -D -r ... - Do NOT split chunks at a rabin boundary. Default - is to split. Perform Delta Encoding in addition to Identical Dedup: pcompress -E ... - This also implies '-D'. This performs Delta Compression diff --git a/pcompress.c b/pcompress.c index ad4175b..d24856e 100644 --- a/pcompress.c +++ b/pcompress.c @@ -96,7 +96,7 @@ usage(pc_ctx_t *pctx) " between chunk processing threads and algorithm threads.\n" " bzip2 - Bzip2 Algorithm from libbzip2.\n" " ppmd - The PPMd algorithm excellent for textual data. PPMd requires\n" - " at least 64MB X CPUs more memory than the other modes.\n" + " at least 64MB X core-count more memory than the other modes.\n" #ifdef ENABLE_PC_LIBBSC " libbsc - A Block Sorting Compressor using the Burrows Wheeler Transform\n" " like Bzip2 but runs faster and gives better compression than\n" @@ -119,9 +119,8 @@ usage(pc_ctx_t *pctx) " %s -d \n" "3) To operate as a pipe, read from stdin and write to stdout:\n" " %s -p ...\n" - "4) Attempt Rabin fingerprinting based deduplication on chunks:\n" + "4) Attempt Rabin fingerprinting based deduplication on a per-chunk basis:\n" " %s -D ...\n" - " %s -D -r ... - Do NOT split chunks at a rabin boundary. Default is to split.\n\n" "5) Perform Deduplication across the entire dataset (Global Dedupe):\n" " %s -G <-D|-F> - This option requires one of '-D' or '-F' to be specified\n" " to identify the block splitting method.\n" @@ -140,7 +139,7 @@ usage(pc_ctx_t *pctx) " '-S' \n" " - Specify chunk checksum to use:\n\n", UTILITY_VERSION, pctx->exec_name, pctx->exec_name, pctx->exec_name, pctx->exec_name, - pctx->exec_name, pctx->exec_name, pctx->exec_name); + pctx->exec_name, pctx->exec_name); list_checksums(stderr, " "); fprintf(stderr, "\n" " '-F' - Perform Fixed-Block Deduplication. Faster than '-D' but with lower\n" @@ -2719,10 +2718,6 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) pctx->enable_delta2_encode = 1; break; - case 'r': - pctx->enable_rabin_split = 0; - break; - case 'k': pctx->keylen = atoi(optarg); if ((pctx->keylen != 16 && pctx->keylen != 32) || pctx->keylen > MAX_KEYLEN) { diff --git a/test/t4.tst b/test/t4.tst index a3ed244..90b543b 100644 --- a/test/t4.tst +++ b/test/t4.tst @@ -10,10 +10,10 @@ do for tf in `cat files.lst` do rm -f ${tf}.* - for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B0 -EE" "-D -B5 -EE -L" "-D -B2 -r" "-P" "-D -P" "-D -L -P" \ + for feat in "-D" "-D -B3 -L" "-D -B4 -E" "-D -B0 -EE" "-D -B5 -EE -L" "-D -B2" "-P" "-D -P" "-D -L -P" \ "-G -D" "-G -F" "-G -L -P" "-G -B2" do - for seg in 2m 100m + for seg in 2m 11m do cmd="../../pcompress -c ${algo} -l 3 -s ${seg} $feat ${tf}" echo "Running $cmd"