From 3ddaf6d45fb42ea50983005d5a6f4189a6749b7f Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sat, 4 Jan 2014 21:45:23 +0530 Subject: [PATCH] Bump version and update command help text. --- README.md | 4 +- pcompress.c | 169 +++++++++++++++++++++++++--------------------------- pcompress.h | 2 +- 3 files changed, 85 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index 4e513de..1cfbaaf 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ Standard Usage [-t ] [-S ] [] Takes a single file as input and produces a compressed file. Archiving is not performed. - This can also work as compression pipeline. + This can also work in streaming mode. -c See above. Also see section "Compression Algorithms" for details. @@ -138,7 +138,7 @@ Standard Usage chunks may not produce better compression. Smaller chunks can result in better data analysis here. - -p Make Pcompress work in full pipeline mode. Data is ingested via stdin + -p Make Pcompress work in streaming mode. Data is ingested via stdin compressed and output via stdout. No filenames are used. diff --git a/pcompress.c b/pcompress.c index 8e1f449..db6900e 100644 --- a/pcompress.c +++ b/pcompress.c @@ -81,94 +81,89 @@ extern uint32_t lzma_crc32(const uint8_t *buf, uint64_t size, uint32_t crc); void DLL_EXPORT usage(pc_ctx_t *pctx) { + fprintf(stderr, - "\nPcompress Version %s\n\n" - "Usage:\n" - "1) To compress a file:\n" - " %s -c [-l ] [-s ] []\n" - " Where can be the folowing:\n" - " lzfx - Very fast and small algorithm based on LZF.\n" - " lz4 - Ultra fast, high-throughput algorithm reaching RAM B/W at level1.\n" - " zlib - The base Zlib format compression (not Gzip).\n" - " lzma - The LZMA (Lempel-Ziv Markov) algorithm from 7Zip.\n" - " lzmaMt - Multithreaded version of LZMA. This is a faster version but\n" - " uses more memory for the dictionary. Thread count is balanced\n" - " between chunk processing threads and algorithm threads.\n" - " bzip2 - Bzip2 Algorithm from libbzip2.\n" - " ppmd - The PPMd algorithm excellent for textual data. PPMd requires\n" - " at least 64MB X core-count more memory than the other modes.\n" -#ifdef ENABLE_PC_LIBBSC - " libbsc - A Block Sorting Compressor using the Burrows Wheeler Transform\n" - " like Bzip2 but runs faster and gives better compression than\n" - " Bzip2 (See: libbsc.com).\n" -#endif - " adapt - Adaptive mode where ppmd or bzip2 will be used per chunk,\n" - " depending on which one produces better compression. This mode\n" - " is obviously fairly slow and requires lots of memory.\n" - " adapt2 - Adaptive mode which includes ppmd and lzma. This requires\n" - " more memory than adapt mode, is slower and potentially gives\n" - " the best compression.\n" - " none - No compression. This is only meaningful with -D and -E so Dedupe\n" - " can be done for post-processing with an external utility.\n" - " - This can be in bytes or can use the following suffixes:\n" - " g - Gigabyte, m - Megabyte, k - Kilobyte.\n" - " Larger chunks produce better compression at the cost of memory.\n" - " - Can be a number from 0 meaning minimum and 14 meaning\n" - " maximum compression.\n\n" - " - Optional argument specifying the destination compressed\n" - " file. The '.pz' extension is appended. If this is '-' then\n" - " compressed output goes to stdout. If this argument is omitted then\n" - " source filename is used with the extension '.pz' appended.\n" - "2) To decompress a file compressed using above command:\n" - " %s -d \n" - "3) To operate as a pipe, read from stdin and write to stdout:\n" - " %s -p ...\n" - "4) Attempt Rabin fingerprinting based deduplication on a per-chunk basis:\n" - " %s -D ...\n" - "5) Perform Deduplication across the entire dataset (Global Dedupe):\n" - " %s -G <-D|-F> - This option requires one of '-D' or '-F' to be specified\n" - " to identify the block splitting method.\n" - "6) Perform Delta Encoding in addition to Identical Dedupe:\n" - " %s -E ... - This also implies '-D'. This checks for at least 60%% similarity.\n" - " The flag can be repeated as in '-EE' to indicate at least 40%% similarity.\n\n" - "7) Number of threads can optionally be specified: -t <1 - 256 count>\n" - "8) Other flags:\n" - " '-L' - Enable LZP pre-compression. This improves compression ratio of all\n" - " algorithms with some extra CPU and very low RAM overhead.\n" - " '-P' - Enable Adaptive Delta Encoding. It can improve compresion ratio for\n" - " data containing tables of numerical values especially if those are in\n" - " an arithmetic series.\n" - " NOTE - Both -L and -P can be used together to give maximum benefit on most.\n" - " datasets.\n" - " '-S' \n" - " - Specify chunk checksum to use:\n\n", - UTILITY_VERSION, pctx->exec_name, pctx->exec_name, pctx->exec_name, pctx->exec_name, - pctx->exec_name, pctx->exec_name); - list_checksums(stderr, " "); - fprintf(stderr, "\n" - " '-F' - Perform Fixed-Block Deduplication. Faster than '-D' but with lower\n" - " deduplication ratio.\n" - " '-B' <1..5>\n" - " - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n" - " '-B' 0\n" - " - Use ultra-small 2KB blocks for deduplication. See README for caveats.\n" - " '-M' - Display memory allocator statistics\n" - " '-C' - Display compression statistics\n\n"); - fprintf(stderr, "\n" - "8) Encryption flags:\n" - " '-e '\n" - " - Encrypt chunks with the given encrption algorithm. The ALGO parameter\n" - " can be one of AES or SALSA20. Both are used in CTR stream encryption\n" - " mode. The password can be prompted from the user or read from a file.\n" - " Unique keys are generated every time pcompress is run even when giving\n" - " the same password. Default key length is 256-bits (see -k below).\n" - " '-w '\n" - " - Provide a file which contains the encryption password. This file must\n" - " be readable and writable since it is zeroed out after the password is\n" - " read.\n" - " '-k \n" - " - Specify key length. Can be 16 for 128 bit or 32 for 256 bit. Default\n" - " is 32 for 256 bit keys.\n\n"); +"\nPcompress Version %s\n\n" +"See README.md for detailed usage.\n\n" +"Standard Usage\n" +"==============\n" +" Standard usage consists of a few common options to control basic behavior with auto-\n" +" setting of various other parameters.\n\n" +" Archiving\n" +" ---------\n" +" %s -a [-v] [-l ] [-s ] [-c ]\n" +" [ ...] [-t ] [-S ]\n" +" \n\n" +" Archives a given set of files and/or directories into a compressed PAX archive which\n" +" is then compressed.\n\n" +" -a Enables the archive mode.\n" +" -l \n" +" Select a compression level from 1 (fast) to 14 (slow). Default: 6\n\n" +" -s \n" +" Specifies the maximum chunk size to split the data for parallelism. Values\n" +" can be in bytes or with suffix(k - KB, m - MB, g - GB). Default: 8m\n" +" Larger chunks can produce better compression at the cost of memory.\n\n" +" -c \n" +" The compression algorithm. Default algorithm when archiving is adapt2.\n" +" -v Enables verbose mode.\n\n" +" -t \n" +" Sets the number of compression threads. Default: core count.\n" +" -S \n" +" The chunk verification checksum. Default: BLAKE256. Others are: CRC64, SHA256,\n" +" SHA512, KECCAK256, KECCAK512, BLAKE256, BLAKE512.\n" +" \n" +" Pathname of the resulting archive. A '.pz' extension is automatically added\n" +" if not already present. This can be '-' to output to stdout.\n\n" +" Single File Compression\n" +" -----------------------\n" +" %s -c [-l ] [-s ] [-p] []\n" +" [-t ] [-S ] []\n\n" +" Takes a single file as input and produces a compressed file. Archiving is not performed.\n" +" This can also work in streaming mode.\n\n" +" -c \n" +" See above. Also see section 'Compression Algorithms' in README.md for details.\n" +" -l \n" +" -s \n" +" -t \n" +" -S \n" +" See above.\n" +" Note: In singe file compression mode with adapt2 or adapt algorithm, larger\n" +" chunks may not necessarily produce better compression.\n" +" -p Make Pcompress work in streaming mode. Input is stdin, output is stdout.\n\n" +" \n" +" Pathname of the compressed file to be created or '-' for stdout.\n\n" +" Decompression and Archive extraction\n" +" ------------------------------------\n" +" %s -d [-m] [-K] []\n\n" +" -m Enable restoring *all* permissions, ACLs, Extended Attributes etc.\n" +" Equivalent to the '-p' option in tar.\n" +" -K Do not overwrite newer files.\n\n" +" -m and -K are only meaningful if the compressed file is an archive. For single file\n" +" compressed mode these options are ignored.\n\n" +" \n" +" Specifies the compressed file or archive. This can be '-' to indicate reading\n" +" from stdin while write goes to \n\n" +" \n" +" If single file compression was used then this is the output file.\n" +" Default output name if omitted: .out\n\n" +" If Archiving was done then this should be the name of a directory into which\n" +" extracted files are restored. Default if omitted: Current directory.\n\n", + UTILITY_VERSION, pctx->exec_name, pctx->exec_name, pctx->exec_name); + fprintf(stderr, +" Encryption\n" +" ----------\n" +" -e Encrypt chunks with the given encrption algorithm. The ALGO parameter\n" +" can be one of AES or SALSA20. Both are used in CTR stream encryption\n" +" mode. The password can be prompted from the user or read from a file.\n" +" Unique keys are generated every time pcompress is run even when giving\n" +" the same password. Default key length is 256-bits (see -k below).\n" +" -w \n" +" Provide a file which contains the encryption password. This file must\n" +" be readable and writable since it is zeroed out after the password is\n" +" read.\n" +" -k \n" +" Specify key length. Can be 16 for 128 bit or 32 for 256 bit. Default\n" +" is 32 for 256 bit keys.\n\n"); } static void diff --git a/pcompress.h b/pcompress.h index 9adf74b..bc8451f 100644 --- a/pcompress.h +++ b/pcompress.h @@ -45,7 +45,7 @@ extern "C" { #define FLAG_DEDUP_FIXED 2 #define FLAG_SINGLE_CHUNK 4 #define FLAG_ARCHIVE 2048 -#define UTILITY_VERSION "2.4" +#define UTILITY_VERSION "3.0" #define MASK_CRYPTO_ALG 0x30 #define MAX_LEVEL 14