Bump version and update command help text.

2014-01-04 21:45:23 +05:30 · 2014-01-04 21:45:23 +05:30 · 3ddaf6d45f
commit 3ddaf6d45f
parent 16da0b0339
3 changed files with 85 additions and 90 deletions
--- a/README.md
+++ b/README.md
@ -124,7 +124,7 @@ Standard Usage
                 [-t <number>] [-S <chunk checksum>] [<target file or '-'>]
       Takes a single file as input and produces a compressed file. Archiving is not performed.
-       This can also work as compression pipeline.
+       This can also work in streaming mode.
       -c <algorithm>
                See above. Also see section "Compression Algorithms" for details.
@ -138,7 +138,7 @@ Standard Usage
                      chunks may not produce better compression. Smaller chunks can result
                      in better data analysis here.
-       -p       Make Pcompress work in full pipeline mode. Data is ingested via stdin
+       -p       Make Pcompress work in streaming mode. Data is ingested via stdin
                compressed and output via stdout. No filenames are used.
       <target file>
--- a/pcompress.c
+++ b/pcompress.c
@ -81,94 +81,89 @@ extern uint32_t lzma_crc32(const uint8_t *buf, uint64_t size, uint32_t crc);
 void DLL_EXPORT
 usage(pc_ctx_t *pctx)
 {
 	fprintf(stderr,
-	    "\nPcompress Version %s\n\n"
+"\nPcompress Version %s\n\n"
-	    "Usage:\n"
+"See README.md for detailed usage.\n\n"
-	    "1) To compress a file:\n"
+"Standard Usage\n"
-	    "   %s -c <algorithm> [-l <compress level>] [-s <chunk size>] <file> [<target file>]\n"
+"==============\n"
-	    "   Where <algorithm> can be the folowing:\n"
+"    Standard usage consists of a few common options to control basic behavior with auto-\n"
-	    "   lzfx   - Very fast and small algorithm based on LZF.\n"
+"    setting of various other parameters.\n\n"
-	    "   lz4    - Ultra fast, high-throughput algorithm reaching RAM B/W at level1.\n"
+"    Archiving\n"
-	    "   zlib   - The base Zlib format compression (not Gzip).\n"
+"    ---------\n"
-	    "   lzma   - The LZMA (Lempel-Ziv Markov) algorithm from 7Zip.\n"
+"       %s -a [-v] [-l <compress level>] [-s <chunk size>] [-c <algorithm>]\n"
-	    "   lzmaMt - Multithreaded version of LZMA. This is a faster version but\n"
+"                    [<file1> <directory1> <file2> ...] [-t <number>] [-S <chunk checksum>]\n"
-	    "            uses more memory for the dictionary. Thread count is balanced\n"
+"                    <archive filename or '-'>\n\n"
-	    "            between chunk processing threads and algorithm threads.\n"
+"       Archives a given set of files and/or directories into a compressed PAX archive which\n"
-	    "   bzip2  - Bzip2 Algorithm from libbzip2.\n"
+"       is then compressed.\n\n"
-	    "   ppmd   - The PPMd algorithm excellent for textual data. PPMd requires\n"
+"       -a       Enables the archive mode.\n"
-	    "            at least 64MB X core-count more memory than the other modes.\n"
+"       -l <compress level>\n"
-#ifdef ENABLE_PC_LIBBSC
+"                Select a compression level from 1 (fast) to 14 (slow). Default: 6\n\n"
-	    "   libbsc - A Block Sorting Compressor using the Burrows Wheeler Transform\n"
+"       -s <chunk size>\n"
-	    "            like Bzip2 but runs faster and gives better compression than\n"
+"                Specifies the maximum chunk size to split the data for parallelism. Values\n"
-	    "            Bzip2 (See: libbsc.com).\n"
+"                can be in bytes or with suffix(k - KB, m - MB, g - GB). Default: 8m\n"
-#endif
+"                Larger chunks can produce better compression at the cost of memory.\n\n"
-	    "   adapt  - Adaptive mode where ppmd or bzip2 will be used per chunk,\n"
+"       -c <algorithm>\n"
-	    "            depending on which one produces better compression. This mode\n"
+"                The compression algorithm. Default algorithm when archiving is adapt2.\n"
-	    "            is obviously fairly slow and requires lots of memory.\n"
+"       -v       Enables verbose mode.\n\n"
-	    "   adapt2 - Adaptive mode which includes ppmd and lzma. This requires\n"
+"       -t <number>\n"
-	    "            more memory than adapt mode, is slower and potentially gives\n"
+"                Sets the number of compression threads. Default: core count.\n"
-	    "            the best compression.\n"
+"       -S <chunk checksum>\n"
-	    "   none   - No compression. This is only meaningful with -D and -E so Dedupe\n"
+"                The chunk verification checksum. Default: BLAKE256. Others are: CRC64, SHA256,\n"
-	    "            can be done for post-processing with an external utility.\n"
+"                SHA512, KECCAK256, KECCAK512, BLAKE256, BLAKE512.\n"
-	    "   <chunk_size> - This can be in bytes or can use the following suffixes:\n"
+"       <archive filename>\n"
-	    "            g - Gigabyte, m - Megabyte, k - Kilobyte.\n"
+"                Pathname of the resulting archive. A '.pz' extension is automatically added\n"
-	    "            Larger chunks produce better compression at the cost of memory.\n"
+"                if not already present. This can be '-' to output to stdout.\n\n"
-	    "   <compress_level> - Can be a number from 0 meaning minimum and 14 meaning\n"
+"    Single File Compression\n"
-	    "            maximum compression.\n\n"
+"    -----------------------\n"
-	    "   <target file>    - Optional argument specifying the destination compressed\n"
+"       %s -c <algorithm> [-l <compress level>] [-s <chunk size>] [-p] [<file>]\n"
-	    "            file. The '.pz' extension is appended. If this is '-' then\n"
+"                 [-t <number>] [-S <chunk checksum>] [<target file or '-'>]\n\n"
-	    "            compressed output goes to stdout. If this argument is omitted then\n"
+"       Takes a single file as input and produces a compressed file. Archiving is not performed.\n"
-	    "            source filename is used with the extension '.pz' appended.\n"
+"       This can also work in streaming mode.\n\n"
-	    "2) To decompress a file compressed using above command:\n"
+"       -c <algorithm>\n"
-	    "   %s -d <compressed file> <target file>\n"
+"                See above. Also see section 'Compression Algorithms' in README.md for details.\n"
-	    "3) To operate as a pipe, read from stdin and write to stdout:\n"
+"       -l <compress level>\n"
-	    "   %s -p ...\n"
+"       -s <chunk size>\n"
-	    "4) Attempt Rabin fingerprinting based deduplication on a per-chunk basis:\n"
+"       -t <number>\n"
-	    "   %s -D ...\n"
+"       -S <chunk checksum>\n"
-	    "5) Perform Deduplication across the entire dataset (Global Dedupe):\n"
+"                See above.\n"
-	    "   %s -G <-D|-F> - This option requires one of '-D' or '-F' to be specified\n"
+"                Note: In singe file compression mode with adapt2 or adapt algorithm, larger\n"
-	    "             to identify the block splitting method.\n"
+"                      chunks may not necessarily produce better compression.\n"
-	    "6) Perform Delta Encoding in addition to Identical Dedupe:\n"
+"       -p       Make Pcompress work in streaming mode. Input is stdin, output is stdout.\n\n"
-	    "   %s -E ... - This also implies '-D'. This checks for at least 60%% similarity.\n"
+"       <target file>\n"
-	    "   The flag can be repeated as in '-EE' to indicate at least 40%% similarity.\n\n"
+"                Pathname of the compressed file to be created or '-' for stdout.\n\n"
-	    "7) Number of threads can optionally be specified: -t <1 - 256 count>\n"
+"    Decompression and Archive extraction\n"
-	    "8) Other flags:\n"
+"    ------------------------------------\n"
-	    "   '-L'    - Enable LZP pre-compression. This improves compression ratio of all\n"
+"       %s -d <compressed file or '-'> [-m] [-K] [<target file or directory>]\n\n"
-	    "             algorithms with some extra CPU and very low RAM overhead.\n"
+"       -m        Enable restoring *all* permissions, ACLs, Extended Attributes etc.\n"
-	    "   '-P'    - Enable Adaptive Delta Encoding. It can improve compresion ratio for\n"
+"                 Equivalent to the '-p' option in tar.\n"
-	    "             data containing tables of numerical values especially if those are in\n"
+"       -K        Do not overwrite newer files.\n\n"
-	    "             an arithmetic series.\n"
+"       -m and -K are only meaningful if the compressed file is an archive. For single file\n"
-	    "   NOTE    - Both -L and -P can be used together to give maximum benefit on most.\n"
+"       compressed mode these options are ignored.\n\n"
-	    "             datasets.\n"
+"       <compressed file>\n"
-	    "   '-S' <cksum>\n"
+"                 Specifies the compressed file or archive. This can be '-' to indicate reading\n"
-	    "           - Specify chunk checksum to use:\n\n",
+"                 from stdin while write goes to <target file>\n\n"
-	    UTILITY_VERSION, pctx->exec_name, pctx->exec_name, pctx->exec_name, pctx->exec_name,
+"       <target file or directory>\n"
-	    pctx->exec_name, pctx->exec_name);
+"                 If single file compression was used then this is the output file.\n"
-	list_checksums(stderr, "             ");
+"                 Default output name if omitted: <input filename>.out\n\n"
-	fprintf(stderr, "\n"
+"                 If Archiving was done then this should be the name of a directory into which\n"
-	    "   '-F'    - Perform Fixed-Block Deduplication. Faster than '-D' but with lower\n"
+"                 extracted files are restored. Default if omitted: Current directory.\n\n",
-	    "             deduplication ratio.\n"
+	    UTILITY_VERSION, pctx->exec_name, pctx->exec_name, pctx->exec_name);
-	    "   '-B' <1..5>\n"
+	fprintf(stderr,
-	    "           - Specify an average Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
+"    Encryption\n"
-	    "   '-B' 0\n"
+"    ----------\n"
-	    "           - Use ultra-small 2KB blocks for deduplication. See README for caveats.\n"
+"       -e <ALGO> Encrypt chunks with the given encrption algorithm. The ALGO parameter\n"
-	    "   '-M'    - Display memory allocator statistics\n"
+"                 can be one of AES or SALSA20. Both are used in CTR stream encryption\n"
-	    "   '-C'    - Display compression statistics\n\n");
+"                 mode. The password can be prompted from the user or read from a file.\n"
-	fprintf(stderr, "\n"
+"                 Unique keys are generated every time pcompress is run even when giving\n"
-	    "8) Encryption flags:\n"
+"                 the same password. Default key length is 256-bits (see -k below).\n"
-	    "   '-e <ALGO>'\n"
+"       -w <pathname>\n"
-	    "           - Encrypt chunks with the given encrption algorithm. The ALGO parameter\n"
+"                 Provide a file which contains the encryption password. This file must\n"
-	    "             can be one of AES or SALSA20. Both are used in CTR stream encryption\n"
+"                 be readable and writable since it is zeroed out after the password is\n"
-	    "             mode. The password can be prompted from the user or read from a file.\n"
+"                 read.\n"
-	    "             Unique keys are generated every time pcompress is run even when giving\n"
+"       -k <key length>\n"
-	    "             the same password. Default key length is 256-bits (see -k below).\n"
+"                 Specify key length. Can be 16 for 128 bit or 32 for 256 bit. Default\n"
-	    "   '-w <pathname>'\n"
+"                 is 32 for 256 bit keys.\n\n");
 	    "           - Provide a file which contains the encryption password. This file must\n"
 	    "             be readable and writable since it is zeroed out after the password is\n"
 	    "             read.\n"
 	    "   '-k <key length>\n"
 	    "           - Specify key length. Can be 16 for 128 bit or 32 for 256 bit. Default\n"
 	    "             is 32 for 256 bit keys.\n\n");
 }
 static void
--- a/pcompress.h
+++ b/pcompress.h
@ -45,7 +45,7 @@ extern "C" {
 #define	FLAG_DEDUP_FIXED	2
 #define	FLAG_SINGLE_CHUNK	4
 #define	FLAG_ARCHIVE	2048
-#define	UTILITY_VERSION	"2.4"
+#define	UTILITY_VERSION	"3.0"
 #define	MASK_CRYPTO_ALG	0x30
 #define	MAX_LEVEL	14