From 2c516c009c929fbe911190b2690455d3bf6d086b Mon Sep 17 00:00:00 2001
From: Moinak Ghosh <moinakg@gmail.com>
Date: Tue, 31 Jul 2012 21:07:35 +0530
Subject: [PATCH] Fix crash when algo init function returns error. Fix LZFX
 error handling. More updates to README.

---
 Changelog       |  8 ++++++++
 README.md       | 23 ++++++++++++++++++++---
 lzfx_compress.c |  7 ++++---
 main.c          |  8 ++++++--
 4 files changed, 38 insertions(+), 8 deletions(-)
 create mode 100644 Changelog

diff --git a/Changelog b/Changelog
new file mode 100644
index 0000000..fbd0893
--- /dev/null
+++ b/Changelog
@@ -0,0 +1,8 @@
+== Changes since 0.5 Alpha release ==
+Further improve LZMA compression parameters to utilize all the 14 levels.
+Tweak some Rabin parmeters for better reduction with zlib and Bzip2.
+Increase the small size slabs a bit.
+Fix slab sizing.
+Fix buffer size computation when allocating Rabin block array.
+Reduce memory usage of Rabin block array.
+Add an SSE optimization for bsdiff.
diff --git a/README.md b/README.md
index 1145bc8..3c0c196 100644
--- a/README.md
+++ b/README.md
@@ -99,18 +99,31 @@ Zlib	- Fast, better compression.
 	  Levels: 1 - 9
 Bzip2	- Slow, much better compression than Zlib.
 	  Levels: 1 - 9
+
 LZMA	- Very slow. Extreme compression.
 	  Levels: 1 - 14
+          Till level 9 it is standard LZMA parameters. Levels 10 - 12 use
+          more memory and higher match iterations so are slower. Levels
+          13 and 14 use larger dictionaries upto 256MB and really suck up
+          RAM. Use these levels only if you have at the minimum 4GB RAM on
+          your system.
+
 PPMD	- Slow. Extreme compression for Text, average compression for binary.
+          This also requires lots of RAM similar to LZMA.
 	  Levels: 1 - 14.
 
 Adapt	- Very slow synthetic mode. Both Bzip2 and PPMD are tried per chunk and
 	  better result selected.
 	  Levels: 1 - 14
 Adapt2	- Ultra slow synthetic mode. Both LZMA and PPMD are tried per chunk and
-	  better result selected. Can give best compression ration when splitting
+	  better result selected. Can give best compression ratio when splitting
 	  file into multiple chunks.
 	  Levels: 1 - 14
+          Since both LZMA and PPMD are used together memory requirements are
+          quite extensive especially if you are also using extreme levels above
+          10. For example with 64MB chunk, Level 14, 2 threads and with or without
+          dedupe, it uses upto 3.5GB physical RAM. So minimum requirement is 6GB
+          RAM *and* at least 4GB physical swap.
 
 It is possible for a single chunk to span the entire file if enough RAM is
 available. However for adaptive modes to be effective for large files, especially
@@ -119,10 +132,14 @@ algorithm can be selected for textual and binary portions.
 
 Caveats
 =======
-This utility can gobble up RAM depending on compression algorithm,
+This utility is not meant for resource constrained environments. Minimum memory
+usage (RES/RSS) with barely meaningful settings is around 10MB. This occurs when
+using the minimal LZFX compression algorithm at level 2 with a 1MB chunk size and
+running 2 threads.
+Normally this utility requires lots of RAM depending on compression algorithm,
 compression level, and dedupe being enabled. Larger chunk sizes can give
 better compression ratio but at the same time use more RAM.
 
 In some cases for files less than a gigabyte. Using Delta Compression in addition
 to exact Dedupe can have a slight negative impact on LZMA compression ratio
-especially when using the large-window ultra compression levels above 12.
+especially when using the large-window ultra compression levels above 10.
diff --git a/lzfx_compress.c b/lzfx_compress.c
index 9ddaf18..60e8bf5 100644
--- a/lzfx_compress.c
+++ b/lzfx_compress.c
@@ -103,8 +103,9 @@ lz_fx_compress(void *src, size_t srclen, void *dst, size_t *dstlen,
 	unsigned int _dstlen = *dstlen;
 
 	rv = lzfx_compress(src, _srclen, dst, &_dstlen, lzdat->htab_bits);
-	if (rv == -1) {
-		lz_fx_err(rv);
+	if (rv != 0) {
+		if (rv != LZFX_ESIZE)
+			lz_fx_err(rv);
 		return (-1);
 	}
 	*dstlen = _dstlen;
@@ -121,7 +122,7 @@ lz_fx_decompress(void *src, size_t srclen, void *dst, size_t *dstlen,
 	unsigned int _dstlen = *dstlen;
 
 	rv = lzfx_decompress(src, _srclen, dst, &_dstlen);
-	if (rv == -1) {
+	if (rv != 0) {
 		lz_fx_err(rv);
 		return (-1);
 	}
diff --git a/main.c b/main.c
index 24167eb..6974740 100644
--- a/main.c
+++ b/main.c
@@ -890,7 +890,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
 	if (nprocs > 1) fprintf(stderr, "s");
 	fprintf(stderr, "\n");
 
-	dary = (struct cmp_data **)slab_alloc(NULL, sizeof (struct cmp_data *) * nprocs);
+	dary = (struct cmp_data **)slab_calloc(NULL, nprocs, sizeof (struct cmp_data *));
 	if (enable_rabin_scan)
 		cread_buf = (uchar_t *)slab_alloc(NULL, compressed_chunksize + CHDR_SZ);
 	else
@@ -1101,7 +1101,10 @@ comp_done:
 	if (err) {
 		if (compfd != -1 && !pipe_mode)
 			unlink(tmpfile1);
-		fprintf(stderr, "Error compressing file: %s\n", filename);
+		if (filename)
+			fprintf(stderr, "Error compressing file: %s\n", filename);
+		else
+			fprintf(stderr, "Error compressing\n");
 	} else {
 		/*
 		* Write a trailer of zero chunk length.
@@ -1133,6 +1136,7 @@ comp_done:
 	}
 	if (dary != NULL) {
 		for (i = 0; i < nprocs; i++) {
+			if (!dary[i]) continue;
 			slab_free(NULL, dary[i]->uncompressed_chunk);
 			slab_free(NULL, dary[i]->cmp_seg);
 			if (enable_rabin_scan) {