diff --git a/main.c b/main.c
index e640877..34ca19a 100644
--- a/main.c
+++ b/main.c
@@ -88,6 +88,7 @@ static int do_compress = 0;
 static int do_uncompress = 0;
 static int cksum_bytes;
 static int cksum = 0;
+static int rab_blk_size = 0;
 static rabin_context_t *rctx;
 
 static void
@@ -138,13 +139,15 @@ usage(void)
 	    "   %s -E ... - This also implies '-D'.\n"
 	    "6) Number of threads can optionally be specified: -t <1 - 256 count>\n"
 	    "7) Other flags:\n"
-	    "   '-L'	- Enable LZP pre-compression. This improves compression ratio of all\n"
-	    "       	  algorithms with some extra CPU and very low RAM overhead.\n"
-	    "	'-S' <cksum>\n"
-	    "		- Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n"
-	    "		  Default one is SKEIN256.\n"
-	    "   '-M'	- Display memory allocator statistics\n"
-	    "   '-C'	- Display compression statistics\n\n",
+	    "   '-L'    - Enable LZP pre-compression. This improves compression ratio of all\n"
+	    "             algorithms with some extra CPU and very low RAM overhead.\n"
+	    "   '-S' <cksum>\n"
+	    "           - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n"
+	    "             Default one is SKEIN256.\n"
+	    "   '-B' <1..5>\n"
+	    "           - Specify a minimum Dedupe block size. 1 - 4K, 2 - 8K ... 5 - 64K.\n"
+	    "   '-M'    - Display memory allocator statistics\n"
+	    "   '-C'    - Display compression statistics\n\n",
 	    UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
 }
 
@@ -578,7 +581,7 @@ start_decompress(const char *filename, const char *to_filename)
 			}
 		}
 		if (enable_rabin_scan) {
-			tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, 0,
+			tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size,
 			    algo, enable_delta_encode);
 			if (tdat->rctx == NULL) {
 				UNCOMP_BAIL;
@@ -1130,7 +1133,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
 			}
 		}
 		if (enable_rabin_scan) {
-			tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, 0,
+			tdat->rctx = create_rabin_context(chunksize, compressed_chunksize, rab_blk_size,
 			    algo, enable_delta_encode);
 			if (tdat->rctx == NULL) {
 				COMP_BAIL;
@@ -1521,7 +1524,7 @@ main(int argc, char *argv[])
 	level = 6;
 	slab_init();
 
-	while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:")) != -1) {
+	while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:B:")) != -1) {
 		int ovr;
 
 		switch (opt) {
@@ -1555,6 +1558,12 @@ main(int argc, char *argv[])
 				err_exit(0, "Compression level should be in range 0 - 14\n");
 			break;
 
+		    case 'B':
+			rab_blk_size = atoi(optarg);
+			if (rab_blk_size < 1 || rab_blk_size > 5)
+				err_exit(0, "Minimum Dedupe block size must be in range 1 (4k) - 5 (64k)\n");
+			break;
+
 		    case 'p':
 			pipe_mode = 1;
 			break;
diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c
index 67b9821..4da4cbd 100755
--- a/rabin/rabin_dedup.c
+++ b/rabin/rabin_dedup.c
@@ -88,6 +88,9 @@ rabin_min_blksz(uint64_t chunksize, int rab_blk_sz, const char *algo, int delta_
 	uint32_t min_blk;
 
 	min_blk = 1 << (rab_blk_sz + RAB_BLK_MIN_BITS);
+	if (rab_blk_sz > 1)
+		return (min_blk);
+
 	if (((memcmp(algo, "lzma", 4) == 0 || memcmp(algo, "adapt", 5) == 0) &&
 	      chunksize <= LZMA_WINDOW_MAX) || delta_flag) {
 		if (memcmp(algo, "lzfx", 4) == 0 || memcmp(algo, "lz4", 3) == 0 ||
@@ -220,6 +223,7 @@ create_rabin_context(uint64_t chunksize, uint64_t real_chunksize, int rab_blk_sz
 	 * x * polynomial_pow can we written as x << RAB_POLYNOMIAL_WIN_SIZE
 	 */
 
+	slab_cache_add(sizeof (rabin_blockentry_t));
 	ctx->current_window_data = current_window_data;
 	ctx->real_chunksize = real_chunksize;
 	reset_rabin_context(ctx);
@@ -408,7 +412,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
 		 * if (fplist[fpos[1]] > fplist[fpos[0]]) fpos[0] = fpos[1];
 		 */
 		fpos[0] = fpos[(fplist[fpos[1]] > fplist[fpos[0]])];
-		if (len1 == SKETCH_BASIC_BLOCK_SZ) {
+		if (len1 == SKETCH_BASIC_BLOCK_SZ && ctx->delta_flag) {
 			uint32_t p1, p2, p3;
 			/*
 			 * Compute the super sketch value by summing all the representative
@@ -460,9 +464,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
 			ctx->blocks[blknum]->crc = XXH_strong32(buf1+last_offset, length, 0);
 
 			// Accumulate the 2 sketch values into a combined similarity checksum
-			ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2;
-			ctx->blocks[blknum]->mean_n_length = cur_sketch / j;
-			memset(fplist, 0, fplist_sz);
+			if (ctx->delta_flag) {
+				ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2;
+				ctx->blocks[blknum]->mean_n_length = cur_sketch / j;
+				memset(fplist, 0, fplist_sz);
+			} else {
+				ctx->blocks[blknum]->cksum_n_offset = 0;
+				ctx->blocks[blknum]->mean_n_length = 0;
+			}
 			fpos[0] = 0;
 			len1 = 0;
 			cur_sketch = 0;
@@ -498,9 +507,14 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
 			ctx->blocks[blknum]->ref = 0;
 			ctx->blocks[blknum]->similar = 0;
 
-			j = (j > 0 ? j:1);
-			ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2;
-			ctx->blocks[blknum]->mean_n_length = cur_sketch / j;
+			if (ctx->delta_flag) {
+				j = (j > 0 ? j:1);
+				ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2;
+				ctx->blocks[blknum]->mean_n_length = cur_sketch / j;
+			} else {
+				ctx->blocks[blknum]->cksum_n_offset = 0;
+				ctx->blocks[blknum]->mean_n_length = 0;
+			}
 			ctx->blocks[blknum]->crc = XXH_strong32(buf1+last_offset, ctx->blocks[blknum]->length, 0);
 			blknum++;
 			last_offset = *size;
@@ -771,6 +785,7 @@ rabin_inverse_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size)
 	sz = 0;
 	ctx->valid = 1;
 
+	slab_cache_add(sizeof (rabin_blockentry_t));
 	for (blk = 0; blk < blknum; blk++) {
 		if (ctx->blocks[blk] == 0)
 			ctx->blocks[blk] = (rabin_blockentry_t *)slab_alloc(NULL, sizeof (rabin_blockentry_t));