From e46d3d10b364be59f956c3eb16281c9d3da21163 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Thu, 31 May 2012 21:36:33 +0530 Subject: [PATCH] Add basic compression statistics and new option to display them. --- adaptive_compress.c | 26 +++++++++++++++++ bzip2_compress.c | 5 ++++ lzma_compress.c | 5 ++++ main.c | 70 +++++++++++++++++++++++++++++++++++++++------ pcompress.h | 10 ++++++- ppmd_compress.c | 5 ++++ utils.c | 30 +++++++++++++++++++ utils.h | 1 + zlib_compress.c | 5 ++++ 9 files changed, 148 insertions(+), 9 deletions(-) diff --git a/adaptive_compress.c b/adaptive_compress.c index 9fbb2e6..284ff24 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -35,6 +35,10 @@ #include #include +static unsigned int lzma_count = 0; +static unsigned int bzip2_count = 0; +static unsigned int ppmd_count = 0; + extern int lzma_compress(void *src, size_t srclen, void *dst, size_t *destlen, int level, void *data); extern int bzip2_compress(void *src, size_t srclen, void *dst, @@ -60,6 +64,20 @@ struct adapt_data { int adapt_mode; }; +void +adapt_stats(int show) +{ + if (show) { + fprintf(stderr, "Adaptive mode stats:\n"); + fprintf(stderr, " BZIP2 chunk count: %u\n", bzip2_count); + fprintf(stderr, " PPMd chunk count: %u\n", ppmd_count); + fprintf(stderr, " LZMA chunk count: %u\n\n", lzma_count); + } + lzma_count = 0; + bzip2_count = 0; + ppmd_count = 0; +} + int adapt_init(void **data, int *level, ssize_t chunksize) { @@ -74,6 +92,9 @@ adapt_init(void **data, int *level, ssize_t chunksize) *data = adat; if (*level > 9) *level = 9; } + lzma_count = 0; + bzip2_count = 0; + ppmd_count = 0; return (rv); } @@ -120,6 +141,7 @@ adapt_compress(void *src, size_t srclen, void *dst, { struct adapt_data *adat = (struct adapt_data *)(data); int rv, rv1, rv2; + unsigned int *inc; size_t dst2len, dst3len, smaller_dstlen; uchar_t *dst2, *smaller_dst, *larger_dst; void *tmp; @@ -131,6 +153,7 @@ adapt_compress(void *src, size_t srclen, void *dst, } rv = COMPRESS_LZMA; + inc = &lzma_count; dst2len = *dstlen; dst3len = *dstlen; rv1 = bzip2_compress(src, srclen, dst2, &dst2len, level, data); @@ -143,6 +166,7 @@ adapt_compress(void *src, size_t srclen, void *dst, larger_dst = dst; smaller_dstlen = dst2len; smaller_dst = dst2; + inc = &bzip2_count; } else { larger_dst = dst2; smaller_dstlen = *dstlen; @@ -157,9 +181,11 @@ adapt_compress(void *src, size_t srclen, void *dst, rv = COMPRESS_PPMD; smaller_dstlen = dst2len; smaller_dst = larger_dst; + inc = &ppmd_count; } } + *inc += 1; if (smaller_dst != dst) { memcpy(dst, smaller_dst, smaller_dstlen); *dstlen = smaller_dstlen; diff --git a/bzip2_compress.c b/bzip2_compress.c index b820e47..e0d1faa 100644 --- a/bzip2_compress.c +++ b/bzip2_compress.c @@ -38,6 +38,11 @@ slab_alloc_i(void *p, int items, int size) { return (ptr); } +void +bzip2_stats(int show) +{ +} + int bzip2_init(void **data, int *level, ssize_t chunksize) { diff --git a/lzma_compress.c b/lzma_compress.c index 0474c6b..343edd4 100644 --- a/lzma_compress.c +++ b/lzma_compress.c @@ -43,6 +43,11 @@ static ISzAlloc g_Alloc = { NULL }; +void +lzma_stats(int show) +{ +} + /* * The two functions below are not thread-safe, by design. */ diff --git a/main.c b/main.c index 6d35a6a..801b652 100644 --- a/main.c +++ b/main.c @@ -67,14 +67,20 @@ static compress_func_ptr _compress_func; static compress_func_ptr _decompress_func; static init_func_ptr _init_func; static deinit_func_ptr _deinit_func; +static stats_func_ptr _stats_func; + static int main_cancel; static int adapt_mode = 0; static int pipe_mode = 0; static int nthreads = 0; -static int hide_stats = 1; +static int hide_mem_stats = 1; +static int hide_cmp_stats = 1; static unsigned int chunk_num; +static uint64_t largest_chunk, smallest_chunk, avg_chunk; static const char *exec_name; static const char *algo = NULL; +static int do_compress = 0; +static int do_uncompress = 0; static void usage(void) @@ -106,10 +112,27 @@ usage(void) "3) To operate as a pipe, read from stdin and write to stdout:\n" " %s <-c ...|-d ...> -p\n" "4) Number of threads can optionally be specified: -t <1 - 256 count>\n" - "5) Pass '-M' to display memory allocator statistics\n\n", + "5) Pass '-M' to display memory allocator statistics\n" + "6) Pass '-C' to display compression statistics\n\n", exec_name, exec_name, exec_name); } +void +show_compression_stats(uint64_t chunksize) +{ + chunk_num++; + fprintf(stderr, "\nCompression Statistics\n"); + fprintf(stderr, "======================\n"); + fprintf(stderr, "Total chunks : %u\n", chunk_num); + fprintf(stderr, "Best compressed chunk : %s(%.2f%%)\n", + bytes_to_size(smallest_chunk), (double)smallest_chunk/(double)chunksize*100); + fprintf(stderr, "Worst compressed chunk : %s(%.2f%%)\n", + bytes_to_size(largest_chunk), (double)largest_chunk/(double)chunksize*100); + avg_chunk /= chunk_num; + fprintf(stderr, "Avg compressed chunk : %s(%.2f%%)\n\n", + bytes_to_size(avg_chunk), (double)avg_chunk/(double)chunksize*100); +} + /* * This routine is called in multiple threads. Calls the decompression handler * as encoded in the file header. For adaptive mode the handler adapt_decompress() @@ -298,6 +321,10 @@ start_decompress(const char *filename, const char *to_filename) nprocs = nthreads; fprintf(stderr, "Scaling to %d threads\n", nprocs); + slab_cache_add(compressed_chunksize + CHDR_SZ); + slab_cache_add(chunksize); + slab_cache_add(sizeof (struct cmp_data)); + dary = (struct cmp_data **)slab_alloc(NULL, sizeof (struct cmp_data *) * nprocs); for (i = 0; i < nprocs; i++) { dary[i] = (struct cmp_data *)slab_alloc(NULL, sizeof (struct cmp_data)); @@ -384,6 +411,12 @@ start_decompress(const char *filename, const char *to_filename) break; } + if (tdat->len_cmp > largest_chunk) + largest_chunk = tdat->len_cmp; + if (tdat->len_cmp < smallest_chunk) + smallest_chunk = tdat->len_cmp; + avg_chunk += tdat->len_cmp; + /* * Now read compressed chunk including the crc64 checksum. */ @@ -449,7 +482,8 @@ uncomp_done: if (uncompfd != -1) close(uncompfd); } - slab_cleanup(hide_stats); + if (!hide_cmp_stats) show_compression_stats(chunksize); + slab_cleanup(hide_mem_stats); } static void * @@ -541,6 +575,13 @@ repeat: goto do_cancel; } + if (do_compress) { + if (tdat->len_cmp > largest_chunk) + largest_chunk = tdat->len_cmp; + if (tdat->len_cmp < smallest_chunk) + smallest_chunk = tdat->len_cmp; + avg_chunk += tdat->len_cmp; + } wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp); if (unlikely(wbytes != tdat->len_cmp)) { int i; @@ -747,6 +788,9 @@ start_compress(const char *filename, uint64_t chunksize, int level) chunk_num = 0; np = 0; bail = 0; + largest_chunk = 0; + smallest_chunk = chunksize; + avg_chunk = 0; /* * Read the first chunk into a spare buffer (a simple double-buffering). @@ -870,7 +914,9 @@ comp_done: if (uncompfd != -1) close(uncompfd); } - slab_cleanup(hide_stats); + if (!hide_cmp_stats) show_compression_stats(chunksize); + _stats_func(!hide_cmp_stats); + slab_cleanup(hide_mem_stats); } /* @@ -889,6 +935,7 @@ init_algo(const char *algo, int bail) _decompress_func = zlib_decompress; _init_func = zlib_init; _deinit_func = NULL; + _stats_func = zlib_stats; rv = 0; } else if (memcmp(algorithm, "lzma", 4) == 0) { @@ -896,6 +943,7 @@ init_algo(const char *algo, int bail) _decompress_func = lzma_decompress; _init_func = lzma_init; _deinit_func = lzma_deinit; + _stats_func = lzma_stats; rv = 0; } else if (memcmp(algorithm, "bzip2", 5) == 0) { @@ -903,6 +951,7 @@ init_algo(const char *algo, int bail) _decompress_func = bzip2_decompress; _init_func = bzip2_init; _deinit_func = NULL; + _stats_func = bzip2_stats; rv = 0; } else if (memcmp(algorithm, "ppmd", 4) == 0) { @@ -910,6 +959,7 @@ init_algo(const char *algo, int bail) _decompress_func = ppmd_decompress; _init_func = ppmd_init; _deinit_func = ppmd_deinit; + _stats_func = ppmd_stats; rv = 0; /* adapt2 and adapt ordering of the checks matters here. */ @@ -918,6 +968,7 @@ init_algo(const char *algo, int bail) _decompress_func = adapt_decompress; _init_func = adapt2_init; _deinit_func = adapt_deinit; + _stats_func = adapt_stats; adapt_mode = 1; rv = 0; @@ -926,6 +977,7 @@ init_algo(const char *algo, int bail) _decompress_func = adapt_decompress; _init_func = adapt_init; _deinit_func = adapt_deinit; + _stats_func = adapt_stats; adapt_mode = 1; rv = 0; } @@ -940,14 +992,12 @@ main(int argc, char *argv[]) char *to_filename = NULL; ssize_t chunksize = DEFAULT_CHUNKSIZE; int opt, level, num_rem; - int do_compress = 0; - int do_uncompress = 0; exec_name = get_execname(argv[0]); level = 6; slab_init(); - while ((opt = getopt(argc, argv, "dc:s:l:pt:M")) != -1) { + while ((opt = getopt(argc, argv, "dc:s:l:pt:MC")) != -1) { int ovr; switch (opt) { @@ -992,7 +1042,11 @@ main(int argc, char *argv[]) break; case 'M': - hide_stats = 0; + hide_mem_stats = 0; + break; + + case 'C': + hide_cmp_stats = 0; break; case '?': diff --git a/pcompress.h b/pcompress.h index e101c02..180ca43 100644 --- a/pcompress.h +++ b/pcompress.h @@ -51,9 +51,10 @@ extern "C" { typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst, size_t *destlen, int level, void *data); -/* Pointer type for algo specific init/deinit functions. */ +/* Pointer type for algo specific init/deinit/stats functions. */ typedef int (*init_func_ptr)(void **data, int *level, ssize_t chunksize); typedef int (*deinit_func_ptr)(void **data); +typedef void (*stats_func_ptr)(int show); extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, size_t size, @@ -87,10 +88,17 @@ extern int lzma_init(void **data, int *level, ssize_t chunksize); extern int ppmd_init(void **data, int *level, ssize_t chunksize); extern int bzip2_init(void **data, int *level, ssize_t chunksize); extern int zlib_init(void **data, int *level, ssize_t chunksize); + extern int adapt_deinit(void **data); extern int lzma_deinit(void **data); extern int ppmd_deinit(void **data); +extern void adapt_stats(int show); +extern void ppmd_stats(int show); +extern void lzma_stats(int show); +extern void bzip2_stats(int show); +extern void zlib_stats(int show); + /* * Per-thread data structure for compression and decompression threads. */ diff --git a/ppmd_compress.c b/ppmd_compress.c index 2b48cbc..ade3151 100644 --- a/ppmd_compress.c +++ b/ppmd_compress.c @@ -56,6 +56,11 @@ static ISzAlloc g_Alloc = { NULL }; +void +ppmd_stats(int show) +{ +} + int ppmd_init(void **data, int *level, ssize_t chunksize) { diff --git a/utils.c b/utils.c index f129b06..b7ef420 100644 --- a/utils.c +++ b/utils.c @@ -138,6 +138,36 @@ parse_numeric(ssize_t *val, const char *str) return (ovr); } +/* + * Convert number of bytes into human readable format + */ +char * +bytes_to_size(uint64_t bytes) +{ + static char num[20]; + uint64_t kilobyte = 1024; + uint64_t megabyte = kilobyte * 1024; + uint64_t gigabyte = megabyte * 1024; + uint64_t terabyte = gigabyte * 1024; + + if (bytes < kilobyte) { + sprintf(num, "%llu B", bytes); + + } else if (bytes < megabyte) { + sprintf(num, "%llu KB", bytes / kilobyte); + + } else if (bytes < gigabyte) { + sprintf(num, "%llu MB", bytes / megabyte); + + } else if (bytes < terabyte) { + sprintf(num, "%llu GB", bytes / gigabyte); + + } else { + sprintf(num, "%llu B", bytes); + } + return (num); +} + /* * Read/Write helpers to ensure a full chunk is read or written * unless there is an error. diff --git a/utils.h b/utils.h index 0321698..9a2bea2 100644 --- a/utils.h +++ b/utils.h @@ -95,6 +95,7 @@ typedef unsigned long uintptr_t; extern void err_exit(int show_errno, const char *format, ...); extern const char *get_execname(const char *); extern int parse_numeric(ssize_t *val, const char *str); +extern char *bytes_to_size(uint64_t bytes); extern ssize_t Read(int fd, void *buf, size_t count); extern ssize_t Write(int fd, const void *buf, size_t count); diff --git a/zlib_compress.c b/zlib_compress.c index bd30c79..01ee415 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -45,6 +45,11 @@ zlib_init(void **data, int *level, ssize_t chunksize) return (0); } +void +zlib_stats(int show) +{ +} + static void zerr(int ret) {