Add basic compression statistics and new option to display them.

This commit is contained in:
Moinak Ghosh 2012-05-31 21:36:33 +05:30
parent b3fe43da1d
commit e46d3d10b3
9 changed files with 148 additions and 9 deletions

View file

@ -35,6 +35,10 @@
#include <pcompress.h>
#include <allocator.h>
static unsigned int lzma_count = 0;
static unsigned int bzip2_count = 0;
static unsigned int ppmd_count = 0;
extern int lzma_compress(void *src, size_t srclen, void *dst,
size_t *destlen, int level, void *data);
extern int bzip2_compress(void *src, size_t srclen, void *dst,
@ -60,6 +64,20 @@ struct adapt_data {
int adapt_mode;
};
void
adapt_stats(int show)
{
if (show) {
fprintf(stderr, "Adaptive mode stats:\n");
fprintf(stderr, " BZIP2 chunk count: %u\n", bzip2_count);
fprintf(stderr, " PPMd chunk count: %u\n", ppmd_count);
fprintf(stderr, " LZMA chunk count: %u\n\n", lzma_count);
}
lzma_count = 0;
bzip2_count = 0;
ppmd_count = 0;
}
int
adapt_init(void **data, int *level, ssize_t chunksize)
{
@ -74,6 +92,9 @@ adapt_init(void **data, int *level, ssize_t chunksize)
*data = adat;
if (*level > 9) *level = 9;
}
lzma_count = 0;
bzip2_count = 0;
ppmd_count = 0;
return (rv);
}
@ -120,6 +141,7 @@ adapt_compress(void *src, size_t srclen, void *dst,
{
struct adapt_data *adat = (struct adapt_data *)(data);
int rv, rv1, rv2;
unsigned int *inc;
size_t dst2len, dst3len, smaller_dstlen;
uchar_t *dst2, *smaller_dst, *larger_dst;
void *tmp;
@ -131,6 +153,7 @@ adapt_compress(void *src, size_t srclen, void *dst,
}
rv = COMPRESS_LZMA;
inc = &lzma_count;
dst2len = *dstlen;
dst3len = *dstlen;
rv1 = bzip2_compress(src, srclen, dst2, &dst2len, level, data);
@ -143,6 +166,7 @@ adapt_compress(void *src, size_t srclen, void *dst,
larger_dst = dst;
smaller_dstlen = dst2len;
smaller_dst = dst2;
inc = &bzip2_count;
} else {
larger_dst = dst2;
smaller_dstlen = *dstlen;
@ -157,9 +181,11 @@ adapt_compress(void *src, size_t srclen, void *dst,
rv = COMPRESS_PPMD;
smaller_dstlen = dst2len;
smaller_dst = larger_dst;
inc = &ppmd_count;
}
}
*inc += 1;
if (smaller_dst != dst) {
memcpy(dst, smaller_dst, smaller_dstlen);
*dstlen = smaller_dstlen;

View file

@ -38,6 +38,11 @@ slab_alloc_i(void *p, int items, int size) {
return (ptr);
}
void
bzip2_stats(int show)
{
}
int
bzip2_init(void **data, int *level, ssize_t chunksize)
{

View file

@ -43,6 +43,11 @@ static ISzAlloc g_Alloc = {
NULL
};
void
lzma_stats(int show)
{
}
/*
* The two functions below are not thread-safe, by design.
*/

70
main.c
View file

@ -67,14 +67,20 @@ static compress_func_ptr _compress_func;
static compress_func_ptr _decompress_func;
static init_func_ptr _init_func;
static deinit_func_ptr _deinit_func;
static stats_func_ptr _stats_func;
static int main_cancel;
static int adapt_mode = 0;
static int pipe_mode = 0;
static int nthreads = 0;
static int hide_stats = 1;
static int hide_mem_stats = 1;
static int hide_cmp_stats = 1;
static unsigned int chunk_num;
static uint64_t largest_chunk, smallest_chunk, avg_chunk;
static const char *exec_name;
static const char *algo = NULL;
static int do_compress = 0;
static int do_uncompress = 0;
static void
usage(void)
@ -106,10 +112,27 @@ usage(void)
"3) To operate as a pipe, read from stdin and write to stdout:\n"
" %s <-c ...|-d ...> -p\n"
"4) Number of threads can optionally be specified: -t <1 - 256 count>\n"
"5) Pass '-M' to display memory allocator statistics\n\n",
"5) Pass '-M' to display memory allocator statistics\n"
"6) Pass '-C' to display compression statistics\n\n",
exec_name, exec_name, exec_name);
}
void
show_compression_stats(uint64_t chunksize)
{
chunk_num++;
fprintf(stderr, "\nCompression Statistics\n");
fprintf(stderr, "======================\n");
fprintf(stderr, "Total chunks : %u\n", chunk_num);
fprintf(stderr, "Best compressed chunk : %s(%.2f%%)\n",
bytes_to_size(smallest_chunk), (double)smallest_chunk/(double)chunksize*100);
fprintf(stderr, "Worst compressed chunk : %s(%.2f%%)\n",
bytes_to_size(largest_chunk), (double)largest_chunk/(double)chunksize*100);
avg_chunk /= chunk_num;
fprintf(stderr, "Avg compressed chunk : %s(%.2f%%)\n\n",
bytes_to_size(avg_chunk), (double)avg_chunk/(double)chunksize*100);
}
/*
* This routine is called in multiple threads. Calls the decompression handler
* as encoded in the file header. For adaptive mode the handler adapt_decompress()
@ -298,6 +321,10 @@ start_decompress(const char *filename, const char *to_filename)
nprocs = nthreads;
fprintf(stderr, "Scaling to %d threads\n", nprocs);
slab_cache_add(compressed_chunksize + CHDR_SZ);
slab_cache_add(chunksize);
slab_cache_add(sizeof (struct cmp_data));
dary = (struct cmp_data **)slab_alloc(NULL, sizeof (struct cmp_data *) * nprocs);
for (i = 0; i < nprocs; i++) {
dary[i] = (struct cmp_data *)slab_alloc(NULL, sizeof (struct cmp_data));
@ -384,6 +411,12 @@ start_decompress(const char *filename, const char *to_filename)
break;
}
if (tdat->len_cmp > largest_chunk)
largest_chunk = tdat->len_cmp;
if (tdat->len_cmp < smallest_chunk)
smallest_chunk = tdat->len_cmp;
avg_chunk += tdat->len_cmp;
/*
* Now read compressed chunk including the crc64 checksum.
*/
@ -449,7 +482,8 @@ uncomp_done:
if (uncompfd != -1) close(uncompfd);
}
slab_cleanup(hide_stats);
if (!hide_cmp_stats) show_compression_stats(chunksize);
slab_cleanup(hide_mem_stats);
}
static void *
@ -541,6 +575,13 @@ repeat:
goto do_cancel;
}
if (do_compress) {
if (tdat->len_cmp > largest_chunk)
largest_chunk = tdat->len_cmp;
if (tdat->len_cmp < smallest_chunk)
smallest_chunk = tdat->len_cmp;
avg_chunk += tdat->len_cmp;
}
wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp);
if (unlikely(wbytes != tdat->len_cmp)) {
int i;
@ -747,6 +788,9 @@ start_compress(const char *filename, uint64_t chunksize, int level)
chunk_num = 0;
np = 0;
bail = 0;
largest_chunk = 0;
smallest_chunk = chunksize;
avg_chunk = 0;
/*
* Read the first chunk into a spare buffer (a simple double-buffering).
@ -870,7 +914,9 @@ comp_done:
if (uncompfd != -1) close(uncompfd);
}
slab_cleanup(hide_stats);
if (!hide_cmp_stats) show_compression_stats(chunksize);
_stats_func(!hide_cmp_stats);
slab_cleanup(hide_mem_stats);
}
/*
@ -889,6 +935,7 @@ init_algo(const char *algo, int bail)
_decompress_func = zlib_decompress;
_init_func = zlib_init;
_deinit_func = NULL;
_stats_func = zlib_stats;
rv = 0;
} else if (memcmp(algorithm, "lzma", 4) == 0) {
@ -896,6 +943,7 @@ init_algo(const char *algo, int bail)
_decompress_func = lzma_decompress;
_init_func = lzma_init;
_deinit_func = lzma_deinit;
_stats_func = lzma_stats;
rv = 0;
} else if (memcmp(algorithm, "bzip2", 5) == 0) {
@ -903,6 +951,7 @@ init_algo(const char *algo, int bail)
_decompress_func = bzip2_decompress;
_init_func = bzip2_init;
_deinit_func = NULL;
_stats_func = bzip2_stats;
rv = 0;
} else if (memcmp(algorithm, "ppmd", 4) == 0) {
@ -910,6 +959,7 @@ init_algo(const char *algo, int bail)
_decompress_func = ppmd_decompress;
_init_func = ppmd_init;
_deinit_func = ppmd_deinit;
_stats_func = ppmd_stats;
rv = 0;
/* adapt2 and adapt ordering of the checks matters here. */
@ -918,6 +968,7 @@ init_algo(const char *algo, int bail)
_decompress_func = adapt_decompress;
_init_func = adapt2_init;
_deinit_func = adapt_deinit;
_stats_func = adapt_stats;
adapt_mode = 1;
rv = 0;
@ -926,6 +977,7 @@ init_algo(const char *algo, int bail)
_decompress_func = adapt_decompress;
_init_func = adapt_init;
_deinit_func = adapt_deinit;
_stats_func = adapt_stats;
adapt_mode = 1;
rv = 0;
}
@ -940,14 +992,12 @@ main(int argc, char *argv[])
char *to_filename = NULL;
ssize_t chunksize = DEFAULT_CHUNKSIZE;
int opt, level, num_rem;
int do_compress = 0;
int do_uncompress = 0;
exec_name = get_execname(argv[0]);
level = 6;
slab_init();
while ((opt = getopt(argc, argv, "dc:s:l:pt:M")) != -1) {
while ((opt = getopt(argc, argv, "dc:s:l:pt:MC")) != -1) {
int ovr;
switch (opt) {
@ -992,7 +1042,11 @@ main(int argc, char *argv[])
break;
case 'M':
hide_stats = 0;
hide_mem_stats = 0;
break;
case 'C':
hide_cmp_stats = 0;
break;
case '?':

View file

@ -51,9 +51,10 @@ extern "C" {
typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst,
size_t *destlen, int level, void *data);
/* Pointer type for algo specific init/deinit functions. */
/* Pointer type for algo specific init/deinit/stats functions. */
typedef int (*init_func_ptr)(void **data, int *level, ssize_t chunksize);
typedef int (*deinit_func_ptr)(void **data);
typedef void (*stats_func_ptr)(int show);
extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc);
extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, size_t size,
@ -87,10 +88,17 @@ extern int lzma_init(void **data, int *level, ssize_t chunksize);
extern int ppmd_init(void **data, int *level, ssize_t chunksize);
extern int bzip2_init(void **data, int *level, ssize_t chunksize);
extern int zlib_init(void **data, int *level, ssize_t chunksize);
extern int adapt_deinit(void **data);
extern int lzma_deinit(void **data);
extern int ppmd_deinit(void **data);
extern void adapt_stats(int show);
extern void ppmd_stats(int show);
extern void lzma_stats(int show);
extern void bzip2_stats(int show);
extern void zlib_stats(int show);
/*
* Per-thread data structure for compression and decompression threads.
*/

View file

@ -56,6 +56,11 @@ static ISzAlloc g_Alloc = {
NULL
};
void
ppmd_stats(int show)
{
}
int
ppmd_init(void **data, int *level, ssize_t chunksize)
{

30
utils.c
View file

@ -138,6 +138,36 @@ parse_numeric(ssize_t *val, const char *str)
return (ovr);
}
/*
* Convert number of bytes into human readable format
*/
char *
bytes_to_size(uint64_t bytes)
{
static char num[20];
uint64_t kilobyte = 1024;
uint64_t megabyte = kilobyte * 1024;
uint64_t gigabyte = megabyte * 1024;
uint64_t terabyte = gigabyte * 1024;
if (bytes < kilobyte) {
sprintf(num, "%llu B", bytes);
} else if (bytes < megabyte) {
sprintf(num, "%llu KB", bytes / kilobyte);
} else if (bytes < gigabyte) {
sprintf(num, "%llu MB", bytes / megabyte);
} else if (bytes < terabyte) {
sprintf(num, "%llu GB", bytes / gigabyte);
} else {
sprintf(num, "%llu B", bytes);
}
return (num);
}
/*
* Read/Write helpers to ensure a full chunk is read or written
* unless there is an error.

View file

@ -95,6 +95,7 @@ typedef unsigned long uintptr_t;
extern void err_exit(int show_errno, const char *format, ...);
extern const char *get_execname(const char *);
extern int parse_numeric(ssize_t *val, const char *str);
extern char *bytes_to_size(uint64_t bytes);
extern ssize_t Read(int fd, void *buf, size_t count);
extern ssize_t Write(int fd, const void *buf, size_t count);

View file

@ -45,6 +45,11 @@ zlib_init(void **data, int *level, ssize_t chunksize)
return (0);
}
void
zlib_stats(int show)
{
}
static
void zerr(int ret)
{