Use libbsc/ppmd for BMP files.

Fix extension based hashing.
Do not append .pz extension to filenames already having it.
Some code formatting changes.
This commit is contained in:
Moinak Ghosh 2013-11-28 22:42:51 +05:30
parent bd530e3393
commit 306f145f22
9 changed files with 74 additions and 32 deletions

View file

@ -133,7 +133,8 @@ DELTA2OBJS = $(DELTA2SRCS:.c=.o)
ARCHIVESRCS = archive/pc_archive.c archive/pc_arc_filter.c utils/phash/phash.c \
utils/phash/lookupa.c utils/phash/recycle.c
ARCHIVEHDRS = pcompress.h utils/utils.h archive/pc_archive.h utils/phash/standard.h \
utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h archive/pc_arc_filter.h
utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h archive/pc_arc_filter.h \
utils/phash/extensions.h
ARCHIVEOBJS = $(ARCHIVESRCS:.c=.o)
PJPGSRCS = filters/packjpg/aricoder.cpp filters/packjpg/bitops.cpp filters/packjpg/packjpg.cpp \

View file

@ -269,14 +269,16 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
rv = ADAPT_COMPRESS_LZ4;
lz4_count++;
} else if (adat->adapt_mode == 2 && (PC_TYPE(btype) == TYPE_BINARY)) {
} else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY &&
PC_SUBTYPE(btype) != TYPE_BMP) {
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
if (rv < 0)
return (rv);
rv = ADAPT_COMPRESS_LZMA;
lzma_count++;
} else if (adat->adapt_mode == 1 && (PC_TYPE(btype) == TYPE_BINARY)) {
} else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY &&
PC_SUBTYPE(btype) != TYPE_BMP) {
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
if (rv < 0)
return (rv);
@ -285,7 +287,8 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
} else {
#ifdef ENABLE_PC_LIBBSC
if (adat->bsc_data && PC_SUBTYPE(btype) == TYPE_MARKUP) {
if (adat->bsc_data && (PC_SUBTYPE(btype) == TYPE_MARKUP ||
PC_SUBTYPE(btype) == TYPE_BMP)) {
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
if (rv < 0)
return (rv);

View file

@ -1355,7 +1355,7 @@ init_archive_mod() {
* comparison.
*/
for (j = 0; j < extlist[i].len; j++)
extnum = (extnum << 1) | extlist[i].ext[j];
extnum = (extnum << 8) | extlist[i].ext[j];
exthtab[slot].extnum = extnum;
exthtab[slot].type = extlist[i].type;
}
@ -1407,7 +1407,7 @@ detect_type_by_ext(const char *path, int pathlen)
* Pack given extension into 64-bit integer.
*/
for (i = 0; i < len; i++)
extnum = (extnum << 1) | tolower(ext[i]);
extnum = (extnum << 8) | tolower(ext[i]);
if (exthtab[slot].extnum == extnum)
return (exthtab[slot].type);
out:
@ -1503,6 +1503,13 @@ detect_type_by_data(uchar_t *buf, size_t len)
}
}
// BMP Files
if (buf[0] == 'B' && buf[1] == 'M') {
uint16_t typ = LE16(U16_P(buf + 14));
if (typ == 12 || typ == 64 || typ == 40 || typ == 128)
return (TYPE_BINARY|TYPE_BMP);
}
// MSDOS COM types
if (buf[0] == 0xe9 || buf[0] == 0xeb) {
if (LE16(U16_P(buf + 0x1fe)) == 0xaa55)

View file

@ -180,12 +180,15 @@ show_compression_stats(pc_ctx_t *pctx)
log_msg(LOG_INFO, 0, "No statistics to display.");
} else {
log_msg(LOG_INFO, 0, "Best compressed chunk : %s(%.2f%%)",
bytes_to_size(pctx->smallest_chunk), (double)pctx->smallest_chunk/(double)pctx->chunksize*100);
bytes_to_size(pctx->smallest_chunk),
(double)pctx->smallest_chunk/(double)pctx->chunksize*100);
log_msg(LOG_INFO, 0, "Worst compressed chunk : %s(%.2f%%)",
bytes_to_size(pctx->largest_chunk), (double)pctx->largest_chunk/(double)pctx->chunksize*100);
bytes_to_size(pctx->largest_chunk),
(double)pctx->largest_chunk/(double)pctx->chunksize*100);
pctx->avg_chunk /= pctx->chunk_num;
log_msg(LOG_INFO, 0, "Avg compressed chunk : %s(%.2f%%)\n",
bytes_to_size(pctx->avg_chunk), (double)pctx->avg_chunk/(double)pctx->chunksize*100);
bytes_to_size(pctx->avg_chunk),
(double)pctx->avg_chunk/(double)pctx->chunksize*100);
}
}
@ -202,7 +205,8 @@ show_compression_stats(pc_ctx_t *pctx)
*/
static int
preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t srclen,
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
algo_props_t *props)
{
uchar_t *dest = (uchar_t *)dst, type = 0;
int64_t result;
@ -233,7 +237,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
}
}
if (pctx->lzp_preprocess) {
if (pctx->lzp_preprocess && PC_SUBTYPE(btype) != TYPE_BMP) {
int hashsize;
hashsize = lzp_hash_size(level);
@ -249,7 +253,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
}
}
if (pctx->enable_delta2_encode && props->delta2_span > 0) {
if (pctx->enable_delta2_encode && props->delta2_span > 0 &&
PC_SUBTYPE(btype) != TYPE_DNA_SEQ && PC_SUBTYPE(btype) != TYPE_BMP) {
_dstlen = fromlen;
result = delta2_encode((uchar_t *)from, fromlen, to,
&_dstlen, props->delta2_span);
@ -284,7 +289,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
if (result > -1 && _dstlen < srclen) {
*dest |= PREPROC_COMPRESSED;
*dstlen = _dstlen + 9;
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n", get_mb_s(srclen, strt, en)));
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
get_mb_s(srclen, strt, en)));
} else {
DEBUG_STAT_EN(fprintf(stderr, "Chunk did not compress.\n"));
memcpy(dest+1, src, srclen);
@ -304,7 +310,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
static int
preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64_t srclen,
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
algo_props_t *props)
{
uchar_t *sorc = (uchar_t *)src, type;
int64_t result;
@ -323,7 +330,8 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
DEBUG_STAT_EN(en = get_wtime_millis());
if (result < 0) return (result);
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n", get_mb_s(srclen, strt, en)));
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
get_mb_s(srclen, strt, en)));
memcpy(src, dst, *dstlen);
srclen = *dstlen;
} else {
@ -368,7 +376,7 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
}
}
if (!(type & (PREPROC_COMPRESSED | PREPROC_TYPE_DELTA2 | PREPROC_TYPE_LZP | PREPROC_TYPE_DISPACK))
if (!(type & (PREPROC_COMPRESSED|PREPROC_TYPE_DELTA2|PREPROC_TYPE_LZP|PREPROC_TYPE_DISPACK))
&& type > 0) {
log_msg(LOG_ERR, 0, "Invalid preprocessing flags: %d", type);
return (-1);
@ -435,7 +443,8 @@ redo:
DEBUG_STAT_EN(strt = get_wtime_millis());
len = pctx->mac_bytes;
deserialize_checksum(checksum, tdat->compressed_chunk + pctx->cksum_bytes, pctx->mac_bytes);
deserialize_checksum(checksum, tdat->compressed_chunk + pctx->cksum_bytes,
pctx->mac_bytes);
memset(tdat->compressed_chunk + pctx->cksum_bytes, 0, pctx->mac_bytes);
hmac_reinit(&tdat->chunk_hmac);
hmac_update(&tdat->chunk_hmac, (uchar_t *)&tdat->len_cmp_be, sizeof (tdat->len_cmp_be));
@ -641,7 +650,8 @@ redo:
* If it does not match we set length of chunk to 0 to indicate
* exit to the writer thread.
*/
compute_checksum(checksum, pctx->cksum, tdat->uncompressed_chunk, _chunksize, tdat->cksum_mt, 1);
compute_checksum(checksum, pctx->cksum, tdat->uncompressed_chunk,
_chunksize, tdat->cksum_mt, 1);
if (memcmp(checksum, tdat->checksum, pctx->cksum_bytes) != 0) {
tdat->len_cmp = 0;
log_msg(LOG_ERR, 0, "ERROR: Chunk %d, checksums do not match.", tdat->id);
@ -887,7 +897,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
if (flags & FLAG_DEDUP_FIXED) {
if (version > 7) {
if (pctx->pipe_mode) {
log_msg(LOG_ERR, 0, "Global Deduplication is not supported with pipe mode.");
log_msg(LOG_ERR, 0, "Global Deduplication is not "
"supported with pipe mode.");
err = 1;
goto uncomp_done;
}
@ -1129,7 +1140,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
free(salt1);
memset(n1, 0, noncelen);
if (memcmp(hdr_hash2, hdr_hash1, pctx->mac_bytes) != 0) {
log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password.");
log_msg(LOG_ERR, 0, "Header verification failed! File "
"tampered or wrong password.");
UNCOMP_BAIL;
}
} else if (version >= 5) {
@ -1158,7 +1170,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
d2 = htonl(level);
crc2 = lzma_crc32((uchar_t *)&d2, sizeof (level), crc2);
if (crc1 != crc2) {
log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password.");
log_msg(LOG_ERR, 0, "Header verification failed! File tampered "
"or wrong password.");
UNCOMP_BAIL;
}
}
@ -1169,7 +1182,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
strcat(pctx->archive_temp_file, "/.data");
if ((pctx->archive_temp_fd = open(pctx->archive_temp_file,
O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) == -1) {
log_msg(LOG_ERR, 1, "Cannot open temporary data file in target directory.");
log_msg(LOG_ERR, 1, "Cannot open temporary data file in "
"target directory.");
UNCOMP_BAIL;
}
add_fname(pctx->archive_temp_file);
@ -2883,7 +2897,8 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
case 'e':
pctx->encrypt_type = get_crypto_alg(optarg);
if (pctx->encrypt_type == 0) {
log_msg(LOG_ERR, 0, "Invalid encryption algorithm. Should be AES or SALSA20.", optarg);
log_msg(LOG_ERR, 0, "Invalid encryption algorithm. "
"Should be AES or SALSA20.", optarg);
return (1);
}
break;
@ -3098,12 +3113,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
pctx->to_filename = NULL;
} else {
strcpy(apath, argv[my_optind]);
if (!endswith(apath, COMP_EXTN))
strcat(apath, COMP_EXTN);
pctx->to_filename = realpath(apath, NULL);
/* Check if compressed file exists */
if (pctx->to_filename != NULL) {
log_msg(LOG_ERR, 0, "Compressed file %s exists", pctx->to_filename);
log_msg(LOG_ERR, 0, "Compressed file %s exists",
pctx->to_filename);
free((void *)(pctx->to_filename));
return (1);
}
@ -3111,12 +3128,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
}
} else {
strcpy(apath, pctx->filename);
if (!endswith(apath, COMP_EXTN))
strcat(apath, COMP_EXTN);
pctx->to_filename = realpath(apath, NULL);
/* Check if compressed file exists */
if (pctx->to_filename != NULL) {
log_msg(LOG_ERR, 0, "Compressed file %s exists", pctx->to_filename);
log_msg(LOG_ERR, 0, "Compressed file %s exists",
pctx->to_filename);
free((void *)(pctx->to_filename));
return (1);
}

View file

@ -132,7 +132,6 @@ struct ext_entry {
{"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4},
{"xcf" , TYPE_BINARY, 3},
{"mo" , TYPE_BINARY, 2},
{"bmp" , TYPE_BINARY, 3},
{"pyo" , TYPE_BINARY, 3},
{"pyc" , TYPE_BINARY, 3},
{"wav" , TYPE_BINARY, 3},

View file

@ -122,7 +122,6 @@ pmd,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD
zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ
xcf,TYPE_BINARY
mo,TYPE_BINARY
bmp,TYPE_BINARY
pyo,TYPE_BINARY
pyc,TYPE_BINARY
wav,TYPE_BINARY

View file

@ -19,7 +19,7 @@ ub1 tab[] = {
0,87,0,0,113,0,125,183,82,0,124,88,40,125,0,0,
124,0,168,125,0,125,0,40,0,82,125,113,113,125,116,0,
0,0,113,85,0,88,0,0,42,27,0,0,0,40,183,61,
0,0,0,0,0,111,17,0,87,125,0,0,166,91,0,0,
0,0,0,0,0,111,0,0,87,125,0,0,127,91,0,0,
};
/* The hash function */

View file

@ -8,7 +8,7 @@
extern ub1 tab[];
#define PHASHLEN 0x80 /* length of hash mapping table */
#define PHASHNKEYS 133 /* How many keys were hashed */
#define PHASHNKEYS 132 /* How many keys were hashed */
#define PHASHRANGE 256 /* Range any input might map to */
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */

View file

@ -38,6 +38,7 @@
#include <sys/param.h>
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include <cpuid.h>
#ifdef __cplusplus
@ -269,7 +270,8 @@ typedef enum {
TYPE_DNA_SEQ = 168,
TYPE_MJPEG = 176,
TYPE_AUDIO_COMPRESSED = 184,
TYPE_EXE64 = 192
TYPE_EXE64 = 192,
TYPE_BMP = 200
} data_type_t;
/*
@ -384,6 +386,18 @@ roundup_pow_two(unsigned int v) {
return (v);
}
static inline int
endswith(char *haystack, char *needle) {
size_t len = strlen(haystack);
size_t nlen = strlen(needle);
if (nlen > len)
return (0);
size_t pos = len - nlen;
if (memcmp(&haystack[pos], needle, nlen) != 0)
return (0);
return (1);
}
#ifdef __cplusplus
}
#endif