Use libbsc/ppmd for BMP files.

Fix extension based hashing.
Do not append .pz extension to filenames already having it.
Some code formatting changes.
This commit is contained in:
Moinak Ghosh 2013-11-28 22:42:51 +05:30
parent bd530e3393
commit 306f145f22
9 changed files with 74 additions and 32 deletions

View file

@ -133,7 +133,8 @@ DELTA2OBJS = $(DELTA2SRCS:.c=.o)
ARCHIVESRCS = archive/pc_archive.c archive/pc_arc_filter.c utils/phash/phash.c \ ARCHIVESRCS = archive/pc_archive.c archive/pc_arc_filter.c utils/phash/phash.c \
utils/phash/lookupa.c utils/phash/recycle.c utils/phash/lookupa.c utils/phash/recycle.c
ARCHIVEHDRS = pcompress.h utils/utils.h archive/pc_archive.h utils/phash/standard.h \ ARCHIVEHDRS = pcompress.h utils/utils.h archive/pc_archive.h utils/phash/standard.h \
utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h archive/pc_arc_filter.h utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h archive/pc_arc_filter.h \
utils/phash/extensions.h
ARCHIVEOBJS = $(ARCHIVESRCS:.c=.o) ARCHIVEOBJS = $(ARCHIVESRCS:.c=.o)
PJPGSRCS = filters/packjpg/aricoder.cpp filters/packjpg/bitops.cpp filters/packjpg/packjpg.cpp \ PJPGSRCS = filters/packjpg/aricoder.cpp filters/packjpg/bitops.cpp filters/packjpg/packjpg.cpp \

View file

@ -269,14 +269,16 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
rv = ADAPT_COMPRESS_LZ4; rv = ADAPT_COMPRESS_LZ4;
lz4_count++; lz4_count++;
} else if (adat->adapt_mode == 2 && (PC_TYPE(btype) == TYPE_BINARY)) { } else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY &&
PC_SUBTYPE(btype) != TYPE_BMP) {
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data); rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
if (rv < 0) if (rv < 0)
return (rv); return (rv);
rv = ADAPT_COMPRESS_LZMA; rv = ADAPT_COMPRESS_LZMA;
lzma_count++; lzma_count++;
} else if (adat->adapt_mode == 1 && (PC_TYPE(btype) == TYPE_BINARY)) { } else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY &&
PC_SUBTYPE(btype) != TYPE_BMP) {
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL); rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
if (rv < 0) if (rv < 0)
return (rv); return (rv);
@ -285,7 +287,8 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
} else { } else {
#ifdef ENABLE_PC_LIBBSC #ifdef ENABLE_PC_LIBBSC
if (adat->bsc_data && PC_SUBTYPE(btype) == TYPE_MARKUP) { if (adat->bsc_data && (PC_SUBTYPE(btype) == TYPE_MARKUP ||
PC_SUBTYPE(btype) == TYPE_BMP)) {
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data); rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
if (rv < 0) if (rv < 0)
return (rv); return (rv);

View file

@ -1355,7 +1355,7 @@ init_archive_mod() {
* comparison. * comparison.
*/ */
for (j = 0; j < extlist[i].len; j++) for (j = 0; j < extlist[i].len; j++)
extnum = (extnum << 1) | extlist[i].ext[j]; extnum = (extnum << 8) | extlist[i].ext[j];
exthtab[slot].extnum = extnum; exthtab[slot].extnum = extnum;
exthtab[slot].type = extlist[i].type; exthtab[slot].type = extlist[i].type;
} }
@ -1407,7 +1407,7 @@ detect_type_by_ext(const char *path, int pathlen)
* Pack given extension into 64-bit integer. * Pack given extension into 64-bit integer.
*/ */
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
extnum = (extnum << 1) | tolower(ext[i]); extnum = (extnum << 8) | tolower(ext[i]);
if (exthtab[slot].extnum == extnum) if (exthtab[slot].extnum == extnum)
return (exthtab[slot].type); return (exthtab[slot].type);
out: out:
@ -1503,6 +1503,13 @@ detect_type_by_data(uchar_t *buf, size_t len)
} }
} }
// BMP Files
if (buf[0] == 'B' && buf[1] == 'M') {
uint16_t typ = LE16(U16_P(buf + 14));
if (typ == 12 || typ == 64 || typ == 40 || typ == 128)
return (TYPE_BINARY|TYPE_BMP);
}
// MSDOS COM types // MSDOS COM types
if (buf[0] == 0xe9 || buf[0] == 0xeb) { if (buf[0] == 0xe9 || buf[0] == 0xeb) {
if (LE16(U16_P(buf + 0x1fe)) == 0xaa55) if (LE16(U16_P(buf + 0x1fe)) == 0xaa55)

View file

@ -180,12 +180,15 @@ show_compression_stats(pc_ctx_t *pctx)
log_msg(LOG_INFO, 0, "No statistics to display."); log_msg(LOG_INFO, 0, "No statistics to display.");
} else { } else {
log_msg(LOG_INFO, 0, "Best compressed chunk : %s(%.2f%%)", log_msg(LOG_INFO, 0, "Best compressed chunk : %s(%.2f%%)",
bytes_to_size(pctx->smallest_chunk), (double)pctx->smallest_chunk/(double)pctx->chunksize*100); bytes_to_size(pctx->smallest_chunk),
(double)pctx->smallest_chunk/(double)pctx->chunksize*100);
log_msg(LOG_INFO, 0, "Worst compressed chunk : %s(%.2f%%)", log_msg(LOG_INFO, 0, "Worst compressed chunk : %s(%.2f%%)",
bytes_to_size(pctx->largest_chunk), (double)pctx->largest_chunk/(double)pctx->chunksize*100); bytes_to_size(pctx->largest_chunk),
(double)pctx->largest_chunk/(double)pctx->chunksize*100);
pctx->avg_chunk /= pctx->chunk_num; pctx->avg_chunk /= pctx->chunk_num;
log_msg(LOG_INFO, 0, "Avg compressed chunk : %s(%.2f%%)\n", log_msg(LOG_INFO, 0, "Avg compressed chunk : %s(%.2f%%)\n",
bytes_to_size(pctx->avg_chunk), (double)pctx->avg_chunk/(double)pctx->chunksize*100); bytes_to_size(pctx->avg_chunk),
(double)pctx->avg_chunk/(double)pctx->chunksize*100);
} }
} }
@ -202,7 +205,8 @@ show_compression_stats(pc_ctx_t *pctx)
*/ */
static int static int
preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t srclen, preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t srclen,
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props) void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
algo_props_t *props)
{ {
uchar_t *dest = (uchar_t *)dst, type = 0; uchar_t *dest = (uchar_t *)dst, type = 0;
int64_t result; int64_t result;
@ -233,7 +237,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
} }
} }
if (pctx->lzp_preprocess) { if (pctx->lzp_preprocess && PC_SUBTYPE(btype) != TYPE_BMP) {
int hashsize; int hashsize;
hashsize = lzp_hash_size(level); hashsize = lzp_hash_size(level);
@ -249,7 +253,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
} }
} }
if (pctx->enable_delta2_encode && props->delta2_span > 0) { if (pctx->enable_delta2_encode && props->delta2_span > 0 &&
PC_SUBTYPE(btype) != TYPE_DNA_SEQ && PC_SUBTYPE(btype) != TYPE_BMP) {
_dstlen = fromlen; _dstlen = fromlen;
result = delta2_encode((uchar_t *)from, fromlen, to, result = delta2_encode((uchar_t *)from, fromlen, to,
&_dstlen, props->delta2_span); &_dstlen, props->delta2_span);
@ -284,7 +289,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
if (result > -1 && _dstlen < srclen) { if (result > -1 && _dstlen < srclen) {
*dest |= PREPROC_COMPRESSED; *dest |= PREPROC_COMPRESSED;
*dstlen = _dstlen + 9; *dstlen = _dstlen + 9;
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n", get_mb_s(srclen, strt, en))); DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
get_mb_s(srclen, strt, en)));
} else { } else {
DEBUG_STAT_EN(fprintf(stderr, "Chunk did not compress.\n")); DEBUG_STAT_EN(fprintf(stderr, "Chunk did not compress.\n"));
memcpy(dest+1, src, srclen); memcpy(dest+1, src, srclen);
@ -304,7 +310,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
static int static int
preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64_t srclen, preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64_t srclen,
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props) void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
algo_props_t *props)
{ {
uchar_t *sorc = (uchar_t *)src, type; uchar_t *sorc = (uchar_t *)src, type;
int64_t result; int64_t result;
@ -323,7 +330,8 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
DEBUG_STAT_EN(en = get_wtime_millis()); DEBUG_STAT_EN(en = get_wtime_millis());
if (result < 0) return (result); if (result < 0) return (result);
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n", get_mb_s(srclen, strt, en))); DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
get_mb_s(srclen, strt, en)));
memcpy(src, dst, *dstlen); memcpy(src, dst, *dstlen);
srclen = *dstlen; srclen = *dstlen;
} else { } else {
@ -435,7 +443,8 @@ redo:
DEBUG_STAT_EN(strt = get_wtime_millis()); DEBUG_STAT_EN(strt = get_wtime_millis());
len = pctx->mac_bytes; len = pctx->mac_bytes;
deserialize_checksum(checksum, tdat->compressed_chunk + pctx->cksum_bytes, pctx->mac_bytes); deserialize_checksum(checksum, tdat->compressed_chunk + pctx->cksum_bytes,
pctx->mac_bytes);
memset(tdat->compressed_chunk + pctx->cksum_bytes, 0, pctx->mac_bytes); memset(tdat->compressed_chunk + pctx->cksum_bytes, 0, pctx->mac_bytes);
hmac_reinit(&tdat->chunk_hmac); hmac_reinit(&tdat->chunk_hmac);
hmac_update(&tdat->chunk_hmac, (uchar_t *)&tdat->len_cmp_be, sizeof (tdat->len_cmp_be)); hmac_update(&tdat->chunk_hmac, (uchar_t *)&tdat->len_cmp_be, sizeof (tdat->len_cmp_be));
@ -641,7 +650,8 @@ redo:
* If it does not match we set length of chunk to 0 to indicate * If it does not match we set length of chunk to 0 to indicate
* exit to the writer thread. * exit to the writer thread.
*/ */
compute_checksum(checksum, pctx->cksum, tdat->uncompressed_chunk, _chunksize, tdat->cksum_mt, 1); compute_checksum(checksum, pctx->cksum, tdat->uncompressed_chunk,
_chunksize, tdat->cksum_mt, 1);
if (memcmp(checksum, tdat->checksum, pctx->cksum_bytes) != 0) { if (memcmp(checksum, tdat->checksum, pctx->cksum_bytes) != 0) {
tdat->len_cmp = 0; tdat->len_cmp = 0;
log_msg(LOG_ERR, 0, "ERROR: Chunk %d, checksums do not match.", tdat->id); log_msg(LOG_ERR, 0, "ERROR: Chunk %d, checksums do not match.", tdat->id);
@ -887,7 +897,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
if (flags & FLAG_DEDUP_FIXED) { if (flags & FLAG_DEDUP_FIXED) {
if (version > 7) { if (version > 7) {
if (pctx->pipe_mode) { if (pctx->pipe_mode) {
log_msg(LOG_ERR, 0, "Global Deduplication is not supported with pipe mode."); log_msg(LOG_ERR, 0, "Global Deduplication is not "
"supported with pipe mode.");
err = 1; err = 1;
goto uncomp_done; goto uncomp_done;
} }
@ -1129,7 +1140,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
free(salt1); free(salt1);
memset(n1, 0, noncelen); memset(n1, 0, noncelen);
if (memcmp(hdr_hash2, hdr_hash1, pctx->mac_bytes) != 0) { if (memcmp(hdr_hash2, hdr_hash1, pctx->mac_bytes) != 0) {
log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password."); log_msg(LOG_ERR, 0, "Header verification failed! File "
"tampered or wrong password.");
UNCOMP_BAIL; UNCOMP_BAIL;
} }
} else if (version >= 5) { } else if (version >= 5) {
@ -1158,7 +1170,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
d2 = htonl(level); d2 = htonl(level);
crc2 = lzma_crc32((uchar_t *)&d2, sizeof (level), crc2); crc2 = lzma_crc32((uchar_t *)&d2, sizeof (level), crc2);
if (crc1 != crc2) { if (crc1 != crc2) {
log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password."); log_msg(LOG_ERR, 0, "Header verification failed! File tampered "
"or wrong password.");
UNCOMP_BAIL; UNCOMP_BAIL;
} }
} }
@ -1169,7 +1182,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
strcat(pctx->archive_temp_file, "/.data"); strcat(pctx->archive_temp_file, "/.data");
if ((pctx->archive_temp_fd = open(pctx->archive_temp_file, if ((pctx->archive_temp_fd = open(pctx->archive_temp_file,
O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) == -1) { O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) == -1) {
log_msg(LOG_ERR, 1, "Cannot open temporary data file in target directory."); log_msg(LOG_ERR, 1, "Cannot open temporary data file in "
"target directory.");
UNCOMP_BAIL; UNCOMP_BAIL;
} }
add_fname(pctx->archive_temp_file); add_fname(pctx->archive_temp_file);
@ -2883,7 +2897,8 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
case 'e': case 'e':
pctx->encrypt_type = get_crypto_alg(optarg); pctx->encrypt_type = get_crypto_alg(optarg);
if (pctx->encrypt_type == 0) { if (pctx->encrypt_type == 0) {
log_msg(LOG_ERR, 0, "Invalid encryption algorithm. Should be AES or SALSA20.", optarg); log_msg(LOG_ERR, 0, "Invalid encryption algorithm. "
"Should be AES or SALSA20.", optarg);
return (1); return (1);
} }
break; break;
@ -3098,12 +3113,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
pctx->to_filename = NULL; pctx->to_filename = NULL;
} else { } else {
strcpy(apath, argv[my_optind]); strcpy(apath, argv[my_optind]);
if (!endswith(apath, COMP_EXTN))
strcat(apath, COMP_EXTN); strcat(apath, COMP_EXTN);
pctx->to_filename = realpath(apath, NULL); pctx->to_filename = realpath(apath, NULL);
/* Check if compressed file exists */ /* Check if compressed file exists */
if (pctx->to_filename != NULL) { if (pctx->to_filename != NULL) {
log_msg(LOG_ERR, 0, "Compressed file %s exists", pctx->to_filename); log_msg(LOG_ERR, 0, "Compressed file %s exists",
pctx->to_filename);
free((void *)(pctx->to_filename)); free((void *)(pctx->to_filename));
return (1); return (1);
} }
@ -3111,12 +3128,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
} }
} else { } else {
strcpy(apath, pctx->filename); strcpy(apath, pctx->filename);
if (!endswith(apath, COMP_EXTN))
strcat(apath, COMP_EXTN); strcat(apath, COMP_EXTN);
pctx->to_filename = realpath(apath, NULL); pctx->to_filename = realpath(apath, NULL);
/* Check if compressed file exists */ /* Check if compressed file exists */
if (pctx->to_filename != NULL) { if (pctx->to_filename != NULL) {
log_msg(LOG_ERR, 0, "Compressed file %s exists", pctx->to_filename); log_msg(LOG_ERR, 0, "Compressed file %s exists",
pctx->to_filename);
free((void *)(pctx->to_filename)); free((void *)(pctx->to_filename));
return (1); return (1);
} }

View file

@ -132,7 +132,6 @@ struct ext_entry {
{"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4}, {"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4},
{"xcf" , TYPE_BINARY, 3}, {"xcf" , TYPE_BINARY, 3},
{"mo" , TYPE_BINARY, 2}, {"mo" , TYPE_BINARY, 2},
{"bmp" , TYPE_BINARY, 3},
{"pyo" , TYPE_BINARY, 3}, {"pyo" , TYPE_BINARY, 3},
{"pyc" , TYPE_BINARY, 3}, {"pyc" , TYPE_BINARY, 3},
{"wav" , TYPE_BINARY, 3}, {"wav" , TYPE_BINARY, 3},

View file

@ -122,7 +122,6 @@ pmd,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD
zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ
xcf,TYPE_BINARY xcf,TYPE_BINARY
mo,TYPE_BINARY mo,TYPE_BINARY
bmp,TYPE_BINARY
pyo,TYPE_BINARY pyo,TYPE_BINARY
pyc,TYPE_BINARY pyc,TYPE_BINARY
wav,TYPE_BINARY wav,TYPE_BINARY

View file

@ -19,7 +19,7 @@ ub1 tab[] = {
0,87,0,0,113,0,125,183,82,0,124,88,40,125,0,0, 0,87,0,0,113,0,125,183,82,0,124,88,40,125,0,0,
124,0,168,125,0,125,0,40,0,82,125,113,113,125,116,0, 124,0,168,125,0,125,0,40,0,82,125,113,113,125,116,0,
0,0,113,85,0,88,0,0,42,27,0,0,0,40,183,61, 0,0,113,85,0,88,0,0,42,27,0,0,0,40,183,61,
0,0,0,0,0,111,17,0,87,125,0,0,166,91,0,0, 0,0,0,0,0,111,0,0,87,125,0,0,127,91,0,0,
}; };
/* The hash function */ /* The hash function */

View file

@ -8,7 +8,7 @@
extern ub1 tab[]; extern ub1 tab[];
#define PHASHLEN 0x80 /* length of hash mapping table */ #define PHASHLEN 0x80 /* length of hash mapping table */
#define PHASHNKEYS 133 /* How many keys were hashed */ #define PHASHNKEYS 132 /* How many keys were hashed */
#define PHASHRANGE 256 /* Range any input might map to */ #define PHASHRANGE 256 /* Range any input might map to */
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */ #define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */

View file

@ -38,6 +38,7 @@
#include <sys/param.h> #include <sys/param.h>
#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
#include <string.h>
#include <cpuid.h> #include <cpuid.h>
#ifdef __cplusplus #ifdef __cplusplus
@ -269,7 +270,8 @@ typedef enum {
TYPE_DNA_SEQ = 168, TYPE_DNA_SEQ = 168,
TYPE_MJPEG = 176, TYPE_MJPEG = 176,
TYPE_AUDIO_COMPRESSED = 184, TYPE_AUDIO_COMPRESSED = 184,
TYPE_EXE64 = 192 TYPE_EXE64 = 192,
TYPE_BMP = 200
} data_type_t; } data_type_t;
/* /*
@ -384,6 +386,18 @@ roundup_pow_two(unsigned int v) {
return (v); return (v);
} }
static inline int
endswith(char *haystack, char *needle) {
size_t len = strlen(haystack);
size_t nlen = strlen(needle);
if (nlen > len)
return (0);
size_t pos = len - nlen;
if (memcmp(&haystack[pos], needle, nlen) != 0)
return (0);
return (1);
}
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif