Use libbsc/ppmd for BMP files.
Fix extension based hashing. Do not append .pz extension to filenames already having it. Some code formatting changes.
This commit is contained in:
parent
bd530e3393
commit
306f145f22
9 changed files with 74 additions and 32 deletions
|
@ -133,7 +133,8 @@ DELTA2OBJS = $(DELTA2SRCS:.c=.o)
|
|||
ARCHIVESRCS = archive/pc_archive.c archive/pc_arc_filter.c utils/phash/phash.c \
|
||||
utils/phash/lookupa.c utils/phash/recycle.c
|
||||
ARCHIVEHDRS = pcompress.h utils/utils.h archive/pc_archive.h utils/phash/standard.h \
|
||||
utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h archive/pc_arc_filter.h
|
||||
utils/phash/lookupa.h utils/phash/recycle.h utils/phash/phash.h archive/pc_arc_filter.h \
|
||||
utils/phash/extensions.h
|
||||
ARCHIVEOBJS = $(ARCHIVESRCS:.c=.o)
|
||||
|
||||
PJPGSRCS = filters/packjpg/aricoder.cpp filters/packjpg/bitops.cpp filters/packjpg/packjpg.cpp \
|
||||
|
|
|
@ -269,14 +269,16 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
rv = ADAPT_COMPRESS_LZ4;
|
||||
lz4_count++;
|
||||
|
||||
} else if (adat->adapt_mode == 2 && (PC_TYPE(btype) == TYPE_BINARY)) {
|
||||
} else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY &&
|
||||
PC_SUBTYPE(btype) != TYPE_BMP) {
|
||||
rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
rv = ADAPT_COMPRESS_LZMA;
|
||||
lzma_count++;
|
||||
|
||||
} else if (adat->adapt_mode == 1 && (PC_TYPE(btype) == TYPE_BINARY)) {
|
||||
} else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY &&
|
||||
PC_SUBTYPE(btype) != TYPE_BMP) {
|
||||
rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
|
@ -285,7 +287,8 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
|||
|
||||
} else {
|
||||
#ifdef ENABLE_PC_LIBBSC
|
||||
if (adat->bsc_data && PC_SUBTYPE(btype) == TYPE_MARKUP) {
|
||||
if (adat->bsc_data && (PC_SUBTYPE(btype) == TYPE_MARKUP ||
|
||||
PC_SUBTYPE(btype) == TYPE_BMP)) {
|
||||
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
|
||||
if (rv < 0)
|
||||
return (rv);
|
||||
|
|
|
@ -1355,7 +1355,7 @@ init_archive_mod() {
|
|||
* comparison.
|
||||
*/
|
||||
for (j = 0; j < extlist[i].len; j++)
|
||||
extnum = (extnum << 1) | extlist[i].ext[j];
|
||||
extnum = (extnum << 8) | extlist[i].ext[j];
|
||||
exthtab[slot].extnum = extnum;
|
||||
exthtab[slot].type = extlist[i].type;
|
||||
}
|
||||
|
@ -1407,7 +1407,7 @@ detect_type_by_ext(const char *path, int pathlen)
|
|||
* Pack given extension into 64-bit integer.
|
||||
*/
|
||||
for (i = 0; i < len; i++)
|
||||
extnum = (extnum << 1) | tolower(ext[i]);
|
||||
extnum = (extnum << 8) | tolower(ext[i]);
|
||||
if (exthtab[slot].extnum == extnum)
|
||||
return (exthtab[slot].type);
|
||||
out:
|
||||
|
@ -1503,6 +1503,13 @@ detect_type_by_data(uchar_t *buf, size_t len)
|
|||
}
|
||||
}
|
||||
|
||||
// BMP Files
|
||||
if (buf[0] == 'B' && buf[1] == 'M') {
|
||||
uint16_t typ = LE16(U16_P(buf + 14));
|
||||
if (typ == 12 || typ == 64 || typ == 40 || typ == 128)
|
||||
return (TYPE_BINARY|TYPE_BMP);
|
||||
}
|
||||
|
||||
// MSDOS COM types
|
||||
if (buf[0] == 0xe9 || buf[0] == 0xeb) {
|
||||
if (LE16(U16_P(buf + 0x1fe)) == 0xaa55)
|
||||
|
|
61
pcompress.c
61
pcompress.c
|
@ -180,12 +180,15 @@ show_compression_stats(pc_ctx_t *pctx)
|
|||
log_msg(LOG_INFO, 0, "No statistics to display.");
|
||||
} else {
|
||||
log_msg(LOG_INFO, 0, "Best compressed chunk : %s(%.2f%%)",
|
||||
bytes_to_size(pctx->smallest_chunk), (double)pctx->smallest_chunk/(double)pctx->chunksize*100);
|
||||
bytes_to_size(pctx->smallest_chunk),
|
||||
(double)pctx->smallest_chunk/(double)pctx->chunksize*100);
|
||||
log_msg(LOG_INFO, 0, "Worst compressed chunk : %s(%.2f%%)",
|
||||
bytes_to_size(pctx->largest_chunk), (double)pctx->largest_chunk/(double)pctx->chunksize*100);
|
||||
bytes_to_size(pctx->largest_chunk),
|
||||
(double)pctx->largest_chunk/(double)pctx->chunksize*100);
|
||||
pctx->avg_chunk /= pctx->chunk_num;
|
||||
log_msg(LOG_INFO, 0, "Avg compressed chunk : %s(%.2f%%)\n",
|
||||
bytes_to_size(pctx->avg_chunk), (double)pctx->avg_chunk/(double)pctx->chunksize*100);
|
||||
bytes_to_size(pctx->avg_chunk),
|
||||
(double)pctx->avg_chunk/(double)pctx->chunksize*100);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -202,7 +205,8 @@ show_compression_stats(pc_ctx_t *pctx)
|
|||
*/
|
||||
static int
|
||||
preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t srclen,
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
|
||||
algo_props_t *props)
|
||||
{
|
||||
uchar_t *dest = (uchar_t *)dst, type = 0;
|
||||
int64_t result;
|
||||
|
@ -233,7 +237,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
}
|
||||
}
|
||||
|
||||
if (pctx->lzp_preprocess) {
|
||||
if (pctx->lzp_preprocess && PC_SUBTYPE(btype) != TYPE_BMP) {
|
||||
int hashsize;
|
||||
|
||||
hashsize = lzp_hash_size(level);
|
||||
|
@ -249,7 +253,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
}
|
||||
}
|
||||
|
||||
if (pctx->enable_delta2_encode && props->delta2_span > 0) {
|
||||
if (pctx->enable_delta2_encode && props->delta2_span > 0 &&
|
||||
PC_SUBTYPE(btype) != TYPE_DNA_SEQ && PC_SUBTYPE(btype) != TYPE_BMP) {
|
||||
_dstlen = fromlen;
|
||||
result = delta2_encode((uchar_t *)from, fromlen, to,
|
||||
&_dstlen, props->delta2_span);
|
||||
|
@ -284,7 +289,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
if (result > -1 && _dstlen < srclen) {
|
||||
*dest |= PREPROC_COMPRESSED;
|
||||
*dstlen = _dstlen + 9;
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n", get_mb_s(srclen, strt, en)));
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk compression speed %.3f MB/s\n",
|
||||
get_mb_s(srclen, strt, en)));
|
||||
} else {
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk did not compress.\n"));
|
||||
memcpy(dest+1, src, srclen);
|
||||
|
@ -304,7 +310,8 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
|||
|
||||
static int
|
||||
preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64_t srclen,
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data, algo_props_t *props)
|
||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
|
||||
algo_props_t *props)
|
||||
{
|
||||
uchar_t *sorc = (uchar_t *)src, type;
|
||||
int64_t result;
|
||||
|
@ -323,7 +330,8 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
|||
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||
|
||||
if (result < 0) return (result);
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n", get_mb_s(srclen, strt, en)));
|
||||
DEBUG_STAT_EN(fprintf(stderr, "Chunk decompression speed %.3f MB/s\n",
|
||||
get_mb_s(srclen, strt, en)));
|
||||
memcpy(src, dst, *dstlen);
|
||||
srclen = *dstlen;
|
||||
} else {
|
||||
|
@ -368,7 +376,7 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
|||
}
|
||||
}
|
||||
|
||||
if (!(type & (PREPROC_COMPRESSED | PREPROC_TYPE_DELTA2 | PREPROC_TYPE_LZP | PREPROC_TYPE_DISPACK))
|
||||
if (!(type & (PREPROC_COMPRESSED|PREPROC_TYPE_DELTA2|PREPROC_TYPE_LZP|PREPROC_TYPE_DISPACK))
|
||||
&& type > 0) {
|
||||
log_msg(LOG_ERR, 0, "Invalid preprocessing flags: %d", type);
|
||||
return (-1);
|
||||
|
@ -435,7 +443,8 @@ redo:
|
|||
|
||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||
len = pctx->mac_bytes;
|
||||
deserialize_checksum(checksum, tdat->compressed_chunk + pctx->cksum_bytes, pctx->mac_bytes);
|
||||
deserialize_checksum(checksum, tdat->compressed_chunk + pctx->cksum_bytes,
|
||||
pctx->mac_bytes);
|
||||
memset(tdat->compressed_chunk + pctx->cksum_bytes, 0, pctx->mac_bytes);
|
||||
hmac_reinit(&tdat->chunk_hmac);
|
||||
hmac_update(&tdat->chunk_hmac, (uchar_t *)&tdat->len_cmp_be, sizeof (tdat->len_cmp_be));
|
||||
|
@ -641,7 +650,8 @@ redo:
|
|||
* If it does not match we set length of chunk to 0 to indicate
|
||||
* exit to the writer thread.
|
||||
*/
|
||||
compute_checksum(checksum, pctx->cksum, tdat->uncompressed_chunk, _chunksize, tdat->cksum_mt, 1);
|
||||
compute_checksum(checksum, pctx->cksum, tdat->uncompressed_chunk,
|
||||
_chunksize, tdat->cksum_mt, 1);
|
||||
if (memcmp(checksum, tdat->checksum, pctx->cksum_bytes) != 0) {
|
||||
tdat->len_cmp = 0;
|
||||
log_msg(LOG_ERR, 0, "ERROR: Chunk %d, checksums do not match.", tdat->id);
|
||||
|
@ -887,7 +897,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
|
|||
if (flags & FLAG_DEDUP_FIXED) {
|
||||
if (version > 7) {
|
||||
if (pctx->pipe_mode) {
|
||||
log_msg(LOG_ERR, 0, "Global Deduplication is not supported with pipe mode.");
|
||||
log_msg(LOG_ERR, 0, "Global Deduplication is not "
|
||||
"supported with pipe mode.");
|
||||
err = 1;
|
||||
goto uncomp_done;
|
||||
}
|
||||
|
@ -1129,7 +1140,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
|
|||
free(salt1);
|
||||
memset(n1, 0, noncelen);
|
||||
if (memcmp(hdr_hash2, hdr_hash1, pctx->mac_bytes) != 0) {
|
||||
log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password.");
|
||||
log_msg(LOG_ERR, 0, "Header verification failed! File "
|
||||
"tampered or wrong password.");
|
||||
UNCOMP_BAIL;
|
||||
}
|
||||
} else if (version >= 5) {
|
||||
|
@ -1158,7 +1170,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
|
|||
d2 = htonl(level);
|
||||
crc2 = lzma_crc32((uchar_t *)&d2, sizeof (level), crc2);
|
||||
if (crc1 != crc2) {
|
||||
log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password.");
|
||||
log_msg(LOG_ERR, 0, "Header verification failed! File tampered "
|
||||
"or wrong password.");
|
||||
UNCOMP_BAIL;
|
||||
}
|
||||
}
|
||||
|
@ -1169,7 +1182,8 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
|
|||
strcat(pctx->archive_temp_file, "/.data");
|
||||
if ((pctx->archive_temp_fd = open(pctx->archive_temp_file,
|
||||
O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) == -1) {
|
||||
log_msg(LOG_ERR, 1, "Cannot open temporary data file in target directory.");
|
||||
log_msg(LOG_ERR, 1, "Cannot open temporary data file in "
|
||||
"target directory.");
|
||||
UNCOMP_BAIL;
|
||||
}
|
||||
add_fname(pctx->archive_temp_file);
|
||||
|
@ -2883,7 +2897,8 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
case 'e':
|
||||
pctx->encrypt_type = get_crypto_alg(optarg);
|
||||
if (pctx->encrypt_type == 0) {
|
||||
log_msg(LOG_ERR, 0, "Invalid encryption algorithm. Should be AES or SALSA20.", optarg);
|
||||
log_msg(LOG_ERR, 0, "Invalid encryption algorithm. "
|
||||
"Should be AES or SALSA20.", optarg);
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
|
@ -3098,12 +3113,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
pctx->to_filename = NULL;
|
||||
} else {
|
||||
strcpy(apath, argv[my_optind]);
|
||||
strcat(apath, COMP_EXTN);
|
||||
if (!endswith(apath, COMP_EXTN))
|
||||
strcat(apath, COMP_EXTN);
|
||||
pctx->to_filename = realpath(apath, NULL);
|
||||
|
||||
/* Check if compressed file exists */
|
||||
if (pctx->to_filename != NULL) {
|
||||
log_msg(LOG_ERR, 0, "Compressed file %s exists", pctx->to_filename);
|
||||
log_msg(LOG_ERR, 0, "Compressed file %s exists",
|
||||
pctx->to_filename);
|
||||
free((void *)(pctx->to_filename));
|
||||
return (1);
|
||||
}
|
||||
|
@ -3111,12 +3128,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
|||
}
|
||||
} else {
|
||||
strcpy(apath, pctx->filename);
|
||||
strcat(apath, COMP_EXTN);
|
||||
if (!endswith(apath, COMP_EXTN))
|
||||
strcat(apath, COMP_EXTN);
|
||||
pctx->to_filename = realpath(apath, NULL);
|
||||
|
||||
/* Check if compressed file exists */
|
||||
if (pctx->to_filename != NULL) {
|
||||
log_msg(LOG_ERR, 0, "Compressed file %s exists", pctx->to_filename);
|
||||
log_msg(LOG_ERR, 0, "Compressed file %s exists",
|
||||
pctx->to_filename);
|
||||
free((void *)(pctx->to_filename));
|
||||
return (1);
|
||||
}
|
||||
|
|
|
@ -132,7 +132,6 @@ struct ext_entry {
|
|||
{"zpaq" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ, 4},
|
||||
{"xcf" , TYPE_BINARY, 3},
|
||||
{"mo" , TYPE_BINARY, 2},
|
||||
{"bmp" , TYPE_BINARY, 3},
|
||||
{"pyo" , TYPE_BINARY, 3},
|
||||
{"pyc" , TYPE_BINARY, 3},
|
||||
{"wav" , TYPE_BINARY, 3},
|
||||
|
|
|
@ -122,7 +122,6 @@ pmd,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_PPMD
|
|||
zpaq,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ZPAQ
|
||||
xcf,TYPE_BINARY
|
||||
mo,TYPE_BINARY
|
||||
bmp,TYPE_BINARY
|
||||
pyo,TYPE_BINARY
|
||||
pyc,TYPE_BINARY
|
||||
wav,TYPE_BINARY
|
||||
|
|
|
@ -19,7 +19,7 @@ ub1 tab[] = {
|
|||
0,87,0,0,113,0,125,183,82,0,124,88,40,125,0,0,
|
||||
124,0,168,125,0,125,0,40,0,82,125,113,113,125,116,0,
|
||||
0,0,113,85,0,88,0,0,42,27,0,0,0,40,183,61,
|
||||
0,0,0,0,0,111,17,0,87,125,0,0,166,91,0,0,
|
||||
0,0,0,0,0,111,0,0,87,125,0,0,127,91,0,0,
|
||||
};
|
||||
|
||||
/* The hash function */
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
extern ub1 tab[];
|
||||
#define PHASHLEN 0x80 /* length of hash mapping table */
|
||||
#define PHASHNKEYS 133 /* How many keys were hashed */
|
||||
#define PHASHNKEYS 132 /* How many keys were hashed */
|
||||
#define PHASHRANGE 256 /* Range any input might map to */
|
||||
#define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include <sys/param.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <cpuid.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -269,7 +270,8 @@ typedef enum {
|
|||
TYPE_DNA_SEQ = 168,
|
||||
TYPE_MJPEG = 176,
|
||||
TYPE_AUDIO_COMPRESSED = 184,
|
||||
TYPE_EXE64 = 192
|
||||
TYPE_EXE64 = 192,
|
||||
TYPE_BMP = 200
|
||||
} data_type_t;
|
||||
|
||||
/*
|
||||
|
@ -384,6 +386,18 @@ roundup_pow_two(unsigned int v) {
|
|||
return (v);
|
||||
}
|
||||
|
||||
static inline int
|
||||
endswith(char *haystack, char *needle) {
|
||||
size_t len = strlen(haystack);
|
||||
size_t nlen = strlen(needle);
|
||||
if (nlen > len)
|
||||
return (0);
|
||||
size_t pos = len - nlen;
|
||||
if (memcmp(&haystack[pos], needle, nlen) != 0)
|
||||
return (0);
|
||||
return (1);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue