From e90c52e516f608998c4e589fdda99a6d7870ec63 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Wed, 13 Nov 2013 23:28:01 +0530 Subject: [PATCH] Work in progress changes for packJPG encoding and decoding. Enhance custom LibArchive filter functionlity. --- archive/pc_arc_filter.c | 183 +++++++++++++++++++++++++++--------- archive/pc_arc_filter.h | 16 +++- archive/pc_archive.c | 160 ++++++++++++++++++++++++------- archive/pjpg_helper.cpp | 23 +++++ filters/packjpg/packjpg.cpp | 2 +- pcompress.c | 9 +- utils/phash/extensions.h | 8 +- utils/phash/extensions.txt | 8 +- utils/utils.h | 6 +- 9 files changed, 318 insertions(+), 97 deletions(-) diff --git a/archive/pc_arc_filter.c b/archive/pc_arc_filter.c index dd7ed44..051b5fa 100644 --- a/archive/pc_arc_filter.c +++ b/archive/pc_arc_filter.c @@ -43,88 +43,183 @@ #include "pc_archive.h" #define PACKJPG_DEF_BUFSIZ (512 * 1024) -#define JPG_SIZE_LIMIT (100 * 1024 * 1024) +#define JPG_SIZE_LIMIT (50 * 1024 * 1024) struct packjpg_filter_data { - uchar_t *buff; - size_t bufflen; + uchar_t *buff, *in_buff; + size_t bufflen, in_bufflen; }; extern size_t packjpg_filter_process(uchar_t *in_buf, size_t len, uchar_t **out_buf); -int packjpg_filter(struct filter_info *fi, void *filter_private); +int64_t packjpg_filter(struct filter_info *fi, void *filter_private); void -add_filters_by_ext() +add_filters_by_type(struct type_data *typetab) { struct packjpg_filter_data *pjdat; + int slot; pjdat = (struct packjpg_filter_data *)malloc(sizeof (struct packjpg_filter_data)); pjdat->buff = (uchar_t *)malloc(PACKJPG_DEF_BUFSIZ); pjdat->bufflen = PACKJPG_DEF_BUFSIZ; - if (insert_filter_data(packjpg_filter, pjdat, "pjg") != 0) { - free(pjdat->buff); - free(pjdat); - log_msg(LOG_WARN, 0, "Failed to add filter module for packJPG."); - } + pjdat->in_buff = NULL; + pjdat->in_bufflen = 0; + + slot = TYPE_JPEG >> 3; + typetab[slot].filter_private = pjdat; + typetab[slot].filter_func = packjpg_filter; + typetab[slot].filter_name = "packJPG"; + slot = TYPE_PACKJPG >> 3; + typetab[slot].filter_private = pjdat; + typetab[slot].filter_func = packjpg_filter; + typetab[slot].filter_name = "packJPG"; } -/* a short reminder about input/output stream types - for the pjglib_init_streams() function - - if input is file - ---------------- - in_scr -> name of input file - in_type -> 0 - in_size -> ignore - - if input is memory - ------------------ - in_scr -> array containg data - in_type -> 1 - in_size -> size of data array - - if input is *FILE (f.e. stdin) - ------------------------------ - in_src -> stream pointer - in_type -> 2 - in_size -> ignore - - vice versa for output streams! */ +static ssize_t +copy_archive_data(struct archive *ar, uchar_t *out_buf) +{ + int64_t offset; + const void *buff; + size_t size, tot; + int r; -int + tot = 0; + for (;;) { + r = archive_read_data_block(ar, &buff, &size, &offset); + if (r == ARCHIVE_EOF) + break; + if (r != ARCHIVE_OK) + return (0); + memcpy(out_buf + offset, buff, size); + tot += size; + } + return (tot); +} + +static ssize_t +write_archive_data(struct archive *aw, uchar_t *out_buf, size_t len, int block_size) +{ + int64_t offset; + uchar_t *buff; + int r; + size_t tot; + + buff = out_buf; + offset = 0; + tot = len; + while (len > 0) { + if (len < block_size) + block_size = len; + r = (int)archive_write_data_block(aw, buff, block_size, offset); + if (r < ARCHIVE_WARN) + r = ARCHIVE_WARN; + if (r != ARCHIVE_OK) { + return (r); + } + offset += block_size; + len -= block_size; + } + return (tot); +} + +/* + * Helper routine to bridge to packJPG C++ lib, without changing packJPG itself. + */ +ssize_t packjpg_filter(struct filter_info *fi, void *filter_private) { struct packjpg_filter_data *pjdat = (struct packjpg_filter_data *)filter_private; uchar_t *mapbuf, *out; - size_t len; + size_t len, in_size = 0; len = archive_entry_size(fi->entry); if (len > JPG_SIZE_LIMIT) // Bork on massive JPEGs - return (-1); + return (FILTER_RETURN_SKIP); - mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fi->fd, 0); - if (mapbuf == NULL) - return (-1); + if (fi->compressing) { + mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fi->fd, 0); + if (mapbuf == NULL) { + log_msg(LOG_ERR, 1, "Mmap failed in packJPG filter."); + return (FILTER_RETURN_ERROR); + } + /* + * We are trying to compress and this is not a jpeg. Skip. + */ + if (mapbuf[0] != 0xFF && mapbuf[1] != 0xD8) { + munmap(mapbuf, len); + return (FILTER_RETURN_SKIP); + } + + } else { + + /* + * Allocate input buffer and read archive data stream for the entry + * into this buffer. + */ + if (pjdat->in_bufflen < len) { + if (pjdat->in_buff) free(pjdat->in_buff); + pjdat->in_bufflen = len; + pjdat->in_buff = malloc(pjdat->in_bufflen); + if (pjdat->in_buff == NULL) { + log_msg(LOG_ERR, 1, "Out of memory."); + return (FILTER_RETURN_ERROR); + } + } + + in_size = copy_archive_data(fi->source_arc, pjdat->in_buff); + if (in_size != len) { + log_msg(LOG_ERR, 0, "Failed to read archive data."); + return (FILTER_RETURN_ERROR); + } + in_size = U64_P(pjdat->in_buff); + mapbuf = pjdat->in_buff + 8; + + /* + * We are trying to decompress and this is not a packJPG file. + * Write the raw data and skip. + */ + if (mapbuf[0] != 'J' && mapbuf[1] != 'S') { + return (write_archive_data(fi->target_arc, mapbuf, in_size, + fi->block_size)); + } + } if (pjdat->bufflen < len) { free(pjdat->buff); - pjdat->bufflen = len; + pjdat->bufflen = len; // Include size for compressed len pjdat->buff = malloc(pjdat->bufflen); if (pjdat->buff == NULL) { log_msg(LOG_ERR, 1, "Out of memory."); munmap(mapbuf, len); - return (-1); + return (FILTER_RETURN_ERROR); } } /* - * Helper routine to bridge to packJPG C++ lib, without changing packJPG itself. + * Compression case. + */ + if (fi->compressing) { + ssize_t rv; + + out = pjdat->buff; + if ((len = packjpg_filter_process(mapbuf, len, &out)) == 0) { + return (FILTER_RETURN_SKIP); + } + in_size = LE64(len); + rv = archive_write_data(fi->target_arc, &in_size, 8); + if (rv != 8) + return (rv); + return (archive_write_data(fi->target_arc, out, len)); + } + + /* + * Decompression case. */ out = pjdat->buff; - if ((len = packjpg_filter_process(mapbuf, len, &out)) == 0) { - return (-1); + if ((len = packjpg_filter_process(mapbuf, in_size, &out)) == 0) { + return (FILTER_RETURN_ERROR); } - return (archive_write_data(fi->target_arc, out, len)); + return (write_archive_data(fi->target_arc, out, len, fi->block_size)); } diff --git a/archive/pc_arc_filter.h b/archive/pc_arc_filter.h index 40e9d9d..60c22ca 100644 --- a/archive/pc_arc_filter.h +++ b/archive/pc_arc_filter.h @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -36,15 +37,26 @@ extern "C" { #endif +#define FILTER_RETURN_SKIP (1) +#define FILTER_RETURN_ERROR (-1) + struct filter_info { + struct archive *source_arc; struct archive *target_arc; struct archive_entry *entry; int fd; + int compressing, block_size; }; -typedef int (*filter_func_ptr)(struct filter_info *fi, void *filter_private); +typedef ssize_t (*filter_func_ptr)(struct filter_info *fi, void *filter_private); -void add_filters_by_ext(); +struct type_data { + void *filter_private; + filter_func_ptr filter_func; + char *filter_name; +}; + +void add_filters_by_type(struct type_data *typetab); #ifdef __cplusplus } diff --git a/archive/pc_archive.c b/archive/pc_archive.c index 2be92c4..5d0575e 100644 --- a/archive/pc_archive.c +++ b/archive/pc_archive.c @@ -60,10 +60,10 @@ pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; static struct ext_hash_entry { uint64_t extnum; int type; - void *filter_private; - filter_func_ptr filter_func; } *exthtab = NULL; +struct type_data typetab[NUM_SUB_TYPES]; + /* AE_IFREG Regular file AE_IFLNK Symbolic link @@ -74,12 +74,12 @@ AE_IFDIR Directory AE_IFIFO Named pipe (fifo) */ -#define ARC_ENTRY_OVRHEAD 500 -#define ARC_SCRATCH_BUFF_SIZE (64 *1024) +#define ARC_ENTRY_OVRHEAD 1024 #define MMAP_SIZE (1024 * 1024) #define SORT_BUF_SIZE (65536) #define NAMELEN 4 #define TEMP_MMAP_SIZE (128 * 1024) +#define AW_BLOCK_SIZE (256 * 1024) typedef struct member_entry { char name[NAMELEN]; @@ -104,7 +104,7 @@ static struct arc_list_state { pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER; -static int detect_type_by_ext(char *path, int pathlen); +static int detect_type_by_ext(const char *path, int pathlen); static int detect_type_by_data(uchar_t *buf, size_t len); /* @@ -164,6 +164,12 @@ creat_write_callback(struct archive *arc, void *ctx, const void *buf, size_t len uchar_t *tbuf; tbuf = pctx->arc_buf + pctx->arc_buf_pos; + + /* + * Determine if we should return the accumulated data to the caller. + * This is done if the data type changes and at least some minimum amount + * of data has accumulated in the buffer. + */ if (pctx->btype != pctx->ctype) { if (pctx->btype == TYPE_UNKNOWN || pctx->arc_buf_pos == 0) { pctx->btype = pctx->ctype; @@ -718,12 +724,23 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf) err = nftw(fn->filename, add_pathname, 1024, FTW_PHYS); } else { int tflag; + struct FTW ftwbuf; + char *pos; if (S_ISLNK(sb.st_mode)) tflag = FTW_SL; else tflag = FTW_F; - add_pathname(fn->filename, &sb, tflag, NULL); + + /* + * Find out basename to mimic FTW. + */ + pos = strrchr(fn->filename, PATHSEP_CHAR); + if (pos) + ftwbuf.base = pos - fn->filename + 1; + else + ftwbuf.base = 0; + add_pathname(fn->filename, &sb, tflag, &ftwbuf); a_state.arc_size = sb.st_size; } if (a_state.bufpos > 0) { @@ -766,6 +783,7 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf) return (-1); } archive_write_set_format_pax_restricted(arc); + archive_write_set_bytes_per_block(arc, 0); archive_write_open(arc, pctx, arc_open_callback, creat_write_callback, creat_close_callback); pctx->archive_ctx = arc; @@ -774,7 +792,7 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf) pctx->temp_mmap_buf = mmap(NULL, pctx->temp_mmap_len, PROT_READ, MAP_SHARED, pctx->archive_members_fd, 0); if (pctx->temp_mmap_buf == NULL) { - log_msg(LOG_WARN, 1, "Unable to mmap pathlist file, switching to slower read()."); + log_msg(LOG_WARN, 1, "Unable to mmap pathlist file, switching to read()."); pctx->temp_mmap_len = 0; } pctx->temp_mmap_pos = 0; @@ -810,6 +828,28 @@ setup_extractor(pc_ctx_t *pctx) return (0); } +static ssize_t +process_by_filter(int fd, int typ, struct archive *target_arc, + struct archive *source_arc, struct archive_entry *entry, int cmp) +{ + struct filter_info fi; + int64_t wrtn; + + fi.source_arc = source_arc; + fi.target_arc = target_arc; + fi.entry = entry; + fi.fd = fd; + fi.compressing = cmp; + fi.block_size = AW_BLOCK_SIZE; + wrtn = (*(typetab[(typ >> 3)].filter_func))(&fi, typetab[(typ >> 3)].filter_private); + close(fd); + if (wrtn == FILTER_RETURN_ERROR) { + log_msg(LOG_ERR, 0, "Error invoking filter module: %s", + typetab[(typ >> 3)].filter_name); + } + return (wrtn); +} + /* * Routines to archive members and write the file data to the callback. Portions of * the following code is adapted from some of the Libarchive bsdtar code. @@ -834,6 +874,20 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry, return (-1); } + if (typ != TYPE_UNKNOWN) { + if (typetab[(typ >> 3)].filter_func != NULL) { + int64_t rv; + + rv = process_by_filter(fd, typ, arc, NULL, entry, 1); + if (rv == FILTER_RETURN_ERROR) + return (-1); + else if (rv == FILTER_RETURN_SKIP) + lseek(fd, 0, SEEK_SET); + else + return (ARCHIVE_OK); + } + } + /* * Use mmap for copying file data. Not necessarily for performance, but it saves on * resident memory use. @@ -847,6 +901,7 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry, len = bytes_to_write; else len = MMAP_SIZE; +do_map: mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, offset); if (mapbuf == NULL) { /* Mmap failed; this is bad. */ @@ -858,8 +913,28 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry, src = mapbuf; wlen = len; - if (typ == TYPE_UNKNOWN) + if (typ == TYPE_UNKNOWN) { pctx->ctype = detect_type_by_data(src, len); + if (typ != TYPE_UNKNOWN) { + if (typetab[(typ >> 3)].filter_func != NULL) { + int64_t rv; + munmap(mapbuf, len); + + rv = process_by_filter(fd, typ, arc, NULL, entry, 1); + if (rv == FILTER_RETURN_ERROR) { + return (-1); + } else if (rv == FILTER_RETURN_SKIP) { + lseek(fd, 0, SEEK_SET); + typ = TYPE_COMPRESSED; + offset = 0; + goto do_map; + } else { + return (ARCHIVE_OK); + } + } + } + } + typ = TYPE_COMPRESSED; // Need to avoid calling detect_type_by_data subsequently. /* * Write the entire mmap-ed buffer. Since we are writing to the compressor @@ -983,10 +1058,10 @@ archiver_thread_func(void *dat) { * libarchive refuses to extract these files on Linux, no matter what I try. * Bug? * - * In this case the file basename is changed and a custom extended attribute - * is set to indicate extraction to change it back. + * In this case the file basename is changed and a custom flag is set to + * indicate extraction to change it back. */ - if (bnchars[0] == '.' && bnchars[1] == '_') { + if (bnchars[0] == '.' && bnchars[1] == '_' && archive_entry_filetype(entry) == AE_IFREG) { char *pos = strstr(name, "._"); char name[] = "@.", value[] = "m"; if (pos) { @@ -1001,6 +1076,8 @@ archiver_thread_func(void *dat) { if (archive_entry_filetype(entry) != AE_IFREG) { archive_entry_set_size(entry, 0); + } else { + archive_entry_set_size(entry, archive_entry_size(entry)); } if (pctx->verbose) log_msg(LOG_INFO, 0, "%5d/%5d %8d %s", ctr, pctx->archive_members_count, @@ -1015,6 +1092,7 @@ archiver_thread_func(void *dat) { ent = spare_entry; spare_entry = NULL; } + archive_write_finish_entry(arc); archive_entry_clear(entry); ctr++; } @@ -1045,13 +1123,29 @@ start_archiver(pc_ctx_t *pctx) { * routines, so we have to handle here. */ static int -copy_data_out(struct archive *ar, struct archive *aw) +copy_data_out(struct archive *ar, struct archive *aw, struct archive_entry *entry, + int typ) { int64_t offset; const void *buff; size_t size; int r; + if (typ != TYPE_UNKNOWN) { + if (typetab[(typ >> 3)].filter_func != NULL) { + int64_t rv; + + rv = process_by_filter(-1, typ, aw, ar, entry, 0); + if (rv == FILTER_RETURN_ERROR) { + archive_set_error(ar, archive_errno(aw), + "%s", archive_error_string(aw)); + return (ARCHIVE_FATAL); + } else { + return (ARCHIVE_OK); + } + } + } + for (;;) { r = archive_read_data_block(ar, &buff, &size, &offset); if (r == ARCHIVE_EOF) @@ -1071,19 +1165,20 @@ copy_data_out(struct archive *ar, struct archive *aw) static int archive_extract_entry(struct archive *a, struct archive_entry *entry, - struct archive *ad) + struct archive *ad, int typ) { int r, r2; - + r = archive_write_header(ad, entry); if (r < ARCHIVE_WARN) r = ARCHIVE_WARN; - if (r != ARCHIVE_OK) + if (r != ARCHIVE_OK) { /* If _write_header failed, copy the error. */ archive_copy_error(a, ad); - else if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0) + } else if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0) { /* Otherwise, pour data into the entry. */ - r = copy_data_out(a, ad); + r = copy_data_out(a, ad, entry, typ); + } r2 = archive_write_finish_entry(ad); if (r2 < ARCHIVE_WARN) r2 = ARCHIVE_WARN; @@ -1153,6 +1248,7 @@ extractor_thread_func(void *dat) { while ((rv = archive_read_next_header(arc, &entry)) != ARCHIVE_EOF) { const char *xt_name, *xt_value; size_t xt_size; + int typ; if (rv != ARCHIVE_OK) log_msg(LOG_WARN, 0, "%s", archive_error_string(arc)); @@ -1167,6 +1263,12 @@ extractor_thread_func(void *dat) { continue; } + typ = TYPE_UNKNOWN; + if (archive_entry_filetype(entry) == AE_IFREG) { + const char *fpath = archive_entry_pathname(entry); + typ = detect_type_by_ext(fpath, strlen(fpath)); + } + /* * Workaround for libarchive weirdness on Non MAC OS X platforms for filenames * starting with '._'. See above ... @@ -1191,7 +1293,7 @@ extractor_thread_func(void *dat) { } #endif - rv = archive_extract_entry(arc, entry, awd); + rv = archive_extract_entry(arc, entry, awd, typ); if (rv != ARCHIVE_OK) { log_msg(LOG_WARN, 0, "%s: %s", archive_entry_pathname(entry), archive_error_string(arc)); @@ -1251,11 +1353,10 @@ init_archive_mod() { extnum = (extnum << 1) | extlist[i].ext[j]; exthtab[slot].extnum = extnum; exthtab[slot].type = extlist[i].type; - exthtab[slot].filter_func = NULL; - exthtab[slot].filter_private = NULL; } - add_filters_by_ext(); + memset(typetab, 0, sizeof (typetab)); + add_filters_by_type(typetab); inited = 1; } else { rv = 1; @@ -1271,9 +1372,9 @@ init_archive_mod() { * outside the hash table range then the function returns unknown type. */ static int -detect_type_by_ext(char *path, int pathlen) +detect_type_by_ext(const char *path, int pathlen) { - char *ext = NULL; + const char *ext = NULL; ub4 slot; int i, len; uint64_t extnum; @@ -1340,16 +1441,3 @@ detect_type_by_data(uchar_t *buf, size_t len) return (TYPE_UNKNOWN); } - -int -insert_filter_data(filter_func_ptr func, void *filter_private, const char *ext) -{ - ub4 slot = phash(ext, strlen(ext)); - if (slot >= PHASHNKEYS || slot < 0) { - log_msg(LOG_WARN, 0, "Cannot add filter for unknown extension: %s", ext); - return (-1); - } - exthtab[slot].filter_func = func; - exthtab[slot].filter_private = filter_private; - return (0); -} diff --git a/archive/pjpg_helper.cpp b/archive/pjpg_helper.cpp index 10fa59c..2c70ada 100644 --- a/archive/pjpg_helper.cpp +++ b/archive/pjpg_helper.cpp @@ -39,6 +39,29 @@ extern "C" { typedef unsigned char uchar_t; +/* a short reminder about input/output stream types + for the pjglib_init_streams() function + + if input is file + ---------------- + in_scr -> name of input file + in_type -> 0 + in_size -> ignore + + if input is memory + ------------------ + in_scr -> array containg data + in_type -> 1 + in_size -> size of data array + + if input is *FILE (f.e. stdin) + ------------------------------ + in_src -> stream pointer + in_type -> 2 + in_size -> ignore + + vice versa for output streams! */ + size_t packjpg_filter_process(uchar_t *in_buf, size_t len, uchar_t **out_buf) { diff --git a/filters/packjpg/packjpg.cpp b/filters/packjpg/packjpg.cpp index f30da99..feeac00 100644 --- a/filters/packjpg/packjpg.cpp +++ b/filters/packjpg/packjpg.cpp @@ -3384,7 +3384,7 @@ INTERN bool unpack_pjg( void ) // init arithmetic compression decoder = new aricoder( str_in, 0 ); - + // decode JPG header if ( !pjg_decode_generic( decoder, &hdrdata, &hdrs ) ) return false; // retrieve padbit from stream diff --git a/pcompress.c b/pcompress.c index 242b5cd..627e16e 100644 --- a/pcompress.c +++ b/pcompress.c @@ -1544,13 +1544,12 @@ plain_index: o_chunksize = _chunksize; /* Compress data chunk. */ - if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0 && - PC_SUBTYPE(pctx->btype) == TYPE_CMP_MAX) { + if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0) { rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz, _chunksize, compressed_chunk + index_size_cmp, &_chunksize, tdat->level, 0, pctx->btype, tdat->data, tdat->props); - } else if (_chunksize > 0 && PC_SUBTYPE(pctx->btype) == TYPE_CMP_MAX) { + } else if (_chunksize > 0) { DEBUG_STAT_EN(double strt, en); DEBUG_STAT_EN(strt = get_wtime_millis()); @@ -1915,8 +1914,10 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev /* * Adjust chunk size for small files. We then get an archive with * a single chunk for the entire file. + * This is not valid for archive mode since we cannot accurately estimate + * final archive size. */ - if (sbuf.st_size <= chunksize) { + if (sbuf.st_size <= chunksize && !(pctx->archive_mode)) { chunksize = sbuf.st_size; pctx->enable_rabin_split = 0; // Do not split for whole files. pctx->nthreads = 1; diff --git a/utils/phash/extensions.h b/utils/phash/extensions.h index c4a5b49..55925e2 100644 --- a/utils/phash/extensions.h +++ b/utils/phash/extensions.h @@ -99,8 +99,8 @@ struct ext_entry { {"xpi" , TYPE_BINARY|TYPE_EXE, 3}, {"off" , TYPE_BINARY|TYPE_EXE, 3}, {"pdf" , TYPE_BINARY, 3}, - {"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 3}, - {"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 4}, + {"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3}, + {"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 4}, {"png" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3}, {"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3}, @@ -114,8 +114,8 @@ struct ext_entry { {"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4}, {"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3}, - {"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 3}, - {"pjg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 3}, + {"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3}, + {"pjg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKJPG, 3}, {"gz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 2}, {"tgz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3}, {"bz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 3}, diff --git a/utils/phash/extensions.txt b/utils/phash/extensions.txt index a349510..db2785e 100644 --- a/utils/phash/extensions.txt +++ b/utils/phash/extensions.txt @@ -89,8 +89,8 @@ com,TYPE_BINARY|TYPE_EXE xpi,TYPE_BINARY|TYPE_EXE off,TYPE_BINARY|TYPE_EXE pdf,TYPE_BINARY -jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX -jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX +jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG +jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ mp3,TYPE_BINARY|TYPE_COMPRESSED wma,TYPE_BINARY|TYPE_COMPRESSED @@ -104,8 +104,8 @@ ofr,TYPE_BINARY|TYPE_COMPRESSED flac,TYPE_BINARY|TYPE_COMPRESSED pac,TYPE_BINARY|TYPE_COMPRESSED gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW -jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX -pjg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX +jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG +pjg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKJPG gz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ tgz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ bz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2 diff --git a/utils/utils.h b/utils/utils.h index 464581d..1b01f92 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -242,8 +242,9 @@ typedef enum { /* * Sub-types. */ +#define NUM_SUB_TYPES 20 TYPE_EXE = 8, - TYPE_CMP_MAX = 16, + TYPE_JPEG = 16, TYPE_MARKUP = 24, TYPE_COMPRESSED_GZ = 32, TYPE_COMPRESSED_LZW = 40, @@ -260,7 +261,8 @@ typedef enum { TYPE_COMPRESSED_RAR = 128, TYPE_COMPRESSED_LZ = 136, TYPE_COMPRESSED_PPMD = 144, - TYPE_COMPRESSED_ZPAQ = 152 + TYPE_COMPRESSED_ZPAQ = 152, + TYPE_PACKJPG = 160 } data_type_t; /*