Work in progress changes for packJPG encoding and decoding.

Enhance custom LibArchive filter functionlity.
This commit is contained in:
Moinak Ghosh 2013-11-13 23:28:01 +05:30
parent 75dfa6a6fb
commit e90c52e516
9 changed files with 318 additions and 97 deletions

View file

@ -43,88 +43,183 @@
#include "pc_archive.h" #include "pc_archive.h"
#define PACKJPG_DEF_BUFSIZ (512 * 1024) #define PACKJPG_DEF_BUFSIZ (512 * 1024)
#define JPG_SIZE_LIMIT (100 * 1024 * 1024) #define JPG_SIZE_LIMIT (50 * 1024 * 1024)
struct packjpg_filter_data { struct packjpg_filter_data {
uchar_t *buff; uchar_t *buff, *in_buff;
size_t bufflen; size_t bufflen, in_bufflen;
}; };
extern size_t packjpg_filter_process(uchar_t *in_buf, size_t len, uchar_t **out_buf); extern size_t packjpg_filter_process(uchar_t *in_buf, size_t len, uchar_t **out_buf);
int packjpg_filter(struct filter_info *fi, void *filter_private); int64_t packjpg_filter(struct filter_info *fi, void *filter_private);
void void
add_filters_by_ext() add_filters_by_type(struct type_data *typetab)
{ {
struct packjpg_filter_data *pjdat; struct packjpg_filter_data *pjdat;
int slot;
pjdat = (struct packjpg_filter_data *)malloc(sizeof (struct packjpg_filter_data)); pjdat = (struct packjpg_filter_data *)malloc(sizeof (struct packjpg_filter_data));
pjdat->buff = (uchar_t *)malloc(PACKJPG_DEF_BUFSIZ); pjdat->buff = (uchar_t *)malloc(PACKJPG_DEF_BUFSIZ);
pjdat->bufflen = PACKJPG_DEF_BUFSIZ; pjdat->bufflen = PACKJPG_DEF_BUFSIZ;
if (insert_filter_data(packjpg_filter, pjdat, "pjg") != 0) { pjdat->in_buff = NULL;
free(pjdat->buff); pjdat->in_bufflen = 0;
free(pjdat);
log_msg(LOG_WARN, 0, "Failed to add filter module for packJPG."); slot = TYPE_JPEG >> 3;
} typetab[slot].filter_private = pjdat;
typetab[slot].filter_func = packjpg_filter;
typetab[slot].filter_name = "packJPG";
slot = TYPE_PACKJPG >> 3;
typetab[slot].filter_private = pjdat;
typetab[slot].filter_func = packjpg_filter;
typetab[slot].filter_name = "packJPG";
} }
/* a short reminder about input/output stream types static ssize_t
for the pjglib_init_streams() function copy_archive_data(struct archive *ar, uchar_t *out_buf)
{
int64_t offset;
const void *buff;
size_t size, tot;
int r;
if input is file tot = 0;
---------------- for (;;) {
in_scr -> name of input file r = archive_read_data_block(ar, &buff, &size, &offset);
in_type -> 0 if (r == ARCHIVE_EOF)
in_size -> ignore break;
if (r != ARCHIVE_OK)
return (0);
memcpy(out_buf + offset, buff, size);
tot += size;
}
return (tot);
}
if input is memory static ssize_t
------------------ write_archive_data(struct archive *aw, uchar_t *out_buf, size_t len, int block_size)
in_scr -> array containg data {
in_type -> 1 int64_t offset;
in_size -> size of data array uchar_t *buff;
int r;
size_t tot;
if input is *FILE (f.e. stdin) buff = out_buf;
------------------------------ offset = 0;
in_src -> stream pointer tot = len;
in_type -> 2 while (len > 0) {
in_size -> ignore if (len < block_size)
block_size = len;
r = (int)archive_write_data_block(aw, buff, block_size, offset);
if (r < ARCHIVE_WARN)
r = ARCHIVE_WARN;
if (r != ARCHIVE_OK) {
return (r);
}
offset += block_size;
len -= block_size;
}
return (tot);
}
vice versa for output streams! */ /*
* Helper routine to bridge to packJPG C++ lib, without changing packJPG itself.
int */
ssize_t
packjpg_filter(struct filter_info *fi, void *filter_private) packjpg_filter(struct filter_info *fi, void *filter_private)
{ {
struct packjpg_filter_data *pjdat = (struct packjpg_filter_data *)filter_private; struct packjpg_filter_data *pjdat = (struct packjpg_filter_data *)filter_private;
uchar_t *mapbuf, *out; uchar_t *mapbuf, *out;
size_t len; size_t len, in_size = 0;
len = archive_entry_size(fi->entry); len = archive_entry_size(fi->entry);
if (len > JPG_SIZE_LIMIT) // Bork on massive JPEGs if (len > JPG_SIZE_LIMIT) // Bork on massive JPEGs
return (-1); return (FILTER_RETURN_SKIP);
mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fi->fd, 0); if (fi->compressing) {
if (mapbuf == NULL) mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fi->fd, 0);
return (-1); if (mapbuf == NULL) {
log_msg(LOG_ERR, 1, "Mmap failed in packJPG filter.");
return (FILTER_RETURN_ERROR);
}
/*
* We are trying to compress and this is not a jpeg. Skip.
*/
if (mapbuf[0] != 0xFF && mapbuf[1] != 0xD8) {
munmap(mapbuf, len);
return (FILTER_RETURN_SKIP);
}
} else {
/*
* Allocate input buffer and read archive data stream for the entry
* into this buffer.
*/
if (pjdat->in_bufflen < len) {
if (pjdat->in_buff) free(pjdat->in_buff);
pjdat->in_bufflen = len;
pjdat->in_buff = malloc(pjdat->in_bufflen);
if (pjdat->in_buff == NULL) {
log_msg(LOG_ERR, 1, "Out of memory.");
return (FILTER_RETURN_ERROR);
}
}
in_size = copy_archive_data(fi->source_arc, pjdat->in_buff);
if (in_size != len) {
log_msg(LOG_ERR, 0, "Failed to read archive data.");
return (FILTER_RETURN_ERROR);
}
in_size = U64_P(pjdat->in_buff);
mapbuf = pjdat->in_buff + 8;
/*
* We are trying to decompress and this is not a packJPG file.
* Write the raw data and skip.
*/
if (mapbuf[0] != 'J' && mapbuf[1] != 'S') {
return (write_archive_data(fi->target_arc, mapbuf, in_size,
fi->block_size));
}
}
if (pjdat->bufflen < len) { if (pjdat->bufflen < len) {
free(pjdat->buff); free(pjdat->buff);
pjdat->bufflen = len; pjdat->bufflen = len; // Include size for compressed len
pjdat->buff = malloc(pjdat->bufflen); pjdat->buff = malloc(pjdat->bufflen);
if (pjdat->buff == NULL) { if (pjdat->buff == NULL) {
log_msg(LOG_ERR, 1, "Out of memory."); log_msg(LOG_ERR, 1, "Out of memory.");
munmap(mapbuf, len); munmap(mapbuf, len);
return (-1); return (FILTER_RETURN_ERROR);
} }
} }
/* /*
* Helper routine to bridge to packJPG C++ lib, without changing packJPG itself. * Compression case.
*/
if (fi->compressing) {
ssize_t rv;
out = pjdat->buff;
if ((len = packjpg_filter_process(mapbuf, len, &out)) == 0) {
return (FILTER_RETURN_SKIP);
}
in_size = LE64(len);
rv = archive_write_data(fi->target_arc, &in_size, 8);
if (rv != 8)
return (rv);
return (archive_write_data(fi->target_arc, out, len));
}
/*
* Decompression case.
*/ */
out = pjdat->buff; out = pjdat->buff;
if ((len = packjpg_filter_process(mapbuf, len, &out)) == 0) { if ((len = packjpg_filter_process(mapbuf, in_size, &out)) == 0) {
return (-1); return (FILTER_RETURN_ERROR);
} }
return (archive_write_data(fi->target_arc, out, len)); return (write_archive_data(fi->target_arc, out, len, fi->block_size));
} }

View file

@ -28,6 +28,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <inttypes.h>
#include <unistd.h> #include <unistd.h>
#include <archive.h> #include <archive.h>
#include <archive_entry.h> #include <archive_entry.h>
@ -36,15 +37,26 @@
extern "C" { extern "C" {
#endif #endif
#define FILTER_RETURN_SKIP (1)
#define FILTER_RETURN_ERROR (-1)
struct filter_info { struct filter_info {
struct archive *source_arc;
struct archive *target_arc; struct archive *target_arc;
struct archive_entry *entry; struct archive_entry *entry;
int fd; int fd;
int compressing, block_size;
}; };
typedef int (*filter_func_ptr)(struct filter_info *fi, void *filter_private); typedef ssize_t (*filter_func_ptr)(struct filter_info *fi, void *filter_private);
void add_filters_by_ext(); struct type_data {
void *filter_private;
filter_func_ptr filter_func;
char *filter_name;
};
void add_filters_by_type(struct type_data *typetab);
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -60,10 +60,10 @@ pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
static struct ext_hash_entry { static struct ext_hash_entry {
uint64_t extnum; uint64_t extnum;
int type; int type;
void *filter_private;
filter_func_ptr filter_func;
} *exthtab = NULL; } *exthtab = NULL;
struct type_data typetab[NUM_SUB_TYPES];
/* /*
AE_IFREG Regular file AE_IFREG Regular file
AE_IFLNK Symbolic link AE_IFLNK Symbolic link
@ -74,12 +74,12 @@ AE_IFDIR Directory
AE_IFIFO Named pipe (fifo) AE_IFIFO Named pipe (fifo)
*/ */
#define ARC_ENTRY_OVRHEAD 500 #define ARC_ENTRY_OVRHEAD 1024
#define ARC_SCRATCH_BUFF_SIZE (64 *1024)
#define MMAP_SIZE (1024 * 1024) #define MMAP_SIZE (1024 * 1024)
#define SORT_BUF_SIZE (65536) #define SORT_BUF_SIZE (65536)
#define NAMELEN 4 #define NAMELEN 4
#define TEMP_MMAP_SIZE (128 * 1024) #define TEMP_MMAP_SIZE (128 * 1024)
#define AW_BLOCK_SIZE (256 * 1024)
typedef struct member_entry { typedef struct member_entry {
char name[NAMELEN]; char name[NAMELEN];
@ -104,7 +104,7 @@ static struct arc_list_state {
pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER;
static int detect_type_by_ext(char *path, int pathlen); static int detect_type_by_ext(const char *path, int pathlen);
static int detect_type_by_data(uchar_t *buf, size_t len); static int detect_type_by_data(uchar_t *buf, size_t len);
/* /*
@ -164,6 +164,12 @@ creat_write_callback(struct archive *arc, void *ctx, const void *buf, size_t len
uchar_t *tbuf; uchar_t *tbuf;
tbuf = pctx->arc_buf + pctx->arc_buf_pos; tbuf = pctx->arc_buf + pctx->arc_buf_pos;
/*
* Determine if we should return the accumulated data to the caller.
* This is done if the data type changes and at least some minimum amount
* of data has accumulated in the buffer.
*/
if (pctx->btype != pctx->ctype) { if (pctx->btype != pctx->ctype) {
if (pctx->btype == TYPE_UNKNOWN || pctx->arc_buf_pos == 0) { if (pctx->btype == TYPE_UNKNOWN || pctx->arc_buf_pos == 0) {
pctx->btype = pctx->ctype; pctx->btype = pctx->ctype;
@ -718,12 +724,23 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf)
err = nftw(fn->filename, add_pathname, 1024, FTW_PHYS); err = nftw(fn->filename, add_pathname, 1024, FTW_PHYS);
} else { } else {
int tflag; int tflag;
struct FTW ftwbuf;
char *pos;
if (S_ISLNK(sb.st_mode)) if (S_ISLNK(sb.st_mode))
tflag = FTW_SL; tflag = FTW_SL;
else else
tflag = FTW_F; tflag = FTW_F;
add_pathname(fn->filename, &sb, tflag, NULL);
/*
* Find out basename to mimic FTW.
*/
pos = strrchr(fn->filename, PATHSEP_CHAR);
if (pos)
ftwbuf.base = pos - fn->filename + 1;
else
ftwbuf.base = 0;
add_pathname(fn->filename, &sb, tflag, &ftwbuf);
a_state.arc_size = sb.st_size; a_state.arc_size = sb.st_size;
} }
if (a_state.bufpos > 0) { if (a_state.bufpos > 0) {
@ -766,6 +783,7 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf)
return (-1); return (-1);
} }
archive_write_set_format_pax_restricted(arc); archive_write_set_format_pax_restricted(arc);
archive_write_set_bytes_per_block(arc, 0);
archive_write_open(arc, pctx, arc_open_callback, archive_write_open(arc, pctx, arc_open_callback,
creat_write_callback, creat_close_callback); creat_write_callback, creat_close_callback);
pctx->archive_ctx = arc; pctx->archive_ctx = arc;
@ -774,7 +792,7 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf)
pctx->temp_mmap_buf = mmap(NULL, pctx->temp_mmap_len, PROT_READ, pctx->temp_mmap_buf = mmap(NULL, pctx->temp_mmap_len, PROT_READ,
MAP_SHARED, pctx->archive_members_fd, 0); MAP_SHARED, pctx->archive_members_fd, 0);
if (pctx->temp_mmap_buf == NULL) { if (pctx->temp_mmap_buf == NULL) {
log_msg(LOG_WARN, 1, "Unable to mmap pathlist file, switching to slower read()."); log_msg(LOG_WARN, 1, "Unable to mmap pathlist file, switching to read().");
pctx->temp_mmap_len = 0; pctx->temp_mmap_len = 0;
} }
pctx->temp_mmap_pos = 0; pctx->temp_mmap_pos = 0;
@ -810,6 +828,28 @@ setup_extractor(pc_ctx_t *pctx)
return (0); return (0);
} }
static ssize_t
process_by_filter(int fd, int typ, struct archive *target_arc,
struct archive *source_arc, struct archive_entry *entry, int cmp)
{
struct filter_info fi;
int64_t wrtn;
fi.source_arc = source_arc;
fi.target_arc = target_arc;
fi.entry = entry;
fi.fd = fd;
fi.compressing = cmp;
fi.block_size = AW_BLOCK_SIZE;
wrtn = (*(typetab[(typ >> 3)].filter_func))(&fi, typetab[(typ >> 3)].filter_private);
close(fd);
if (wrtn == FILTER_RETURN_ERROR) {
log_msg(LOG_ERR, 0, "Error invoking filter module: %s",
typetab[(typ >> 3)].filter_name);
}
return (wrtn);
}
/* /*
* Routines to archive members and write the file data to the callback. Portions of * Routines to archive members and write the file data to the callback. Portions of
* the following code is adapted from some of the Libarchive bsdtar code. * the following code is adapted from some of the Libarchive bsdtar code.
@ -834,6 +874,20 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry,
return (-1); return (-1);
} }
if (typ != TYPE_UNKNOWN) {
if (typetab[(typ >> 3)].filter_func != NULL) {
int64_t rv;
rv = process_by_filter(fd, typ, arc, NULL, entry, 1);
if (rv == FILTER_RETURN_ERROR)
return (-1);
else if (rv == FILTER_RETURN_SKIP)
lseek(fd, 0, SEEK_SET);
else
return (ARCHIVE_OK);
}
}
/* /*
* Use mmap for copying file data. Not necessarily for performance, but it saves on * Use mmap for copying file data. Not necessarily for performance, but it saves on
* resident memory use. * resident memory use.
@ -847,6 +901,7 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry,
len = bytes_to_write; len = bytes_to_write;
else else
len = MMAP_SIZE; len = MMAP_SIZE;
do_map:
mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, offset); mapbuf = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, offset);
if (mapbuf == NULL) { if (mapbuf == NULL) {
/* Mmap failed; this is bad. */ /* Mmap failed; this is bad. */
@ -858,8 +913,28 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry,
src = mapbuf; src = mapbuf;
wlen = len; wlen = len;
if (typ == TYPE_UNKNOWN) if (typ == TYPE_UNKNOWN) {
pctx->ctype = detect_type_by_data(src, len); pctx->ctype = detect_type_by_data(src, len);
if (typ != TYPE_UNKNOWN) {
if (typetab[(typ >> 3)].filter_func != NULL) {
int64_t rv;
munmap(mapbuf, len);
rv = process_by_filter(fd, typ, arc, NULL, entry, 1);
if (rv == FILTER_RETURN_ERROR) {
return (-1);
} else if (rv == FILTER_RETURN_SKIP) {
lseek(fd, 0, SEEK_SET);
typ = TYPE_COMPRESSED;
offset = 0;
goto do_map;
} else {
return (ARCHIVE_OK);
}
}
}
}
typ = TYPE_COMPRESSED; // Need to avoid calling detect_type_by_data subsequently.
/* /*
* Write the entire mmap-ed buffer. Since we are writing to the compressor * Write the entire mmap-ed buffer. Since we are writing to the compressor
@ -983,10 +1058,10 @@ archiver_thread_func(void *dat) {
* libarchive refuses to extract these files on Linux, no matter what I try. * libarchive refuses to extract these files on Linux, no matter what I try.
* Bug? * Bug?
* *
* In this case the file basename is changed and a custom extended attribute * In this case the file basename is changed and a custom flag is set to
* is set to indicate extraction to change it back. * indicate extraction to change it back.
*/ */
if (bnchars[0] == '.' && bnchars[1] == '_') { if (bnchars[0] == '.' && bnchars[1] == '_' && archive_entry_filetype(entry) == AE_IFREG) {
char *pos = strstr(name, "._"); char *pos = strstr(name, "._");
char name[] = "@.", value[] = "m"; char name[] = "@.", value[] = "m";
if (pos) { if (pos) {
@ -1001,6 +1076,8 @@ archiver_thread_func(void *dat) {
if (archive_entry_filetype(entry) != AE_IFREG) { if (archive_entry_filetype(entry) != AE_IFREG) {
archive_entry_set_size(entry, 0); archive_entry_set_size(entry, 0);
} else {
archive_entry_set_size(entry, archive_entry_size(entry));
} }
if (pctx->verbose) if (pctx->verbose)
log_msg(LOG_INFO, 0, "%5d/%5d %8d %s", ctr, pctx->archive_members_count, log_msg(LOG_INFO, 0, "%5d/%5d %8d %s", ctr, pctx->archive_members_count,
@ -1015,6 +1092,7 @@ archiver_thread_func(void *dat) {
ent = spare_entry; ent = spare_entry;
spare_entry = NULL; spare_entry = NULL;
} }
archive_write_finish_entry(arc);
archive_entry_clear(entry); archive_entry_clear(entry);
ctr++; ctr++;
} }
@ -1045,13 +1123,29 @@ start_archiver(pc_ctx_t *pctx) {
* routines, so we have to handle here. * routines, so we have to handle here.
*/ */
static int static int
copy_data_out(struct archive *ar, struct archive *aw) copy_data_out(struct archive *ar, struct archive *aw, struct archive_entry *entry,
int typ)
{ {
int64_t offset; int64_t offset;
const void *buff; const void *buff;
size_t size; size_t size;
int r; int r;
if (typ != TYPE_UNKNOWN) {
if (typetab[(typ >> 3)].filter_func != NULL) {
int64_t rv;
rv = process_by_filter(-1, typ, aw, ar, entry, 0);
if (rv == FILTER_RETURN_ERROR) {
archive_set_error(ar, archive_errno(aw),
"%s", archive_error_string(aw));
return (ARCHIVE_FATAL);
} else {
return (ARCHIVE_OK);
}
}
}
for (;;) { for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset); r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) if (r == ARCHIVE_EOF)
@ -1071,19 +1165,20 @@ copy_data_out(struct archive *ar, struct archive *aw)
static int static int
archive_extract_entry(struct archive *a, struct archive_entry *entry, archive_extract_entry(struct archive *a, struct archive_entry *entry,
struct archive *ad) struct archive *ad, int typ)
{ {
int r, r2; int r, r2;
r = archive_write_header(ad, entry); r = archive_write_header(ad, entry);
if (r < ARCHIVE_WARN) if (r < ARCHIVE_WARN)
r = ARCHIVE_WARN; r = ARCHIVE_WARN;
if (r != ARCHIVE_OK) if (r != ARCHIVE_OK) {
/* If _write_header failed, copy the error. */ /* If _write_header failed, copy the error. */
archive_copy_error(a, ad); archive_copy_error(a, ad);
else if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0) } else if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0) {
/* Otherwise, pour data into the entry. */ /* Otherwise, pour data into the entry. */
r = copy_data_out(a, ad); r = copy_data_out(a, ad, entry, typ);
}
r2 = archive_write_finish_entry(ad); r2 = archive_write_finish_entry(ad);
if (r2 < ARCHIVE_WARN) if (r2 < ARCHIVE_WARN)
r2 = ARCHIVE_WARN; r2 = ARCHIVE_WARN;
@ -1153,6 +1248,7 @@ extractor_thread_func(void *dat) {
while ((rv = archive_read_next_header(arc, &entry)) != ARCHIVE_EOF) { while ((rv = archive_read_next_header(arc, &entry)) != ARCHIVE_EOF) {
const char *xt_name, *xt_value; const char *xt_name, *xt_value;
size_t xt_size; size_t xt_size;
int typ;
if (rv != ARCHIVE_OK) if (rv != ARCHIVE_OK)
log_msg(LOG_WARN, 0, "%s", archive_error_string(arc)); log_msg(LOG_WARN, 0, "%s", archive_error_string(arc));
@ -1167,6 +1263,12 @@ extractor_thread_func(void *dat) {
continue; continue;
} }
typ = TYPE_UNKNOWN;
if (archive_entry_filetype(entry) == AE_IFREG) {
const char *fpath = archive_entry_pathname(entry);
typ = detect_type_by_ext(fpath, strlen(fpath));
}
/* /*
* Workaround for libarchive weirdness on Non MAC OS X platforms for filenames * Workaround for libarchive weirdness on Non MAC OS X platforms for filenames
* starting with '._'. See above ... * starting with '._'. See above ...
@ -1191,7 +1293,7 @@ extractor_thread_func(void *dat) {
} }
#endif #endif
rv = archive_extract_entry(arc, entry, awd); rv = archive_extract_entry(arc, entry, awd, typ);
if (rv != ARCHIVE_OK) { if (rv != ARCHIVE_OK) {
log_msg(LOG_WARN, 0, "%s: %s", archive_entry_pathname(entry), log_msg(LOG_WARN, 0, "%s: %s", archive_entry_pathname(entry),
archive_error_string(arc)); archive_error_string(arc));
@ -1251,11 +1353,10 @@ init_archive_mod() {
extnum = (extnum << 1) | extlist[i].ext[j]; extnum = (extnum << 1) | extlist[i].ext[j];
exthtab[slot].extnum = extnum; exthtab[slot].extnum = extnum;
exthtab[slot].type = extlist[i].type; exthtab[slot].type = extlist[i].type;
exthtab[slot].filter_func = NULL;
exthtab[slot].filter_private = NULL;
} }
add_filters_by_ext(); memset(typetab, 0, sizeof (typetab));
add_filters_by_type(typetab);
inited = 1; inited = 1;
} else { } else {
rv = 1; rv = 1;
@ -1271,9 +1372,9 @@ init_archive_mod() {
* outside the hash table range then the function returns unknown type. * outside the hash table range then the function returns unknown type.
*/ */
static int static int
detect_type_by_ext(char *path, int pathlen) detect_type_by_ext(const char *path, int pathlen)
{ {
char *ext = NULL; const char *ext = NULL;
ub4 slot; ub4 slot;
int i, len; int i, len;
uint64_t extnum; uint64_t extnum;
@ -1340,16 +1441,3 @@ detect_type_by_data(uchar_t *buf, size_t len)
return (TYPE_UNKNOWN); return (TYPE_UNKNOWN);
} }
int
insert_filter_data(filter_func_ptr func, void *filter_private, const char *ext)
{
ub4 slot = phash(ext, strlen(ext));
if (slot >= PHASHNKEYS || slot < 0) {
log_msg(LOG_WARN, 0, "Cannot add filter for unknown extension: %s", ext);
return (-1);
}
exthtab[slot].filter_func = func;
exthtab[slot].filter_private = filter_private;
return (0);
}

View file

@ -39,6 +39,29 @@ extern "C" {
typedef unsigned char uchar_t; typedef unsigned char uchar_t;
/* a short reminder about input/output stream types
for the pjglib_init_streams() function
if input is file
----------------
in_scr -> name of input file
in_type -> 0
in_size -> ignore
if input is memory
------------------
in_scr -> array containg data
in_type -> 1
in_size -> size of data array
if input is *FILE (f.e. stdin)
------------------------------
in_src -> stream pointer
in_type -> 2
in_size -> ignore
vice versa for output streams! */
size_t size_t
packjpg_filter_process(uchar_t *in_buf, size_t len, uchar_t **out_buf) packjpg_filter_process(uchar_t *in_buf, size_t len, uchar_t **out_buf)
{ {

View file

@ -1544,13 +1544,12 @@ plain_index:
o_chunksize = _chunksize; o_chunksize = _chunksize;
/* Compress data chunk. */ /* Compress data chunk. */
if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0 && if ((pctx->lzp_preprocess || pctx->enable_delta2_encode) && _chunksize > 0) {
PC_SUBTYPE(pctx->btype) == TYPE_CMP_MAX) {
rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz, rv = preproc_compress(pctx, tdat->compress, tdat->uncompressed_chunk + dedupe_index_sz,
_chunksize, compressed_chunk + index_size_cmp, &_chunksize, _chunksize, compressed_chunk + index_size_cmp, &_chunksize,
tdat->level, 0, pctx->btype, tdat->data, tdat->props); tdat->level, 0, pctx->btype, tdat->data, tdat->props);
} else if (_chunksize > 0 && PC_SUBTYPE(pctx->btype) == TYPE_CMP_MAX) { } else if (_chunksize > 0) {
DEBUG_STAT_EN(double strt, en); DEBUG_STAT_EN(double strt, en);
DEBUG_STAT_EN(strt = get_wtime_millis()); DEBUG_STAT_EN(strt = get_wtime_millis());
@ -1915,8 +1914,10 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
/* /*
* Adjust chunk size for small files. We then get an archive with * Adjust chunk size for small files. We then get an archive with
* a single chunk for the entire file. * a single chunk for the entire file.
* This is not valid for archive mode since we cannot accurately estimate
* final archive size.
*/ */
if (sbuf.st_size <= chunksize) { if (sbuf.st_size <= chunksize && !(pctx->archive_mode)) {
chunksize = sbuf.st_size; chunksize = sbuf.st_size;
pctx->enable_rabin_split = 0; // Do not split for whole files. pctx->enable_rabin_split = 0; // Do not split for whole files.
pctx->nthreads = 1; pctx->nthreads = 1;

View file

@ -99,8 +99,8 @@ struct ext_entry {
{"xpi" , TYPE_BINARY|TYPE_EXE, 3}, {"xpi" , TYPE_BINARY|TYPE_EXE, 3},
{"off" , TYPE_BINARY|TYPE_EXE, 3}, {"off" , TYPE_BINARY|TYPE_EXE, 3},
{"pdf" , TYPE_BINARY, 3}, {"pdf" , TYPE_BINARY, 3},
{"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 3}, {"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
{"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 4}, {"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 4},
{"png" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3}, {"png" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
{"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"mp3" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"wma" , TYPE_BINARY|TYPE_COMPRESSED, 3},
@ -114,8 +114,8 @@ struct ext_entry {
{"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4}, {"flac" , TYPE_BINARY|TYPE_COMPRESSED, 4},
{"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"pac" , TYPE_BINARY|TYPE_COMPRESSED, 3},
{"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3}, {"gif" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW, 3},
{"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 3}, {"jp2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3},
{"pjg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX, 3}, {"pjg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKJPG, 3},
{"gz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 2}, {"gz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 2},
{"tgz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3}, {"tgz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3},
{"bz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 3}, {"bz2" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2, 3},

View file

@ -89,8 +89,8 @@ com,TYPE_BINARY|TYPE_EXE
xpi,TYPE_BINARY|TYPE_EXE xpi,TYPE_BINARY|TYPE_EXE
off,TYPE_BINARY|TYPE_EXE off,TYPE_BINARY|TYPE_EXE
pdf,TYPE_BINARY pdf,TYPE_BINARY
jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ png,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
mp3,TYPE_BINARY|TYPE_COMPRESSED mp3,TYPE_BINARY|TYPE_COMPRESSED
wma,TYPE_BINARY|TYPE_COMPRESSED wma,TYPE_BINARY|TYPE_COMPRESSED
@ -104,8 +104,8 @@ ofr,TYPE_BINARY|TYPE_COMPRESSED
flac,TYPE_BINARY|TYPE_COMPRESSED flac,TYPE_BINARY|TYPE_COMPRESSED
pac,TYPE_BINARY|TYPE_COMPRESSED pac,TYPE_BINARY|TYPE_COMPRESSED
gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW gif,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZW
jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX jp2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG
pjg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_CMP_MAX pjg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_PACKJPG
gz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ gz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
tgz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ tgz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ
bz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2 bz2,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_BZ2

View file

@ -242,8 +242,9 @@ typedef enum {
/* /*
* Sub-types. * Sub-types.
*/ */
#define NUM_SUB_TYPES 20
TYPE_EXE = 8, TYPE_EXE = 8,
TYPE_CMP_MAX = 16, TYPE_JPEG = 16,
TYPE_MARKUP = 24, TYPE_MARKUP = 24,
TYPE_COMPRESSED_GZ = 32, TYPE_COMPRESSED_GZ = 32,
TYPE_COMPRESSED_LZW = 40, TYPE_COMPRESSED_LZW = 40,
@ -260,7 +261,8 @@ typedef enum {
TYPE_COMPRESSED_RAR = 128, TYPE_COMPRESSED_RAR = 128,
TYPE_COMPRESSED_LZ = 136, TYPE_COMPRESSED_LZ = 136,
TYPE_COMPRESSED_PPMD = 144, TYPE_COMPRESSED_PPMD = 144,
TYPE_COMPRESSED_ZPAQ = 152 TYPE_COMPRESSED_ZPAQ = 152,
TYPE_PACKJPG = 160
} data_type_t; } data_type_t;
/* /*