A new Dictionary preprocessor for text files.
This commit is contained in:
parent
73307c3996
commit
66a482c968
5 changed files with 975 additions and 214 deletions
|
@ -59,7 +59,7 @@ analyze_buffer(void *src, uint64_t srclen, analyzer_ctx_t *actx)
|
||||||
if (cur_byte != ' ')
|
if (cur_byte != ' ')
|
||||||
prev_byte = cur_byte;
|
prev_byte = cur_byte;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Heuristics for detecting BINARY vs generic TEXT vs XML data at various
|
* Heuristics for detecting BINARY vs generic TEXT vs XML data at various
|
||||||
* significance levels.
|
* significance levels.
|
||||||
|
@ -72,7 +72,7 @@ analyze_buffer(void *src, uint64_t srclen, analyzer_ctx_t *actx)
|
||||||
} else {
|
} else {
|
||||||
actx->forty_pct.btype = TYPE_TEXT;
|
actx->forty_pct.btype = TYPE_TEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tot_8b > FIFTY_PCT(srclen)) {
|
if (tot_8b > FIFTY_PCT(srclen)) {
|
||||||
actx->fifty_pct.btype = TYPE_BINARY;
|
actx->fifty_pct.btype = TYPE_BINARY;
|
||||||
} else {
|
} else {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -39,11 +39,8 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void *new_dict_context();
|
int dict_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
||||||
void delete_dict_context(void *dict_ctx);
|
int dict_decode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
||||||
|
|
||||||
int dict_encode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
|
||||||
int dict_decode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -510,7 +510,9 @@ meta_ctx_create(void *pc, int file_version, int comp_fd)
|
||||||
pc_ctx_t *pctx = (pc_ctx_t *)pc;
|
pc_ctx_t *pctx = (pc_ctx_t *)pc;
|
||||||
meta_ctx_t *mctx;
|
meta_ctx_t *mctx;
|
||||||
|
|
||||||
mctx = (meta_ctx_t *)malloc(sizeof (meta_ctx_t));
|
slab_cache_add(METADATA_CHUNK_SIZE + METADATA_HDR_SZ);
|
||||||
|
slab_cache_add(sizeof (meta_ctx_t));
|
||||||
|
mctx = (meta_ctx_t *)slab_alloc(NULL, sizeof (meta_ctx_t));
|
||||||
if (!mctx) {
|
if (!mctx) {
|
||||||
log_msg(LOG_ERR, 1, "Failed to allocate metadata context.");
|
log_msg(LOG_ERR, 1, "Failed to allocate metadata context.");
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
|
14
pcompress.c
14
pcompress.c
|
@ -259,15 +259,13 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
||||||
int b_type;
|
int b_type;
|
||||||
|
|
||||||
if (analyzed)
|
if (analyzed)
|
||||||
b_type = PC_TYPE(actx.one_pct.btype);
|
b_type = PC_TYPE(actx.forty_pct.btype);
|
||||||
else
|
else
|
||||||
b_type = PC_TYPE(analyze_buffer_simple(from, fromlen));
|
b_type = PC_TYPE(analyze_buffer_simple(from, fromlen));
|
||||||
|
|
||||||
if (b_type == TYPE_TEXT) {
|
if (b_type == TYPE_TEXT) {
|
||||||
void *dct = new_dict_context();
|
|
||||||
_dstlen = fromlen;
|
_dstlen = fromlen;
|
||||||
result = dict_encode(dct, from, fromlen, to, &_dstlen);
|
result = dict_encode(from, fromlen, to, &_dstlen);
|
||||||
delete_dict_context(dct);
|
|
||||||
if (result != -1) {
|
if (result != -1) {
|
||||||
uchar_t *tmp;
|
uchar_t *tmp;
|
||||||
tmp = from;
|
tmp = from;
|
||||||
|
@ -346,7 +344,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
||||||
_dstlen = srclen;
|
_dstlen = srclen;
|
||||||
DEBUG_STAT_EN(strt = get_wtime_millis());
|
DEBUG_STAT_EN(strt = get_wtime_millis());
|
||||||
result = cmp_func(src, srclen, dest+9, &_dstlen, level, chdr,
|
result = cmp_func(src, srclen, dest+9, &_dstlen, level, chdr,
|
||||||
(dict?TYPE_TEXT:btype), data);
|
btype, data);
|
||||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
|
|
||||||
if (result > -1 && _dstlen < srclen) {
|
if (result > -1 && _dstlen < srclen) {
|
||||||
|
@ -407,6 +405,7 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
||||||
memcpy(src, dst, _dstlen);
|
memcpy(src, dst, _dstlen);
|
||||||
srclen = _dstlen;
|
srclen = _dstlen;
|
||||||
*dstlen = _dstlen;
|
*dstlen = _dstlen;
|
||||||
|
_dstlen = _dstlen1;
|
||||||
} else {
|
} else {
|
||||||
log_msg(LOG_ERR, 0, "Delta2 decoding failed.");
|
log_msg(LOG_ERR, 0, "Delta2 decoding failed.");
|
||||||
return (result);
|
return (result);
|
||||||
|
@ -436,13 +435,12 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type & PREPROC_TYPE_DICT) {
|
if (type & PREPROC_TYPE_DICT) {
|
||||||
void *dct = new_dict_context();
|
result = dict_decode(src, srclen, dst, &_dstlen);
|
||||||
result = dict_decode(dct, src, srclen, dst, &_dstlen);
|
|
||||||
delete_dict_context(dct);
|
|
||||||
if (result != -1) {
|
if (result != -1) {
|
||||||
memcpy(src, dst, _dstlen);
|
memcpy(src, dst, _dstlen);
|
||||||
srclen = _dstlen;
|
srclen = _dstlen;
|
||||||
*dstlen = _dstlen;
|
*dstlen = _dstlen;
|
||||||
|
_dstlen = _dstlen1;
|
||||||
} else {
|
} else {
|
||||||
log_msg(LOG_ERR, 0, "DICT decoding failed.");
|
log_msg(LOG_ERR, 0, "DICT decoding failed.");
|
||||||
return (result);
|
return (result);
|
||||||
|
|
Loading…
Reference in a new issue