Update,simplify analyzer function to indicate text data for Dict filter.
Fix archive header writing bug. Strip ^M chars from dict filter files. Include DICT preprocessing type. Fix a bunch of bugs found by Xcode.
This commit is contained in:
parent
4fedebc607
commit
071a9e2b26
13 changed files with 617 additions and 448 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -24,3 +24,4 @@ libtool
|
||||||
stamp-h1
|
stamp-h1
|
||||||
.libs
|
.libs
|
||||||
buildtmp
|
buildtmp
|
||||||
|
*.dSYM
|
||||||
|
|
|
@ -31,9 +31,11 @@ GPP=@GPP@
|
||||||
LIBVER=1
|
LIBVER=1
|
||||||
MAINSRCS = utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
|
MAINSRCS = utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \
|
||||||
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
|
adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \
|
||||||
utils/xxhash_base.c utils/heap.c utils/cpuid.c pcompress.c
|
utils/xxhash_base.c utils/heap.c utils/cpuid.c filters/analyzer/analyzer.c \
|
||||||
|
pcompress.c
|
||||||
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heap.h \
|
MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heap.h \
|
||||||
utils/cpuid.h utils/xxhash.h archive/pc_archive.h filters/dispack/dis.hpp
|
utils/cpuid.h utils/xxhash.h archive/pc_archive.h filters/dispack/dis.hpp \
|
||||||
|
filters/analyzer/analyzer.h
|
||||||
MAINOBJS = $(MAINSRCS:.c=.o)
|
MAINOBJS = $(MAINSRCS:.c=.o)
|
||||||
|
|
||||||
PROGSRCS = main.c
|
PROGSRCS = main.c
|
||||||
|
@ -233,7 +235,7 @@ BASE_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT_P
|
||||||
-I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ -I./rabin/global \
|
-I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ -I./rabin/global \
|
||||||
-I./crypto/keccak -I./filters/transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) \
|
-I./crypto/keccak -I./filters/transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) \
|
||||||
-I./crypto/xsalsa20 -I./archive -pedantic -Wall -I./filters -fno-strict-aliasing \
|
-I./crypto/xsalsa20 -I./archive -pedantic -Wall -I./filters -fno-strict-aliasing \
|
||||||
-Wno-unused-but-set-variable -Wno-enum-compare \
|
-Wno-unused-but-set-variable -Wno-enum-compare -I./filters/analyzer \
|
||||||
@COMPAT_CPPFLAGS@ @XSALSA20_DEBUG@ -I@LIBARCHIVE_DIR@/libarchive -I./filters/packjpg \
|
@COMPAT_CPPFLAGS@ @XSALSA20_DEBUG@ -I@LIBARCHIVE_DIR@/libarchive -I./filters/packjpg \
|
||||||
-I./filters/packpnm @ENABLE_WAVPACK@
|
-I./filters/packpnm @ENABLE_WAVPACK@
|
||||||
COMMON_CPPFLAGS = $(BASE_CPPFLAGS) -std=gnu99
|
COMMON_CPPFLAGS = $(BASE_CPPFLAGS) -std=gnu99
|
||||||
|
|
|
@ -175,7 +175,7 @@ creat_write_callback(struct archive *arc, void *ctx, const void *buf, size_t len
|
||||||
pctx->btype = pctx->ctype;
|
pctx->btype = pctx->ctype;
|
||||||
} else {
|
} else {
|
||||||
if (pctx->arc_buf_pos < pctx->min_chunk) {
|
if (pctx->arc_buf_pos < pctx->min_chunk) {
|
||||||
uint32_t diff = pctx->min_chunk - pctx->arc_buf_pos;
|
int diff = pctx->min_chunk - (int)(pctx->arc_buf_pos);
|
||||||
if (len > diff)
|
if (len > diff)
|
||||||
pctx->btype = pctx->ctype;
|
pctx->btype = pctx->ctype;
|
||||||
else
|
else
|
||||||
|
@ -918,9 +918,10 @@ copy_file_data(pc_ctx_t *pctx, struct archive *arc, struct archive_entry *entry,
|
||||||
size_t sz, offset, len;
|
size_t sz, offset, len;
|
||||||
ssize_t bytes_to_write;
|
ssize_t bytes_to_write;
|
||||||
uchar_t *mapbuf;
|
uchar_t *mapbuf;
|
||||||
int rv, fd;
|
int rv, fd, typ1;
|
||||||
const char *fpath;
|
const char *fpath;
|
||||||
|
|
||||||
|
typ1 = typ;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
rv = 0;
|
rv = 0;
|
||||||
sz = archive_entry_size(entry);
|
sz = archive_entry_size(entry);
|
||||||
|
@ -1014,6 +1015,11 @@ do_map:
|
||||||
} else {
|
} else {
|
||||||
return (ARCHIVE_OK);
|
return (ARCHIVE_OK);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if (write_header(arc, entry) == -1) {
|
||||||
|
close(fd);
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (write_header(arc, entry) == -1) {
|
if (write_header(arc, entry) == -1) {
|
||||||
|
@ -1029,7 +1035,7 @@ do_map:
|
||||||
* stage there is no need for blocking.
|
* stage there is no need for blocking.
|
||||||
*/
|
*/
|
||||||
wrtn = archive_write_data(arc, src, wlen);
|
wrtn = archive_write_data(arc, src, wlen);
|
||||||
if (wrtn < wlen) {
|
if (wrtn < (ssize_t)wlen) {
|
||||||
/* Write failed; this is bad */
|
/* Write failed; this is bad */
|
||||||
log_msg(LOG_ERR, 0, "Data write error: %s", archive_error_string(arc));
|
log_msg(LOG_ERR, 0, "Data write error: %s", archive_error_string(arc));
|
||||||
rv = -1;
|
rv = -1;
|
||||||
|
|
1
config
1
config
|
@ -714,7 +714,6 @@ echo "*************** Running configure in libarchive ****************"
|
||||||
(cd $libarchive_dir
|
(cd $libarchive_dir
|
||||||
CC=${GCC} ./configure --disable-bsdtar --disable-bsdcpio --without-zlib --without-bz2lib --without-lzmadec --without-lzma --without-lzo2 --without-nettle --without-openssl --without-xml2 --without-expat --disable-silent-rules --enable-shared=no --enable-static=yes --enable-xattr
|
CC=${GCC} ./configure --disable-bsdtar --disable-bsdcpio --without-zlib --without-bz2lib --without-lzmadec --without-lzma --without-lzo2 --without-nettle --without-openssl --without-xml2 --without-expat --disable-silent-rules --enable-shared=no --enable-static=yes --enable-xattr
|
||||||
[ $? -ne 0 ] && exit $?
|
[ $? -ne 0 ] && exit $?
|
||||||
cp Makefile Makefile.orig
|
|
||||||
cat Makefile | sed '
|
cat Makefile | sed '
|
||||||
s@$(BUILT_SOURCES)@@
|
s@$(BUILT_SOURCES)@@
|
||||||
s@$(libarchive_test_SOURCES)@@
|
s@$(libarchive_test_SOURCES)@@
|
||||||
|
|
69
filters/analyzer/analyzer.c
Normal file
69
filters/analyzer/analyzer.c
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
|
* algorithm lossless compression and decompression program.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012-2014 Moinak Ghosh. All rights reserved.
|
||||||
|
* Use is subject to license terms.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
analyze_buffer(void *src, uint64_t srclen, int btype, int adapt_mode)
|
||||||
|
{
|
||||||
|
uchar_t *src1 = (uchar_t *)src;
|
||||||
|
int stype = PC_SUBTYPE(btype);
|
||||||
|
|
||||||
|
if (btype == TYPE_UNKNOWN || stype == TYPE_ARCHIVE_TAR) {
|
||||||
|
uint32_t freq[256], freq0x80[2] = {0};
|
||||||
|
uint64_t i, alphabetNum = 0, tot8b = 0;
|
||||||
|
uchar_t cur_byte;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Count number of 8-bit binary bytes and XML tags in source.
|
||||||
|
*/
|
||||||
|
tot8b = 0;
|
||||||
|
for (i = 0; i < srclen; i++) {
|
||||||
|
cur_byte = src1[i];
|
||||||
|
tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization
|
||||||
|
freq[cur_byte]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
freq0x80[i>>7]+=freq[i];
|
||||||
|
|
||||||
|
for(i = 'a'; i <= 'z'; i++)
|
||||||
|
alphabetNum+=freq[i];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Heuristics for detecting BINARY vs generic TEXT
|
||||||
|
*/
|
||||||
|
tot8b /= 0x80;
|
||||||
|
if (tot8b < (srclen>>2 + srclen>>3)) {
|
||||||
|
btype = TYPE_TEXT;
|
||||||
|
if (freq0x80[1]<(srclen>>3) && (freq[' ']>(srclen>>7))
|
||||||
|
&& (freq['a']+freq['e']+freq['t']>(srclen>>4))
|
||||||
|
&& alphabetNum>(srclen>>2)) {
|
||||||
|
btype |= TYPE_ENGLISH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (btype);
|
||||||
|
}
|
30
filters/analyzer/analyzer.h
Normal file
30
filters/analyzer/analyzer.h
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
|
* algorithm lossless compression and decompression program.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012-2014 Moinak Ghosh. All rights reserved.
|
||||||
|
* Use is subject to license terms.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this program.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ANALYZER_H
|
||||||
|
#define _ANALYZER_H
|
||||||
|
|
||||||
|
int analyze_buffer(void *src, uint64_t srclen, int btype, int adapt_mode);
|
||||||
|
|
||||||
|
#endif
|
|
@ -432,7 +432,7 @@ delta2_encode_real(uchar_t *src, uint64_t srclen, uchar_t *dst, uint64_t *dstlen
|
||||||
val = gtot1 + (srclen - (pos - src));
|
val = gtot1 + (srclen - (pos - src));
|
||||||
}
|
}
|
||||||
*dstlen = pos2 - dst;
|
*dstlen = pos2 - dst;
|
||||||
return (val);
|
return ((int)val);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -1,66 +1,66 @@
|
||||||
#ifndef _DATATYPE_H
|
#ifndef _DATATYPE_H
|
||||||
#define _DATATYPE_H
|
#define _DATATYPE_H
|
||||||
|
|
||||||
#define CSA_VERSION 8
|
#define CSA_VERSION 8
|
||||||
|
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
typedef uint16_t u16;
|
typedef uint16_t u16;
|
||||||
typedef uint32_t u32;
|
typedef uint32_t u32;
|
||||||
typedef uint64_t u64;
|
typedef uint64_t u64;
|
||||||
typedef int i32;
|
typedef int i32;
|
||||||
typedef int64_t i64;
|
typedef int64_t i64;
|
||||||
|
|
||||||
const u32 KB=1024;
|
const u32 KB=1024;
|
||||||
const u32 MB=1048576;
|
const u32 MB=1048576;
|
||||||
const u32 MinBlockSize=8*KB;
|
const u32 MinBlockSize=8*1024;
|
||||||
|
|
||||||
|
|
||||||
const u32 MaxChunkBits=21;
|
const u32 MaxChunkBits=21;
|
||||||
const u32 MaxChunkSize=(1<<(MaxChunkBits-1));
|
const u32 MaxChunkSize=(1<<(21-1));
|
||||||
const u32 MaxDictSize=512*MB;//Don't change
|
const u32 MaxDictSize=512*1048576;//Don't change
|
||||||
const u32 DefaultOutStreamBlockSize=128*KB;
|
const u32 DefaultOutStreamBlockSize=128*1024;
|
||||||
const u32 DefaultInBufferSize=MaxChunkSize; //Should >=MaxChunkSize
|
const u32 DefaultInBufferSize=21; //Should >=MaxChunkSize
|
||||||
#define DLT_CHANNEL_MAX 5
|
#define DLT_CHANNEL_MAX 5
|
||||||
const u32 DltIndex[DLT_CHANNEL_MAX]={1,2,3,4,8};
|
const u32 DltIndex[DLT_CHANNEL_MAX]={1,2,3,4,8};
|
||||||
|
|
||||||
|
|
||||||
#define SAFEFREE(x) do{if ((x)!=NULL) free(x);x=NULL;}while(0)
|
#define SAFEFREE(x) do{if ((x)!=NULL) free(x);x=NULL;}while(0)
|
||||||
|
|
||||||
|
|
||||||
#define ENCODE 1
|
#define ENCODE 1
|
||||||
#define DECODE 2
|
#define DECODE 2
|
||||||
|
|
||||||
|
|
||||||
/*****ERRORS*****************/
|
/*****ERRORS*****************/
|
||||||
#define NO_ERROR 0
|
#define NO_ERROR 0
|
||||||
#define CANT_OPEN_FILE (-100)
|
#define CANT_OPEN_FILE (-100)
|
||||||
#define CANT_CREATE_FILE (-99)
|
#define CANT_CREATE_FILE (-99)
|
||||||
#define NOT_CSC_FILE (-98)
|
#define NOT_CSC_FILE (-98)
|
||||||
#define VERSION_INVALID (-97)
|
#define VERSION_INVALID (-97)
|
||||||
#define CSC_FILE_INVALID (-95)
|
#define CSC_FILE_INVALID (-95)
|
||||||
#define DECODE_ERROR (-96)
|
#define DECODE_ERROR (-96)
|
||||||
#define CANT_ALLOC_MEM (-94)
|
#define CANT_ALLOC_MEM (-94)
|
||||||
#define ALREADY_INITIALIZED (-93)
|
#define ALREADY_INITIALIZED (-93)
|
||||||
#define OPERATION_ERROR (-92)
|
#define OPERATION_ERROR (-92)
|
||||||
#define FILE_DIDNT_OPEN (-91)
|
#define FILE_DIDNT_OPEN (-91)
|
||||||
/*****ERRORS*****************/
|
/*****ERRORS*****************/
|
||||||
|
|
||||||
/******Block Type*************/
|
/******Block Type*************/
|
||||||
#define DT_NONE 0
|
#define DT_NONE 0
|
||||||
#define DT_HARD 0x05
|
#define DT_HARD 0x05
|
||||||
#define DT_EXE 0x04
|
#define DT_EXE 0x04
|
||||||
#define DT_BAD 0x03
|
#define DT_BAD 0x03
|
||||||
#define DT_NORMAL 0x02
|
#define DT_NORMAL 0x02
|
||||||
#define DT_SKIP 0x01
|
#define DT_SKIP 0x01
|
||||||
#define DT_AUDIO 0x06
|
#define DT_AUDIO 0x06
|
||||||
#define DT_RGB 0x07
|
#define DT_RGB 0x07
|
||||||
#define DT_FAST 0x08
|
#define DT_FAST 0x08
|
||||||
#define SIG_EOF 0x09
|
#define SIG_EOF 0x09
|
||||||
#define DT_ENGTXT 0x0A
|
#define DT_ENGTXT 0x0A
|
||||||
#define DT_DLT 0x10
|
#define DT_DLT 0x10
|
||||||
#define DT_MAXINDEX 0x1F
|
#define DT_MAXINDEX 0x1F
|
||||||
/******Block Type*************/
|
/******Block Type*************/
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,278 +1,306 @@
|
||||||
/*
|
/*
|
||||||
* This file is a part of Pcompress, a chunked parallel multi-
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
* algorithm lossless compression and decompression program.
|
* algorithm lossless compression and decompression program.
|
||||||
*
|
*
|
||||||
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
|
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
|
||||||
* Use is subject to license terms.
|
* Use is subject to license terms.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or
|
* This program is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
* License as published by the Free Software Foundation; either
|
* License as published by the Free Software Foundation; either
|
||||||
* version 3 of the License, or (at your option) any later version.
|
* version 3 of the License, or (at your option) any later version.
|
||||||
*
|
*
|
||||||
* This program is distributed in the hope that it will be useful,
|
* This program is distributed in the hope that it will be useful,
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
* Lesser General Public License for more details.
|
* Lesser General Public License for more details.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
* License along with this program.
|
* License along with this program.
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
*
|
*
|
||||||
* moinakg@gmail.com, http://moinakg.wordpress.com/
|
* moinakg@gmail.com, http://moinakg.wordpress.com/
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Dict filter for text files. Adapted from Public Domain sources
|
* Dict filter for text files. Adapted from Public Domain sources
|
||||||
* of Fu Siyuan's CSC 3.2 archiver.
|
* of Fu Siyuan's CSC 3.2 archiver.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "DictFilter.h"
|
#include "DictFilter.h"
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
const u32 wordNum = 123;
|
class DictFilter
|
||||||
|
{
|
||||||
u8 wordList[wordNum][8] =
|
public:
|
||||||
{
|
~DictFilter();
|
||||||
"",
|
DictFilter();
|
||||||
"ac","ad","ai","al","am",
|
|
||||||
"an","ar","as","at","ea",
|
u32 Forward_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize);
|
||||||
"ec","ed","ee","el","en",
|
void Inverse_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize);
|
||||||
"er","es","et","id","ie",
|
|
||||||
"ig","il","in","io","is",
|
private:
|
||||||
"it","of","ol","on","oo",
|
typedef struct
|
||||||
"or","os","ou","ow","ul",
|
{
|
||||||
"un","ur","us","ba","be",
|
u32 next[26];
|
||||||
"ca","ce","co","ch","de",
|
u8 symbol;
|
||||||
"di","ge","gh","ha","he",
|
} CTreeNode;
|
||||||
"hi","ho","ra","re","ri",
|
CTreeNode wordTree[MAX_WORDTREE_NODE_NUM];
|
||||||
"ro","rs","la","le","li",
|
u32 nodeMum;
|
||||||
"lo","ld","ll","ly","se",
|
u8 maxSymbol;
|
||||||
"si","so","sh","ss","st",
|
//Used for DICT transformer. Words are stored in trees.
|
||||||
"ma","me","mi","ne","nc",
|
|
||||||
"nd","ng","nt","pa","pe",
|
u32 wordIndex[256];
|
||||||
"ta","te","ti","to","th",
|
//Used for DICT untransformer.choose words by symbols.
|
||||||
"tr","wa","ve",
|
void MakeWordTree(); //Init the DICT transformer
|
||||||
"all","and","but","dow",
|
|
||||||
"for","had","hav","her",
|
u32 x0,x1;
|
||||||
"him","his","man","mor",
|
u32 i,k;
|
||||||
"not","now","one","out",
|
};
|
||||||
"she","the","was","wer",
|
|
||||||
"whi","whe","wit","you",
|
const u32 wordNum = 123;
|
||||||
"any","are",
|
|
||||||
"that","said","with","have",
|
u8 wordList[wordNum][8] =
|
||||||
"this","from","were","tion",
|
{
|
||||||
};
|
"",
|
||||||
|
"ac","ad","ai","al","am",
|
||||||
|
"an","ar","as","at","ea",
|
||||||
void
|
"ec","ed","ee","el","en",
|
||||||
DictFilter::MakeWordTree()
|
"er","es","et","id","ie",
|
||||||
{
|
"ig","il","in","io","is",
|
||||||
u32 i,j;
|
"it","of","ol","on","oo",
|
||||||
u32 treePos;
|
"or","os","ou","ow","ul",
|
||||||
u8 symbolIndex = 0x82;
|
"un","ur","us","ba","be",
|
||||||
|
"ca","ce","co","ch","de",
|
||||||
nodeMum = 1;
|
"di","ge","gh","ha","he",
|
||||||
|
"hi","ho","ra","re","ri",
|
||||||
memset(wordTree,0,sizeof(wordTree));
|
"ro","rs","la","le","li",
|
||||||
|
"lo","ld","ll","ly","se",
|
||||||
for (i = 1; i < wordNum; i++) {
|
"si","so","sh","ss","st",
|
||||||
treePos = 0;
|
"ma","me","mi","ne","nc",
|
||||||
for(j = 0; wordList[i][j] != 0; j++) {
|
"nd","ng","nt","pa","pe",
|
||||||
u32 idx = wordList[i][j] - 'a';
|
"ta","te","ti","to","th",
|
||||||
if (wordTree[treePos].next[idx]) {
|
"tr","wa","ve",
|
||||||
treePos = wordTree[treePos].next[idx];
|
"all","and","but","dow",
|
||||||
} else {
|
"for","had","hav","her",
|
||||||
wordTree[treePos].next[idx] = nodeMum;
|
"him","his","man","mor",
|
||||||
treePos = nodeMum;
|
"not","now","one","out",
|
||||||
nodeMum++;
|
"she","the","was","wer",
|
||||||
}
|
"whi","whe","wit","you",
|
||||||
}
|
"any","are",
|
||||||
wordIndex[symbolIndex] = i;
|
"that","said","with","have",
|
||||||
wordTree[treePos].symbol = symbolIndex++;
|
"this","from","were","tion",
|
||||||
}
|
};
|
||||||
|
|
||||||
maxSymbol=symbolIndex;
|
|
||||||
|
void
|
||||||
}
|
DictFilter::MakeWordTree()
|
||||||
|
{
|
||||||
|
u32 i,j;
|
||||||
DictFilter::DictFilter()
|
u32 treePos;
|
||||||
{
|
u8 symbolIndex = 0x82;
|
||||||
MakeWordTree();
|
|
||||||
}
|
nodeMum = 1;
|
||||||
|
|
||||||
|
memset(wordTree,0,sizeof(wordTree));
|
||||||
|
|
||||||
DictFilter::~DictFilter()
|
for (i = 1; i < wordNum; i++) {
|
||||||
{
|
treePos = 0;
|
||||||
}
|
for(j = 0; wordList[i][j] != 0; j++) {
|
||||||
|
u32 idx = wordList[i][j] - 'a';
|
||||||
|
if (wordTree[treePos].next[idx]) {
|
||||||
u32
|
treePos = wordTree[treePos].next[idx];
|
||||||
DictFilter::Forward_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize)
|
} else {
|
||||||
{
|
wordTree[treePos].next[idx] = nodeMum;
|
||||||
if (size < 16384)
|
treePos = nodeMum;
|
||||||
return 0;
|
nodeMum++;
|
||||||
|
}
|
||||||
u32 i,j,treePos = 0;
|
}
|
||||||
u32 lastSymbol = 0;
|
wordIndex[symbolIndex] = i;
|
||||||
u32 dstSize = 0;
|
wordTree[treePos].symbol = symbolIndex++;
|
||||||
u32 idx;
|
}
|
||||||
|
|
||||||
|
maxSymbol=symbolIndex;
|
||||||
for(i = 0; i < size-5;) {
|
|
||||||
if (src[i] >= 'a' && src[i] <= 'z') {
|
}
|
||||||
|
|
||||||
u32 matchSymbol = 0,longestWord = 0;
|
|
||||||
treePos = 0;
|
DictFilter::DictFilter()
|
||||||
for(j = 0;;) {
|
{
|
||||||
idx = src[i+j] - 'a';
|
MakeWordTree();
|
||||||
if (idx < 0 || idx > 25)
|
}
|
||||||
break;
|
|
||||||
if (wordTree[treePos].next[idx] == 0)
|
|
||||||
break;
|
|
||||||
|
DictFilter::~DictFilter()
|
||||||
treePos=wordTree[treePos].next[idx];
|
{
|
||||||
j++;
|
}
|
||||||
if (wordTree[treePos].symbol) {
|
|
||||||
matchSymbol = wordTree[treePos].symbol;
|
|
||||||
longestWord = j;
|
u32
|
||||||
}
|
DictFilter::Forward_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize)
|
||||||
}
|
{
|
||||||
|
if (size < 16384)
|
||||||
if (matchSymbol) {
|
return 0;
|
||||||
dst[dstSize++] = matchSymbol;
|
|
||||||
i += longestWord;
|
u32 i,j,treePos = 0;
|
||||||
continue;
|
u32 lastSymbol = 0;
|
||||||
}
|
u32 dstSize = 0;
|
||||||
lastSymbol = 0;
|
u32 idx;
|
||||||
dst[dstSize++] = src[i];
|
|
||||||
i++;
|
|
||||||
} else {
|
for(i = 0; i < size-5;) {
|
||||||
if (src[i] >= 0x82) {
|
if (src[i] >= 'a' && src[i] <= 'z') {
|
||||||
dst[dstSize++] = 254;
|
|
||||||
dst[dstSize++] = src[i];
|
u32 matchSymbol = 0,longestWord = 0;
|
||||||
}
|
treePos = 0;
|
||||||
else
|
for(j = 0;;) {
|
||||||
dst[dstSize++] = src[i];
|
idx = src[i+j] - 'a';
|
||||||
|
if (idx < 0 || idx > 25)
|
||||||
lastSymbol = 0;
|
break;
|
||||||
treePos = 0;
|
if (wordTree[treePos].next[idx] == 0)
|
||||||
i++;
|
break;
|
||||||
}
|
|
||||||
|
treePos=wordTree[treePos].next[idx];
|
||||||
}
|
j++;
|
||||||
|
if (wordTree[treePos].symbol) {
|
||||||
for (; i<size; i++) {
|
matchSymbol = wordTree[treePos].symbol;
|
||||||
if (src[i] >= 0x82) {
|
longestWord = j;
|
||||||
dst[dstSize++] = 254;
|
}
|
||||||
dst[dstSize++] = src[i];
|
}
|
||||||
}
|
|
||||||
else
|
if (matchSymbol) {
|
||||||
dst[dstSize++] = src[i];
|
dst[dstSize++] = matchSymbol;
|
||||||
}
|
i += longestWord;
|
||||||
|
continue;
|
||||||
if (dstSize > size*0.82)
|
}
|
||||||
return 0;
|
lastSymbol = 0;
|
||||||
|
dst[dstSize++] = src[i];
|
||||||
*dstsize = dstSize;
|
i++;
|
||||||
return 1;
|
} else {
|
||||||
}
|
if (src[i] >= 0x82) {
|
||||||
|
dst[dstSize++] = 254;
|
||||||
void
|
dst[dstSize++] = src[i];
|
||||||
DictFilter::Inverse_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize)
|
}
|
||||||
{
|
else
|
||||||
|
dst[dstSize++] = src[i];
|
||||||
u32 i = 0,j;
|
|
||||||
u32 dstPos = 0,idx;
|
lastSymbol = 0;
|
||||||
|
treePos = 0;
|
||||||
while(dstPos < *dstsize && i < size) {
|
i++;
|
||||||
if (src[i] >= 0x82 && src[i] < maxSymbol) {
|
}
|
||||||
idx = wordIndex[src[i]];
|
|
||||||
for(j=0; wordList[idx][j]; j++)
|
}
|
||||||
dst[dstPos++] = wordList[idx][j];
|
|
||||||
}
|
for (; i<size; i++) {
|
||||||
else if (src[i] == 254 && (i+1 < size && src[i+1] >= 0x82)) {
|
if (src[i] >= 0x82) {
|
||||||
i++;
|
dst[dstSize++] = 254;
|
||||||
dst[dstPos++] = src[i];
|
dst[dstSize++] = src[i];
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
dst[dstPos++] = src[i];
|
dst[dstSize++] = src[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
i++;
|
if (dstSize > size*0.82)
|
||||||
}
|
return 0;
|
||||||
*dstsize = dstPos;
|
|
||||||
}
|
*dstsize = dstSize;
|
||||||
|
return 1;
|
||||||
#ifdef __cplusplus
|
}
|
||||||
extern "C" {
|
|
||||||
#endif
|
void
|
||||||
|
DictFilter::Inverse_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize)
|
||||||
void *
|
{
|
||||||
new_dict_context()
|
|
||||||
{
|
u32 i = 0,j;
|
||||||
DictFilter *df = new DictFilter();
|
u32 dstPos = 0,idx;
|
||||||
return (static_cast<void *>(df));
|
|
||||||
}
|
while(dstPos < *dstsize && i < size) {
|
||||||
|
if (src[i] >= 0x82 && src[i] < maxSymbol) {
|
||||||
void
|
idx = wordIndex[src[i]];
|
||||||
delete_dict_context(void *dict_ctx)
|
for(j=0; wordList[idx][j]; j++)
|
||||||
{
|
dst[dstPos++] = wordList[idx][j];
|
||||||
if (dict_ctx) {
|
}
|
||||||
DictFilter *df = static_cast<DictFilter *>(dict_ctx);
|
else if (src[i] == 254 && (i+1 < size && src[i+1] >= 0x82)) {
|
||||||
delete df;
|
i++;
|
||||||
}
|
dst[dstPos++] = src[i];
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
int
|
dst[dstPos++] = src[i];
|
||||||
dict_encode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
}
|
||||||
{
|
|
||||||
DictFilter *df = static_cast<DictFilter *>(dict_ctx);
|
i++;
|
||||||
u32 fl = fromlen;
|
}
|
||||||
u32 dl = *dstlen;
|
*dstsize = dstPos;
|
||||||
u8 *dst;
|
}
|
||||||
|
|
||||||
if (fromlen > UINT32_MAX)
|
#ifdef __cplusplus
|
||||||
return (-1);
|
extern "C" {
|
||||||
U32_P(to) = LE32(fromlen);
|
#endif
|
||||||
dst = to + 4;
|
|
||||||
dl -= 4;
|
void *
|
||||||
if (df->Forward_Dict(from, fl, dst, &dl)) {
|
new_dict_context()
|
||||||
*dstlen = dl + 4;
|
{
|
||||||
return (0);
|
DictFilter *df = new DictFilter();
|
||||||
}
|
return (static_cast<void *>(df));
|
||||||
return (-1);
|
}
|
||||||
}
|
|
||||||
|
void
|
||||||
int
|
delete_dict_context(void *dict_ctx)
|
||||||
dict_decode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
{
|
||||||
{
|
if (dict_ctx) {
|
||||||
DictFilter *df = static_cast<DictFilter *>(dict_ctx);
|
DictFilter *df = static_cast<DictFilter *>(dict_ctx);
|
||||||
u32 fl = fromlen;
|
delete df;
|
||||||
u32 dl;
|
}
|
||||||
u8 *src;
|
}
|
||||||
|
|
||||||
dl = U32_P(from);
|
int
|
||||||
if (dl > *dstlen) {
|
dict_encode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
||||||
log_msg(LOG_ERR, 0, "Destination overflow in dict_decode.");
|
{
|
||||||
return (-1);
|
DictFilter *df = static_cast<DictFilter *>(dict_ctx);
|
||||||
}
|
u32 fl = fromlen;
|
||||||
*dstlen = dl;
|
u32 dl = *dstlen;
|
||||||
src = from + 4;
|
u8 *dst;
|
||||||
fl -= 4;
|
|
||||||
|
if (fromlen > UINT32_MAX)
|
||||||
df->Inverse_Dict(src, fl, to, &dl);
|
return (-1);
|
||||||
if (dl < *dstlen)
|
U32_P(to) = LE32(fromlen);
|
||||||
return (-1);
|
dst = to + 4;
|
||||||
return (0);
|
dl -= 4;
|
||||||
}
|
if (df->Forward_Dict(from, fl, dst, &dl)) {
|
||||||
|
*dstlen = dl + 4;
|
||||||
#ifdef __cplusplus
|
return (0);
|
||||||
}
|
}
|
||||||
#endif
|
return (-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
dict_decode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
||||||
|
{
|
||||||
|
DictFilter *df = static_cast<DictFilter *>(dict_ctx);
|
||||||
|
u32 fl = fromlen;
|
||||||
|
u32 dl;
|
||||||
|
u8 *src;
|
||||||
|
|
||||||
|
dl = U32_P(from);
|
||||||
|
if (dl > *dstlen) {
|
||||||
|
log_msg(LOG_ERR, 0, "Destination overflow in dict_decode.");
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
|
*dstlen = dl;
|
||||||
|
src = from + 4;
|
||||||
|
fl -= 4;
|
||||||
|
|
||||||
|
df->Inverse_Dict(src, fl, to, &dl);
|
||||||
|
if (dl < *dstlen)
|
||||||
|
return (-1);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -1,80 +1,52 @@
|
||||||
/*
|
/*
|
||||||
* This file is a part of Pcompress, a chunked parallel multi-
|
* This file is a part of Pcompress, a chunked parallel multi-
|
||||||
* algorithm lossless compression and decompression program.
|
* algorithm lossless compression and decompression program.
|
||||||
*
|
*
|
||||||
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
|
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
|
||||||
* Use is subject to license terms.
|
* Use is subject to license terms.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or
|
* This program is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
* License as published by the Free Software Foundation; either
|
* License as published by the Free Software Foundation; either
|
||||||
* version 3 of the License, or (at your option) any later version.
|
* version 3 of the License, or (at your option) any later version.
|
||||||
*
|
*
|
||||||
* This program is distributed in the hope that it will be useful,
|
* This program is distributed in the hope that it will be useful,
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
* Lesser General Public License for more details.
|
* Lesser General Public License for more details.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
* License along with this program.
|
* License along with this program.
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
*
|
*
|
||||||
* moinakg@gmail.com, http://moinakg.wordpress.com/
|
* moinakg@gmail.com, http://moinakg.wordpress.com/
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Dict filter for text files. Adapted from Public Domain sources
|
* Dict filter for text files. Adapted from Public Domain sources
|
||||||
* of Fu Siyuan's CSC 3.2 archiver.
|
* of Fu Siyuan's CSC 3.2 archiver.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _FILTERS_H
|
#ifndef _FILTERS_H
|
||||||
#define _FILTERS_H
|
#define _FILTERS_H
|
||||||
|
|
||||||
#include <utils.h>
|
#include <utils.h>
|
||||||
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#define MAX_WORDTREE_NODE_NUM 300 //Enough now!
|
#define MAX_WORDTREE_NODE_NUM 300 //Enough now!
|
||||||
|
|
||||||
class DictFilter
|
#ifdef __cplusplus
|
||||||
{
|
extern "C" {
|
||||||
public:
|
#endif
|
||||||
~DictFilter();
|
|
||||||
DictFilter();
|
void *new_dict_context();
|
||||||
|
void delete_dict_context(void *dict_ctx);
|
||||||
u32 Forward_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize);
|
|
||||||
void Inverse_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize);
|
int dict_encode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
||||||
|
int dict_decode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
||||||
private:
|
|
||||||
typedef struct
|
#ifdef __cplusplus
|
||||||
{
|
}
|
||||||
u32 next[26];
|
#endif
|
||||||
u8 symbol;
|
|
||||||
} CTreeNode;
|
#endif
|
||||||
CTreeNode wordTree[MAX_WORDTREE_NODE_NUM];
|
|
||||||
u32 nodeMum;
|
|
||||||
u8 maxSymbol;
|
|
||||||
//Used for DICT transformer. Words are stored in trees.
|
|
||||||
|
|
||||||
u32 wordIndex[256];
|
|
||||||
//Used for DICT untransformer.choose words by symbols.
|
|
||||||
void MakeWordTree(); //Init the DICT transformer
|
|
||||||
|
|
||||||
u32 x0,x1;
|
|
||||||
u32 i,k;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void *new_dict_context();
|
|
||||||
void delete_dict_context(void *dict_ctx);
|
|
||||||
|
|
||||||
int dict_encode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
|
||||||
int dict_decode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
73
pcompress.c
73
pcompress.c
|
@ -56,6 +56,8 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <pc_archive.h>
|
#include <pc_archive.h>
|
||||||
#include <filters/dispack/dis.hpp>
|
#include <filters/dispack/dis.hpp>
|
||||||
|
#include "analyzer.h"
|
||||||
|
#include "filters/dict/DictFilter.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use 8MB chunks by default.
|
* We use 8MB chunks by default.
|
||||||
|
@ -204,7 +206,7 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
||||||
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
|
void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data,
|
||||||
algo_props_t *props)
|
algo_props_t *props)
|
||||||
{
|
{
|
||||||
uchar_t *dest = (uchar_t *)dst, type = 0;
|
uchar_t *dest = (uchar_t *)dst, type = 0, atype;
|
||||||
int64_t result;
|
int64_t result;
|
||||||
uint64_t _dstlen, fromlen;
|
uint64_t _dstlen, fromlen;
|
||||||
uchar_t *from, *to;
|
uchar_t *from, *to;
|
||||||
|
@ -238,13 +240,45 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The analyzer is run below only for non-archive mode. When archiving the
|
||||||
|
* archiver thread runs analyzer on incremental blocks and sets the type
|
||||||
|
* accordingly.
|
||||||
|
*/
|
||||||
|
atype = btype;
|
||||||
|
/*
|
||||||
|
* Run an analyzer on the data. At present the analyzer only tries
|
||||||
|
* to detect if this is text for running the dict filter.
|
||||||
|
*/
|
||||||
|
if (pctx->enable_analyzer) {
|
||||||
|
atype = analyze_buffer(src, srclen, btype, pctx->adapt_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enabling LZP also enables the DICT filter since we are dealing with text
|
||||||
|
* in any case.
|
||||||
|
*/
|
||||||
|
if (pctx->lzp_preprocess && PC_TYPE(atype) == TYPE_TEXT) {
|
||||||
|
void *dct = new_dict_context();
|
||||||
|
_dstlen = fromlen;
|
||||||
|
result = dict_encode(dct, from, fromlen, to, &_dstlen);
|
||||||
|
delete_dict_context(dct);
|
||||||
|
if (result != -1) {
|
||||||
|
uchar_t *tmp;
|
||||||
|
tmp = from;
|
||||||
|
from = to;
|
||||||
|
to = tmp;
|
||||||
|
fromlen = _dstlen;
|
||||||
|
type |= PREPROC_TYPE_DICT;
|
||||||
|
}
|
||||||
|
}
|
||||||
#ifndef _MPLV2_LICENSE_
|
#ifndef _MPLV2_LICENSE_
|
||||||
if (pctx->lzp_preprocess && stype != TYPE_BMP && stype != TYPE_TIFF) {
|
if (pctx->lzp_preprocess && stype != TYPE_BMP && stype != TYPE_TIFF) {
|
||||||
int hashsize;
|
int hashsize;
|
||||||
|
|
||||||
hashsize = lzp_hash_size(level);
|
hashsize = lzp_hash_size(level);
|
||||||
result = lzp_compress((const uchar_t *)from, to, fromlen,
|
result = lzp_compress((const uchar_t *)from, to, fromlen,
|
||||||
hashsize, LZP_DEFAULT_LZPMINLEN, 0);
|
hashsize, LZP_DEFAULT_LZPMINLEN, 0);
|
||||||
if (result >= 0 && result < srclen) {
|
if (result >= 0 && result < srclen) {
|
||||||
uchar_t *tmp;
|
uchar_t *tmp;
|
||||||
tmp = from;
|
tmp = from;
|
||||||
|
@ -375,6 +409,20 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (type & PREPROC_TYPE_DICT) {
|
||||||
|
void *dct = new_dict_context();
|
||||||
|
result = dict_decode(dct, src, srclen, dst, &_dstlen);
|
||||||
|
delete_dict_context(dct);
|
||||||
|
if (result != -1) {
|
||||||
|
memcpy(src, dst, _dstlen);
|
||||||
|
srclen = _dstlen;
|
||||||
|
*dstlen = _dstlen;
|
||||||
|
} else {
|
||||||
|
log_msg(LOG_ERR, 0, "DICT decoding failed.");
|
||||||
|
return (result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (type & PREPROC_TYPE_DISPACK) {
|
if (type & PREPROC_TYPE_DISPACK) {
|
||||||
result = dispack_decode((uchar_t *)src, srclen, (uchar_t *)dst, &_dstlen1);
|
result = dispack_decode((uchar_t *)src, srclen, (uchar_t *)dst, &_dstlen1);
|
||||||
if (result != -1) {
|
if (result != -1) {
|
||||||
|
@ -689,13 +737,13 @@ cont:
|
||||||
* Compressed length: 8 bytes.
|
* Compressed length: 8 bytes.
|
||||||
* Checksum: Upto 64 bytes.
|
* Checksum: Upto 64 bytes.
|
||||||
* Chunk flags: 1 byte.
|
* Chunk flags: 1 byte.
|
||||||
*
|
*
|
||||||
* Chunk Flags, 8 bits:
|
* Chunk Flags, 8 bits:
|
||||||
* I I I I I I I I
|
* I I I I I I I I
|
||||||
* | | | | | |
|
* | | | | | |
|
||||||
* | '-----' | | `- 0 - Uncompressed
|
* | '-----' | | `- 0 - Uncompressed
|
||||||
* | | | | 1 - Compressed
|
* | | | | 1 - Compressed
|
||||||
* | | | |
|
* | | | |
|
||||||
* | | | `---- 1 - Chunk was Deduped
|
* | | | `---- 1 - Chunk was Deduped
|
||||||
* | | `------- 1 - Chunk was pre-compressed
|
* | | `------- 1 - Chunk was pre-compressed
|
||||||
* | |
|
* | |
|
||||||
|
@ -1070,7 +1118,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename)
|
||||||
memset(zero, 0, MAX_PW_LEN);
|
memset(zero, 0, MAX_PW_LEN);
|
||||||
fd = open(pctx->pwd_file, O_RDWR);
|
fd = open(pctx->pwd_file, O_RDWR);
|
||||||
if (fd != -1) {
|
if (fd != -1) {
|
||||||
pw_len = lseek(fd, 0, SEEK_END);
|
pw_len = (int)lseek(fd, 0, SEEK_END);
|
||||||
if (pw_len != -1) {
|
if (pw_len != -1) {
|
||||||
if (pw_len > MAX_PW_LEN) pw_len = MAX_PW_LEN-1;
|
if (pw_len > MAX_PW_LEN) pw_len = MAX_PW_LEN-1;
|
||||||
lseek(fd, 0, SEEK_SET);
|
lseek(fd, 0, SEEK_SET);
|
||||||
|
@ -1552,9 +1600,11 @@ redo:
|
||||||
dedupe_index_sz = 0;
|
dedupe_index_sz = 0;
|
||||||
type = COMPRESSED;
|
type = COMPRESSED;
|
||||||
|
|
||||||
|
|
||||||
/* Perform Dedup if enabled. */
|
/* Perform Dedup if enabled. */
|
||||||
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan)) {
|
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan)) {
|
||||||
dedupe_context_t *rctx;
|
dedupe_context_t *rctx;
|
||||||
|
uint64_t rb = tdat->rbytes;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute checksum of original uncompressed chunk. When doing dedup
|
* Compute checksum of original uncompressed chunk. When doing dedup
|
||||||
|
@ -1569,8 +1619,9 @@ redo:
|
||||||
rctx = tdat->rctx;
|
rctx = tdat->rctx;
|
||||||
reset_dedupe_context(tdat->rctx);
|
reset_dedupe_context(tdat->rctx);
|
||||||
rctx->cbuf = tdat->uncompressed_chunk;
|
rctx->cbuf = tdat->uncompressed_chunk;
|
||||||
dedupe_index_sz = dedupe_compress(tdat->rctx, tdat->cmp_seg, &(tdat->rbytes), 0,
|
dedupe_index_sz = dedupe_compress(tdat->rctx, tdat->cmp_seg, &rb, 0,
|
||||||
NULL, tdat->cksum_mt);
|
NULL, tdat->cksum_mt);
|
||||||
|
tdat->rbytes = rb;
|
||||||
if (!rctx->valid) {
|
if (!rctx->valid) {
|
||||||
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
|
memcpy(tdat->uncompressed_chunk, tdat->cmp_seg, rbytes);
|
||||||
tdat->rbytes = rbytes;
|
tdat->rbytes = rbytes;
|
||||||
|
@ -1744,6 +1795,10 @@ plain_index:
|
||||||
tdat->len_cmp += (pctx->cksum_bytes + pctx->mac_bytes);
|
tdat->len_cmp += (pctx->cksum_bytes + pctx->mac_bytes);
|
||||||
rbytes = tdat->len_cmp - len_cmp; // HDR size for HMAC
|
rbytes = tdat->len_cmp - len_cmp; // HDR size for HMAC
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In adaptive mode return value from compression function function indicates
|
||||||
|
* which algorithm was used on the chunk. We have to store that.
|
||||||
|
*/
|
||||||
if (pctx->adapt_mode)
|
if (pctx->adapt_mode)
|
||||||
type |= (rv << 4);
|
type |= (rv << 4);
|
||||||
|
|
||||||
|
@ -2750,7 +2805,8 @@ init_algo(pc_ctx_t *pctx, const char *algo, int bail)
|
||||||
pctx->_deinit_func = adapt_deinit;
|
pctx->_deinit_func = adapt_deinit;
|
||||||
pctx->_stats_func = adapt_stats;
|
pctx->_stats_func = adapt_stats;
|
||||||
pctx->_props_func = adapt_props;
|
pctx->_props_func = adapt_props;
|
||||||
pctx->adapt_mode = 1;
|
pctx->adapt_mode = 2;
|
||||||
|
pctx->enable_analyzer = 1;
|
||||||
rv = 0;
|
rv = 0;
|
||||||
|
|
||||||
} else if (memcmp(algorithm, "adapt", 5) == 0) {
|
} else if (memcmp(algorithm, "adapt", 5) == 0) {
|
||||||
|
@ -2761,6 +2817,7 @@ init_algo(pc_ctx_t *pctx, const char *algo, int bail)
|
||||||
pctx->_stats_func = adapt_stats;
|
pctx->_stats_func = adapt_stats;
|
||||||
pctx->_props_func = adapt_props;
|
pctx->_props_func = adapt_props;
|
||||||
pctx->adapt_mode = 1;
|
pctx->adapt_mode = 1;
|
||||||
|
pctx->enable_analyzer = 1;
|
||||||
rv = 0;
|
rv = 0;
|
||||||
#ifdef ENABLE_PC_LIBBSC
|
#ifdef ENABLE_PC_LIBBSC
|
||||||
} else if (memcmp(algorithm, "libbsc", 6) == 0) {
|
} else if (memcmp(algorithm, "libbsc", 6) == 0) {
|
||||||
|
@ -2770,7 +2827,6 @@ init_algo(pc_ctx_t *pctx, const char *algo, int bail)
|
||||||
pctx->_deinit_func = libbsc_deinit;
|
pctx->_deinit_func = libbsc_deinit;
|
||||||
pctx->_stats_func = libbsc_stats;
|
pctx->_stats_func = libbsc_stats;
|
||||||
pctx->_props_func = libbsc_props;
|
pctx->_props_func = libbsc_props;
|
||||||
pctx->adapt_mode = 1;
|
|
||||||
rv = 0;
|
rv = 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3337,6 +3393,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
}
|
}
|
||||||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode || pctx->dispack_preprocess) {
|
if (pctx->lzp_preprocess || pctx->enable_delta2_encode || pctx->dispack_preprocess) {
|
||||||
pctx->preprocess_mode = 1;
|
pctx->preprocess_mode = 1;
|
||||||
|
pctx->enable_analyzer = 1;
|
||||||
}
|
}
|
||||||
if (pctx->chunksize == 0) {
|
if (pctx->chunksize == 0) {
|
||||||
if (pctx->level < 9) {
|
if (pctx->level < 9) {
|
||||||
|
|
|
@ -60,13 +60,14 @@ extern "C" {
|
||||||
#define CHSIZE_MASK 0x80
|
#define CHSIZE_MASK 0x80
|
||||||
#define BZIP2_A_NUM 16
|
#define BZIP2_A_NUM 16
|
||||||
#define LZMA_A_NUM 32
|
#define LZMA_A_NUM 32
|
||||||
#define CHUNK_FLAG_DEDUP 2
|
#define CHUNK_FLAG_DEDUP 2
|
||||||
#define CHUNK_FLAG_PREPROC 4
|
#define CHUNK_FLAG_PREPROC 4
|
||||||
#define COMP_EXTN ".pz"
|
#define COMP_EXTN ".pz"
|
||||||
|
|
||||||
#define PREPROC_TYPE_LZP 1
|
#define PREPROC_TYPE_LZP 1
|
||||||
#define PREPROC_TYPE_DELTA2 2
|
#define PREPROC_TYPE_DELTA2 2
|
||||||
#define PREPROC_TYPE_DISPACK 4
|
#define PREPROC_TYPE_DISPACK 4
|
||||||
|
#define PREPROC_TYPE_DICT 8
|
||||||
#define PREPROC_COMPRESSED 128
|
#define PREPROC_COMPRESSED 128
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -212,6 +213,7 @@ typedef struct pc_ctx {
|
||||||
int delta2_nstrides;
|
int delta2_nstrides;
|
||||||
int enable_rabin_split;
|
int enable_rabin_split;
|
||||||
int enable_fixed_scan;
|
int enable_fixed_scan;
|
||||||
|
int enable_analyzer;
|
||||||
int preprocess_mode;
|
int preprocess_mode;
|
||||||
int lzp_preprocess;
|
int lzp_preprocess;
|
||||||
int dispack_preprocess;
|
int dispack_preprocess;
|
||||||
|
@ -275,7 +277,7 @@ struct cmp_data {
|
||||||
uchar_t *compressed_chunk;
|
uchar_t *compressed_chunk;
|
||||||
uchar_t *uncompressed_chunk;
|
uchar_t *uncompressed_chunk;
|
||||||
dedupe_context_t *rctx;
|
dedupe_context_t *rctx;
|
||||||
uint64_t rbytes;
|
int64_t rbytes;
|
||||||
uint64_t chunksize;
|
uint64_t chunksize;
|
||||||
uint64_t len_cmp, len_cmp_be;
|
uint64_t len_cmp, len_cmp_be;
|
||||||
uchar_t checksum[CKSUM_MAX_BYTES];
|
uchar_t checksum[CKSUM_MAX_BYTES];
|
||||||
|
|
|
@ -383,14 +383,18 @@ get_total_ram()
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
|
#define NANO_SEC (1000000000ULL)
|
||||||
int
|
int
|
||||||
clock_gettime(int clk_id, struct timespec *ts)
|
clock_gettime(int clk_id, struct timespec *ts)
|
||||||
{
|
{
|
||||||
if (clk_id == CLOCK_MONOTONIC) {
|
if (clk_id == CLOCK_MONOTONIC) {
|
||||||
uint64_t abstime = mach_absolute_time();
|
uint64_t nanotime = mach_absolute_time() *
|
||||||
return (abstime * sTimebaseInfo.numer / sTimebaseInfo.denom);
|
sTimebaseInfo.numer / sTimebaseInfo.denom;
|
||||||
|
ts->tv_sec = nanotime / NANO_SEC;
|
||||||
|
ts->tv_nsec = nanotime % NANO_SEC;
|
||||||
|
return (0);
|
||||||
}
|
}
|
||||||
return (0);
|
return (EINVAL);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -543,8 +547,7 @@ log_msg(log_level_t log_level, int show_errno, const char *format, ...)
|
||||||
fputs(msg, stderr);
|
fputs(msg, stderr);
|
||||||
|
|
||||||
} else if (ldest.type == LOG_FILE) {
|
} else if (ldest.type == LOG_FILE) {
|
||||||
int rv;
|
(void) write(ldest.fd, msg, strlen(msg));
|
||||||
rv = write(ldest.fd, msg, strlen(msg));
|
|
||||||
} else {
|
} else {
|
||||||
ldest.cb(msg);
|
ldest.cb(msg);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue