A bunch of improvements and fixes.
- Fix heap corruption in DICT Filter. - Make default Dedup block size as 8KB. - Revamp executable file handling: Part#1. - Developed new E8E9 filter that works better than Dispack on raw data blocks. - Remove block-based Dispack encoding. File-specific Dispack filter to be added. - Improve file header based executable file detection. - Introduce new sorting algorithm for filenames without extension.
This commit is contained in:
parent
4c62e4db60
commit
f970b41e34
8 changed files with 206 additions and 186 deletions
|
@ -198,17 +198,18 @@ creat_write_callback(struct archive *arc, void *ctx, const void *buf, size_t len
|
||||||
} else {
|
} else {
|
||||||
if (pctx->arc_buf_pos < pctx->min_chunk) {
|
if (pctx->arc_buf_pos < pctx->min_chunk) {
|
||||||
int diff = pctx->min_chunk - (int)(pctx->arc_buf_pos);
|
int diff = pctx->min_chunk - (int)(pctx->arc_buf_pos);
|
||||||
if (len >= diff)
|
if (len >= diff) {
|
||||||
pctx->btype = pctx->ctype;
|
pctx->btype = pctx->ctype;
|
||||||
else
|
} else {
|
||||||
pctx->ctype = pctx->btype;
|
pctx->ctype = pctx->btype;
|
||||||
|
}
|
||||||
pctx->interesting = 1;
|
pctx->interesting = 1;
|
||||||
} else {
|
} else {
|
||||||
pctx->arc_writing = 0;
|
pctx->arc_writing = 0;
|
||||||
Sem_Post(&(pctx->read_sem));
|
Sem_Post(&(pctx->read_sem));
|
||||||
Sem_Wait(&(pctx->write_sem));
|
Sem_Wait(&(pctx->write_sem));
|
||||||
tbuf = pctx->arc_buf + pctx->arc_buf_pos;
|
|
||||||
pctx->arc_writing = 1;
|
pctx->arc_writing = 1;
|
||||||
|
tbuf = pctx->arc_buf;
|
||||||
pctx->btype = pctx->ctype;
|
pctx->btype = pctx->ctype;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -258,6 +259,7 @@ archiver_read(void *ctx, void *buf, uint64_t count)
|
||||||
pctx->btype = TYPE_UNKNOWN;
|
pctx->btype = TYPE_UNKNOWN;
|
||||||
Sem_Post(&(pctx->write_sem));
|
Sem_Post(&(pctx->write_sem));
|
||||||
Sem_Wait(&(pctx->read_sem));
|
Sem_Wait(&(pctx->read_sem));
|
||||||
|
|
||||||
pctx->arc_buf = NULL;
|
pctx->arc_buf = NULL;
|
||||||
return (pctx->arc_buf_pos);
|
return (pctx->arc_buf_pos);
|
||||||
}
|
}
|
||||||
|
@ -722,8 +724,50 @@ add_pathname(const char *fpath, const struct stat *sb,
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
if (!dot) {
|
if (!dot) {
|
||||||
while (basename[i] != '\0' && i < NAMELEN) {
|
int plen = strlen(fpath);
|
||||||
member->name[i] = basename[i]; i++;
|
int nsep;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Filenames without an extension are sorted based on
|
||||||
|
* their entire path characteristics. This mostly avoids
|
||||||
|
* unwanted mixing of different file types if we just
|
||||||
|
* sort by filename.
|
||||||
|
*
|
||||||
|
* For every path separator we take the first character
|
||||||
|
* of the directory name limited by NAMELEN chars. Counting
|
||||||
|
* is backward from the basename itself. If less than
|
||||||
|
* NAMELEN path separators are present (i.e. fewer than
|
||||||
|
* NAMELEN level dir nesting) then remaining chars are filled
|
||||||
|
* from the basename.
|
||||||
|
*/
|
||||||
|
nsep = 0;
|
||||||
|
for (i = 0; i < plen; i++) {
|
||||||
|
if (fpath[i] == '/') {
|
||||||
|
nsep++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nsep < NAMELEN) {
|
||||||
|
int diff = NAMELEN - nsep;
|
||||||
|
nsep = NAMELEN-1;
|
||||||
|
i = ftwbuf->base + diff;
|
||||||
|
while (diff > 0) {
|
||||||
|
member->name[nsep] = fpath[i];
|
||||||
|
nsep--;
|
||||||
|
i--;
|
||||||
|
diff--;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nsep = NAMELEN-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = ftwbuf->base;
|
||||||
|
while (nsep > -1 && i > 0) {
|
||||||
|
if (fpath[i-1] == '/') {
|
||||||
|
member->name[nsep] = fpath[i];
|
||||||
|
nsep--;
|
||||||
|
}
|
||||||
|
i--;
|
||||||
}
|
}
|
||||||
// Clear 64-bit MSB
|
// Clear 64-bit MSB
|
||||||
member->size &= 0x7FFFFFFFFFFFFFFF;
|
member->size &= 0x7FFFFFFFFFFFFFFF;
|
||||||
|
@ -1807,6 +1851,8 @@ out:
|
||||||
static int
|
static int
|
||||||
detect_type_by_data(uchar_t *buf, size_t len)
|
detect_type_by_data(uchar_t *buf, size_t len)
|
||||||
{
|
{
|
||||||
|
uint16_t leval;
|
||||||
|
|
||||||
// At least a few bytes.
|
// At least a few bytes.
|
||||||
if (len < 10) return (TYPE_UNKNOWN);
|
if (len < 10) return (TYPE_UNKNOWN);
|
||||||
|
|
||||||
|
@ -1875,16 +1921,24 @@ detect_type_by_data(uchar_t *buf, size_t len)
|
||||||
} else {
|
} else {
|
||||||
uint32_t off = LE32(U32_P(buf + 0x3c));
|
uint32_t off = LE32(U32_P(buf + 0x3c));
|
||||||
// This is non-MSDOS, check whether PE
|
// This is non-MSDOS, check whether PE
|
||||||
if (off < len - 3) {
|
if (off < len - 100) {
|
||||||
if (buf[off] == 'P' && buf[off+1] == 'E' &&
|
if (buf[off] == 'P' && buf[off+1] == 'E' &&
|
||||||
buf[off+2] == '\0' && buf[off+3] == '\0') {
|
buf[off+2] == '\0' && buf[off+3] == '\0') {
|
||||||
|
uint16_t id;
|
||||||
|
|
||||||
// This is a PE executable.
|
// This is a PE executable.
|
||||||
// Check 32/64-bit.
|
// Check 32/64-bit.
|
||||||
|
off = LE32(U32_P(buf + 0x3c))+24;
|
||||||
|
id = LE16(U16_P(buf + off));
|
||||||
|
if (id == 0x010b || id == 0x020b) {
|
||||||
off = LE32(U32_P(buf + 0x3c))+4;
|
off = LE32(U32_P(buf + 0x3c))+4;
|
||||||
if (LE16(U16_P(buf + off)) == 0x8664) {
|
id = LE16(U16_P(buf + off));
|
||||||
|
if (id == 0x8664)
|
||||||
return (TYPE_BINARY|TYPE_EXE64);
|
return (TYPE_BINARY|TYPE_EXE64);
|
||||||
} else {
|
else
|
||||||
return (TYPE_BINARY|TYPE_EXE32);
|
return (TYPE_BINARY|TYPE_EXE32);
|
||||||
|
} else {
|
||||||
|
return (TYPE_BINARY);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return (TYPE_BINARY|TYPE_EXE32);
|
return (TYPE_BINARY|TYPE_EXE32);
|
||||||
|
@ -1922,6 +1976,21 @@ detect_type_by_data(uchar_t *buf, size_t len)
|
||||||
return (TYPE_BINARY);
|
return (TYPE_BINARY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// x86 Unix format object files (COFF)
|
||||||
|
leval = LE16(U16_P(buf));
|
||||||
|
if (leval == 0502 || leval == 0503 || leval == 0510 || leval == 0511 ||
|
||||||
|
leval == 0512 || leval == 0514 || leval == 0522) {
|
||||||
|
return (TYPE_BINARY|TYPE_EXE32);
|
||||||
|
}
|
||||||
|
|
||||||
|
// AMD64 COFF
|
||||||
|
if (leval == 0x8664)
|
||||||
|
return (TYPE_BINARY|TYPE_EXE64);
|
||||||
|
|
||||||
|
// Intel BIOS ROM images
|
||||||
|
if (*buf == 0x55 && *(buf + 1) == 0xaa)
|
||||||
|
return (TYPE_BINARY|TYPE_EXE32);
|
||||||
|
|
||||||
if (U16_P(buf + 2) == COM_MAGIC || U16_P(buf + 4) == COM_MAGIC ||
|
if (U16_P(buf + 2) == COM_MAGIC || U16_P(buf + 4) == COM_MAGIC ||
|
||||||
U16_P(buf + 4) == COM_MAGIC || U16_P(buf + 5) == COM_MAGIC ||
|
U16_P(buf + 4) == COM_MAGIC || U16_P(buf + 5) == COM_MAGIC ||
|
||||||
U16_P(buf + 13) == COM_MAGIC || U16_P(buf + 18) == COM_MAGIC ||
|
U16_P(buf + 13) == COM_MAGIC || U16_P(buf + 18) == COM_MAGIC ||
|
||||||
|
|
|
@ -156,6 +156,8 @@ DictFilter::Forward_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize)
|
||||||
|
|
||||||
|
|
||||||
for(i = 0; i < size-5;) {
|
for(i = 0; i < size-5;) {
|
||||||
|
if (dstSize > *dstsize-4)
|
||||||
|
return (0);
|
||||||
if (src[i] >= 'a' && src[i] <= 'z') {
|
if (src[i] >= 'a' && src[i] <= 'z') {
|
||||||
|
|
||||||
u32 matchSymbol = 0,longestWord = 0;
|
u32 matchSymbol = 0,longestWord = 0;
|
||||||
|
@ -199,6 +201,8 @@ DictFilter::Forward_Dict(u8 *src, u32 size, u8 *dst, u32 *dstsize)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; i<size; i++) {
|
for (; i<size; i++) {
|
||||||
|
if (dstSize > *dstsize-4)
|
||||||
|
return (0);
|
||||||
if (src[i] >= 0x82) {
|
if (src[i] >= 0x82) {
|
||||||
dst[dstSize++] = 254;
|
dst[dstSize++] = 254;
|
||||||
dst[dstSize++] = src[i];
|
dst[dstSize++] = src[i];
|
||||||
|
@ -282,7 +286,7 @@ dict_encode(void *dict_ctx, uchar_t *from, uint64_t fromlen, uchar_t *to, uint64
|
||||||
dst = to + 4;
|
dst = to + 4;
|
||||||
dl -= 4;
|
dl -= 4;
|
||||||
if (df->Forward_Dict(from, fl, dst, &dl)) {
|
if (df->Forward_Dict(from, fl, dst, &dl)) {
|
||||||
*dstlen = dl + 8;
|
*dstlen = dl + 4;
|
||||||
DEBUG_STAT_EN(en = get_wtime_millis());
|
DEBUG_STAT_EN(en = get_wtime_millis());
|
||||||
DEBUG_STAT_EN(fprintf(stderr, "DICT: fromlen: %" PRIu64 ", dstlen: %" PRIu64 "\n",
|
DEBUG_STAT_EN(fprintf(stderr, "DICT: fromlen: %" PRIu64 ", dstlen: %" PRIu64 "\n",
|
||||||
fromlen, *dstlen));
|
fromlen, *dstlen));
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
#include "types.hpp"
|
#include "types.hpp"
|
||||||
#include "dis.hpp"
|
#include "dis.hpp"
|
||||||
|
#include <utils.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -151,7 +152,6 @@ using namespace std;
|
||||||
#define DISFILTER_BLOCK (32768)
|
#define DISFILTER_BLOCK (32768)
|
||||||
#define DISFILTERED 1
|
#define DISFILTERED 1
|
||||||
#define ORIGSIZE 2
|
#define ORIGSIZE 2
|
||||||
#define E8E9 4
|
|
||||||
#define NORMAL_HDR (1 + 2)
|
#define NORMAL_HDR (1 + 2)
|
||||||
#define EXTENDED_HDR (1 + 2 + 2)
|
#define EXTENDED_HDR (1 + 2 + 2)
|
||||||
// Dispack min reduction should be 8%, otherwise we abort
|
// Dispack min reduction should be 8%, otherwise we abort
|
||||||
|
@ -927,139 +927,86 @@ is_x86_code(uchar_t *buf, int len)
|
||||||
return (freq[0x8b] > avgFreq && freq[0x00] > avgFreq * 2 && freq[0xE8] > 6);
|
return (freq[0x8b] > avgFreq && freq[0x00] > avgFreq * 2 && freq[0xE8] > 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* E8E9 Filter from CSC 3.2 (Fu Siyuan). This is applied to blocks that can't
|
|
||||||
* be Disfiltered.
|
|
||||||
*/
|
|
||||||
class EFilter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
static void Forward_E89(sU8 *src, sU32 size)
|
|
||||||
{
|
|
||||||
sU32 i,j;
|
|
||||||
sS32 c;
|
|
||||||
|
|
||||||
E89init();
|
|
||||||
for(i=0, j=0; i < size; i++) {
|
|
||||||
c = E89forward(src[i]);
|
|
||||||
if (c >= 0) src[j++]=c;
|
|
||||||
}
|
|
||||||
while((c = E89flush()) >= 0) src[j++] = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void Inverse_E89( sU8* src, sU32 size)
|
|
||||||
{
|
|
||||||
sU32 i,j;
|
|
||||||
sS32 c;
|
|
||||||
|
|
||||||
E89init();
|
|
||||||
for(i=0, j=0; i < size; i++) {
|
|
||||||
c = E89inverse(src[i]);
|
|
||||||
if (c >= 0) src[j++]=c;
|
|
||||||
}
|
|
||||||
while((c = E89flush()) >= 0) src[j++] = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
static sU32 x0,x1;
|
|
||||||
static sU32 i,k;
|
|
||||||
static sU8 cs; // cache size, F8 - 5 bytes
|
|
||||||
|
|
||||||
~EFilter() {}
|
|
||||||
EFilter() {}
|
|
||||||
|
|
||||||
static void E89init(void)
|
|
||||||
{
|
|
||||||
cs = 0xFF;
|
|
||||||
x0 = x1 = 0;
|
|
||||||
i = 0;
|
|
||||||
k = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
static sS32 E89cache_byte(sS32 c)
|
|
||||||
{
|
|
||||||
sS32 d = cs&0x80 ? -1 : (sU8)(x1);
|
|
||||||
x1 >>= 8;
|
|
||||||
x1 |= (x0<<24);
|
|
||||||
x0 >>= 8;
|
|
||||||
x0 |= (c<<24);
|
|
||||||
cs <<= 1; i++;
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
static sU32 E89xswap(sU32 x)
|
|
||||||
{
|
|
||||||
x<<=7;
|
|
||||||
return (x>>24)|((sU8)(x>>16)<<8)|((sU8)(x>>8)<<16)|((sU8)(x)<<(24-7));
|
|
||||||
}
|
|
||||||
|
|
||||||
static sU32 E89yswap(sU32 x)
|
|
||||||
{
|
|
||||||
x = ((sU8)(x>>24)<<7)|((sU8)(x>>16)<<8)|((sU8)(x>>8)<<16)|(x<<24);
|
|
||||||
return x>>7;
|
|
||||||
}
|
|
||||||
|
|
||||||
static sS32 E89forward(sS32 c)
|
|
||||||
{
|
|
||||||
sU32 x;
|
|
||||||
if(i >= k) {
|
|
||||||
if((x1&0xFE000000) == 0xE8000000) {
|
|
||||||
k = i+4;
|
|
||||||
x= x0 - 0xFF000000;
|
|
||||||
if( x<0x02000000 ) {
|
|
||||||
x = (x+i) & 0x01FFFFFF;
|
|
||||||
x = E89xswap(x);
|
|
||||||
x0 = x + 0xFF000000;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return E89cache_byte(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
static sS32 E89inverse(sS32 c)
|
|
||||||
{
|
|
||||||
sU32 x;
|
|
||||||
if(i >= k) {
|
|
||||||
if((x1&0xFE000000) == 0xE8000000) {
|
|
||||||
k = i+4;
|
|
||||||
x = x0 - 0xFF000000;
|
|
||||||
if(x < 0x02000000) {
|
|
||||||
x = E89yswap(x);
|
|
||||||
x = (x-i) & 0x01FFFFFF;
|
|
||||||
x0 = x + 0xFF000000;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return E89cache_byte(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
static sS32 E89flush(void)
|
|
||||||
{
|
|
||||||
sS32 d;
|
|
||||||
if(cs != 0xFF) {
|
|
||||||
while(cs & 0x80) E89cache_byte(0),++cs;
|
|
||||||
d = E89cache_byte(0); ++cs;
|
|
||||||
return d;
|
|
||||||
} else {
|
|
||||||
E89init();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Linker weirdo!
|
|
||||||
*/
|
|
||||||
sU32 EFilter::x0;
|
|
||||||
sU32 EFilter::x1;
|
|
||||||
sU32 EFilter::i;
|
|
||||||
sU32 EFilter::k;
|
|
||||||
sU8 EFilter::cs;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* E8 E9 Call/Jmp transform routines. Convert relative Call and Jmp addresses
|
||||||
|
* to absolute values to improve compression. A couple of tricks are employed:
|
||||||
|
* 1) Avoid transforming zero adresses or where adding the current offset to
|
||||||
|
* to the presumed address results in a zero result. This avoids a bunch of
|
||||||
|
* false positives.
|
||||||
|
* 2) Store transformed values in big-endian format. This improves compression.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
Forward_E89(uint8_t *src, uint64_t sz)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
uint32_t size;
|
||||||
|
|
||||||
|
if (sz > UINT32_MAX) {
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
size = sz;
|
||||||
|
i = 0;
|
||||||
|
while (i < size-4) {
|
||||||
|
if ((src[i] & 0xfe) == 0xe8 &&
|
||||||
|
(src[i+4] == 0 || src[i+4] == 0xff))
|
||||||
|
{
|
||||||
|
uint32_t off;
|
||||||
|
|
||||||
|
off = (src[i+1] | (src[i+2] << 8) | (src[i+3] << 16));
|
||||||
|
if (off > 0) {
|
||||||
|
off += i;
|
||||||
|
off &= 0xffffff;
|
||||||
|
if (off > 0) {
|
||||||
|
src[i+1] = (uint8_t)(off >> 16);
|
||||||
|
src[i+2] = (uint8_t)(off >> 8);
|
||||||
|
src[i+3] = (uint8_t)off;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
Inverse_E89(uint8_t *src, uint64_t sz)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
uint32_t size;
|
||||||
|
|
||||||
|
if (sz > UINT32_MAX) {
|
||||||
|
return (-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
size = sz;
|
||||||
|
i = size-5;;
|
||||||
|
while (i > 0) {
|
||||||
|
if ((src[i] & 0xfe) == 0xe8 &&
|
||||||
|
(src[i+4] == 0 || src[i+4] == 0xff))
|
||||||
|
{
|
||||||
|
uint32_t val;
|
||||||
|
|
||||||
|
val = (src[i+3] | (src[i+2] << 8) | (src[i+1] << 16));
|
||||||
|
if (val > 0) {
|
||||||
|
val -= i;
|
||||||
|
val &= 0xffffff;
|
||||||
|
if (val > 0) {
|
||||||
|
src[i+1] = (uint8_t)val;
|
||||||
|
src[i+2] = (uint8_t)(val >> 8);
|
||||||
|
src[i+3] = (uint8_t)(val >> 16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 32-bit x86 executable packer top-level routines. Detected x86 executable data
|
* 32-bit x86 executable packer top-level routines. Detected x86 executable data
|
||||||
* are passed through these encoding routines. The data chunk is split into 32KB
|
* are passed through these encoding routines. The data chunk is split into 32KB
|
||||||
|
@ -1067,7 +1014,7 @@ extern "C" {
|
||||||
* a block contains valid x86 code by trying to estimate some instruction metrics.
|
* a block contains valid x86 code by trying to estimate some instruction metrics.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen, int stype)
|
||||||
{
|
{
|
||||||
uchar_t *pos, *hdr, type, *pos_to, *to_last;
|
uchar_t *pos, *hdr, type, *pos_to, *to_last;
|
||||||
sU32 len;
|
sU32 len;
|
||||||
|
@ -1094,7 +1041,6 @@ dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
||||||
sU16 origsize;
|
sU16 origsize;
|
||||||
sU32 out;
|
sU32 out;
|
||||||
sU8 *rv;
|
sU8 *rv;
|
||||||
int dis_tried;
|
|
||||||
|
|
||||||
if (len > DISFILTER_BLOCK)
|
if (len > DISFILTER_BLOCK)
|
||||||
sz = DISFILTER_BLOCK;
|
sz = DISFILTER_BLOCK;
|
||||||
|
@ -1113,11 +1059,9 @@ dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
||||||
}
|
}
|
||||||
|
|
||||||
out = sz;
|
out = sz;
|
||||||
dis_tried = 0;
|
|
||||||
if (is_x86_code(pos, sz)) {
|
if (is_x86_code(pos, sz)) {
|
||||||
ctx.ResetCtx(0, sz);
|
ctx.ResetCtx(0, sz);
|
||||||
rv = DisFilter(ctx, pos, sz, 0, pos_to, out);
|
rv = DisFilter(ctx, pos, sz, 0, pos_to, out);
|
||||||
dis_tried = 1;
|
|
||||||
} else {
|
} else {
|
||||||
rv = NULL;
|
rv = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1126,15 +1070,6 @@ dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
memcpy(pos_to, pos, origsize);
|
memcpy(pos_to, pos, origsize);
|
||||||
|
|
||||||
/*
|
|
||||||
* If Dispack failed, we apply a simple E8E9 filter
|
|
||||||
* on the block.
|
|
||||||
*/
|
|
||||||
if (dis_tried) {
|
|
||||||
EFilter::Forward_E89(pos_to, origsize);
|
|
||||||
type |= E8E9;
|
|
||||||
}
|
|
||||||
*hdr = type;
|
*hdr = type;
|
||||||
hdr++;
|
hdr++;
|
||||||
U16_P(hdr) = LE16(origsize);
|
U16_P(hdr) = LE16(origsize);
|
||||||
|
@ -1218,8 +1153,6 @@ dispack_decode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen)
|
||||||
* This only happens if this block was detected as x86 instruction
|
* This only happens if this block was detected as x86 instruction
|
||||||
* stream and Dispack was tried but it failed.
|
* stream and Dispack was tried but it failed.
|
||||||
*/
|
*/
|
||||||
if (type & E8E9)
|
|
||||||
EFilter::Inverse_E89(pos_to, cmpsz);
|
|
||||||
pos += cmpsz;
|
pos += cmpsz;
|
||||||
pos_to += cmpsz;
|
pos_to += cmpsz;
|
||||||
len -= cmpsz;
|
len -= cmpsz;
|
||||||
|
|
|
@ -31,9 +31,13 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *_dstlen);
|
int dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *_dstlen,
|
||||||
|
int stype);
|
||||||
int dispack_decode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
int dispack_decode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen);
|
||||||
|
|
||||||
|
int Forward_E89(uint8_t *src, uint64_t sz);
|
||||||
|
int Inverse_E89(uint8_t *src, uint64_t sz);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
47
pcompress.c
47
pcompress.c
|
@ -232,22 +232,23 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If Dispack is enabled it has to be done first since Dispack analyses the
|
* Dispack is used for 32-bit EXE files via a libarchive filter routine.
|
||||||
* x86 instruction stream in the raw data.
|
* However if Dispack fails or 64-bit exes are detected we apply an E8E9
|
||||||
* AR archives are typically static libraries. So we Dispack them unconditionally.
|
* CALL/JMP transform filter.
|
||||||
* TODO: Is this too much to assume in the generic case? Can we look inside ar archives?
|
|
||||||
*/
|
*/
|
||||||
if (pctx->dispack_preprocess && (stype == TYPE_EXE32 || stype == TYPE_EXE64 ||
|
if (pctx->exe_preprocess) {
|
||||||
stype == TYPE_ARCHIVE_AR)) {
|
if (stype == TYPE_EXE32 || stype == TYPE_EXE64 ||
|
||||||
|
stype == TYPE_ARCHIVE_AR) {
|
||||||
_dstlen = fromlen;
|
_dstlen = fromlen;
|
||||||
result = dispack_encode((uchar_t *)from, fromlen, to, &_dstlen);
|
memcpy(to, from, fromlen);
|
||||||
if (result != -1) {
|
if (Forward_E89(to, fromlen) == 0) {
|
||||||
uchar_t *tmp;
|
uchar_t *tmp;
|
||||||
tmp = from;
|
tmp = from;
|
||||||
from = to;
|
from = to;
|
||||||
to = tmp;
|
to = tmp;
|
||||||
fromlen = _dstlen;
|
fromlen = _dstlen;
|
||||||
type |= PREPROC_TYPE_DISPACK;
|
type |= PREPROC_TYPE_E8E9;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -449,7 +450,18 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type & PREPROC_TYPE_DISPACK) {
|
if (type & PREPROC_TYPE_E8E9) {
|
||||||
|
_dstlen1 = srclen;
|
||||||
|
memcpy(dst, src, srclen);
|
||||||
|
result = Inverse_E89(dst, srclen);
|
||||||
|
if (result != -1) {
|
||||||
|
*dstlen = _dstlen1;
|
||||||
|
} else {
|
||||||
|
log_msg(LOG_ERR, 0, "E8E9 decoding failed.");
|
||||||
|
return (result);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (type & PREPROC_TYPE_DISPACK) { // Backward compatibility
|
||||||
result = dispack_decode((uchar_t *)src, srclen, (uchar_t *)dst, &_dstlen1);
|
result = dispack_decode((uchar_t *)src, srclen, (uchar_t *)dst, &_dstlen1);
|
||||||
if (result != -1) {
|
if (result != -1) {
|
||||||
*dstlen = _dstlen1;
|
*dstlen = _dstlen1;
|
||||||
|
@ -1769,7 +1781,6 @@ redo:
|
||||||
dedupe_index_sz = 0;
|
dedupe_index_sz = 0;
|
||||||
type = COMPRESSED;
|
type = COMPRESSED;
|
||||||
|
|
||||||
|
|
||||||
/* Perform Dedup if enabled. */
|
/* Perform Dedup if enabled. */
|
||||||
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan)) {
|
if ((pctx->enable_rabin_scan || pctx->enable_fixed_scan)) {
|
||||||
dedupe_context_t *rctx;
|
dedupe_context_t *rctx;
|
||||||
|
@ -3106,7 +3117,6 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
char *pos;
|
char *pos;
|
||||||
struct filter_flags ff;
|
struct filter_flags ff;
|
||||||
|
|
||||||
|
|
||||||
pctx->level = -1;
|
pctx->level = -1;
|
||||||
err = 0;
|
err = 0;
|
||||||
pctx->keylen = DEFAULT_KEYLEN;
|
pctx->keylen = DEFAULT_KEYLEN;
|
||||||
|
@ -3171,7 +3181,6 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'B':
|
case 'B':
|
||||||
pctx->advanced_opts = 1;
|
|
||||||
pctx->rab_blk_size = atoi(optarg);
|
pctx->rab_blk_size = atoi(optarg);
|
||||||
if (pctx->rab_blk_size < 0 || pctx->rab_blk_size > 5) {
|
if (pctx->rab_blk_size < 0 || pctx->rab_blk_size > 5) {
|
||||||
log_msg(LOG_ERR, 0, "Average Dedupe block size must be in range 0 (2k), 1 (4k) .. 5 (64k)");
|
log_msg(LOG_ERR, 0, "Average Dedupe block size must be in range 0 (2k), 1 (4k) .. 5 (64k)");
|
||||||
|
@ -3293,7 +3302,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
|
|
||||||
case 'x':
|
case 'x':
|
||||||
pctx->advanced_opts = 1;
|
pctx->advanced_opts = 1;
|
||||||
pctx->dispack_preprocess = 1;
|
pctx->exe_preprocess = 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'T':
|
case 'T':
|
||||||
|
@ -3415,11 +3424,11 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Dispack, PackJPG and WavPack are only valid when archiving files.
|
* EXE, PackJPG and WavPack are only valid when archiving files.
|
||||||
*/
|
*/
|
||||||
if ((pctx->dispack_preprocess || ff.enable_packjpg || ff.enable_wavpack)
|
if ((pctx->exe_preprocess || ff.enable_packjpg || ff.enable_wavpack)
|
||||||
&& !pctx->archive_mode) {
|
&& !pctx->archive_mode) {
|
||||||
log_msg(LOG_ERR, 0, "Dispack Executable Preprocessor and PackJPG are "
|
log_msg(LOG_ERR, 0, "Executable File Preprocessor and PackJPG are "
|
||||||
"only valid when archiving.");
|
"only valid when archiving.");
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
@ -3597,7 +3606,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
init_filters(&ff);
|
init_filters(&ff);
|
||||||
pctx->enable_packjpg = ff.enable_packjpg;
|
pctx->enable_packjpg = ff.enable_packjpg;
|
||||||
pctx->enable_wavpack = ff.enable_wavpack;
|
pctx->enable_wavpack = ff.enable_wavpack;
|
||||||
if (pctx->level > 8) pctx->dispack_preprocess = 1;
|
if (pctx->level > 8) pctx->exe_preprocess = 1;
|
||||||
if (pctx->meta_stream != -1)
|
if (pctx->meta_stream != -1)
|
||||||
pctx->meta_stream = 1;
|
pctx->meta_stream = 1;
|
||||||
else
|
else
|
||||||
|
@ -3622,7 +3631,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
|
||||||
}
|
}
|
||||||
if (pctx->level > 9) pctx->delta2_nstrides = NSTRIDES_EXTRA;
|
if (pctx->level > 9) pctx->delta2_nstrides = NSTRIDES_EXTRA;
|
||||||
}
|
}
|
||||||
if (pctx->lzp_preprocess || pctx->enable_delta2_encode || pctx->dispack_preprocess) {
|
if (pctx->lzp_preprocess || pctx->enable_delta2_encode || pctx->exe_preprocess) {
|
||||||
pctx->preprocess_mode = 1;
|
pctx->preprocess_mode = 1;
|
||||||
pctx->enable_analyzer = 1;
|
pctx->enable_analyzer = 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,7 @@ extern "C" {
|
||||||
#define PREPROC_TYPE_DELTA2 2
|
#define PREPROC_TYPE_DELTA2 2
|
||||||
#define PREPROC_TYPE_DISPACK 4
|
#define PREPROC_TYPE_DISPACK 4
|
||||||
#define PREPROC_TYPE_DICT 8
|
#define PREPROC_TYPE_DICT 8
|
||||||
|
#define PREPROC_TYPE_E8E9 16
|
||||||
#define PREPROC_COMPRESSED 128
|
#define PREPROC_COMPRESSED 128
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -220,7 +221,7 @@ typedef struct pc_ctx {
|
||||||
int enable_analyzer;
|
int enable_analyzer;
|
||||||
int preprocess_mode;
|
int preprocess_mode;
|
||||||
int lzp_preprocess;
|
int lzp_preprocess;
|
||||||
int dispack_preprocess;
|
int exe_preprocess;
|
||||||
int encrypt_type;
|
int encrypt_type;
|
||||||
int archive_mode;
|
int archive_mode;
|
||||||
int enable_archive_sort;
|
int enable_archive_sort;
|
||||||
|
|
|
@ -74,7 +74,7 @@
|
||||||
//Use prime constant from Bulat Ziganshin's REP. Seems to work best across wide range of data.
|
//Use prime constant from Bulat Ziganshin's REP. Seems to work best across wide range of data.
|
||||||
#define RAB_POLYNOMIAL_CONST 153191
|
#define RAB_POLYNOMIAL_CONST 153191
|
||||||
#define POLY_MASK (0xffffffffffULL)
|
#define POLY_MASK (0xffffffffffULL)
|
||||||
#define RAB_BLK_DEFAULT 1
|
#define RAB_BLK_DEFAULT 2
|
||||||
#define RAB_BLK_MIN_BITS 11
|
#define RAB_BLK_MIN_BITS 11
|
||||||
#define LZMA_WINDOW_MAX (128L * 1024L * 1024L)
|
#define LZMA_WINDOW_MAX (128L * 1024L * 1024L)
|
||||||
#define RAB_POLYNOMIAL_WIN_SIZE 16
|
#define RAB_POLYNOMIAL_WIN_SIZE 16
|
||||||
|
|
|
@ -255,7 +255,7 @@ Read_Adjusted(int fd, uchar_t *buf, uint64_t count, int64_t *rabin_count, void *
|
||||||
rcount = Read(fd, buf2, count);
|
rcount = Read(fd, buf2, count);
|
||||||
if (rcount > 0) {
|
if (rcount > 0) {
|
||||||
rcount += *rabin_count;
|
rcount += *rabin_count;
|
||||||
if (rcount == count) {
|
if (rcount == count + *rabin_count) {
|
||||||
uint64_t rc, rbc;
|
uint64_t rc, rbc;
|
||||||
rc = rcount;
|
rc = rcount;
|
||||||
rbc = *rabin_count;
|
rbc = *rabin_count;
|
||||||
|
|
Loading…
Reference in a new issue