From 0192790c02c64fb66e1e208c0681bc63e165a31c Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 24 Nov 2013 19:45:58 +0530 Subject: [PATCH] Add Dispack filter with auto-detection of x86 executables in archive mode. More elaborate magic header based detection of 32-bit and 64-bit x86 binaries. Always use fast-mode LZ4 in Adaptive modes. --- Makefile.in | 12 +- adaptive_compress.c | 7 +- archive/pc_archive.c | 70 ++- filters/dispack/dis.cpp | 1067 ++++++++++++++++++++++++++++++++++++ filters/dispack/dis.hpp | 41 ++ filters/dispack/types.hpp | 51 ++ pcompress.c | 39 +- pcompress.h | 2 + utils/phash/extensions.h | 12 +- utils/phash/extensions.txt | 12 +- utils/phash/phash.c | 16 +- utils/phash/phash.h | 2 +- utils/utils.h | 7 +- 13 files changed, 1293 insertions(+), 45 deletions(-) create mode 100644 filters/dispack/dis.cpp create mode 100644 filters/dispack/dis.hpp create mode 100644 filters/dispack/types.hpp diff --git a/Makefile.in b/Makefile.in index 0041d9f..7fa005a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -30,7 +30,7 @@ MAINSRCS = utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \ adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \ utils/xxhash_base.c utils/heap.c utils/cpuid.c pcompress.c MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heap.h \ - utils/cpuid.h utils/xxhash.h archive/pc_archive.h + utils/cpuid.h utils/xxhash.h archive/pc_archive.h filters/dispack/dis.hpp MAINOBJS = $(MAINSRCS:.c=.o) PROGSRCS = main.c @@ -142,6 +142,10 @@ PJPGHDRS = filters/packjpg/aricoder.h filters/packjpg/bitops.h filters/packjpg/d filters/packjpg/packjpglib.h filters/packjpg/pjpgtbl.h PJPGOBJS = $(PJPGSRCS:.cpp=.o) +DISPACKSRCS = filters/dispack/dis.cpp +DISPACKHDRS = filters/dispack/dis.hpp filters/dispack/types.hpp +DISPACKOBJS = $(DISPACKSRCS:.cpp=.o) + SKEIN_BLOCK_C = crypto/skein/skein_block.c SKEIN_BLOCK_ASM = crypto/skein/skein_block_x64.s SKEIN_BLOCK_SRC = @SKEIN_BLOCK@ @@ -218,7 +222,7 @@ OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \ $(SKEIN_BLOCK_OBJ) @SHA2ASM_OBJS@ @SHA2_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \ $(TRANSP_OBJS) $(CRYPTO_OBJS) $(ZLIB_OBJS) $(BZLIB_OBJS) $(XXHASH_OBJS) $(BLAKE2_OBJS) \ -@CRYPTO_COMPAT_OBJS@ $(CRYPTO_ASM_OBJS) $(ARCHIVEOBJS) $(PJPGOBJS) +@CRYPTO_COMPAT_OBJS@ $(CRYPTO_ASM_OBJS) $(ARCHIVEOBJS) $(PJPGOBJS) $(DISPACKOBJS) DEBUG_LINK = g++ -pthread @LIBBSCGEN_OPT@ @EXTRA_OPT_FLAGS@ -fopenmp -fPIC DEBUG_COMPILE = gcc -g -c @EXTRA_OPT_FLAGS@ -fPIC @@ -296,6 +300,10 @@ $(PJPGOBJS): $(PJPGSRCS) $(PJPGHDRS) $(COMPILE_cpp) $(COMMON_VEC_FLAGS) @SSE_OPT_FLAGS@ -O2 -fsched-spec-load \ $(VEC_FLAGS) -DBUILD_LIB $(COMMON_CPPFLAGS_cpp) $(@:.o=.cpp) -o $@ +$(DISPACKOBJS): $(DISPACKSRCS) $(DISPACKHDRS) + $(COMPILE_cpp) $(COMMON_VEC_FLAGS) @SSE_OPT_FLAGS@ -O2 -fsched-spec-load \ + $(VEC_FLAGS) $(COMMON_CPPFLAGS_cpp) $(@:.o=.cpp) -o $@ + $(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC) $(COMPILE) $(SKEIN_FLAGS) $(SKEIN_BLOCK_SRC) -o $@ diff --git a/adaptive_compress.c b/adaptive_compress.c index 568006d..74b6f98 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -124,7 +124,7 @@ adapt_init(void **data, int *level, int nthreads, uint64_t chunksize, int file_version, compress_op_t op) { struct adapt_data *adat = (struct adapt_data *)(*data); - int rv = 0; + int rv = 0, lv = 1; if (!adat) { adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data)); @@ -137,7 +137,7 @@ adapt_init(void **data, int *level, int nthreads, uint64_t chunksize, * compression level. */ if (rv == 0) - rv = lz4_init(&(adat->lz4_data), 1, nthreads, chunksize, file_version, op); + rv = lz4_init(&(adat->lz4_data), &lv, nthreads, chunksize, file_version, op); adat->lzma_data = NULL; adat->bsc_data = NULL; *data = adat; @@ -178,8 +178,9 @@ adapt2_init(void **data, int *level, int nthreads, uint64_t chunksize, * otherwise incompressible data. So we always use it at the lowest and fastest * compression level. */ + lv = 1; if (rv == 0) - rv = lz4_init(&(adat->lz4_data), 1, nthreads, chunksize, file_version, op); + rv = lz4_init(&(adat->lz4_data), &lv, nthreads, chunksize, file_version, op); *data = adat; if (*level > 9) *level = 9; } diff --git a/archive/pc_archive.c b/archive/pc_archive.c index 93ca24a..44e981f 100644 --- a/archive/pc_archive.c +++ b/archive/pc_archive.c @@ -915,6 +915,7 @@ do_map: if (typ == TYPE_UNKNOWN) { pctx->ctype = detect_type_by_data(src, len); + typ = pctx->ctype; if (typ != TYPE_UNKNOWN) { if (typetab[(typ >> 3)].filter_func != NULL) { int64_t rv; @@ -1428,6 +1429,9 @@ out: /* TTA1 packed into 32-bit integer. */ # define TTA1 (0x54544131) + +/* Magic for different MSDOS COM file types. */ +# define COM_MAGIC (0xcd21) #else /* 0x7fELF packed into 32-bit integer. */ # define ELFINT (0x464c457fU) @@ -1443,6 +1447,9 @@ out: /* TTA1 packed into 32-bit integer. */ # define TTA1 (0x31415454) + +/* Magic for different MSDOS COM file types. */ +# define COM_MAGIC (0x21cd) #endif /* @@ -1454,12 +1461,63 @@ detect_type_by_data(uchar_t *buf, size_t len) // At least a few bytes. if (len < 16) return (TYPE_UNKNOWN); - if (U32_P(buf) == ELFINT) - return (TYPE_BINARY|TYPE_EXE); // Regular ELF - if ((buf[0] == 'M' || buf[0] == 'L') && buf[1] == 'Z') - return (TYPE_BINARY|TYPE_EXE); // MSDOS Exe - if (buf[0] == 0xe9) - return (TYPE_BINARY|TYPE_EXE); // MSDOS COM + if (U32_P(buf) == ELFINT) { // Regular ELF, check for 32/64-bit, core dump + if (*(buf + 16) != 4) { + if (*(buf + 4) == 2) { + return (TYPE_BINARY|TYPE_EXE64); + } else { + return (TYPE_BINARY|TYPE_EXE32); + } + } else { + return (TYPE_BINARY); + } + } + if (buf[1] == 'Z') { + // Check for MSDOS/Windows Exe types + if (buf[0] == 'L') { + return (TYPE_BINARY|TYPE_EXE32); + } else if (buf[0] == 'M') { + // If relocation table is less than 0x40 bytes into file then + // it is a 32-bit MSDOS exe. + if (LE16(U16_P(buf + 0x18)) < 0x40) { + return (TYPE_BINARY|TYPE_EXE32); + } else { + uint32_t off = LE32(U32_P(buf + 0x3c)); + // This is non-MSDOS, check whether PE + if (off < len - 3) { + if (buf[off] == 'P' && buf[off+1] == 'E' && + buf[off+2] == '\0' && buf[off+3] == '\0') { + // This is a PE executable. + // Check 32/64-bit. + off = LE32(U32_P(buf + 0x3c))+4; + if (LE16(U16_P(buf + off)) == 0x8664) { + return (TYPE_BINARY|TYPE_EXE64); + } else { + return (TYPE_BINARY|TYPE_EXE32); + } + } else { + return (TYPE_BINARY|TYPE_EXE32); + } + } + } + } + } + + // MSDOS COM types + if (buf[0] == 0xe9 || buf[0] == 0xeb) { + if (LE16(U16_P(buf + 0x1fe)) == 0xaa55) + return (TYPE_BINARY|TYPE_EXE32); // MSDOS COM + else + return (TYPE_BINARY); + } + if (U16_P(buf + 2) == COM_MAGIC || U16_P(buf + 4) == COM_MAGIC || + U16_P(buf + 4) == COM_MAGIC || U16_P(buf + 5) == COM_MAGIC || + U16_P(buf + 13) == COM_MAGIC || U16_P(buf + 18) == COM_MAGIC || + U16_P(buf + 23) == COM_MAGIC || U16_P(buf + 30) == COM_MAGIC || + U16_P(buf + 70) == COM_MAGIC) { + return (TYPE_BINARY|TYPE_EXE32); // MSDOS COM + } + if (U32_P(buf) == TZINT) return (TYPE_BINARY); // Timezone data if (U32_P(buf) == PPMINT) diff --git a/filters/dispack/dis.cpp b/filters/dispack/dis.cpp new file mode 100644 index 0000000..3e14378 --- /dev/null +++ b/filters/dispack/dis.cpp @@ -0,0 +1,1067 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. + * If not, see . + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#include "types.hpp" +#include "dis.hpp" +#include +#include +#include +#include +#include + +/* Version history: + * + * 1.00 (Nov 2009) Initial release + * 1.01 (Jan 2011) Don't assert on bytes > MAXINSTR when dealing with jump tables + * 1.02 (Nov 2013) (Moinak Ghosh) Changes to integrate with Pcompress. + * Adapted and modified from: + * http://www.farbrausch.de/~fg/code/disfilter/ + */ + +/****************************************************************************/ + +/* This is a filter for x86 binary code, intended to improve its compressibility + * by standard algorithms. The basic ideas are quite old; for example, the LZX + * algorithm used in Microsoft .CAB files uses a special preprocessor that + * converts the target address in CALL opcodes from a relative offset to an + * absolute address. This simple transforms greatly helps both LZ-based and + * statistical coders: the same function being called repeatedly now results + * in the same byte sequence for the call being repeated, instead of having + * a different encoding every time. The preprocessor doesn't really understand + * the instruction stream; it just looks for a 0xE8 byte (the opcode for near + * call) and adds the current position to the 4 bytes that follow it. + * + * Most modern compressors include this filter or variations, to be used on .EXE + * files; newer variants usually try to detect whether the target offset would be + * within the executable image to reduce the number of false positives. Another + * common modification stores the transformed offsets in big endian byte order: + * this clusters the high bits (which are likely to be similar along a stretch of + * code) together with the opcode, again yielding somewhat better compression. + * + * However, all this is based on a very limited understanding of x86 binary code. + * It is possible to do significantly with a more thorough understanding of the + * bytestream and its underlying structure. This algorithm borrows heavily from the + * Split-Stream^2 method described in [1] (or, more precisely, an earlier variant + * published somewhen in 2004; I don't remember the details anymore). It also introduces + * some (to my knowledge) novel ideas, though. + * + * The basic idea behind Split-Stream is to disassemble the target program, + * splitting it into several distinct streams that can be coded separately. Examples + * of such streams are the opcodes themselves, 8 bit immediates, 32 bit immediates, + * jump and call target addresses, and so on - the idea being that the individual + * fields are highly correlated amongst themselves, but largely independent of each + * other. Splitting the streams reduces the context dilution (the inclusion of + * irrelevant values in the context used for prediction) that otherwise harms compression + * in compiled code. Since the actual compressor in kkrunchy is a LZ-based dictionary + * coder and not a context coder, there's no easy way to mix multiple models or use + * alphabets with more than 256 symbols; hence the streams are simply stored sequentially, + * with a small header denoting the size of each. This interface sacrifices some + * compression potential, but has the advantage that the filter inputs and outputs + * simple bytestreams; kkrunchy actually compresses the (several hundred bytes long) + * unfiltering code along with the transformed code, so part of the decompressor is + * stored in compressed form. This results in a somewhat peculiar "bootstrapping" + * decompression process but saved roughly 200 bytes when it was originally written; + * a big enough gain to be worth it when targeting 64k executables. + * + * The actual list of streams that are identified can be found below (the "Streams" enum). + * To categorize which byte belongs where, the code needs to be disassembled. This + * is simpler than it sounds, given the complexity of x86 instruction encoding; + * luckily, there's no need to fully "understand" each instruction. We mainly need to + * be able to identify the opcode, the addressing mode used, and the presence of + * immediate data fields. This is implemented using a mostly table-driven disassembler. + * Since the original decoder was heavily optimized for size and the tables need to be + * included with the decoder, the encoding is very compact: It mainly consists of two + * tables of 256 entries each with 4 bits per entry used - the first table describing + * one-byte opcodes, the second for two-byte opcodes (when this code was written, there + * were no three-byte opcodes yet). There are some simplifications present in the tables + * and the disassembler, where doing so poses no problems. For example, all prefixes + * are treated as one-byte opcodes with no operands; this is incorrect, but as long as + * the encoder and decoder agree on it, there's no problem. There's also no need to + * distinguish between different instructions when they all have the same addressing modes + * and combination of immediate operands. All this gets rid of a lot of special cases. + * There is one significant deviation from the PPMexe paper [1], though: the code + * is very careful never to assume that its parsing of the instruction stream is correct, + * and absolutely no irreversible transforms take place (such as the instruction + * rescheduling in [1]). Unrecognizable and invalid opcodes are preserved. This is done + * by using a very uncommon opcode as escape code, encapsulating otherwise invalid + * sequences within the bytestream. This property is critical in practice: code sections + * often contain jump tables and other data that isn't decodable as x86 instruction + * stream. Corrupting such data during the compression process is unacceptable. + * + * The target adresses of near jumps and calls of course still get converted from + * relative to absolute; additionally, all values larger than 8 bit are stored in big + * endian byte order. Both transforms are trivial to undo on the decoder side and yield + * notable improvements in compression ratio. Additionally, the last 255 call targets + * are kept in an array that's updated using the "move to front" heuristic. If a target + * occurs repeatedly (as is common in practice), the offset doesn't need to be coded at + * all; instead the position in the array is transmitted. (This is the ST_CALL_IDX + * stream). Additionally, the instruction stream is analyzed to identify potential + * call targets (i.e. start addresses of functions) even before they are first + * referenced: if a RET or INT3 opcode is found in the instruction stream, the filter + * assumes that the next instruction is likely to start a new function (MSVC++ uses + * INT3 opcodes to fill the "no man's land" between functions) and adds its address to + * the function table automatically. Typical overall hit rates for the function table + * are between 70 and 80 per cent - so only a quarter of all call target addresses ever + * needs to be stored explicitly. + * + * The most common type of data intermixed with code sections is jump tables and + * virtual function tables. Generally speaking, any data inside the code section is + * bad for the filter; its statistics are very different from the binary code being + * encoded which hurts compression, and it causes the disassembler to lose sync + * temporarily. To work around this problem, the encoder tries to identify jump + * tables, using another escape code to identify them in the output stream. The + * heuristic used here is rather simple, but works very well: When an instruction + * is expected, the encoder looks at the next 12 bytes. If they evaluate to + * addresses within the code section when interpreted as 3 dwords, the encoder assumes + * that it has found a jump table (or vtable). Jump table entries are encoded the + * same way that call targets are. + * + * [1] "PPMexe: Program Compression" + * M. Drinic, D. Kirovski, and H. Vo, MS Research + * ACM Transactions on Programming Languages and Systems, Vol.29, (no.1), 2007. + * http://research.microsoft.com/en-us/um/people/darkok/papers/TOPLAS.pdf + */ + +#define DISFILTER_BLOCK (32768) +#define DISFILTERED 1 +#define ORIGSIZE 2 +#define CLEAR_DISFILTER 0xfe +#define NORMAL_HDR (1 + 2) +#define EXTENDED_HDR (1 + 2 + 2) +// Dispack min reduction should be 8%, otherwise we abort +#define DIS_MIN_REDUCE (2622) + +#define MAXINSTR 15 // maximum size of a single instruction in bytes (actually, decodeable ones are shorter) + +enum Opcodes +{ + // 1-byte opcodes of special interest (for one reason or another) + OP_2BYTE = 0x0f, // start of 2-byte opcode + OP_OSIZE = 0x66, // operand size prefix + OP_CALLF = 0x9a, + OP_RETNI = 0xc2, // ret near+immediate + OP_RETN = 0xc3, + OP_ENTER = 0xc8, + OP_INT3 = 0xcc, + OP_INTO = 0xce, + OP_CALLN = 0xe8, + OP_JMPF = 0xea, + OP_ICEBP = 0xf1, + + // escape codes we use (these need to be 1-byte opcodes without an address or immediate operand!) + ESCAPE = OP_ICEBP, + JUMPTAB = OP_INTO +}; + +// formats +enum InstructionFormat +{ + // encoding mode + fNM = 0x0, // no ModRM + fAM = 0x1, // no ModRM, "address mode" (jumps or direct addresses) + fMR = 0x2, // ModRM present + fMEXTRA = 0x3, // ModRM present, includes extra bits for opcode + fMODE = 0x3, // bitmask for mode + + // no ModRM: size of immediate operand + fNI = 0x0, // no immediate + fBI = 0x4, // byte immediate + fWI = 0x8, // word immediate + fDI = 0xc, // dword immediate + fTYPE = 0xc, // type mask + + // address mode: type of address operand + fAD = 0x0, // absolute address + fDA = 0x4, // dword absolute jump target + fBR = 0x8, // byte relative jump target + fDR = 0xc, // dword relative jump target + + // others + fERR = 0xf // denotes invalid opcodes +}; + +enum Streams +{ + ST_OP, // prefixes, first byte of opcode + ST_SIB, // SIB byte + ST_CALL_IDX, // call table index + ST_DISP8_R0, // byte displacement on ModRM, reg no. 0 and following + ST_DISP8_R1, ST_DISP8_R2, ST_DISP8_R3, ST_DISP8_R4, ST_DISP8_R5, ST_DISP8_R6, ST_DISP8_R7, + ST_JUMP8, // short jump + ST_IMM8, // 8-bit immediate + ST_IMM16, // 16-bit immediate + ST_IMM32, // 32-bit immediate + ST_DISP32, // 32-bit displacement + ST_ADDR32, // 32-bit direct address + ST_CALL32, // 32-bit call target + ST_JUMP32, // 32-bit jump target + + ST_MAX, + + // these components of the instruction stream are also identified + // seperately, but stored together with another stream since there's + // high correlation between them (or just because one streams provides + // good context to predict the other) + ST_MODRM = ST_OP, // ModRM byte + ST_OP2 = ST_OP, // second byte of opcode + ST_AJUMP32 = ST_JUMP32, // absolute jump target + ST_JUMPTBL_COUNT = ST_OP +}; + +/****************************************************************************/ + +// These helper functions assume that this code is being compiled on a +// little-endian platform with no alignment restrictions on data accesses. +// If this isn't a safe assumption, change these functions appropriately. +// All byte order dependent operations end up calling them. +// +// I also use the VC++ _byteswap intrinsics to implement big endian stores; +// if your compiler doesn't have them, it should be trivial to get rid of them. + +static inline sU8 Load8(const sU8 *s) { return *s; } +static inline sU16 Load16(const sU8 *s) { return *((const sU16 *) s); } +static inline sU16 Load16B(const sU8 *s) { return _byteswap_ushort(Load16(s)); } +static inline sU32 Load32(const sU8 *s) { return *((const sU32 *) s); } +static inline sU32 Load32B(const sU8 *s) { return _byteswap_ulong(Load32(s)); } + +static inline void Store8(sU8 *d,sU8 v) { *d = v; } +static inline void Store16(sU8 *d,sU16 v) { *((sU16 *) d) = v; } +static inline void Store16B(sU8 *d,sU16 v) { *((sU16 *) d) = _byteswap_ushort(v); } +static inline void Store32(sU8 *d,sU32 v) { *((sU32 *) d) = v; } +static inline void Store32B(sU8 *d,sU32 v) { *((sU32 *) d) = _byteswap_ulong(v); } + +static inline sU8 Fetch8(sU8 *&s) { return *s++; } +static inline sU16 Fetch16(sU8 *&s) { sU16 v = Load16(s); s += 2; return v; } +static inline sU16 Fetch16B(sU8 *&s) { sU16 v = Load16B(s); s += 2; return v; } +static inline sU32 Fetch32(sU8 *&s) { sU32 v = Load32(s); s += 4; return v; } +static inline sU32 Fetch32B(sU8 *&s) { sU32 v = Load32B(s); s += 4; return v; } + +static inline sU8 Write8(sU8 *&d,sU8 v) { Store8(d,v); d += 1; return v; } +static inline sU16 Write16(sU8 *&d,sU16 v) { Store16(d,v); d += 2; return v; } +static inline sU32 Write32(sU8 *&d,sU32 v) { Store32(d,v); d += 4; return v; } + +/****************************************************************************/ + +static sU32 MoveToFront(sU32 *table,sInt pos,sU32 val) +{ + for(;pos > 0;pos--) + table[pos] = table[pos-1]; + + table[0] = val; + return val; +} + +static inline void AddMTF(sU32 *mtf,sU32 val) +{ + MoveToFront(mtf,255,val); +} + +static sInt FindMTF(sU32 *mtf,sU32 val) +{ + for(sInt i=0;i<255;i++) + { + if(mtf[i] == val) + { + MoveToFront(mtf,i,val); + return i; + } + } + + AddMTF(mtf,val); + return -1; +} + +/****************************************************************************/ + +struct DataBuffer +{ + sInt Size,Max; + sU8 *Data; + + DataBuffer() + { + Max = 256; + Data = (sU8 *) malloc(Max); + ResetBuffer(); + } + + void ResetBuffer() + { + Size = 0; + } + + ~DataBuffer() + { + free(Data); + } + + sU8 *Add(sInt bytes) + { + if(Size+bytes>Max) + { + Max = (Max*2 < Size+bytes) ? Size+bytes : Max*2; + Data = (sU8 *) realloc(Data,Max); + } + + sU8 *ret = Data+Size; + Size += bytes; + return ret; + } +}; + +/****************************************************************************/ + +// 1-byte opcodes +sU8 Table1[256] = +{ + // 0 1 2 3 4 5 6 7 8 9 a b c d e f + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI, // 0 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI, // 1 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI, // 2 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI, // 3 + + fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // 4 + fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // 5 + fNM|fNI,fNM|fNI,fMR|fNI,fMR|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fDI,fMR|fDI,fNM|fBI,fMR|fBI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // 6 + fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR, // 7 + + fMR|fBI,fMR|fDI,fMR|fBI,fMR|fBI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // 8 + fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fAM|fDA,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // 9 + fAM|fAD,fAM|fAD,fAM|fAD,fAM|fAD,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fBI,fNM|fDI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // a + fNM|fBI,fNM|fBI,fNM|fBI,fNM|fBI,fNM|fBI,fNM|fBI,fNM|fBI,fNM|fBI,fNM|fDI,fNM|fDI,fNM|fDI,fNM|fDI,fNM|fDI,fNM|fDI,fNM|fDI,fNM|fDI, // b + + fMR|fBI,fMR|fBI,fNM|fWI,fNM|fNI,fMR|fNI,fMR|fNI,fMR|fBI,fMR|fDI,fNM|fBI,fNM|fNI,fNM|fWI,fNM|fNI,fNM|fNI,fNM|fBI,fERR ,fNM|fNI, // c + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fBI,fNM|fBI,fNM|fNI,fNM|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // d + fAM|fBR,fAM|fBR,fAM|fBR,fAM|fBR,fNM|fBI,fNM|fBI,fNM|fBI,fNM|fBI,fAM|fDR,fAM|fDR,fAM|fAD,fAM|fBR,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // e + fNM|fNI,fERR ,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fMEXTRA,fMEXTRA,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fMEXTRA,fMEXTRA, // f +}; + +/****************************************************************************/ + +// 2-byte opcodes +sU8 Table2[256] = +{ + // 0 1 2 3 4 5 6 7 8 9 a b c d e f + fERR ,fERR ,fERR ,fERR ,fERR ,fERR ,fNM|fNI,fERR ,fNM|fNI,fNM|fNI,fERR ,fERR ,fERR ,fERR ,fERR ,fERR , // 0 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fERR ,fERR ,fERR ,fERR ,fERR ,fERR ,fERR , // 1 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fERR ,fERR ,fERR ,fERR ,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // 2 + fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fERR ,fNM|fNI,fERR ,fERR ,fERR ,fERR ,fERR ,fERR ,fERR ,fERR , // 3 + + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // 4 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // 5 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // 6 + fMR|fBI,fMR|fBI,fMR|fBI,fMR|fBI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fNI,fERR ,fERR ,fERR ,fERR ,fERR ,fERR ,fMR|fNI,fMR|fNI, // 7 + + fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR,fAM|fDR, // 8 + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // 9 + fNM|fNI,fNM|fNI,fNM|fNI,fMR|fNI,fMR|fBI,fMR|fNI,fMR|fNI,fMR|fNI,fERR ,fERR ,fERR ,fMR|fNI,fMR|fBI,fMR|fNI,fERR ,fMR|fNI, // a + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fERR ,fERR ,fERR ,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // b + + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI,fNM|fNI, // c + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // d + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // e + fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fERR , // f +}; + +/****************************************************************************/ + +// escape opcodes using ModRM byte to get more variants +sU8 TableX[32] = +{ + // 0 1 2 3 4 5 6 7 + fMR|fBI,fERR ,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // escapes for 0xf6 + fMR|fDI,fERR ,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI,fMR|fNI, // escapes for 0xf7 + fMR|fNI,fMR|fNI,fERR ,fERR ,fERR ,fERR ,fERR ,fERR , // escapes for 0xfe + fMR|fNI,fMR|fNI,fMR|fNI,fERR ,fMR|fNI,fERR ,fMR|fNI,fERR , // escapes for 0xff +}; + +/****************************************************************************/ +/****************************************************************************/ + +struct DisFilterCtx +{ + DataBuffer Buffer[ST_MAX]; + sU32 FuncTable[256]; + sBool NextIsFunc; + + sU32 CodeStart,CodeEnd; + + DisFilterCtx(sU32 codeStart,sU32 codeEnd) + { + ResetCtx(codeStart, codeEnd); + } + + void ResetCtx(sU32 codeStart,sU32 codeEnd) + { + NextIsFunc = sTRUE; + for(sInt i=0;i<256;i++) + FuncTable[i] = 0; + + CodeStart = codeStart; + CodeEnd = codeEnd; + for (sInt i=0; i= CodeStart && codedAddr < CodeEnd) + count++; + else + break; + } + + if(count < 3) // if it's less than 3 entries, it's probably not a jump table. + count = 0; + + return count; + } + + sInt ProcessInstr(sU8 *instr,sU32 memory) + { + if(sInt nJump = DetectJumpTable(instr,memory)) + { + // probable jump table with nJump entries + sInt remaining = nJump; + + while(remaining) + { + sInt count = (remaining < 256) ? remaining : 256; + Put8(ST_OP,JUMPTAB); + Put8(ST_JUMPTBL_COUNT,count-1); + + for(sInt i=0;i> 3) & 7) | ((code & 0x01) << 3) | ((code & 0x08) << 1)]; + + if(flags != fERR) + { + if(o16) + Put8(ST_OP,OP_OSIZE); + + Put8(ST_OP,code); + if(code == OP_2BYTE) + Put8(ST_OP2,code2); + + if(code == OP_CALLF || code == OP_JMPF || code == OP_ENTER) + { + // far call/jump have a *48-bit* immediate address. we deal with it here by copying the segment index + // manually and encoding the rest as a normal 32-bit direct address. + // similarly, enter has a word operand and a byte operand. again, we code the word here, and + // deal with the byte later during the normal flow. + Copy16(ST_IMM16,instr); + } + + if((flags & fMODE) == fMR) + { + sInt modrm = Copy8(ST_MODRM,instr); + sInt sib = 0; + + if((modrm & 0x07) == 4 && modrm < 0xc0) + sib = Copy8(ST_SIB,instr); + + if((modrm & 0xc0) == 0x40) // register+byte displacement + Copy8(ST_DISP8_R0 + (modrm & 0x07),instr); + + if((modrm & 0xc0) == 0x80 || (modrm & 0xc7) == 0x05 || (modrm < 0x40 && (sib & 0x07) == 5)) + { + // register+dword displacement + Copy32((modrm & 0xc7) == 0x05 ? ST_ADDR32 : ST_DISP32,instr); + } + } + + if((flags & fMODE) == fAM) + { + switch(flags & fTYPE) + { + case fAD: Copy32(ST_ADDR32,instr); break; + case fDA: Copy32(ST_AJUMP32,instr); break; + case fBR: Copy8(ST_JUMP8,instr); break; + + case fDR: + { + sU32 target = Fetch32(instr); + target += (instr - start) + memory; + if(code != OP_CALLN) // not a near call + Put32(ST_JUMP32,target); + else + { + sInt ind = FindMTF(FuncTable,target); + Put8(ST_CALL_IDX,ind+1); + if(ind == -1) + Put32(ST_CALL32,target); + } + } + break; + } + } + else + { + switch(flags & fTYPE) + { + case fBI: Copy8(ST_IMM8,instr); break; + case fWI: Copy16(ST_IMM16,instr); break; + + case fDI: + if(!o16) + Copy32(ST_IMM32,instr); + else + Copy16(ST_IMM16,instr); + break; + } + } + + return instr - start; + } + else // couldn't decode instruction + { + Put8(ST_OP,ESCAPE); // escape code + Put8(ST_OP,*start); // the unrecognized opcode + return 1; + } + } + + sU8 *Flush(sU8 *out, sU32 &sz) + { + sU32 size = 0; + + if (sz < ST_MAX * 16) + return (NULL); + size = ST_MAX * 4; // 4 bytes per stream to encode the size + for(sInt i=0;i= sz) return (NULL); // Check for output overflow + } + + // Output ptr is supplied by caller + sU8 *outPtr = out; + + for(sInt i=0;i streamEnd[strm]) return sFALSE +#define CheckDst(size) if(dest+size > destEnd) return sFALSE +#define CheckSrcDst(strm,size) if(stream[strm]+size > streamEnd[strm] || dest+size > destEnd) return sFALSE + +#define Copy8Chk(strm) do { CheckSrcDst(strm,1); Copy8 (dest,stream[strm]); } while(0) +#define Copy16Chk(strm) do { CheckSrcDst(strm,2); Copy16(dest,stream[strm]); } while(0) +#define Copy32Chk(strm) do { CheckSrcDst(strm,4); Copy32(dest,stream[strm]); } while(0) + +static sBool +DisUnFilter(sU8 *source,sU32 sourceSize,sU8 *dest,sU32 destSize,sU32 memStart) +{ + sU8 *stream[ST_MAX]; + sU8 *streamEnd[ST_MAX]; + sU32 funcTable[256]; + + // read header (list of stream sizes) + if(sourceSize < ST_MAX*4) + return sFALSE; + + sU8 *hdr = source; + sU8 *cur = source + ST_MAX*4; + for(sInt i=0;i> 3) & 7) | ((code & 0x01) << 3) | ((code & 0x08) << 1)]; + + if((modrm & 0x07) == 4 && modrm < 0xc0) + { + CheckSrcDst(ST_SIB,1); + sib = Copy8(dest,stream[ST_SIB]); + } + + if((modrm & 0xc0) == 0x40) // register+byte displacement + { + sInt st = (modrm & 0x07) + ST_DISP8_R0; + Copy8Chk(st); + } + + if((modrm & 0xc0) == 0x80 || (modrm & 0xc7) == 0x05 || (modrm < 0x40 && (sib & 0x07) == 0x05)) + { + sInt st = (modrm & 0xc7) == 5 ? ST_ADDR32 : ST_DISP32; + Copy32Chk(st); + } + } + + if((flags & fMODE) == fAM) + { + switch(flags & fTYPE) + { + case fAD: Copy32Chk(ST_ADDR32); break; + case fDA: Copy32Chk(ST_AJUMP32); break; + case fBR: Copy8Chk(ST_JUMP8); break; + + case fDR: + { + sU32 target; + if(code == OP_CALLN) + { + CheckSrc(ST_CALL_IDX,1); + sInt ind = Fetch8(stream[ST_CALL_IDX]); + if(ind) + target = MoveToFront(funcTable,ind-1,funcTable[ind-1]); + else + { + CheckSrc(ST_CALL32,4); + target = Fetch32B(stream[ST_CALL32]); + AddMTF(funcTable,target); + } + } + else + { + CheckSrc(ST_JUMP32,4); + target = Fetch32B(stream[ST_JUMP32]); + } + + target -= (dest - start) + 4 + memory; + CheckDst(4); + Write32(dest,target); + } + break; + } + } + else + { + switch(flags & fTYPE) + { + case fBI: Copy8Chk(ST_IMM8); break; + case fWI: Copy16Chk(ST_IMM16); break; + + case fDI: + if(!o16) + Copy32Chk(ST_IMM32); + else + Copy16Chk(ST_IMM16); + break; + } + } + } + } + + return sTRUE; +} + +/* + * Try to estimate if the given data block contains 32-bit x86 instructions + * especially of the call and jmp variety. + * TODO: This is a very rough estimation and can probably be improved. + */ +static int +is_x86_code(uchar_t *buf, int len) +{ + int e8e9 = 0, ff = 0; + uchar_t *pos, *last; + + pos = buf; + last = buf + len - 4; + while (pos < last) { + if (*pos == 0xe8 || *pos == 0xe9) { + if (pos[3] == 0xff && pos[4] == 0xff) { + e8e9++; + ff++; + pos += 4; + } else if (pos[3] == 0 && pos[4] == 0) { + e8e9++; + pos += 4; + } else { + pos++; + } + } else { + pos++; + } + } + return ((double)e8e9/len >= 0.003 && (double)ff/e8e9 >= 0.1); +} + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * 32-bit x86 executable packer top-level routines. Detected x86 executable data + * are passed through these encoding routines. The data chunk is split into 32KB + * blocks and each block is separately Dispack-ed. The code tries to detect if + * a block contains valid x86 code by trying to estimate some instruction metrics. + */ +int +dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen) +{ + uchar_t *pos, *hdr, type, *pos_to, *to_last; + uint64_t len; + + if (fromlen < DISFILTER_BLOCK) + return (-1); + + pos = from; + len = fromlen; + pos_to = to; + to_last = to + *dstlen; + while (len > 0) { + DisFilterCtx ctx(0, DISFILTER_BLOCK); + sU32 sz; + sU16 origsize; + sU32 out; + sU8 *rv; + + if (len > DISFILTER_BLOCK) + sz = DISFILTER_BLOCK; + else + sz = len; + + hdr = pos_to; + type = 0; + origsize = sz; + if (sz < DISFILTER_BLOCK) { + type |= ORIGSIZE; + pos_to += EXTENDED_HDR; + U16_P(hdr + NORMAL_HDR) = LE16(origsize); + } else { + pos_to += NORMAL_HDR; + } + + out = sz; + if (is_x86_code(pos, sz)) { + ctx.ResetCtx(0, sz); + rv = DisFilter(ctx, pos, sz, 0, pos_to, out); + } else { + rv = NULL; + } + if (rv != pos_to || sz == out) { + if (pos_to + origsize >= to_last) { + return (-1); + } + type &= CLEAR_DISFILTER; + *hdr = type; + hdr++; + U16_P(hdr) = LE16(origsize); + memcpy(pos_to, pos, origsize); + pos_to += origsize; + } else { + sU16 csize; + + if (pos_to + out >= to_last) { + return (-1); + } + type |= DISFILTERED; + *hdr = type; + hdr++; + csize = out; + U16_P(hdr) = LE16(csize); + pos_to += csize; + } + pos += sz; + len -= sz; + } + *dstlen = pos_to - to; + if ((fromlen - *dstlen) < DIS_MIN_REDUCE) { + return (-1); + } + return (0); +} + +int +dispack_decode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen) +{ + uchar_t *pos, type, *pos_to, *to_last; + uint64_t len; + + pos = from; + len = fromlen; + pos_to = to; + to_last = to + *dstlen; + while (len > 0) { + sU32 sz, cmpsz; + + type = *pos++; + len--; + sz = DISFILTER_BLOCK; + cmpsz = LE16(U16_P(pos)); + pos += 2; + len -= 2; + if (type & ORIGSIZE) { + sz = LE16(U16_P(pos)); + pos += 2; + len -= 2; + } + + if (type & DISFILTERED) { + if (pos_to + sz > to_last) + return (-1); + if (DisUnFilter(pos, cmpsz, pos_to, sz, 0) != sTRUE) + return (-1); + pos += cmpsz; + pos_to += sz; + len -= cmpsz; + } else { + if (pos_to + cmpsz > to_last) + return (-1); + memcpy(pos_to, pos, cmpsz); + pos += cmpsz; + pos_to += cmpsz; + len -= cmpsz; + } + } + *dstlen = pos_to - to; + return (0); +} + +#ifdef __cplusplus +} +#endif + diff --git a/filters/dispack/dis.hpp b/filters/dispack/dis.hpp new file mode 100644 index 0000000..fc6bb23 --- /dev/null +++ b/filters/dispack/dis.hpp @@ -0,0 +1,41 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. + * If not, see . + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#ifndef __DIS_HPP__ +#define __DIS_HPP__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int dispack_encode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *_dstlen); +int dispack_decode(uchar_t *from, uint64_t fromlen, uchar_t *to, uint64_t *dstlen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/filters/dispack/types.hpp b/filters/dispack/types.hpp new file mode 100644 index 0000000..f672e1f --- /dev/null +++ b/filters/dispack/types.hpp @@ -0,0 +1,51 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. + * If not, see . + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + */ + +#include +#include +#include + +#ifndef __TYPES_HPP__ +#define __TYPES_HPP__ + +typedef unsigned char sU8; +typedef signed char sS8; +typedef unsigned short sU16; +typedef signed short sS16; +typedef unsigned int sU32; +typedef signed int sS32; +typedef uint64_t sU64; +typedef int64_t sS64; +typedef int sInt; +typedef char sChar; +typedef bool sBool; +typedef float sF32; +typedef double sF64; + +#define sTRUE true +#define sFALSE false + +#define _byteswap_ushort htons +#define _byteswap_ulong htonl +#endif diff --git a/pcompress.c b/pcompress.c index abf4bfe..dac13d7 100644 --- a/pcompress.c +++ b/pcompress.c @@ -55,6 +55,7 @@ #include #include #include +#include /* * We use 8MB chunks by default. @@ -215,6 +216,23 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t fromlen = srclen; result = 0; + /* + * If Dispack is enabled it has to be done first since Dispack analyses the + * x86 instruction stream in the raw data. + */ + if (pctx->dispack_preprocess && PC_SUBTYPE(btype) == TYPE_EXE32) { + _dstlen = fromlen; + result = dispack_encode((uchar_t *)from, fromlen, to, &_dstlen); + if (result != -1) { + uchar_t *tmp; + tmp = from; + from = to; + to = tmp; + fromlen = _dstlen; + type |= PREPROC_TYPE_DISPACK; + } + } + if (pctx->lzp_preprocess) { int hashsize; @@ -335,7 +353,23 @@ preproc_decompress(pc_ctx_t *pctx, compress_func_ptr dec_func, void *src, uint64 *dstlen = result; } - if (!(type & (PREPROC_COMPRESSED | PREPROC_TYPE_DELTA2 | PREPROC_TYPE_LZP)) && type > 0) { + /* + * If Dispack is enabled it has to be done first since Dispack analyses the + * x86 instruction stream in the raw data. + */ + if (type & PREPROC_TYPE_DISPACK) { + result = dispack_decode((uchar_t *)src, srclen, (uchar_t *)dst, &_dstlen); + if (result != -1) { + memcpy(src, dst, _dstlen); + srclen = _dstlen; + *dstlen = _dstlen; + } else { + return (result); + } + } + + if (!(type & (PREPROC_COMPRESSED | PREPROC_TYPE_DELTA2 | PREPROC_TYPE_LZP | PREPROC_TYPE_DISPACK)) + && type > 0) { log_msg(LOG_ERR, 0, "Invalid preprocessing flags: %d", type); return (-1); } @@ -3153,8 +3187,9 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) if (pctx->level > 9) ff.enable_packjpg = 1; init_filters(&ff); pctx->enable_packjpg = ff.enable_packjpg; + if (pctx->level > 8) pctx->dispack_preprocess = 1; } - if (pctx->lzp_preprocess || pctx->enable_delta2_encode) { + if (pctx->lzp_preprocess || pctx->enable_delta2_encode || pctx->dispack_preprocess) { pctx->preprocess_mode = 1; } } else if (pctx->do_uncompress) { diff --git a/pcompress.h b/pcompress.h index 45d02bf..38172c1 100644 --- a/pcompress.h +++ b/pcompress.h @@ -60,6 +60,7 @@ extern "C" { #define PREPROC_TYPE_LZP 1 #define PREPROC_TYPE_DELTA2 2 +#define PREPROC_TYPE_DISPACK 4 #define PREPROC_COMPRESSED 128 /* @@ -205,6 +206,7 @@ typedef struct pc_ctx { int enable_fixed_scan; int preprocess_mode; int lzp_preprocess; + int dispack_preprocess; int encrypt_type; int archive_mode; int verbose; diff --git a/utils/phash/extensions.h b/utils/phash/extensions.h index c6c765f..98679c7 100644 --- a/utils/phash/extensions.h +++ b/utils/phash/extensions.h @@ -88,16 +88,6 @@ struct ext_entry { {"upp" , TYPE_TEXT, 3}, {"mom" , TYPE_TEXT, 3}, {"tmac" , TYPE_TEXT, 4}, - {"exe" , TYPE_BINARY|TYPE_EXE, 3}, - {"dll" , TYPE_BINARY|TYPE_EXE, 3}, - {"bin" , TYPE_BINARY|TYPE_EXE, 3}, - {"o" , TYPE_BINARY|TYPE_EXE, 1}, - {"a" , TYPE_BINARY|TYPE_EXE, 1}, - {"obj" , TYPE_BINARY|TYPE_EXE, 3}, - {"so" , TYPE_BINARY|TYPE_EXE, 2}, - {"com" , TYPE_BINARY|TYPE_EXE, 3}, - {"xpi" , TYPE_BINARY|TYPE_EXE, 3}, - {"off" , TYPE_BINARY|TYPE_EXE, 3}, {"pdf" , TYPE_BINARY, 3}, {"jpg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 3}, {"jpeg" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG, 4}, @@ -152,5 +142,7 @@ struct ext_entry { {"SVGZ" , TYPE_BINARY, 4}, {"ODT" , TYPE_BINARY, 3}, {"3DM" , TYPE_BINARY, 3}, + {"chm" , TYPE_BINARY, 3}, + {"CHM" , TYPE_BINARY, 3}, }; #endif diff --git a/utils/phash/extensions.txt b/utils/phash/extensions.txt index ccde73f..8c43b42 100644 --- a/utils/phash/extensions.txt +++ b/utils/phash/extensions.txt @@ -78,16 +78,6 @@ am,TYPE_TEXT upp,TYPE_TEXT mom,TYPE_TEXT tmac,TYPE_TEXT -exe,TYPE_BINARY|TYPE_EXE -dll,TYPE_BINARY|TYPE_EXE -bin,TYPE_BINARY|TYPE_EXE -o,TYPE_BINARY|TYPE_EXE -a,TYPE_BINARY|TYPE_EXE -obj,TYPE_BINARY|TYPE_EXE -so,TYPE_BINARY|TYPE_EXE -com,TYPE_BINARY|TYPE_EXE -xpi,TYPE_BINARY|TYPE_EXE -off,TYPE_BINARY|TYPE_EXE pdf,TYPE_BINARY jpg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG jpeg,TYPE_BINARY|TYPE_COMPRESSED|TYPE_JPEG @@ -142,3 +132,5 @@ swf,TYPE_BINARY SVGZ,TYPE_BINARY,TYPE_COMPRESSED|TYPE_COMPRESSED_GZ ODT,TYPE_BINARY,TYPE_COMPRESSED|TYPE_COMPRESSED_ZIP 3DM,TYPE_BINARY +chm,TYPE_BINARY +CHM,TYPE_BINARY diff --git a/utils/phash/phash.c b/utils/phash/phash.c index 3196f2f..2c13753 100644 --- a/utils/phash/phash.c +++ b/utils/phash/phash.c @@ -12,14 +12,14 @@ /* small adjustments to _a_ to make values distinct */ ub1 tab[] = { -125,0,0,220,235,125,82,0,113,0,0,7,0,0,82,0, -0,0,7,124,0,0,82,0,0,125,0,7,0,220,125,120, -0,0,0,0,22,0,0,113,0,113,113,0,0,125,85,0, -113,0,11,113,125,7,0,0,0,40,0,113,85,0,0,125, -0,113,0,0,113,0,125,183,40,27,7,15,58,183,113,0, -124,0,0,22,125,220,0,40,0,87,87,125,113,0,183,125, -0,125,87,7,0,85,0,0,59,229,85,7,135,116,0,146, -0,0,82,0,0,0,200,0,56,125,0,0,61,202,0,0, +125,0,0,87,7,113,82,120,113,0,0,113,0,0,113,125, +0,0,7,113,0,113,0,0,0,7,0,131,0,85,0,22, +0,113,0,0,85,0,0,113,0,113,125,113,0,7,22,0, +82,0,0,113,125,125,0,0,0,0,0,113,22,0,0,125, +0,87,0,0,113,0,125,183,82,0,124,88,40,125,0,0, +124,0,168,125,0,125,0,40,0,82,125,113,113,125,116,0, +0,0,113,85,0,88,0,0,42,27,0,0,0,40,183,61, +0,0,0,0,0,111,17,0,87,125,0,0,166,91,0,0, }; /* The hash function */ diff --git a/utils/phash/phash.h b/utils/phash/phash.h index aa7445c..74bd726 100644 --- a/utils/phash/phash.h +++ b/utils/phash/phash.h @@ -8,7 +8,7 @@ extern ub1 tab[]; #define PHASHLEN 0x80 /* length of hash mapping table */ -#define PHASHNKEYS 141 /* How many keys were hashed */ +#define PHASHNKEYS 133 /* How many keys were hashed */ #define PHASHRANGE 256 /* Range any input might map to */ #define PHASHSALT 0x9e3779b9 /* internal, initialize normal hash */ diff --git a/utils/utils.h b/utils/utils.h index 973b97d..7bb0d6f 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -245,8 +245,8 @@ typedef enum { /* * Sub-types. */ -#define NUM_SUB_TYPES 20 - TYPE_EXE = 8, +#define NUM_SUB_TYPES 24 + TYPE_EXE32 = 8, TYPE_JPEG = 16, TYPE_MARKUP = 24, TYPE_COMPRESSED_GZ = 32, @@ -268,7 +268,8 @@ typedef enum { TYPE_PACKJPG = 160, TYPE_DNA_SEQ = 168, TYPE_MJPEG = 176, - TYPE_AUDIO_COMPRESSED = 184 + TYPE_AUDIO_COMPRESSED = 184, + TYPE_EXE64 = 192 } data_type_t; /*