From 5521955a944fb6017d9ec4643f4dc48d21d1527c Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Wed, 18 Dec 2013 23:00:39 +0530 Subject: [PATCH] Detect AR archives and set the type. Re-use a less common type code for AR. Use Dispack generically for all executables and AR archives. --- archive/pc_archive.c | 2 ++ pcompress.c | 5 ++++- utils/phash/extensions.h | 4 ++-- utils/phash/extensions.txt | 4 ++-- utils/utils.h | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/archive/pc_archive.c b/archive/pc_archive.c index 8708950..d118d1d 100644 --- a/archive/pc_archive.c +++ b/archive/pc_archive.c @@ -1467,6 +1467,8 @@ detect_type_by_data(uchar_t *buf, size_t len) // At least a few bytes. if (len < 16) return (TYPE_UNKNOWN); + if (memcmp(buf, "!\n", 8) == 0) + return (TYPE_BINARY|TYPE_ARCHIVE_AR); if (U32_P(buf) == ELFINT) { // Regular ELF, check for 32/64-bit, core dump if (*(buf + 16) != 4) { if (*(buf + 4) == 2) { diff --git a/pcompress.c b/pcompress.c index 4cd9625..6eed6e3 100644 --- a/pcompress.c +++ b/pcompress.c @@ -226,8 +226,11 @@ preproc_compress(pc_ctx_t *pctx, compress_func_ptr cmp_func, void *src, uint64_t /* * If Dispack is enabled it has to be done first since Dispack analyses the * x86 instruction stream in the raw data. + * AR archives are typically static libraries. So we Dispack them unconditionally. + * TODO: Is this too much to assume in the generic case? Can we look inside ar archives? */ - if (pctx->dispack_preprocess && stype == TYPE_EXE32) { + if (pctx->dispack_preprocess && (stype == TYPE_EXE32 || stype == TYPE_EXE64 || + stype == TYPE_ARCHIVE_AR)) { _dstlen = fromlen; result = dispack_encode((uchar_t *)from, fromlen, to, &_dstlen); if (result != -1) { diff --git a/utils/phash/extensions.h b/utils/phash/extensions.h index e4c239b..7275e5e 100644 --- a/utils/phash/extensions.h +++ b/utils/phash/extensions.h @@ -121,8 +121,8 @@ struct ext_entry { {"arc" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARC, 3}, {"jar" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ, 3}, {"lz" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZ, 2}, - {"lzh" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH, 3}, - {"lha" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH, 3}, + {"lzh" , TYPE_BINARY|TYPE_COMPRESSED, 3}, + {"lha" , TYPE_BINARY|TYPE_COMPRESSED, 3}, {"lzma" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA, 4}, {"lzo" , TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO, 3}, {"dmg" , TYPE_BINARY, 3}, diff --git a/utils/phash/extensions.txt b/utils/phash/extensions.txt index 22a8943..323af6c 100644 --- a/utils/phash/extensions.txt +++ b/utils/phash/extensions.txt @@ -111,8 +111,8 @@ arj,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARJ arc,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_ARC jar,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_GZ lz,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZ -lzh,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH -lha,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LH +lzh,TYPE_BINARY|TYPE_COMPRESSED +lha,TYPE_BINARY|TYPE_COMPRESSED lzma,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZMA lzo,TYPE_BINARY|TYPE_COMPRESSED|TYPE_COMPRESSED_LZO dmg,TYPE_BINARY diff --git a/utils/utils.h b/utils/utils.h index dbac54f..bdf5021 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -256,7 +256,7 @@ typedef enum { TYPE_COMPRESSED_ZIP = 56, TYPE_COMPRESSED_ARJ = 64, TYPE_COMPRESSED_ARC = 72, - TYPE_COMPRESSED_LH = 80, + TYPE_ARCHIVE_AR = 80, TYPE_COMPRESSED_LZMA = 88, TYPE_COMPRESSED_LZO = 96, TYPE_AVI = 104,