Detect some DICOM formats and use BSC for DICOM data.

This commit is contained in:
Moinak Ghosh 2014-01-01 19:44:58 +05:30
parent ea345a902a
commit 683c3e48b5
3 changed files with 27 additions and 4 deletions

View file

@ -215,12 +215,15 @@ adapt_deinit(void **data)
return (rv);
}
/*
* Identify the types that BSC can compress better than others.
*/
int
is_bsc_type(int btype)
{
int stype = PC_SUBTYPE(btype);
return ((stype == TYPE_MARKUP) | (stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) |
(stype == TYPE_MP4) | (stype == TYPE_FLAC) | (stype == TYPE_AVI));
(stype == TYPE_MP4) | (stype == TYPE_FLAC) | (stype == TYPE_AVI) | (stype == TYPE_DICOM));
}
int
@ -237,8 +240,8 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
double tagcnt, pct_tag;
uchar_t cur_byte, prev_byte;
/*
* Count number of 8-bit binary bytes and XML tags in source.
*/
* Count number of 8-bit binary bytes and XML tags in source.
*/
tot8b = 0;
tag1 = 0;
tag2 = 0;
@ -255,6 +258,9 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
prev_byte = cur_byte;
}
/*
* Heuristics for detecting BINARY vs generic TEXT vs XML data.
*/
tot8b /= 0x80;
tagcnt = tag1 + tag2 + tag3;
pct_tag = tagcnt / (double)srclen;

View file

@ -1493,6 +1493,22 @@ detect_type_by_data(uchar_t *buf, size_t len)
return (TYPE_BINARY|TYPE_ARCHIVE_TAR);
if (memcmp(buf, "%PDF-", 5) == 0)
return (TYPE_BINARY|TYPE_PDF);
// Try to detect DICOM medical image file. BSC compresses these better.
if (len > 127) {
size_t i;
// DICOM files should have either DICM or ISO_IR within the first 128 bytes
for (i = 0; i < 128; i++) {
if (buf[i] == 'D')
if (memcmp(&buf[i], "DICM", 4) == 0)
return (TYPE_BINARY|TYPE_DICOM);
if (buf[i] == 'I')
if (memcmp(&buf[i], "ISO_IR ", 7) == 0)
return (TYPE_BINARY|TYPE_DICOM);
}
}
if (U32_P(buf) == ELFINT) { // Regular ELF, check for 32/64-bit, core dump
if (*(buf + 16) != 4) {
if (*(buf + 4) == 2) {

View file

@ -274,7 +274,8 @@ typedef enum {
TYPE_BMP = 200,
TYPE_TIFF = 208,
TYPE_PDF = 216,
TYPE_ARCHIVE_TAR = 224
TYPE_ARCHIVE_TAR = 224,
TYPE_DICOM = 232
} data_type_t;
/*