Detect some DICOM formats and use BSC for DICOM data.
This commit is contained in:
parent
ea345a902a
commit
683c3e48b5
3 changed files with 27 additions and 4 deletions
|
@ -215,12 +215,15 @@ adapt_deinit(void **data)
|
||||||
return (rv);
|
return (rv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Identify the types that BSC can compress better than others.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
is_bsc_type(int btype)
|
is_bsc_type(int btype)
|
||||||
{
|
{
|
||||||
int stype = PC_SUBTYPE(btype);
|
int stype = PC_SUBTYPE(btype);
|
||||||
return ((stype == TYPE_MARKUP) | (stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) |
|
return ((stype == TYPE_MARKUP) | (stype == TYPE_BMP) | (stype == TYPE_DNA_SEQ) |
|
||||||
(stype == TYPE_MP4) | (stype == TYPE_FLAC) | (stype == TYPE_AVI));
|
(stype == TYPE_MP4) | (stype == TYPE_FLAC) | (stype == TYPE_AVI) | (stype == TYPE_DICOM));
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -237,8 +240,8 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
||||||
double tagcnt, pct_tag;
|
double tagcnt, pct_tag;
|
||||||
uchar_t cur_byte, prev_byte;
|
uchar_t cur_byte, prev_byte;
|
||||||
/*
|
/*
|
||||||
* Count number of 8-bit binary bytes and XML tags in source.
|
* Count number of 8-bit binary bytes and XML tags in source.
|
||||||
*/
|
*/
|
||||||
tot8b = 0;
|
tot8b = 0;
|
||||||
tag1 = 0;
|
tag1 = 0;
|
||||||
tag2 = 0;
|
tag2 = 0;
|
||||||
|
@ -255,6 +258,9 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
|
||||||
prev_byte = cur_byte;
|
prev_byte = cur_byte;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Heuristics for detecting BINARY vs generic TEXT vs XML data.
|
||||||
|
*/
|
||||||
tot8b /= 0x80;
|
tot8b /= 0x80;
|
||||||
tagcnt = tag1 + tag2 + tag3;
|
tagcnt = tag1 + tag2 + tag3;
|
||||||
pct_tag = tagcnt / (double)srclen;
|
pct_tag = tagcnt / (double)srclen;
|
||||||
|
|
|
@ -1493,6 +1493,22 @@ detect_type_by_data(uchar_t *buf, size_t len)
|
||||||
return (TYPE_BINARY|TYPE_ARCHIVE_TAR);
|
return (TYPE_BINARY|TYPE_ARCHIVE_TAR);
|
||||||
if (memcmp(buf, "%PDF-", 5) == 0)
|
if (memcmp(buf, "%PDF-", 5) == 0)
|
||||||
return (TYPE_BINARY|TYPE_PDF);
|
return (TYPE_BINARY|TYPE_PDF);
|
||||||
|
|
||||||
|
// Try to detect DICOM medical image file. BSC compresses these better.
|
||||||
|
if (len > 127) {
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
// DICOM files should have either DICM or ISO_IR within the first 128 bytes
|
||||||
|
for (i = 0; i < 128; i++) {
|
||||||
|
if (buf[i] == 'D')
|
||||||
|
if (memcmp(&buf[i], "DICM", 4) == 0)
|
||||||
|
return (TYPE_BINARY|TYPE_DICOM);
|
||||||
|
if (buf[i] == 'I')
|
||||||
|
if (memcmp(&buf[i], "ISO_IR ", 7) == 0)
|
||||||
|
return (TYPE_BINARY|TYPE_DICOM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (U32_P(buf) == ELFINT) { // Regular ELF, check for 32/64-bit, core dump
|
if (U32_P(buf) == ELFINT) { // Regular ELF, check for 32/64-bit, core dump
|
||||||
if (*(buf + 16) != 4) {
|
if (*(buf + 16) != 4) {
|
||||||
if (*(buf + 4) == 2) {
|
if (*(buf + 4) == 2) {
|
||||||
|
|
|
@ -274,7 +274,8 @@ typedef enum {
|
||||||
TYPE_BMP = 200,
|
TYPE_BMP = 200,
|
||||||
TYPE_TIFF = 208,
|
TYPE_TIFF = 208,
|
||||||
TYPE_PDF = 216,
|
TYPE_PDF = 216,
|
||||||
TYPE_ARCHIVE_TAR = 224
|
TYPE_ARCHIVE_TAR = 224,
|
||||||
|
TYPE_DICOM = 232
|
||||||
} data_type_t;
|
} data_type_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in a new issue