Add ASM version of Skein for x64 platforms with auto-detection

Error checking for checksum flag when decompressing
Update comments and READMEs
This commit is contained in:
Moinak Ghosh 2012-09-01 14:40:15 +05:30
parent eda312ce1e
commit 4ba840b255
7 changed files with 1387 additions and 5 deletions

View file

@ -8,6 +8,12 @@ The simplest process to build and install this utility is:
make
make install
In order to remove all binaries:
make clean
Remove all binaries and the generated Makefile:
make distclean
Strictly speaking the 'make install' step is not required to
run the utility as it is a single stand alone program for now.

View file

@ -62,8 +62,11 @@ LZPSRCS = lzp/lzp.c
LZPHDRS = lzp/lzp.h
LZPOBJS = $(LZPSRCS:.c=.o)
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein_block.c \
crypto/skein/skein.c crypto/skein/skein_debug.c
SKEIN_BLOCK_C = crypto/skein/skein_block.c
SKEIN_BLOCK_ASM = crypto/skein/skein_block_x64.s
SKEIN_BLOCK_SRC = @SKEIN_BLOCK@
SKEIN_BLOCK_OBJ = crypto/skein/skein_block.o
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein.c crypto/skein/skein_debug.c
SKEINHDRS = crypto/skein/brg_endian.h crypto/skein/SHA3api_ref.h \
crypto/skein/skein.h crypto/skein/skein_port.h crypto/skein/brg_types.h \
crypto/skein/skein_debug.h crypto/skein/skein_iv.h
@ -87,7 +90,7 @@ COMMON_VEC_FLAGS = -ftree-vectorize
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm @LIBBSCLFLAGS@
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS)
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) $(SKEIN_BLOCK_OBJ)
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
@ -146,6 +149,9 @@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@
$(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@

View file

@ -86,6 +86,15 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
'-L' - Enable LZP pre-compression. This improves compression ratio of all
algorithms with some extra CPU and very low RAM overhead. Using
delta encoding in conjunction with this may not always be beneficial.
'-S' <cksum>
- Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512
Default one is SKEIN256. The implementation actually uses SKEIN
512-256. This is 25% slower than simple CRC64 but is many times more
robust than CRC64 in detecting data integrity errors. SKEIN is a
finalist in the NIST SHA-3 standard selection process and is one of
the fastest in the group, especially on x86 platforms. BLAKE is faster
than SKEIN on a few platforms.
SKEIN 512-256 is about 60% faster than SHA 512-256 on x64 platforms.
'-M' - Display memory allocator statistics
'-C' - Display compression statistics

21
config
View file

@ -72,6 +72,25 @@ else
typ="RELEASE"
fi
OS=$(uname)
skeinblock='\$\(SKEIN_BLOCK_C\)'
if [ "$OS" = "Linux" ]
then
plat=$(uname -r)
elif [ "$OS" = "SunOS" ]
then
plat=$(isainfo -v)
else
echo "Unsupported OS: $OS"
exit 1
fi
echo $plat | egrep 'x86_64|amd64' > /dev/null
if [ $? -eq 0 ]
then
skeinblock='\$\(SKEIN_BLOCK_ASM\)'
fi
linkvar="LINK"
compilevar="COMPILE"
compilecppvar="COMPILE_cpp"
@ -83,6 +102,7 @@ rabinoptvar="RABIN_OPT"
noslabcppflagsvar="NO_SLAB_CPPFLAGS"
debugstatscppflagsvar="DEBUG_STATS_CPPFLAGS"
prefixvar="PREFIX"
skeinblockvar="SKEIN_BLOCK"
libbscdirvar="LIBBSCDIR"
libbsclibvar="LIBBSCLIB"
@ -115,5 +135,6 @@ s#@${libbsclflagsvar}@#${libbsclflags}#g
s#@${libbscwrapobjvar}@#${libbscwrapobj}#g
s#@${libbscgenoptvar}@#${libbscgenopt}#g
s#@${libbsccppflagsvar}@#${libbsccppflags}#g
s#@${skeinblockvar}@#${skeinblock}#g
" > Makefile

File diff suppressed because it is too large Load diff

5
main.c
View file

@ -535,7 +535,10 @@ start_decompress(const char *filename, const char *to_filename)
}
cksum = flags & CKSUM_MASK;
get_checksum_props(NULL, &cksum, &cksum_bytes);
if (get_checksum_props(NULL, &cksum, &cksum_bytes) == -1) {
fprintf(stderr, "Invalid checksum algorithm code: %d. File corrupt ?\n", cksum);
UNCOMP_BAIL;
}
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
if (nthreads > 0 && nthreads < nprocs)

11
utils.c
View file

@ -335,12 +335,15 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes)
Skein_512_Update(&ctx, buf, bytes);
Skein_512_Final(&ctx, cksum_buf);
} else {
fprintf(stderr, "Invalid checksum algorithm code: %d\n", cksum);
return (-1);
}
return (0);
}
/*
* Check is either the given checksum name or id is valid and
* return it's properties.
*/
int
get_checksum_props(char *name, int *cksum, int *cksum_bytes)
{
@ -357,6 +360,12 @@ get_checksum_props(char *name, int *cksum, int *cksum_bytes)
return (-1);
}
/*
* Endian independent way of storing the checksum bytes. This is actually
* storing in little endian format and a copy can be avoided in x86 land.
* However unsightly ifdefs are avoided here since this is not so performance
* critical.
*/
void
serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
{