Add ASM version of Skein for x64 platforms with auto-detection
Error checking for checksum flag when decompressing Update comments and READMEs
This commit is contained in:
parent
eda312ce1e
commit
4ba840b255
7 changed files with 1387 additions and 5 deletions
6
INSTALL
6
INSTALL
|
@ -8,6 +8,12 @@ The simplest process to build and install this utility is:
|
|||
make
|
||||
make install
|
||||
|
||||
In order to remove all binaries:
|
||||
make clean
|
||||
|
||||
Remove all binaries and the generated Makefile:
|
||||
make distclean
|
||||
|
||||
Strictly speaking the 'make install' step is not required to
|
||||
run the utility as it is a single stand alone program for now.
|
||||
|
||||
|
|
12
Makefile.in
12
Makefile.in
|
@ -62,8 +62,11 @@ LZPSRCS = lzp/lzp.c
|
|||
LZPHDRS = lzp/lzp.h
|
||||
LZPOBJS = $(LZPSRCS:.c=.o)
|
||||
|
||||
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein_block.c \
|
||||
crypto/skein/skein.c crypto/skein/skein_debug.c
|
||||
SKEIN_BLOCK_C = crypto/skein/skein_block.c
|
||||
SKEIN_BLOCK_ASM = crypto/skein/skein_block_x64.s
|
||||
SKEIN_BLOCK_SRC = @SKEIN_BLOCK@
|
||||
SKEIN_BLOCK_OBJ = crypto/skein/skein_block.o
|
||||
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein.c crypto/skein/skein_debug.c
|
||||
SKEINHDRS = crypto/skein/brg_endian.h crypto/skein/SHA3api_ref.h \
|
||||
crypto/skein/skein.h crypto/skein/skein_port.h crypto/skein/brg_types.h \
|
||||
crypto/skein/skein_debug.h crypto/skein/skein_iv.h
|
||||
|
@ -87,7 +90,7 @@ COMMON_VEC_FLAGS = -ftree-vectorize
|
|||
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm @LIBBSCLFLAGS@
|
||||
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS)
|
||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) $(SKEIN_BLOCK_OBJ)
|
||||
|
||||
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
|
||||
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
|
||||
|
@ -146,6 +149,9 @@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
|
|||
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
$(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@
|
||||
|
||||
$(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS)
|
||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||
|
||||
|
|
|
@ -86,6 +86,15 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
|
|||
'-L' - Enable LZP pre-compression. This improves compression ratio of all
|
||||
algorithms with some extra CPU and very low RAM overhead. Using
|
||||
delta encoding in conjunction with this may not always be beneficial.
|
||||
'-S' <cksum>
|
||||
- Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512
|
||||
Default one is SKEIN256. The implementation actually uses SKEIN
|
||||
512-256. This is 25% slower than simple CRC64 but is many times more
|
||||
robust than CRC64 in detecting data integrity errors. SKEIN is a
|
||||
finalist in the NIST SHA-3 standard selection process and is one of
|
||||
the fastest in the group, especially on x86 platforms. BLAKE is faster
|
||||
than SKEIN on a few platforms.
|
||||
SKEIN 512-256 is about 60% faster than SHA 512-256 on x64 platforms.
|
||||
'-M' - Display memory allocator statistics
|
||||
'-C' - Display compression statistics
|
||||
|
||||
|
|
21
config
21
config
|
@ -72,6 +72,25 @@ else
|
|||
typ="RELEASE"
|
||||
fi
|
||||
|
||||
OS=$(uname)
|
||||
skeinblock='\$\(SKEIN_BLOCK_C\)'
|
||||
if [ "$OS" = "Linux" ]
|
||||
then
|
||||
plat=$(uname -r)
|
||||
elif [ "$OS" = "SunOS" ]
|
||||
then
|
||||
plat=$(isainfo -v)
|
||||
else
|
||||
echo "Unsupported OS: $OS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo $plat | egrep 'x86_64|amd64' > /dev/null
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
skeinblock='\$\(SKEIN_BLOCK_ASM\)'
|
||||
fi
|
||||
|
||||
linkvar="LINK"
|
||||
compilevar="COMPILE"
|
||||
compilecppvar="COMPILE_cpp"
|
||||
|
@ -83,6 +102,7 @@ rabinoptvar="RABIN_OPT"
|
|||
noslabcppflagsvar="NO_SLAB_CPPFLAGS"
|
||||
debugstatscppflagsvar="DEBUG_STATS_CPPFLAGS"
|
||||
prefixvar="PREFIX"
|
||||
skeinblockvar="SKEIN_BLOCK"
|
||||
|
||||
libbscdirvar="LIBBSCDIR"
|
||||
libbsclibvar="LIBBSCLIB"
|
||||
|
@ -115,5 +135,6 @@ s#@${libbsclflagsvar}@#${libbsclflags}#g
|
|||
s#@${libbscwrapobjvar}@#${libbscwrapobj}#g
|
||||
s#@${libbscgenoptvar}@#${libbscgenopt}#g
|
||||
s#@${libbsccppflagsvar}@#${libbsccppflags}#g
|
||||
s#@${skeinblockvar}@#${skeinblock}#g
|
||||
" > Makefile
|
||||
|
||||
|
|
1328
crypto/skein/skein_block_x64.s
Normal file
1328
crypto/skein/skein_block_x64.s
Normal file
File diff suppressed because it is too large
Load diff
5
main.c
5
main.c
|
@ -535,7 +535,10 @@ start_decompress(const char *filename, const char *to_filename)
|
|||
}
|
||||
|
||||
cksum = flags & CKSUM_MASK;
|
||||
get_checksum_props(NULL, &cksum, &cksum_bytes);
|
||||
if (get_checksum_props(NULL, &cksum, &cksum_bytes) == -1) {
|
||||
fprintf(stderr, "Invalid checksum algorithm code: %d. File corrupt ?\n", cksum);
|
||||
UNCOMP_BAIL;
|
||||
}
|
||||
|
||||
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (nthreads > 0 && nthreads < nprocs)
|
||||
|
|
11
utils.c
11
utils.c
|
@ -335,12 +335,15 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes)
|
|||
Skein_512_Update(&ctx, buf, bytes);
|
||||
Skein_512_Final(&ctx, cksum_buf);
|
||||
} else {
|
||||
fprintf(stderr, "Invalid checksum algorithm code: %d\n", cksum);
|
||||
return (-1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check is either the given checksum name or id is valid and
|
||||
* return it's properties.
|
||||
*/
|
||||
int
|
||||
get_checksum_props(char *name, int *cksum, int *cksum_bytes)
|
||||
{
|
||||
|
@ -357,6 +360,12 @@ get_checksum_props(char *name, int *cksum, int *cksum_bytes)
|
|||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Endian independent way of storing the checksum bytes. This is actually
|
||||
* storing in little endian format and a copy can be avoided in x86 land.
|
||||
* However unsightly ifdefs are avoided here since this is not so performance
|
||||
* critical.
|
||||
*/
|
||||
void
|
||||
serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue