Add support for Skein512 and Skein256 checksums

Import Skein code from NIST CD submission
Make checksum algorithms pluggable
Fix handling of huge buffers (>2GB) in LZP
Cleanup of some buffer sizing code
Speed up CRC64 calculation in dedupe chunking
This commit is contained in:
Moinak Ghosh 2012-08-31 22:36:06 +05:30
parent f03834278a
commit eda312ce1e
20 changed files with 3115 additions and 81 deletions

View file

@ -62,6 +62,13 @@ LZPSRCS = lzp/lzp.c
LZPHDRS = lzp/lzp.h
LZPOBJS = $(LZPSRCS:.c=.o)
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein_block.c \
crypto/skein/skein.c crypto/skein/skein_debug.c
SKEINHDRS = crypto/skein/brg_endian.h crypto/skein/SHA3api_ref.h \
crypto/skein/skein.h crypto/skein/skein_port.h crypto/skein/brg_types.h \
crypto/skein/skein_debug.h crypto/skein/skein_iv.h
SKEINOBJS = $(SKEINSRCS:.c=.o)
LIBBSCWRAP = libbsc_compress.c
LIBBSCWRAPOBJ = libbsc_compress.o
LIBBSCDIR = @LIBBSCDIR@
@ -75,12 +82,12 @@ BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ lzp/*~
RM = rm -f
COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT_PROPS \
-DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 \
-I./lzp @LIBBSCCPPFLAGS@
-I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein
COMMON_VEC_FLAGS = -ftree-vectorize
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm @LIBBSCLFLAGS@
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS)
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
@ -139,6 +146,9 @@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS)
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
$(LIBBSCLIB):
(cd $(LIBBSCDIR); make)

115
crypto/skein/SHA3api_ref.c Normal file
View file

@ -0,0 +1,115 @@
/***********************************************************************
**
** Implementation of the AHS API using the Skein hash function.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
************************************************************************/
#include <string.h> /* get the memcpy/memset functions */
#include "skein.h" /* get the Skein API definitions */
#include "SHA3api_ref.h"/* get the AHS API definitions */
/******************************************************************/
/* AHS API code */
/******************************************************************/
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* select the context size and init the context */
HashReturn Init(hashState *state, int hashbitlen)
{
#if SKEIN_256_NIST_MAX_HASH_BITS
if (hashbitlen <= SKEIN_256_NIST_MAX_HASHBITS)
{
Skein_Assert(hashbitlen > 0,BAD_HASHLEN);
state->statebits = 64*SKEIN_256_STATE_WORDS;
return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen);
}
#endif
if (hashbitlen <= SKEIN_512_NIST_MAX_HASHBITS)
{
state->statebits = 64*SKEIN_512_STATE_WORDS;
return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen);
}
else
{
state->statebits = 64*SKEIN1024_STATE_WORDS;
return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen);
}
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process data to be hashed */
HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
{
/* only the final Update() call is allowed do partial bytes, else assert an error */
Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, FAIL);
Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL);
if ((databitlen & 7) == 0) /* partial bytes? */
{
switch ((state->statebits >> 8) & 3)
{
case 2: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3);
case 1: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3);
case 0: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3);
default: return FAIL;
}
}
else
{ /* handle partial final byte */
size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */
u08b_t b,mask;
mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */
b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */
switch ((state->statebits >> 8) & 3)
{
case 2: Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */
Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */
break;
case 1: Skein_256_Update(&state->u.ctx_256,data,bCnt-1); /* process all but the final byte */
Skein_256_Update(&state->u.ctx_256,&b , 1 ); /* process the (masked) partial byte */
break;
case 0: Skein1024_Update(&state->u.ctx1024,data,bCnt-1); /* process all but the final byte */
Skein1024_Update(&state->u.ctx1024,&b , 1 ); /* process the (masked) partial byte */
break;
default: return FAIL;
}
Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */
return SUCCESS;
}
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize hash computation and output the result (hashbitlen bits) */
HashReturn Final(hashState *state, BitSequence *hashval)
{
Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL);
switch ((state->statebits >> 8) & 3)
{
case 2: return Skein_512_Final(&state->u.ctx_512,hashval);
case 1: return Skein_256_Final(&state->u.ctx_256,hashval);
case 0: return Skein1024_Final(&state->u.ctx1024,hashval);
default: return FAIL;
}
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* all-in-one hash function */
HashReturn Hash(int hashbitlen, const BitSequence *data, /* all-in-one call */
DataLength databitlen,BitSequence *hashval)
{
hashState state;
HashReturn r = Init(&state,hashbitlen);
if (r == SUCCESS)
{ /* these calls do not fail when called properly */
r = Update(&state,data,databitlen);
Final(&state,hashval);
}
return r;
}

View file

@ -0,0 +1,66 @@
#ifndef _AHS_API_H_
#define _AHS_API_H_
/***********************************************************************
**
** Interface declarations of the AHS API using the Skein hash function.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
************************************************************************/
#include "skein.h"
typedef enum
{
SUCCESS = SKEIN_SUCCESS,
FAIL = SKEIN_FAIL,
BAD_HASHLEN = SKEIN_BAD_HASHLEN
}
HashReturn;
typedef size_t DataLength; /* bit count type */
typedef u08b_t BitSequence; /* bit stream type */
typedef struct
{
uint_t statebits; /* 256, 512, or 1024 */
union
{
Skein_Ctxt_Hdr_t h; /* common header "overlay" */
Skein_256_Ctxt_t ctx_256;
Skein_512_Ctxt_t ctx_512;
Skein1024_Ctxt_t ctx1024;
} u;
}
hashState;
/* "incremental" hashing API */
HashReturn Init (hashState *state, int hashbitlen);
HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
HashReturn Final (hashState *state, BitSequence *hashval);
/* "all-in-one" call */
HashReturn Hash (int hashbitlen, const BitSequence *data,
DataLength databitlen, BitSequence *hashval);
/*
** Re-define the compile-time constants below to change the selection
** of the Skein state size in the Init() function in SHA3api_ref.c.
**
** That is, the NIST API does not allow for explicit selection of the
** Skein block size, so it must be done implicitly in the Init() function.
** The selection is controlled by these constants.
*/
#ifndef SKEIN_256_NIST_MAX_HASHBITS
#define SKEIN_256_NIST_MAX_HASHBITS (0)
#endif
#ifndef SKEIN_512_NIST_MAX_HASHBITS
#define SKEIN_512_NIST_MAX_HASHBITS (512)
#endif
#endif /* ifdef _AHS_API_H_ */

148
crypto/skein/brg_endian.h Normal file
View file

@ -0,0 +1,148 @@
/*
---------------------------------------------------------------------------
Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue 20/10/2006
*/
#ifndef BRG_ENDIAN_H
#define BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
/* Include files where endian defines and byteswap functions may reside */
#if defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined(AVR)
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 ) || defined( AVR )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
#endif
#endif
/* special handler for IA64, which may be either endianness (?) */
/* here we assume little-endian, but this may need to be changed */
#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
# define PLATFORM_MUST_ALIGN (1)
#ifndef PLATFORM_BYTE_ORDER
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#endif
#ifndef PLATFORM_MUST_ALIGN
# define PLATFORM_MUST_ALIGN (0)
#endif
#endif /* ifndef BRG_ENDIAN_H */

188
crypto/skein/brg_types.h Normal file
View file

@ -0,0 +1,188 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue 09/09/2006
The unsigned integer types defined here are of the form uint_<nn>t where
<nn> is the length of the type; for example, the unsigned 32-bit type is
'uint_32t'. These are NOT the same as the 'C99 integer types' that are
defined in the inttypes.h and stdint.h headers since attempts to use these
types have shown that support for them is still highly variable. However,
since the latter are of the form uint<nn>_t, a regular expression search
and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
can be used to convert the types used here to the C99 standard types.
*/
#ifndef BRG_TYPES_H
#define BRG_TYPES_H
#if defined(__cplusplus)
extern "C" {
#endif
#include <limits.h>
#ifndef BRG_UI8
# define BRG_UI8
# if UCHAR_MAX == 255u
typedef unsigned char uint_8t;
# else
# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI16
# define BRG_UI16
# if USHRT_MAX == 65535u
typedef unsigned short uint_16t;
# else
# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
# endif
#endif
#ifndef BRG_UI32
# define BRG_UI32
# if UINT_MAX == 4294967295u
# define li_32(h) 0x##h##u
typedef unsigned int uint_32t;
# elif ULONG_MAX == 4294967295u
# define li_32(h) 0x##h##ul
typedef unsigned long uint_32t;
# elif defined( _CRAY )
# error This code needs 32-bit data types, which Cray machines do not provide
# else
# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI64
# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( __sun ) && defined(ULONG_MAX) && ULONG_MAX == 0xfffffffful
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
# if UINT_MAX == 18446744073709551615u
# define BRG_UI64
# define li_64(h) 0x##h##u
typedef unsigned int uint_64t;
# endif
# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
# if ULONG_MAX == 18446744073709551615ul
# define BRG_UI64
# define li_64(h) 0x##h##ul
typedef unsigned long uint_64t;
# endif
# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
# if ULLONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
# if ULONG_LONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# elif defined(__GNUC__) /* DLW: avoid mingw problem with -ansi */
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
#endif
#if defined( NEED_UINT_64T ) && !defined( BRG_UI64 )
# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
#endif
#ifndef RETURN_VALUES
# define RETURN_VALUES
# if defined( DLL_EXPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllexport ) void __stdcall
# define INT_RETURN __declspec( dllexport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllexport__ ) void
# define INT_RETURN __declspec( __dllexport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( DLL_IMPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllimport ) void __stdcall
# define INT_RETURN __declspec( dllimport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllimport__ ) void
# define INT_RETURN __declspec( __dllimport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( __WATCOMC__ )
# define VOID_RETURN void __cdecl
# define INT_RETURN int __cdecl
# else
# define VOID_RETURN void
# define INT_RETURN int
# endif
#endif
/* These defines are used to declare buffers in a way that allows
faster operations on longer variables to be used. In all these
defines 'size' must be a power of 2 and >= 8
dec_unit_type(size,x) declares a variable 'x' of length
'size' bits
dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
bytes defined as an array of variables
each of 'size' bits (bsize must be a
multiple of size / 8)
ptr_cast(x,size) casts a pointer to a pointer to a
varaiable of length 'size' bits
*/
#define ui_type(size) uint_##size##t
#define dec_unit_type(size,x) typedef ui_type(size) x
#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
#define ptr_cast(x,size) ((ui_type(size)*)(x))
#if defined(__cplusplus)
}
#endif
#endif

753
crypto/skein/skein.c Normal file
View file

@ -0,0 +1,753 @@
/***********************************************************************
**
** Implementation of the Skein hash function.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
************************************************************************/
#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
#include <string.h> /* get the memcpy/memset functions */
#include "skein.h" /* get the Skein API definitions */
#include "skein_iv.h" /* get precomputed IVs */
/*****************************************************************/
/* External function to process blkCnt (nonzero) full block(s) of data. */
void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
/*****************************************************************/
/* 256-bit Skein */
/*****************************************************************/
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a straight hashing operation */
int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
{
union
{
u08b_t b[SKEIN_256_STATE_BYTES];
u64b_t w[SKEIN_256_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
switch (hashBitLen)
{ /* use pre-computed values, where available */
#ifndef SKEIN_NO_PRECOMP
case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break;
case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break;
case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break;
case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break;
#endif
default:
/* here if there is no precomputed IV value available */
/* build/process the config block, type == CONFIG (could be precomputed) */
Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
/* compute the initial chaining values from config block */
memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
break;
}
/* The chaining vars ctx->X are now initialized for the given hashBitLen. */
/* Set up to process the data message portion of the hash (default) */
Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a MAC and/or tree hash operation */
/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
{
union
{
u08b_t b[SKEIN_256_STATE_BYTES];
u64b_t w[SKEIN_256_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
/* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) /* is there a key? */
{
memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
}
else /* here to pre-process a key */
{
Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
/* do a mini-Init right here */
ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
Skein_256_Update(ctx,key,keyBytes); /* hash the key */
Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
#if SKEIN_NEED_SWAP
{
uint_t i;
for (i=0;i<SKEIN_256_STATE_WORDS;i++) /* convert key bytes to context words */
ctx->X[i] = Skein_Swap64(ctx->X[i]);
}
#endif
}
/* build/process the config block, type == CONFIG (could be precomputed for each key) */
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx,CFG_FINAL);
memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
Skein_Show_Key(256,&ctx->h,key,keyBytes);
/* compute the initial chaining values from config block */
Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
/* The chaining vars ctx->X are now initialized */
/* Set up to process the data message portion of the hash (default) */
ctx->h.bCnt = 0; /* buffer b[] starts out empty */
Skein_Start_New_Type(ctx,MSG);
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process the input bytes */
int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
{
size_t n;
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
/* process full blocks, if any */
if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES)
{
if (ctx->h.bCnt) /* finish up any buffered message data */
{
n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
if (n)
{
Skein_assert(n < msgByteCnt); /* check on our logic here */
memcpy(&ctx->b[ctx->h.bCnt],msg,n);
msgByteCnt -= n;
msg += n;
ctx->h.bCnt += n;
}
Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES);
ctx->h.bCnt = 0;
}
/* now process any remaining full blocks, directly from input message data */
if (msgByteCnt > SKEIN_256_BLOCK_BYTES)
{
n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */
Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES);
msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
msg += n * SKEIN_256_BLOCK_BYTES;
}
Skein_assert(ctx->h.bCnt == 0);
}
/* copy any remaining source message data bytes into b[] */
if (msgByteCnt)
{
Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
ctx->h.bCnt += msgByteCnt;
}
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the result */
int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
{
size_t i,n,byteCnt;
u64b_t X[SKEIN_256_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
/* run Threefish in "counter mode" to generate output */
memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
{
((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
Skein_Start_New_Type(ctx,OUT_FINAL);
Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
if (n >= SKEIN_256_BLOCK_BYTES)
n = SKEIN_256_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
}
return SKEIN_SUCCESS;
}
#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
size_t Skein_256_API_CodeSize(void)
{
return ((u08b_t *) Skein_256_API_CodeSize) -
((u08b_t *) Skein_256_Init);
}
#endif
/*****************************************************************/
/* 512-bit Skein */
/*****************************************************************/
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a straight hashing operation */
int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
{
union
{
u08b_t b[SKEIN_512_STATE_BYTES];
u64b_t w[SKEIN_512_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
switch (hashBitLen)
{ /* use pre-computed values, where available */
#ifndef SKEIN_NO_PRECOMP
case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break;
case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break;
case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break;
case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break;
#endif
default:
/* here if there is no precomputed IV value available */
/* build/process the config block, type == CONFIG (could be precomputed) */
Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
/* compute the initial chaining values from config block */
memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
break;
}
/* The chaining vars ctx->X are now initialized for the given hashBitLen. */
/* Set up to process the data message portion of the hash (default) */
Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a MAC and/or tree hash operation */
/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
{
union
{
u08b_t b[SKEIN_512_STATE_BYTES];
u64b_t w[SKEIN_512_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
/* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) /* is there a key? */
{
memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
}
else /* here to pre-process a key */
{
Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
/* do a mini-Init right here */
ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
Skein_512_Update(ctx,key,keyBytes); /* hash the key */
Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
#if SKEIN_NEED_SWAP
{
uint_t i;
for (i=0;i<SKEIN_512_STATE_WORDS;i++) /* convert key bytes to context words */
ctx->X[i] = Skein_Swap64(ctx->X[i]);
}
#endif
}
/* build/process the config block, type == CONFIG (could be precomputed for each key) */
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx,CFG_FINAL);
memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
Skein_Show_Key(512,&ctx->h,key,keyBytes);
/* compute the initial chaining values from config block */
Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
/* The chaining vars ctx->X are now initialized */
/* Set up to process the data message portion of the hash (default) */
ctx->h.bCnt = 0; /* buffer b[] starts out empty */
Skein_Start_New_Type(ctx,MSG);
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process the input bytes */
int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
{
size_t n;
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
/* process full blocks, if any */
if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
{
if (ctx->h.bCnt) /* finish up any buffered message data */
{
n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
if (n)
{
Skein_assert(n < msgByteCnt); /* check on our logic here */
memcpy(&ctx->b[ctx->h.bCnt],msg,n);
msgByteCnt -= n;
msg += n;
ctx->h.bCnt += n;
}
Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
ctx->h.bCnt = 0;
}
/* now process any remaining full blocks, directly from input message data */
if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
{
n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */
Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
msg += n * SKEIN_512_BLOCK_BYTES;
}
Skein_assert(ctx->h.bCnt == 0);
}
/* copy any remaining source message data bytes into b[] */
if (msgByteCnt)
{
Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
ctx->h.bCnt += msgByteCnt;
}
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the result */
int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
{
size_t i,n,byteCnt;
u64b_t X[SKEIN_512_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
/* run Threefish in "counter mode" to generate output */
memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
{
((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
Skein_Start_New_Type(ctx,OUT_FINAL);
Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
if (n >= SKEIN_512_BLOCK_BYTES)
n = SKEIN_512_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
}
return SKEIN_SUCCESS;
}
#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
size_t Skein_512_API_CodeSize(void)
{
return ((u08b_t *) Skein_512_API_CodeSize) -
((u08b_t *) Skein_512_Init);
}
#endif
/*****************************************************************/
/* 1024-bit Skein */
/*****************************************************************/
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a straight hashing operation */
int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
{
union
{
u08b_t b[SKEIN1024_STATE_BYTES];
u64b_t w[SKEIN1024_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
switch (hashBitLen)
{ /* use pre-computed values, where available */
#ifndef SKEIN_NO_PRECOMP
case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break;
case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break;
case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break;
#endif
default:
/* here if there is no precomputed IV value available */
/* build/process the config block, type == CONFIG (could be precomputed) */
Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
/* compute the initial chaining values from config block */
memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
break;
}
/* The chaining vars ctx->X are now initialized for the given hashBitLen. */
/* Set up to process the data message portion of the hash (default) */
Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a MAC and/or tree hash operation */
/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
{
union
{
u08b_t b[SKEIN1024_STATE_BYTES];
u64b_t w[SKEIN1024_STATE_WORDS];
} cfg; /* config block */
Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
/* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) /* is there a key? */
{
memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
}
else /* here to pre-process a key */
{
Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
/* do a mini-Init right here */
ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
Skein1024_Update(ctx,key,keyBytes); /* hash the key */
Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
#if SKEIN_NEED_SWAP
{
uint_t i;
for (i=0;i<SKEIN1024_STATE_WORDS;i++) /* convert key bytes to context words */
ctx->X[i] = Skein_Swap64(ctx->X[i]);
}
#endif
}
/* build/process the config block, type == CONFIG (could be precomputed for each key) */
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx,CFG_FINAL);
memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
Skein_Show_Key(1024,&ctx->h,key,keyBytes);
/* compute the initial chaining values from config block */
Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
/* The chaining vars ctx->X are now initialized */
/* Set up to process the data message portion of the hash (default) */
ctx->h.bCnt = 0; /* buffer b[] starts out empty */
Skein_Start_New_Type(ctx,MSG);
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process the input bytes */
int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
{
size_t n;
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
/* process full blocks, if any */
if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES)
{
if (ctx->h.bCnt) /* finish up any buffered message data */
{
n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
if (n)
{
Skein_assert(n < msgByteCnt); /* check on our logic here */
memcpy(&ctx->b[ctx->h.bCnt],msg,n);
msgByteCnt -= n;
msg += n;
ctx->h.bCnt += n;
}
Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES);
ctx->h.bCnt = 0;
}
/* now process any remaining full blocks, directly from input message data */
if (msgByteCnt > SKEIN1024_BLOCK_BYTES)
{
n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */
Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES);
msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
msg += n * SKEIN1024_BLOCK_BYTES;
}
Skein_assert(ctx->h.bCnt == 0);
}
/* copy any remaining source message data bytes into b[] */
if (msgByteCnt)
{
Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
ctx->h.bCnt += msgByteCnt;
}
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the result */
int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
{
size_t i,n,byteCnt;
u64b_t X[SKEIN1024_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
/* run Threefish in "counter mode" to generate output */
memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
{
((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
Skein_Start_New_Type(ctx,OUT_FINAL);
Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
if (n >= SKEIN1024_BLOCK_BYTES)
n = SKEIN1024_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
}
return SKEIN_SUCCESS;
}
#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
size_t Skein1024_API_CodeSize(void)
{
return ((u08b_t *) Skein1024_API_CodeSize) -
((u08b_t *) Skein1024_Init);
}
#endif
/**************** Functions to support MAC/tree hashing ***************/
/* (this code is identical for Optimized and Reference versions) */
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the block, no OUTPUT stage */
int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
{
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the block, no OUTPUT stage */
int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
{
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* finalize the hash computation and output the block, no OUTPUT stage */
int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
{
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */
return SKEIN_SUCCESS;
}
#if SKEIN_TREE_HASH
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* just do the OUTPUT stage */
int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
{
size_t i,n,byteCnt;
u64b_t X[SKEIN_256_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
/* run Threefish in "counter mode" to generate output */
memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
{
((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
Skein_Start_New_Type(ctx,OUT_FINAL);
Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
if (n >= SKEIN_256_BLOCK_BYTES)
n = SKEIN_256_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
}
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* just do the OUTPUT stage */
int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
{
size_t i,n,byteCnt;
u64b_t X[SKEIN_512_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
/* run Threefish in "counter mode" to generate output */
memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
{
((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
Skein_Start_New_Type(ctx,OUT_FINAL);
Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
if (n >= SKEIN_512_BLOCK_BYTES)
n = SKEIN_512_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
}
return SKEIN_SUCCESS;
}
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* just do the OUTPUT stage */
int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
{
size_t i,n,byteCnt;
u64b_t X[SKEIN1024_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
/* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
/* run Threefish in "counter mode" to generate output */
memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
{
((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
Skein_Start_New_Type(ctx,OUT_FINAL);
Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
if (n >= SKEIN1024_BLOCK_BYTES)
n = SKEIN1024_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
}
return SKEIN_SUCCESS;
}
#endif

327
crypto/skein/skein.h Normal file
View file

@ -0,0 +1,327 @@
#ifndef _SKEIN_H_
#define _SKEIN_H_ 1
/**************************************************************************
**
** Interface declarations and internal definitions for Skein hashing.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
***************************************************************************
**
** The following compile-time switches may be defined to control some
** tradeoffs between speed, code size, error checking, and security.
**
** The "default" note explains what happens when the switch is not defined.
**
** SKEIN_DEBUG -- make callouts from inside Skein code
** to examine/display intermediate values.
** [default: no callouts (no overhead)]
**
** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
** code. If not defined, most error checking
** is disabled (for performance). Otherwise,
** the switch value is interpreted as:
** 0: use assert() to flag errors
** 1: return SKEIN_FAIL to flag errors
**
***************************************************************************/
#ifdef __cplusplus
extern "C"
{
#endif
#include <stddef.h> /* get size_t definition */
#include "skein_port.h" /* get platform-specific definitions */
enum
{
SKEIN_SUCCESS = 0, /* return codes from Skein calls */
SKEIN_FAIL = 1,
SKEIN_BAD_HASHLEN = 2
};
#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */
#define SKEIN_256_STATE_WORDS ( 4)
#define SKEIN_512_STATE_WORDS ( 8)
#define SKEIN1024_STATE_WORDS (16)
#define SKEIN_MAX_STATE_WORDS (16)
#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS)
#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS)
#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS)
#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS)
#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS)
#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
typedef struct
{
size_t hashBitLen; /* size of hash result, in bits */
size_t bCnt; /* current byte count in buffer b[] */
u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */
} Skein_Ctxt_Hdr_t;
typedef struct /* 256-bit Skein hash context structure */
{
Skein_Ctxt_Hdr_t h; /* common header context variables */
u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */
u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
} Skein_256_Ctxt_t;
typedef struct /* 512-bit Skein hash context structure */
{
Skein_Ctxt_Hdr_t h; /* common header context variables */
u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */
u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
} Skein_512_Ctxt_t;
typedef struct /* 1024-bit Skein hash context structure */
{
Skein_Ctxt_Hdr_t h; /* common header context variables */
u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */
u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
} Skein1024_Ctxt_t;
/* Skein APIs for (incremental) "straight hashing" */
int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen);
int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen);
int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen);
int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
/*
** Skein APIs for "extended" initialization: MAC keys, tree hashing.
** After an InitExt() call, just use Update/Final calls as with Init().
**
** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
** the results of InitExt() are identical to calling Init().
** The function Init() may be called once to "precompute" the IV for
** a given hashBitLen value, then by saving a copy of the context
** the IV computation may be avoided in later calls.
** Similarly, the function InitExt() may be called once per MAC key
** to precompute the MAC IV, then a copy of the context saved and
** reused for each new MAC computation.
**/
int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
/*
** Skein APIs for MAC and tree hash:
** Final_Pad: pad, do final block, but no OUTPUT type
** Output: do just the output stage
*/
int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
#ifndef SKEIN_TREE_HASH
#define SKEIN_TREE_HASH (1)
#endif
#if SKEIN_TREE_HASH
int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
#endif
/*****************************************************************
** "Internal" Skein definitions
** -- not needed for sequential hashing API, but will be
** helpful for other uses of Skein (e.g., tree hash mode).
** -- included here so that they can be shared between
** reference and optimized code.
******************************************************************/
/* tweak word T[1]: bit field starting positions */
#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */
#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */
#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */
#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */
#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */
#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */
/* tweak word T[1]: flag bit definition(s) */
#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST)
#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL)
#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD)
/* tweak word T[1]: tree level bit field mask */
#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL)
/* tweak word T[1]: block type field */
#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */
#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */
#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */
#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */
#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */
#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */
#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */
#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */
#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */
#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */
#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */
#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */
#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */
#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
#define SKEIN_VERSION (1)
#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */
#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/
#endif
#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32))
#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE)
#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
#define SKEIN_CFG_STR_LEN (4*8)
/* bit field definitions in config block treeInfo word */
#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0)
#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8)
#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \
( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
(((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
(((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )
#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */
/*
** Skein macros for getting/setting tweak words, etc.
** These are useful for partial input bytes, hash tree init/update, etc.
**/
#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);}
#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0)
#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1)
#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
/* set both tweak words at once */
#define Skein_Set_T0_T1(ctxPtr,T0,T1) \
{ \
Skein_Set_T0(ctxPtr,(T0)); \
Skein_Set_T1(ctxPtr,(T1)); \
}
#define Skein_Set_Type(ctxPtr,BLK_TYPE) \
Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */
#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \
{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; }
#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; }
#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);}
/*****************************************************************
** "Internal" Skein definitions for debugging and error checking
******************************************************************/
#ifdef SKEIN_DEBUG /* examine/display intermediate values? */
#include "skein_debug.h"
#else /* default is no callouts */
#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr)
#define Skein_Show_Round(bits,ctx,r,X)
#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr)
#define Skein_Show_Final(bits,ctx,cnt,outPtr)
#define Skein_Show_Key(bits,ctx,key,keyBytes)
#endif
#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */
#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
#define Skein_assert(x)
#elif defined(SKEIN_ASSERT)
#include <assert.h>
#define Skein_Assert(x,retCode) assert(x)
#define Skein_assert(x) assert(x)
#else
#include <assert.h>
#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */
#define Skein_assert(x) assert(x) /* internal error */
#endif
/*****************************************************************
** Skein block function constants (shared across Ref and Opt code)
******************************************************************/
enum
{
/* Skein_256 round rotation constants */
R_256_0_0=14, R_256_0_1=16,
R_256_1_0=52, R_256_1_1=57,
R_256_2_0=23, R_256_2_1=40,
R_256_3_0= 5, R_256_3_1=37,
R_256_4_0=25, R_256_4_1=33,
R_256_5_0=46, R_256_5_1=12,
R_256_6_0=58, R_256_6_1=22,
R_256_7_0=32, R_256_7_1=32,
/* Skein_512 round rotation constants */
R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22,
/* Skein1024 round rotation constants */
R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37,
R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52,
R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17,
R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25,
R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30,
R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41,
R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25,
R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20
};
#ifndef SKEIN_ROUNDS
#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */
#define SKEIN_512_ROUNDS_TOTAL (72)
#define SKEIN1024_ROUNDS_TOTAL (80)
#else /* allow command-line define in range 8*(5..14) */
#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5))
#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5))
#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5))
#endif
#ifdef __cplusplus
}
#endif
#endif /* ifndef _SKEIN_H_ */

689
crypto/skein/skein_block.c Normal file
View file

@ -0,0 +1,689 @@
/***********************************************************************
**
** Implementation of the Skein block functions.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
** Compile-time switches:
**
** SKEIN_USE_ASM -- set bits (256/512/1024) to select which
** versions use ASM code for block processing
** [default: use C for all block sizes]
**
************************************************************************/
#include <string.h>
#include "skein.h"
#ifndef SKEIN_USE_ASM
#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
#endif
#ifndef SKEIN_LOOP
#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
#endif
#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
#define KW_TWK_BASE (0)
#define KW_KEY_BASE (3)
#define ks (kw + KW_KEY_BASE)
#define ts (kw + KW_TWK_BASE)
#ifdef SKEIN_DEBUG
#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
#else
#define DebugSaveTweak(ctx)
#endif
/***************************** Skein_256 ******************************/
#if !(SKEIN_USE_ASM & 256)
void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
{ /* do it in C */
enum
{
WCNT = SKEIN_256_STATE_WORDS
};
#undef RCNT
#define RCNT (SKEIN_256_ROUNDS_TOTAL/8)
#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
#else
#define SKEIN_UNROLL_256 (0)
#endif
#if SKEIN_UNROLL_256
#if (RCNT % SKEIN_UNROLL_256)
#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
#endif
size_t r;
u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
#else
u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
#endif
u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */
u64b_t w [WCNT]; /* local copy of input block */
#ifdef SKEIN_DEBUG
const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */
Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
#endif
Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
ts[0] = ctx->h.T[0];
ts[1] = ctx->h.T[1];
do {
/* this implementation only supports 2**64 input bytes (no carry out here) */
ts[0] += byteCntAdd; /* update processed length */
/* precompute the key schedule for this block */
ks[0] = ctx->X[0];
ks[1] = ctx->X[1];
ks[2] = ctx->X[2];
ks[3] = ctx->X[3];
ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
ts[2] = ts[0] ^ ts[1];
Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
DebugSaveTweak(ctx);
Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
X0 = w[0] + ks[0]; /* do the first full key injection */
X1 = w[1] + ks[1] + ts[0];
X2 = w[2] + ks[2] + ts[1];
X3 = w[3] + ks[3];
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */
blkPtr += SKEIN_256_BLOCK_BYTES;
/* run the rounds */
#define Round256(p0,p1,p2,p3,ROT,rNum) \
X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
#if SKEIN_UNROLL_256 == 0
#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \
Round256(p0,p1,p2,p3,ROT,rNum) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
#define I256(R) \
X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \
X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
X3 += ks[((R)+4) % 5] + (R)+1; \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
#else /* looping version */
#define R256(p0,p1,p2,p3,ROT,rNum) \
Round256(p0,p1,p2,p3,ROT,rNum) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
#define I256(R) \
X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
X3 += ks[r+(R)+3] + r+(R) ; \
ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\
ts[r + (R)+2 ] = ts[r+(R)-1]; \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */
#endif
{
#define R256_8_rounds(R) \
R256(0,1,2,3,R_256_0,8*(R) + 1); \
R256(0,3,2,1,R_256_1,8*(R) + 2); \
R256(0,1,2,3,R_256_2,8*(R) + 3); \
R256(0,3,2,1,R_256_3,8*(R) + 4); \
I256(2*(R)); \
R256(0,1,2,3,R_256_4,8*(R) + 5); \
R256(0,3,2,1,R_256_5,8*(R) + 6); \
R256(0,1,2,3,R_256_6,8*(R) + 7); \
R256(0,3,2,1,R_256_7,8*(R) + 8); \
I256(2*(R)+1);
R256_8_rounds( 0);
#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN)))
#if R256_Unroll_R( 1)
R256_8_rounds( 1);
#endif
#if R256_Unroll_R( 2)
R256_8_rounds( 2);
#endif
#if R256_Unroll_R( 3)
R256_8_rounds( 3);
#endif
#if R256_Unroll_R( 4)
R256_8_rounds( 4);
#endif
#if R256_Unroll_R( 5)
R256_8_rounds( 5);
#endif
#if R256_Unroll_R( 6)
R256_8_rounds( 6);
#endif
#if R256_Unroll_R( 7)
R256_8_rounds( 7);
#endif
#if R256_Unroll_R( 8)
R256_8_rounds( 8);
#endif
#if R256_Unroll_R( 9)
R256_8_rounds( 9);
#endif
#if R256_Unroll_R(10)
R256_8_rounds(10);
#endif
#if R256_Unroll_R(11)
R256_8_rounds(11);
#endif
#if R256_Unroll_R(12)
R256_8_rounds(12);
#endif
#if R256_Unroll_R(13)
R256_8_rounds(13);
#endif
#if R256_Unroll_R(14)
R256_8_rounds(14);
#endif
#if (SKEIN_UNROLL_256 > 14)
#error "need more unrolling in Skein_256_Process_Block"
#endif
}
/* do the final "feedforward" xor, update context chaining vars */
ctx->X[0] = X0 ^ w[0];
ctx->X[1] = X1 ^ w[1];
ctx->X[2] = X2 ^ w[2];
ctx->X[3] = X3 ^ w[3];
Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
ts[1] &= ~SKEIN_T1_FLAG_FIRST;
}
while (--blkCnt);
ctx->h.T[0] = ts[0];
ctx->h.T[1] = ts[1];
}
#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
size_t Skein_256_Process_Block_CodeSize(void)
{
return ((u08b_t *) Skein_256_Process_Block_CodeSize) -
((u08b_t *) Skein_256_Process_Block);
}
uint_t Skein_256_Unroll_Cnt(void)
{
return SKEIN_UNROLL_256;
}
#endif
#endif
/***************************** Skein_512 ******************************/
#if !(SKEIN_USE_ASM & 512)
void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
{ /* do it in C */
enum
{
WCNT = SKEIN_512_STATE_WORDS
};
#undef RCNT
#define RCNT (SKEIN_512_ROUNDS_TOTAL/8)
#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
#else
#define SKEIN_UNROLL_512 (0)
#endif
#if SKEIN_UNROLL_512
#if (RCNT % SKEIN_UNROLL_512)
#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
#endif
size_t r;
u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
#else
u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
#endif
u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */
u64b_t w [WCNT]; /* local copy of input block */
#ifdef SKEIN_DEBUG
const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */
Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
#endif
Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
ts[0] = ctx->h.T[0];
ts[1] = ctx->h.T[1];
do {
/* this implementation only supports 2**64 input bytes (no carry out here) */
ts[0] += byteCntAdd; /* update processed length */
/* precompute the key schedule for this block */
ks[0] = ctx->X[0];
ks[1] = ctx->X[1];
ks[2] = ctx->X[2];
ks[3] = ctx->X[3];
ks[4] = ctx->X[4];
ks[5] = ctx->X[5];
ks[6] = ctx->X[6];
ks[7] = ctx->X[7];
ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
ts[2] = ts[0] ^ ts[1];
Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
DebugSaveTweak(ctx);
Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
X0 = w[0] + ks[0]; /* do the first full key injection */
X1 = w[1] + ks[1];
X2 = w[2] + ks[2];
X3 = w[3] + ks[3];
X4 = w[4] + ks[4];
X5 = w[5] + ks[5] + ts[0];
X6 = w[6] + ks[6] + ts[1];
X7 = w[7] + ks[7];
blkPtr += SKEIN_512_BLOCK_BYTES;
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
/* run the rounds */
#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
#if SKEIN_UNROLL_512 == 0
#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \
Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
#define I512(R) \
X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \
X1 += ks[((R)+2) % 9]; \
X2 += ks[((R)+3) % 9]; \
X3 += ks[((R)+4) % 9]; \
X4 += ks[((R)+5) % 9]; \
X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \
X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \
X7 += ks[((R)+8) % 9] + (R)+1; \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
#else /* looping version */
#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
#define I512(R) \
X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
X1 += ks[r+(R)+1]; \
X2 += ks[r+(R)+2]; \
X3 += ks[r+(R)+3]; \
X4 += ks[r+(R)+4]; \
X5 += ks[r+(R)+5] + ts[r+(R)+0]; \
X6 += ks[r+(R)+6] + ts[r+(R)+1]; \
X7 += ks[r+(R)+7] + r+(R) ; \
ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \
ts[r + (R)+2] = ts[r+(R)-1]; \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */
#endif /* end of looped code definitions */
{
#define R512_8_rounds(R) /* do 8 full rounds */ \
R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \
R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \
R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \
R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \
I512(2*(R)); \
R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \
R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \
R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \
R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \
I512(2*(R)+1); /* and key injection */
R512_8_rounds( 0);
#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
#if R512_Unroll_R( 1)
R512_8_rounds( 1);
#endif
#if R512_Unroll_R( 2)
R512_8_rounds( 2);
#endif
#if R512_Unroll_R( 3)
R512_8_rounds( 3);
#endif
#if R512_Unroll_R( 4)
R512_8_rounds( 4);
#endif
#if R512_Unroll_R( 5)
R512_8_rounds( 5);
#endif
#if R512_Unroll_R( 6)
R512_8_rounds( 6);
#endif
#if R512_Unroll_R( 7)
R512_8_rounds( 7);
#endif
#if R512_Unroll_R( 8)
R512_8_rounds( 8);
#endif
#if R512_Unroll_R( 9)
R512_8_rounds( 9);
#endif
#if R512_Unroll_R(10)
R512_8_rounds(10);
#endif
#if R512_Unroll_R(11)
R512_8_rounds(11);
#endif
#if R512_Unroll_R(12)
R512_8_rounds(12);
#endif
#if R512_Unroll_R(13)
R512_8_rounds(13);
#endif
#if R512_Unroll_R(14)
R512_8_rounds(14);
#endif
#if (SKEIN_UNROLL_512 > 14)
#error "need more unrolling in Skein_512_Process_Block"
#endif
}
/* do the final "feedforward" xor, update context chaining vars */
ctx->X[0] = X0 ^ w[0];
ctx->X[1] = X1 ^ w[1];
ctx->X[2] = X2 ^ w[2];
ctx->X[3] = X3 ^ w[3];
ctx->X[4] = X4 ^ w[4];
ctx->X[5] = X5 ^ w[5];
ctx->X[6] = X6 ^ w[6];
ctx->X[7] = X7 ^ w[7];
Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
ts[1] &= ~SKEIN_T1_FLAG_FIRST;
}
while (--blkCnt);
ctx->h.T[0] = ts[0];
ctx->h.T[1] = ts[1];
}
#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
size_t Skein_512_Process_Block_CodeSize(void)
{
return ((u08b_t *) Skein_512_Process_Block_CodeSize) -
((u08b_t *) Skein_512_Process_Block);
}
uint_t Skein_512_Unroll_Cnt(void)
{
return SKEIN_UNROLL_512;
}
#endif
#endif
/***************************** Skein1024 ******************************/
#if !(SKEIN_USE_ASM & 1024)
void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
{ /* do it in C, always looping (unrolled is bigger AND slower!) */
enum
{
WCNT = SKEIN1024_STATE_WORDS
};
#undef RCNT
#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
#else
#define SKEIN_UNROLL_1024 (0)
#endif
#if (SKEIN_UNROLL_1024 != 0)
#if (RCNT % SKEIN_UNROLL_1024)
#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
#endif
size_t r;
u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
#else
u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
#endif
u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */
X08,X09,X10,X11,X12,X13,X14,X15;
u64b_t w [WCNT]; /* local copy of input block */
#ifdef SKEIN_DEBUG
const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */
Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03;
Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07;
Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15;
#endif
Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
ts[0] = ctx->h.T[0];
ts[1] = ctx->h.T[1];
do {
/* this implementation only supports 2**64 input bytes (no carry out here) */
ts[0] += byteCntAdd; /* update processed length */
/* precompute the key schedule for this block */
ks[ 0] = ctx->X[ 0];
ks[ 1] = ctx->X[ 1];
ks[ 2] = ctx->X[ 2];
ks[ 3] = ctx->X[ 3];
ks[ 4] = ctx->X[ 4];
ks[ 5] = ctx->X[ 5];
ks[ 6] = ctx->X[ 6];
ks[ 7] = ctx->X[ 7];
ks[ 8] = ctx->X[ 8];
ks[ 9] = ctx->X[ 9];
ks[10] = ctx->X[10];
ks[11] = ctx->X[11];
ks[12] = ctx->X[12];
ks[13] = ctx->X[13];
ks[14] = ctx->X[14];
ks[15] = ctx->X[15];
ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^
ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^
ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^
ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
ts[2] = ts[0] ^ ts[1];
Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
DebugSaveTweak(ctx);
Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
X00 = w[ 0] + ks[ 0]; /* do the first full key injection */
X01 = w[ 1] + ks[ 1];
X02 = w[ 2] + ks[ 2];
X03 = w[ 3] + ks[ 3];
X04 = w[ 4] + ks[ 4];
X05 = w[ 5] + ks[ 5];
X06 = w[ 6] + ks[ 6];
X07 = w[ 7] + ks[ 7];
X08 = w[ 8] + ks[ 8];
X09 = w[ 9] + ks[ 9];
X10 = w[10] + ks[10];
X11 = w[11] + ks[11];
X12 = w[12] + ks[12];
X13 = w[13] + ks[13] + ts[0];
X14 = w[14] + ks[14] + ts[1];
X15 = w[15] + ks[15];
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \
X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \
X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \
X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \
X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \
#if SKEIN_UNROLL_1024 == 0
#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr);
#define I1024(R) \
X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \
X01 += ks[((R)+ 2) % 17]; \
X02 += ks[((R)+ 3) % 17]; \
X03 += ks[((R)+ 4) % 17]; \
X04 += ks[((R)+ 5) % 17]; \
X05 += ks[((R)+ 6) % 17]; \
X06 += ks[((R)+ 7) % 17]; \
X07 += ks[((R)+ 8) % 17]; \
X08 += ks[((R)+ 9) % 17]; \
X09 += ks[((R)+10) % 17]; \
X10 += ks[((R)+11) % 17]; \
X11 += ks[((R)+12) % 17]; \
X12 += ks[((R)+13) % 17]; \
X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \
X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \
X15 += ks[((R)+16) % 17] + (R)+1; \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
#else /* looping version */
#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr);
#define I1024(R) \
X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \
X01 += ks[r+(R)+ 1]; \
X02 += ks[r+(R)+ 2]; \
X03 += ks[r+(R)+ 3]; \
X04 += ks[r+(R)+ 4]; \
X05 += ks[r+(R)+ 5]; \
X06 += ks[r+(R)+ 6]; \
X07 += ks[r+(R)+ 7]; \
X08 += ks[r+(R)+ 8]; \
X09 += ks[r+(R)+ 9]; \
X10 += ks[r+(R)+10]; \
X11 += ks[r+(R)+11]; \
X12 += ks[r+(R)+12]; \
X13 += ks[r+(R)+13] + ts[r+(R)+0]; \
X14 += ks[r+(R)+14] + ts[r+(R)+1]; \
X15 += ks[r+(R)+15] + r+(R) ; \
ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \
ts[r + (R)+ 2] = ts[r+(R)-1]; \
Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */
#endif
{
#define R1024_8_rounds(R) /* do 8 full rounds */ \
R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \
R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \
R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \
R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \
I1024(2*(R)); \
R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \
R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \
R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \
R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \
I1024(2*(R)+1);
R1024_8_rounds( 0);
#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN)))
#if R1024_Unroll_R( 1)
R1024_8_rounds( 1);
#endif
#if R1024_Unroll_R( 2)
R1024_8_rounds( 2);
#endif
#if R1024_Unroll_R( 3)
R1024_8_rounds( 3);
#endif
#if R1024_Unroll_R( 4)
R1024_8_rounds( 4);
#endif
#if R1024_Unroll_R( 5)
R1024_8_rounds( 5);
#endif
#if R1024_Unroll_R( 6)
R1024_8_rounds( 6);
#endif
#if R1024_Unroll_R( 7)
R1024_8_rounds( 7);
#endif
#if R1024_Unroll_R( 8)
R1024_8_rounds( 8);
#endif
#if R1024_Unroll_R( 9)
R1024_8_rounds( 9);
#endif
#if R1024_Unroll_R(10)
R1024_8_rounds(10);
#endif
#if R1024_Unroll_R(11)
R1024_8_rounds(11);
#endif
#if R1024_Unroll_R(12)
R1024_8_rounds(12);
#endif
#if R1024_Unroll_R(13)
R1024_8_rounds(13);
#endif
#if R1024_Unroll_R(14)
R1024_8_rounds(14);
#endif
#if (SKEIN_UNROLL_1024 > 14)
#error "need more unrolling in Skein_1024_Process_Block"
#endif
}
/* do the final "feedforward" xor, update context chaining vars */
ctx->X[ 0] = X00 ^ w[ 0];
ctx->X[ 1] = X01 ^ w[ 1];
ctx->X[ 2] = X02 ^ w[ 2];
ctx->X[ 3] = X03 ^ w[ 3];
ctx->X[ 4] = X04 ^ w[ 4];
ctx->X[ 5] = X05 ^ w[ 5];
ctx->X[ 6] = X06 ^ w[ 6];
ctx->X[ 7] = X07 ^ w[ 7];
ctx->X[ 8] = X08 ^ w[ 8];
ctx->X[ 9] = X09 ^ w[ 9];
ctx->X[10] = X10 ^ w[10];
ctx->X[11] = X11 ^ w[11];
ctx->X[12] = X12 ^ w[12];
ctx->X[13] = X13 ^ w[13];
ctx->X[14] = X14 ^ w[14];
ctx->X[15] = X15 ^ w[15];
Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
ts[1] &= ~SKEIN_T1_FLAG_FIRST;
blkPtr += SKEIN1024_BLOCK_BYTES;
}
while (--blkCnt);
ctx->h.T[0] = ts[0];
ctx->h.T[1] = ts[1];
}
#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
size_t Skein1024_Process_Block_CodeSize(void)
{
return ((u08b_t *) Skein1024_Process_Block_CodeSize) -
((u08b_t *) Skein1024_Process_Block);
}
uint_t Skein1024_Unroll_Cnt(void)
{
return SKEIN_UNROLL_1024;
}
#endif
#endif

247
crypto/skein/skein_debug.c Normal file
View file

@ -0,0 +1,247 @@
/***********************************************************************
**
** Debug output functions for Skein hashing.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
************************************************************************/
#include <stdio.h>
#ifdef SKEIN_DEBUG /* only instantiate this code if SKEIN_DEBUG is on */
#include "skein.h"
static const char INDENT[] = " "; /* how much to indent on new line */
uint_t skein_DebugFlag = 0; /* off by default. Must be set externally */
static void Show64_step(size_t cnt,const u64b_t *X,size_t step)
{
size_t i,j;
for (i=j=0;i < cnt;i++,j+=step)
{
if (i % 4 == 0) printf(INDENT);
printf(" %08X.%08X ",(uint_32t)(X[j] >> 32),(uint_32t)X[j]);
if (i % 4 == 3 || i==cnt-1) printf("\n");
fflush(stdout);
}
}
#define Show64(cnt,X) Show64_step(cnt,X,1)
static void Show64_flag(size_t cnt,const u64b_t *X)
{
size_t xptr = (size_t) X;
size_t step = (xptr & 1) ? 2 : 1;
if (step != 1)
{
X = (const u64b_t *) (xptr & ~1);
}
Show64_step(cnt,X,step);
}
static void Show08(size_t cnt,const u08b_t *b)
{
size_t i;
for (i=0;i < cnt;i++)
{
if (i %16 == 0) printf(INDENT);
else if (i % 4 == 0) printf(" ");
printf(" %02X",b[i]);
if (i %16 == 15 || i==cnt-1) printf("\n");
fflush(stdout);
}
}
static const char *AlgoHeader(uint_t bits)
{
if (skein_DebugFlag & SKEIN_DEBUG_THREEFISH)
switch (bits)
{
case 256: return ":Threefish-256: ";
case 512: return ":Threefish-512: ";
case 1024: return ":Threefish-1024:";
}
else
switch (bits)
{
case 256: return ":Skein-256: ";
case 512: return ":Skein-512: ";
case 1024: return ":Skein-1024:";
}
return NULL;
}
void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr)
{
if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG))
if (skein_DebugFlag & SKEIN_DEBUG_FINAL)
{
printf("\n%s Final output=\n",AlgoHeader(bits));
Show08(cnt,outPtr);
printf(" ++++++++++\n");
fflush(stdout);
}
}
/* show state after a round (or "pseudo-round") */
void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X)
{
static uint_t injectNum=0; /* not multi-thread safe! */
if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG))
if (skein_DebugFlag)
{
if (r >= SKEIN_RND_SPECIAL)
{ /* a key injection (or feedforward) point */
injectNum = (r == SKEIN_RND_KEY_INITIAL) ? 0 : injectNum+1;
if ( skein_DebugFlag & SKEIN_DEBUG_INJECT ||
((skein_DebugFlag & SKEIN_DEBUG_FINAL) && r == SKEIN_RND_FEED_FWD))
{
printf("\n%s",AlgoHeader(bits));
switch (r)
{
case SKEIN_RND_KEY_INITIAL:
printf(" [state after initial key injection]");
break;
case SKEIN_RND_KEY_INJECT:
printf(" [state after key injection #%02d]",injectNum);
break;
case SKEIN_RND_FEED_FWD:
printf(" [state after plaintext feedforward]");
injectNum = 0;
break;
}
printf("=\n");
Show64(bits/64,X);
if (r== SKEIN_RND_FEED_FWD)
printf(" ----------\n");
}
}
else if (skein_DebugFlag & SKEIN_DEBUG_ROUNDS)
{
uint_t j;
u64b_t p[SKEIN_MAX_STATE_WORDS];
const u08b_t *perm;
const static u08b_t PERM_256 [4][ 4] = { { 0,1,2,3 }, { 0,3,2,1 }, { 0,1,2,3 }, { 0,3,2,1 } };
const static u08b_t PERM_512 [4][ 8] = { { 0,1,2,3,4,5,6,7 },
{ 2,1,4,7,6,5,0,3 },
{ 4,1,6,3,0,5,2,7 },
{ 6,1,0,7,2,5,4,3 }
};
const static u08b_t PERM_1024[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
{ 0, 9, 2,13, 6,11, 4,15,10, 7,12, 3,14, 5, 8, 1 },
{ 0, 7, 2, 5, 4, 3, 6, 1,12,15,14,13, 8,11,10, 9 },
{ 0,15, 2,11, 6,13, 4, 9,14, 1, 8, 5,10, 3,12, 7 }
};
if ((skein_DebugFlag & SKEIN_DEBUG_PERMUTE) && (r & 3))
{
printf("\n%s [state after round %2d (permuted)]=\n",AlgoHeader(bits),(int)r);
switch (bits)
{
case 256: perm = PERM_256 [r&3]; break;
case 512: perm = PERM_512 [r&3]; break;
default: perm = PERM_1024[r&3]; break;
}
for (j=0;j<bits/64;j++)
p[j] = X[perm[j]];
Show64(bits/64,p);
}
else
{
printf("\n%s [state after round %2d]=\n",AlgoHeader(bits),(int)r);
Show64(bits/64,X);
}
}
}
}
/* show state after a round (or "pseudo-round"), given a list of pointers */
void Skein_Show_R_Ptr(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X_ptr[])
{
uint_t i;
u64b_t X[SKEIN_MAX_STATE_WORDS];
for (i=0;i<bits/64;i++) /* copy over the words */
X[i] = X_ptr[i][0];
Skein_Show_Round(bits,h,r,X);
}
/* show the state at the start of a block */
void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X,const u08b_t *blkPtr,
const u64b_t *wPtr, const u64b_t *ksPtr, const u64b_t *tsPtr)
{
uint_t n;
if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG))
if (skein_DebugFlag)
{
if (skein_DebugFlag & SKEIN_DEBUG_HDR)
{
printf("\n%s Block: outBits=%4d. T0=%06X.",AlgoHeader(bits),(uint_t) h->hashBitLen,(uint_t)h->T[0]);
printf(" Type=");
n = (uint_t) ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) >> SKEIN_T1_POS_BLK_TYPE);
switch (n)
{
case SKEIN_BLK_TYPE_KEY: printf("KEY. "); break;
case SKEIN_BLK_TYPE_CFG: printf("CFG. "); break;
case SKEIN_BLK_TYPE_PERS: printf("PERS."); break;
case SKEIN_BLK_TYPE_PK : printf("PK. "); break;
case SKEIN_BLK_TYPE_KDF: printf("KDF. "); break;
case SKEIN_BLK_TYPE_MSG: printf("MSG. "); break;
case SKEIN_BLK_TYPE_OUT: printf("OUT. "); break;
default: printf("0x%02X.",n); break;
}
printf(" Flags=");
printf((h->T[1] & SKEIN_T1_FLAG_FIRST) ? " First":" ");
printf((h->T[1] & SKEIN_T1_FLAG_FINAL) ? " Final":" ");
printf((h->T[1] & SKEIN_T1_FLAG_BIT_PAD) ? " Pad" :" ");
n = (uint_t) ((h->T[1] & SKEIN_T1_TREE_LVL_MASK) >> SKEIN_T1_POS_TREE_LVL);
if (n)
printf(" TreeLevel = %02X",n);
printf("\n");
fflush(stdout);
}
if (skein_DebugFlag & SKEIN_DEBUG_TWEAK)
{
printf(" Tweak:\n");
Show64(2,h->T);
}
if (skein_DebugFlag & SKEIN_DEBUG_STATE)
{
printf(" %s words:\n",(skein_DebugFlag & SKEIN_DEBUG_THREEFISH)?"Key":"State");
Show64(bits/64,X);
}
if (skein_DebugFlag & SKEIN_DEBUG_KEYSCHED)
{
printf(" Tweak schedule:\n");
Show64_flag(3,tsPtr);
printf(" Key schedule:\n");
Show64_flag((bits/64)+1,ksPtr);
}
if (skein_DebugFlag & SKEIN_DEBUG_INPUT_64)
{
printf(" Input block (words):\n");
Show64(bits/64,wPtr);
}
if (skein_DebugFlag & SKEIN_DEBUG_INPUT_08)
{
printf(" Input block (bytes):\n");
Show08(bits/8,blkPtr);
}
}
}
void Skein_Show_Key(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes)
{
if (keyBytes)
if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG))
if (skein_DebugFlag & SKEIN_DEBUG_KEY)
{
printf("\n%s MAC key = %4u bytes\n",AlgoHeader(bits),(unsigned) keyBytes);
Show08(keyBytes,key);
}
}
#endif

View file

@ -0,0 +1,48 @@
#ifndef _SKEIN_DEBUG_H_
#define _SKEIN_DEBUG_H_
/***********************************************************************
**
** Interface definitions for Skein hashing debug output.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
************************************************************************/
#ifdef SKEIN_DEBUG
/* callout functions used inside Skein code */
void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X,const u08b_t *blkPtr,
const u64b_t *wPtr,const u64b_t *ksPtr,const u64b_t *tsPtr);
void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X);
void Skein_Show_R_Ptr(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X_ptr[]);
void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr);
void Skein_Show_Key (uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes);
extern uint_t skein_DebugFlag; /* flags to control debug output (0 --> none) */
#define SKEIN_RND_SPECIAL (1000u)
#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u)
#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u)
#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u)
/* flag bits: skein_DebugFlag */
#define SKEIN_DEBUG_KEY (1u << 1) /* show MAC key */
#define SKEIN_DEBUG_CONFIG (1u << 2) /* show config block processing */
#define SKEIN_DEBUG_STATE (1u << 3) /* show input state during Show_Block() */
#define SKEIN_DEBUG_TWEAK (1u << 4) /* show input state during Show_Block() */
#define SKEIN_DEBUG_KEYSCHED (1u << 5) /* show expanded key schedule */
#define SKEIN_DEBUG_INPUT_64 (1u << 6) /* show input block as 64-bit words */
#define SKEIN_DEBUG_INPUT_08 (1u << 7) /* show input block as 8-bit bytes */
#define SKEIN_DEBUG_INJECT (1u << 8) /* show state after key injection & feedforward points */
#define SKEIN_DEBUG_ROUNDS (1u << 9) /* show state after all rounds */
#define SKEIN_DEBUG_FINAL (1u <<10) /* show final output of Skein */
#define SKEIN_DEBUG_HDR (1u <<11) /* show block header */
#define SKEIN_DEBUG_THREEFISH (1u <<12) /* use Threefish name instead of Skein */
#define SKEIN_DEBUG_PERMUTE (1u <<13) /* use word permutations */
#define SKEIN_DEBUG_ALL ((~0u) & ~(SKEIN_DEBUG_THREEFISH | SKEIN_DEBUG_PERMUTE))
#define THREEFISH_DEBUG_ALL (SKEIN_DEBUG_ALL | SKEIN_DEBUG_THREEFISH)
#endif /* SKEIN_DEBUG */
#endif /* _SKEIN_DEBUG_H_ */

199
crypto/skein/skein_iv.h Normal file
View file

@ -0,0 +1,199 @@
#ifndef _SKEIN_IV_H_
#define _SKEIN_IV_H_
#include "skein.h" /* get Skein macros and types */
/*
***************** Pre-computed Skein IVs *******************
**
** NOTE: these values are not "magic" constants, but
** are generated using the Threefish block function.
** They are pre-computed here only for speed; i.e., to
** avoid the need for a Threefish call during Init().
**
** The IV for any fixed hash length may be pre-computed.
** Only the most common values are included here.
**
************************************************************
**/
#define MK_64 SKEIN_MK_64
/* blkSize = 256 bits. hashSize = 128 bits */
const u64b_t SKEIN_256_IV_128[] =
{
MK_64(0xE1111906,0x964D7260),
MK_64(0x883DAAA7,0x7C8D811C),
MK_64(0x10080DF4,0x91960F7A),
MK_64(0xCCF7DDE5,0xB45BC1C2)
};
/* blkSize = 256 bits. hashSize = 160 bits */
const u64b_t SKEIN_256_IV_160[] =
{
MK_64(0x14202314,0x72825E98),
MK_64(0x2AC4E9A2,0x5A77E590),
MK_64(0xD47A5856,0x8838D63E),
MK_64(0x2DD2E496,0x8586AB7D)
};
/* blkSize = 256 bits. hashSize = 224 bits */
const u64b_t SKEIN_256_IV_224[] =
{
MK_64(0xC6098A8C,0x9AE5EA0B),
MK_64(0x876D5686,0x08C5191C),
MK_64(0x99CB88D7,0xD7F53884),
MK_64(0x384BDDB1,0xAEDDB5DE)
};
/* blkSize = 256 bits. hashSize = 256 bits */
const u64b_t SKEIN_256_IV_256[] =
{
MK_64(0xFC9DA860,0xD048B449),
MK_64(0x2FCA6647,0x9FA7D833),
MK_64(0xB33BC389,0x6656840F),
MK_64(0x6A54E920,0xFDE8DA69)
};
/* blkSize = 512 bits. hashSize = 128 bits */
const u64b_t SKEIN_512_IV_128[] =
{
MK_64(0xA8BC7BF3,0x6FBF9F52),
MK_64(0x1E9872CE,0xBD1AF0AA),
MK_64(0x309B1790,0xB32190D3),
MK_64(0xBCFBB854,0x3F94805C),
MK_64(0x0DA61BCD,0x6E31B11B),
MK_64(0x1A18EBEA,0xD46A32E3),
MK_64(0xA2CC5B18,0xCE84AA82),
MK_64(0x6982AB28,0x9D46982D)
};
/* blkSize = 512 bits. hashSize = 160 bits */
const u64b_t SKEIN_512_IV_160[] =
{
MK_64(0x28B81A2A,0xE013BD91),
MK_64(0xC2F11668,0xB5BDF78F),
MK_64(0x1760D8F3,0xF6A56F12),
MK_64(0x4FB74758,0x8239904F),
MK_64(0x21EDE07F,0x7EAF5056),
MK_64(0xD908922E,0x63ED70B8),
MK_64(0xB8EC76FF,0xECCB52FA),
MK_64(0x01A47BB8,0xA3F27A6E)
};
/* blkSize = 512 bits. hashSize = 224 bits */
const u64b_t SKEIN_512_IV_224[] =
{
MK_64(0xCCD06162,0x48677224),
MK_64(0xCBA65CF3,0xA92339EF),
MK_64(0x8CCD69D6,0x52FF4B64),
MK_64(0x398AED7B,0x3AB890B4),
MK_64(0x0F59D1B1,0x457D2BD0),
MK_64(0x6776FE65,0x75D4EB3D),
MK_64(0x99FBC70E,0x997413E9),
MK_64(0x9E2CFCCF,0xE1C41EF7)
};
/* blkSize = 512 bits. hashSize = 256 bits */
const u64b_t SKEIN_512_IV_256[] =
{
MK_64(0xCCD044A1,0x2FDB3E13),
MK_64(0xE8359030,0x1A79A9EB),
MK_64(0x55AEA061,0x4F816E6F),
MK_64(0x2A2767A4,0xAE9B94DB),
MK_64(0xEC06025E,0x74DD7683),
MK_64(0xE7A436CD,0xC4746251),
MK_64(0xC36FBAF9,0x393AD185),
MK_64(0x3EEDBA18,0x33EDFC13)
};
/* blkSize = 512 bits. hashSize = 384 bits */
const u64b_t SKEIN_512_IV_384[] =
{
MK_64(0xA3F6C6BF,0x3A75EF5F),
MK_64(0xB0FEF9CC,0xFD84FAA4),
MK_64(0x9D77DD66,0x3D770CFE),
MK_64(0xD798CBF3,0xB468FDDA),
MK_64(0x1BC4A666,0x8A0E4465),
MK_64(0x7ED7D434,0xE5807407),
MK_64(0x548FC1AC,0xD4EC44D6),
MK_64(0x266E1754,0x6AA18FF8)
};
/* blkSize = 512 bits. hashSize = 512 bits */
const u64b_t SKEIN_512_IV_512[] =
{
MK_64(0x4903ADFF,0x749C51CE),
MK_64(0x0D95DE39,0x9746DF03),
MK_64(0x8FD19341,0x27C79BCE),
MK_64(0x9A255629,0xFF352CB1),
MK_64(0x5DB62599,0xDF6CA7B0),
MK_64(0xEABE394C,0xA9D5C3F4),
MK_64(0x991112C7,0x1A75B523),
MK_64(0xAE18A40B,0x660FCC33)
};
/* blkSize = 1024 bits. hashSize = 384 bits */
const u64b_t SKEIN1024_IV_384[] =
{
MK_64(0x5102B6B8,0xC1894A35),
MK_64(0xFEEBC9E3,0xFE8AF11A),
MK_64(0x0C807F06,0xE32BED71),
MK_64(0x60C13A52,0xB41A91F6),
MK_64(0x9716D35D,0xD4917C38),
MK_64(0xE780DF12,0x6FD31D3A),
MK_64(0x797846B6,0xC898303A),
MK_64(0xB172C2A8,0xB3572A3B),
MK_64(0xC9BC8203,0xA6104A6C),
MK_64(0x65909338,0xD75624F4),
MK_64(0x94BCC568,0x4B3F81A0),
MK_64(0x3EBBF51E,0x10ECFD46),
MK_64(0x2DF50F0B,0xEEB08542),
MK_64(0x3B5A6530,0x0DBC6516),
MK_64(0x484B9CD2,0x167BBCE1),
MK_64(0x2D136947,0xD4CBAFEA)
};
/* blkSize = 1024 bits. hashSize = 512 bits */
const u64b_t SKEIN1024_IV_512[] =
{
MK_64(0xCAEC0E5D,0x7C1B1B18),
MK_64(0xA01B0E04,0x5F03E802),
MK_64(0x33840451,0xED912885),
MK_64(0x374AFB04,0xEAEC2E1C),
MK_64(0xDF25A0E2,0x813581F7),
MK_64(0xE4004093,0x8B12F9D2),
MK_64(0xA662D539,0xC2ED39B6),
MK_64(0xFA8B85CF,0x45D8C75A),
MK_64(0x8316ED8E,0x29EDE796),
MK_64(0x053289C0,0x2E9F91B8),
MK_64(0xC3F8EF1D,0x6D518B73),
MK_64(0xBDCEC3C4,0xD5EF332E),
MK_64(0x549A7E52,0x22974487),
MK_64(0x67070872,0x5B749816),
MK_64(0xB9CD28FB,0xF0581BD1),
MK_64(0x0E2940B8,0x15804974)
};
/* blkSize = 1024 bits. hashSize = 1024 bits */
const u64b_t SKEIN1024_IV_1024[] =
{
MK_64(0xD593DA07,0x41E72355),
MK_64(0x15B5E511,0xAC73E00C),
MK_64(0x5180E5AE,0xBAF2C4F0),
MK_64(0x03BD41D3,0xFCBCAFAF),
MK_64(0x1CAEC6FD,0x1983A898),
MK_64(0x6E510B8B,0xCDD0589F),
MK_64(0x77E2BDFD,0xC6394ADA),
MK_64(0xC11E1DB5,0x24DCB0A3),
MK_64(0xD6D14AF9,0xC6329AB5),
MK_64(0x6A9B0BFC,0x6EB67E0D),
MK_64(0x9243C60D,0xCCFF1332),
MK_64(0x1A1F1DDE,0x743F02D4),
MK_64(0x0996753C,0x10ED0BB8),
MK_64(0x6572DD22,0xF2B4969A),
MK_64(0x61FD3062,0xD00A579A),
MK_64(0x1DE0536E,0x8682E539)
};
#endif /* _SKEIN_IV_H_ */

124
crypto/skein/skein_port.h Normal file
View file

@ -0,0 +1,124 @@
#ifndef _SKEIN_PORT_H_
#define _SKEIN_PORT_H_
/*******************************************************************
**
** Platform-specific definitions for Skein hash function.
**
** Source code author: Doug Whiting, 2008.
**
** This algorithm and source code is released to the public domain.
**
** Many thanks to Brian Gladman for his portable header files.
**
** To port Skein to an "unsupported" platform, change the definitions
** in this file appropriately.
**
********************************************************************/
#include "brg_types.h" /* get integer type definitions */
typedef unsigned int uint_t; /* native unsigned integer */
typedef uint_8t u08b_t; /* 8-bit unsigned integer */
typedef uint_64t u64b_t; /* 64-bit unsigned integer */
#ifndef RotL_64
#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N))))
#endif
/*
* Skein is "natively" little-endian (unlike SHA-xxx), for optimal
* performance on x86 CPUs. The Skein code requires the following
* definitions for dealing with endianness:
*
* SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
* Skein_Put64_LSB_First
* Skein_Get64_LSB_First
* Skein_Swap64
*
* If SKEIN_NEED_SWAP is defined at compile time, it is used here
* along with the portable versions of Put64/Get64/Swap64, which
* are slow in general.
*
* Otherwise, an "auto-detect" of endianness is attempted below.
* If the default handling doesn't work well, the user may insert
* platform-specific code instead (e.g., for big-endian CPUs).
*
*/
#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
#include "brg_endian.h" /* get endianness selection */
#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
/* here for big-endian CPUs */
#define SKEIN_NEED_SWAP (1)
#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
#define SKEIN_NEED_SWAP (0)
#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */
#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
#endif
#else
#error "Skein needs endianness setting!"
#endif
#endif /* ifndef SKEIN_NEED_SWAP */
/*
******************************************************************
* Provide any definitions still needed.
******************************************************************
*/
#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
#if SKEIN_NEED_SWAP
#define Skein_Swap64(w64) \
( (( ((u64b_t)(w64)) & 0xFF) << 56) | \
(((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \
(((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \
(((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \
(((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \
(((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \
(((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \
(((((u64b_t)(w64)) >>56) & 0xFF) ) )
#else
#define Skein_Swap64(w64) (w64)
#endif
#endif /* ifndef Skein_Swap64 */
#ifndef Skein_Put64_LSB_First
void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
{ /* this version is fully portable (big-endian or little-endian), but slow */
size_t n;
for (n=0;n<bCnt;n++)
dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
}
#else
; /* output only the function prototype */
#endif
#endif /* ifndef Skein_Put64_LSB_First */
#ifndef Skein_Get64_LSB_First
void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
{ /* this version is fully portable (big-endian or little-endian), but slow */
size_t n;
for (n=0;n<8*wCnt;n+=8)
dst[n/8] = (((u64b_t) src[n ]) ) +
(((u64b_t) src[n+1]) << 8) +
(((u64b_t) src[n+2]) << 16) +
(((u64b_t) src[n+3]) << 24) +
(((u64b_t) src[n+4]) << 32) +
(((u64b_t) src[n+5]) << 40) +
(((u64b_t) src[n+6]) << 48) +
(((u64b_t) src[n+7]) << 56) ;
}
#else
; /* output only the function prototype */
#endif
#endif /* ifndef Skein_Get64_LSB_First */
#endif /* ifndef _SKEIN_PORT_H_ */

View file

@ -46,17 +46,21 @@ See also the bsc and libbsc web site:
#include "lzp.h"
#define LZP_LZP_MATCH_FLAG 0xf2
#define LZP_MATCH_FLAG 0xf2
static
inline int bsc_lzp_num_blocks(ssize_t n)
{
int nb;
if (n < 256 * 1024) return 1;
if (n < 4 * 1024 * 1024) return 2;
if (n < 16 * 1024 * 1024) return 4;
if (n < LZP_MAX_BLOCK) return 8;
return (n / LZP_MAX_BLOCK);
nb = n / LZP_MAX_BLOCK;
if (n % LZP_MAX_BLOCK) nb++;
return (nb);
}
static
@ -94,7 +98,7 @@ int bsc_lzp_encode_block(const unsigned char * input, const unsigned char * inpu
{
if ((heuristic > input) && (*(unsigned int *)heuristic != *(unsigned int *)(reference + (heuristic - input))))
{
goto LZP_LZP_MATCH_NOT_FOUND;
goto LZP_MATCH_NOT_FOUND;
}
int len = 4;
@ -105,7 +109,7 @@ int bsc_lzp_encode_block(const unsigned char * input, const unsigned char * inpu
if (len < minLen)
{
if (heuristic < input + len) heuristic = input + len;
goto LZP_LZP_MATCH_NOT_FOUND;
goto LZP_MATCH_NOT_FOUND;
}
if (input[len] == reference[len]) len++;
@ -114,7 +118,7 @@ int bsc_lzp_encode_block(const unsigned char * input, const unsigned char * inpu
input += len; context = input[-1] | (input[-2] << 8) | (input[-3] << 16) | (input[-4] << 24);
*output++ = LZP_LZP_MATCH_FLAG;
*output++ = LZP_MATCH_FLAG;
len -= minLen; while (len >= 254) { len -= 254; *output++ = 254; if (output >= outputEOB) break; }
@ -123,9 +127,9 @@ int bsc_lzp_encode_block(const unsigned char * input, const unsigned char * inpu
else
{
unsigned char next;
LZP_LZP_MATCH_NOT_FOUND:
LZP_MATCH_NOT_FOUND:
next = *output++ = *input++; context = (context << 8) | next;
if (next == LZP_LZP_MATCH_FLAG) *output++ = 255;
if (next == LZP_MATCH_FLAG) *output++ = 255;
}
}
else
@ -141,7 +145,7 @@ LZP_LZP_MATCH_NOT_FOUND:
if (value > 0)
{
unsigned char next = *output++ = *input++; context = (context << 8) | next;
if (next == LZP_LZP_MATCH_FLAG) *output++ = 255;
if (next == LZP_MATCH_FLAG) *output++ = 255;
}
else
{
@ -181,7 +185,7 @@ int bsc_lzp_decode_block(const unsigned char * input, const unsigned char * inpu
{
unsigned int index = ((context >> 15) ^ context ^ (context >> 3)) & mask;
int value = lookup[index]; lookup[index] = (int)(output - outputStart);
if (*input == LZP_LZP_MATCH_FLAG && value > 0)
if (*input == LZP_MATCH_FLAG && value > 0)
{
input++;
if (*input != 255)
@ -209,7 +213,7 @@ int bsc_lzp_decode_block(const unsigned char * input, const unsigned char * inpu
}
else
{
input++; context = (context << 8) | (*output++ = LZP_LZP_MATCH_FLAG);
input++; context = (context << 8) | (*output++ = LZP_MATCH_FLAG);
}
}
else
@ -238,14 +242,19 @@ ssize_t bsc_lzp_compress_serial(const unsigned char * input, unsigned char * out
}
int nBlocks = bsc_lzp_num_blocks(n);
int chunkSize = n / nBlocks;
int chunkSize;
int blockId;
ssize_t outputPtr = 1 + 8 * nBlocks;
if (n > LZP_MAX_BLOCK)
chunkSize = LZP_MAX_BLOCK;
else
chunkSize = n / nBlocks;
output[0] = nBlocks;
for (blockId = 0; blockId < nBlocks; ++blockId)
{
int inputStart = blockId * chunkSize;
ssize_t inputStart = blockId * chunkSize;
int inputSize = blockId != nBlocks - 1 ? chunkSize : n - inputStart;
int outputSize = inputSize; if (outputSize > n - outputPtr) outputSize = n - outputPtr;
@ -407,8 +416,8 @@ ssize_t lzp_decompress(const unsigned char * input, unsigned char * output, ssiz
for (blockId = 0; blockId < nBlocks; ++blockId)
{
int inputPtr = 0; for (p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4);
int outputPtr = 0; for (p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0);
ssize_t inputPtr = 0; for (p = 0; p < blockId; ++p) inputPtr += *(int *)(input + 1 + 8 * p + 4);
ssize_t outputPtr = 0; for (p = 0; p < blockId; ++p) outputPtr += *(int *)(input + 1 + 8 * p + 0);
inputPtr += 1 + 8 * nBlocks;

View file

@ -46,7 +46,7 @@ See also the bsc and libbsc web site:
#define LZP_DEFAULT_LZPHASHSIZE 16
#define LZP_DEFAULT_LZPMINLEN 128
#define LZP_MAX_BLOCK (2147483648LL)
#define LZP_MAX_BLOCK (2000000000L)
#define ALPHABET_SIZE (256)
#ifdef __cplusplus

98
main.c
View file

@ -86,13 +86,15 @@ static const char *exec_name;
static const char *algo = NULL;
static int do_compress = 0;
static int do_uncompress = 0;
static int cksum_bytes;
static int cksum = 0;
static rabin_context_t *rctx;
static void
usage(void)
{
fprintf(stderr,
"\nPcompress Version %f\n\n"
"\nPcompress Version %s\n\n"
"Usage:\n"
"1) To compress a file:\n"
" %s -c <algorithm> [-l <compress level>] [-s <chunk size>] <file>\n"
@ -138,6 +140,9 @@ usage(void)
"7) Other flags:\n"
" '-L' - Enable LZP pre-compression. This improves compression ratio of all\n"
" algorithms with some extra CPU and very low RAM overhead.\n"
" '-S' <cksum>\n"
" - Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512\n"
" Default one is SKEIN256.\n"
" '-M' - Display memory allocator statistics\n"
" '-C' - Display compression statistics\n\n",
UTILITY_VERSION, exec_name, exec_name, exec_name, exec_name, exec_name, exec_name);
@ -258,7 +263,7 @@ perform_decompress(void *dat)
ssize_t rabin_index_sz, rabin_data_sz, rabin_index_sz_cmp, rabin_data_sz_cmp;
int type, rv;
unsigned int blknum;
typeof (tdat->crc64) crc64;
uchar_t checksum[CKSUM_MAX_BYTES];
uchar_t HDR;
uchar_t *cseg;
@ -275,20 +280,19 @@ redo:
*/
if (tdat->rbytes == 0) {
tdat->len_cmp = 0;
tdat->crc64 = 0;
goto cont;
}
cseg = tdat->compressed_chunk + sizeof (crc64);
cseg = tdat->compressed_chunk + cksum_bytes;
_chunksize = tdat->chunksize;
tdat->crc64 = htonll(*((typeof (crc64) *)(tdat->compressed_chunk)));
deserialize_checksum(tdat->checksum, tdat->compressed_chunk, cksum_bytes);
HDR = *cseg;
cseg += CHDR_SZ;
cseg += CHUNK_FLAG_SZ;
if (HDR & CHSIZE_MASK) {
uchar_t *rseg;
tdat->rbytes -= sizeof (ssize_t);
tdat->len_cmp -= sizeof (ssize_t);
tdat->rbytes -= ORIGINAL_CHUNKSZ;
tdat->len_cmp -= ORIGINAL_CHUNKSZ;
rseg = tdat->compressed_chunk + tdat->rbytes;
_chunksize = ntohll(*((ssize_t *)rseg));
}
@ -383,8 +387,8 @@ redo:
* If it does not match we set length of chunk to 0 to indicate
* exit to the writer thread.
*/
crc64 = lzma_crc64(tdat->uncompressed_chunk, _chunksize, 0);
if (crc64 != tdat->crc64) {
compute_checksum(checksum, cksum, tdat->uncompressed_chunk, _chunksize);
if (memcmp(checksum, tdat->checksum, cksum_bytes) != 0) {
tdat->len_cmp = 0;
fprintf(stderr, "ERROR: Chunk %d, checksums do not match.\n", tdat->id);
}
@ -408,7 +412,7 @@ cont:
*
* Chunk Header:
* Compressed length: 8 bytes.
* CRC64 Checksum: 8 bytes.
* Checksum: Upto 64 bytes.
* Chunk flags: 1 byte.
*
* Chunk Flags, 8 bits:
@ -512,8 +516,7 @@ start_decompress(const char *filename, const char *to_filename)
goto uncomp_done;
}
compressed_chunksize = chunksize + sizeof (chunksize) +
sizeof (uint64_t) + sizeof (chunksize) + zlib_buf_extra(chunksize);
compressed_chunksize = chunksize + CHUNK_HDR_SZ + zlib_buf_extra(chunksize);
if (_props_func) {
_props_func(&props, level, chunksize);
@ -531,6 +534,9 @@ start_decompress(const char *filename, const char *to_filename)
props.is_single_chunk = 1;
}
cksum = flags & CKSUM_MASK;
get_checksum_props(NULL, &cksum, &cksum_bytes);
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
if (nthreads > 0 && nthreads < nprocs)
nprocs = nthreads;
@ -542,7 +548,7 @@ start_decompress(const char *filename, const char *to_filename)
if (nthreads * props.nthreads > 1) fprintf(stderr, "s");
fprintf(stderr, "\n");
nprocs = nthreads;
slab_cache_add(compressed_chunksize + CHDR_SZ);
slab_cache_add(compressed_chunksize);
slab_cache_add(chunksize);
slab_cache_add(sizeof (struct cmp_data));
@ -597,7 +603,7 @@ start_decompress(const char *filename, const char *to_filename)
/*
* Now read from the compressed file in variable compressed chunk size.
* First the size is read from the chunk header and then as many bytes +
* CRC64 checksum size are read and passed to decompression thread.
* checksum size are read and passed to decompression thread.
* Chunk sequencing is ensured.
*/
chunk_num = 0;
@ -644,12 +650,13 @@ start_decompress(const char *filename, const char *to_filename)
*/
if (!tdat->compressed_chunk) {
tdat->compressed_chunk = (uchar_t *)slab_alloc(NULL,
compressed_chunksize + CHDR_SZ);
compressed_chunksize);
if (enable_rabin_scan)
tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL,
compressed_chunksize + CHDR_SZ);
compressed_chunksize);
else
tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL, chunksize);
tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL,
chunksize);
if (!tdat->compressed_chunk || !tdat->uncompressed_chunk) {
fprintf(stderr, "Out of memory\n");
UNCOMP_BAIL;
@ -664,12 +671,12 @@ start_decompress(const char *filename, const char *to_filename)
avg_chunk += tdat->len_cmp;
/*
* Now read compressed chunk including the crc64 checksum.
* Now read compressed chunk including the checksum.
*/
tdat->rbytes = Read(compfd, tdat->compressed_chunk,
tdat->len_cmp + sizeof(tdat->crc64) + CHDR_SZ);
tdat->len_cmp + cksum_bytes + CHUNK_FLAG_SZ);
if (main_cancel) break;
if (tdat->rbytes < tdat->len_cmp + sizeof(tdat->crc64) + CHDR_SZ) {
if (tdat->rbytes < tdat->len_cmp + cksum_bytes + CHUNK_FLAG_SZ) {
if (tdat->rbytes < 0) {
perror("Read: ");
UNCOMP_BAIL;
@ -752,7 +759,7 @@ redo:
return (0);
}
compressed_chunk = tdat->compressed_chunk + CHDR_SZ;
compressed_chunk = tdat->compressed_chunk + CHUNK_FLAG_SZ;
rbytes = tdat->rbytes;
/* Perform Dedup if enabled. */
if (enable_rabin_scan) {
@ -764,7 +771,7 @@ redo:
* into uncompressed_chunk so that compress transforms uncompressed_chunk
* back into cmp_seg. Avoids an extra memcpy().
*/
tdat->crc64 = lzma_crc64(tdat->cmp_seg, tdat->rbytes, 0);
compute_checksum(tdat->checksum, cksum, tdat->cmp_seg, tdat->rbytes);
rctx = tdat->rctx;
reset_rabin_context(tdat->rctx);
@ -778,7 +785,7 @@ redo:
/*
* Compute checksum of original uncompressed chunk.
*/
tdat->crc64 = lzma_crc64(tdat->uncompressed_chunk, tdat->rbytes, 0);
compute_checksum(tdat->checksum, cksum, tdat->uncompressed_chunk, tdat->rbytes);
}
/*
@ -868,16 +875,16 @@ plain_compress:
if (lzp_preprocess) {
type |= CHUNK_FLAG_PREPROC;
}
/*
* Insert compressed chunk length and CRC64 checksum into
* chunk header.
* Insert compressed chunk length and checksum into chunk header.
*/
len_cmp = tdat->len_cmp;
*((typeof (len_cmp) *)(tdat->cmp_seg)) = htonll(tdat->len_cmp);
*((typeof (tdat->crc64) *)(tdat->cmp_seg + sizeof (tdat->len_cmp))) = htonll(tdat->crc64);
tdat->len_cmp += CHDR_SZ;
serialize_checksum(tdat->checksum, tdat->cmp_seg + sizeof (tdat->len_cmp), cksum_bytes);
tdat->len_cmp += CHUNK_FLAG_SZ;
tdat->len_cmp += sizeof (len_cmp);
tdat->len_cmp += sizeof (tdat->crc64);
tdat->len_cmp += cksum_bytes;
if (adapt_mode)
type |= (rv << 4);
@ -888,8 +895,8 @@ plain_compress:
if (tdat->rbytes < tdat->chunksize) {
type |= CHSIZE_MASK;
*((typeof (tdat->rbytes) *)(tdat->cmp_seg + tdat->len_cmp)) = htonll(tdat->rbytes);
tdat->len_cmp += sizeof (tdat->rbytes);
len_cmp += sizeof (tdat->rbytes);
tdat->len_cmp += ORIGINAL_CHUNKSZ;
len_cmp += ORIGINAL_CHUNKSZ;
*((typeof (len_cmp) *)(tdat->cmp_seg)) = htonll(len_cmp);
}
/*
@ -924,6 +931,7 @@ repeat:
smallest_chunk = tdat->len_cmp;
avg_chunk += tdat->len_cmp;
}
wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp);
if (unlikely(wbytes != tdat->len_cmp)) {
int i;
@ -981,8 +989,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
* See start_decompress() routine for details of chunk header.
* We also keep extra 8-byte space for the last chunk's size.
*/
compressed_chunksize = chunksize + sizeof (chunksize) +
sizeof (uint64_t) + sizeof (chunksize) + zlib_buf_extra(chunksize);
compressed_chunksize = chunksize + CHUNK_HDR_SZ + zlib_buf_extra(chunksize);
init_algo_props(&props);
if (_props_func) {
@ -1005,7 +1012,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
thread = 0;
single_chunk = 0;
slab_cache_add(chunksize);
slab_cache_add(compressed_chunksize + CHDR_SZ);
slab_cache_add(compressed_chunksize);
slab_cache_add(sizeof (struct cmp_data));
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
@ -1090,7 +1097,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
dary = (struct cmp_data **)slab_calloc(NULL, nprocs, sizeof (struct cmp_data *));
if (enable_rabin_scan)
cread_buf = (uchar_t *)slab_alloc(NULL, compressed_chunksize + CHDR_SZ);
cread_buf = (uchar_t *)slab_alloc(NULL, compressed_chunksize);
else
cread_buf = (uchar_t *)slab_alloc(NULL, chunksize);
if (!cread_buf) {
@ -1149,6 +1156,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
* Write out file header. First insert hdr elements into mem buffer
* then write out the full hdr in one shot.
*/
flags |= cksum;
memset(cread_buf, 0, ALGO_SZ);
strncpy(cread_buf, algo, ALGO_SZ);
version = htons(VERSION);
@ -1216,9 +1224,9 @@ start_compress(const char *filename, uint64_t chunksize, int level)
tdat->cmp_seg = (uchar_t *)1;
else
tdat->cmp_seg = (uchar_t *)slab_alloc(NULL,
compressed_chunksize + CHDR_SZ);
compressed_chunksize);
tdat->uncompressed_chunk = (uchar_t *)slab_alloc(NULL,
compressed_chunksize + CHDR_SZ);
compressed_chunksize);
} else {
if (single_chunk)
tdat->uncompressed_chunk = (uchar_t *)1;
@ -1226,9 +1234,9 @@ start_compress(const char *filename, uint64_t chunksize, int level)
tdat->uncompressed_chunk =
(uchar_t *)slab_alloc(NULL, chunksize);
tdat->cmp_seg = (uchar_t *)slab_alloc(NULL,
compressed_chunksize + CHDR_SZ);
compressed_chunksize);
}
tdat->compressed_chunk = tdat->cmp_seg + sizeof (chunksize) + sizeof (uint64_t);
tdat->compressed_chunk = tdat->cmp_seg + COMPRESSED_CHUNKSZ + cksum_bytes;
if (!tdat->cmp_seg || !tdat->uncompressed_chunk) {
fprintf(stderr, "Out of memory\n");
COMP_BAIL;
@ -1250,7 +1258,7 @@ start_compress(const char *filename, uint64_t chunksize, int level)
tmp = tdat->cmp_seg;
tdat->cmp_seg = cread_buf;
cread_buf = tmp;
tdat->compressed_chunk = tdat->cmp_seg + sizeof (chunksize) + sizeof (uint64_t);
tdat->compressed_chunk = tdat->cmp_seg + COMPRESSED_CHUNKSZ + cksum_bytes;
/*
* If there is data after the last rabin boundary in the chunk, then
@ -1510,7 +1518,7 @@ main(int argc, char *argv[])
level = 6;
slab_init();
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErL")) != -1) {
while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDErLS:")) != -1) {
int ovr;
switch (opt) {
@ -1579,6 +1587,12 @@ main(int argc, char *argv[])
enable_rabin_split = 0;
break;
case 'S':
if (get_checksum_props(optarg, &cksum, &cksum_bytes) == -1) {
err_exit(0, "Invalid checksum type %s", optarg);
}
break;
case '?':
default:
usage();
@ -1653,6 +1667,8 @@ main(int argc, char *argv[])
}
main_cancel = 0;
if (cksum == 0)
get_checksum_props(DEFAULT_CKSUM, &cksum, &cksum_bytes);
/*
* Start the main routines.
*/

View file

@ -34,13 +34,13 @@ extern "C" {
#include <rabin_polynomial.h>
#define CHDR_SZ 1
#define CHUNK_FLAG_SZ 1
#define ALGO_SZ 8
#define MIN_CHUNK 2048
#define VERSION 3
#define FLAG_DEDUP 1
#define FLAG_SINGLE_CHUNK 2
#define UTILITY_VERSION 0.8
#define UTILITY_VERSION "0.8"
#define COMPRESSED 1
#define UNCOMPRESSED 0
@ -55,16 +55,21 @@ extern "C" {
#define PREPROC_COMPRESSED 128
/*
* lower 3 bits in higher nibble indicate compression algorithm.
* Sizes of chunk header components.
*/
#define COMPRESSED_CHUNKSZ (sizeof (uint64_t))
#define ORIGINAL_CHUNKSZ (sizeof (uint64_t))
#define CHUNK_HDR_SZ (COMPRESSED_CHUNKSZ + cksum_bytes + ORIGINAL_CHUNKSZ + CHUNK_FLAG_SZ)
/*
* lower 3 bits in higher nibble indicate chunk compression algorithm
* in adaptive modes.
*/
#define COMPRESS_LZMA 1
#define COMPRESS_BZIP2 2
#define COMPRESS_PPMD 3
#define CHDR_ALGO_MASK 7
extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc);
extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, size_t size,
uint64_t crc, uint64_t *cnt);
extern uint32_t zlib_buf_extra(ssize_t buflen);
extern int lz4_buf_extra(ssize_t buflen);
@ -155,7 +160,7 @@ struct cmp_data {
ssize_t rbytes;
ssize_t chunksize;
ssize_t len_cmp;
uint64_t crc64;
uchar_t checksum[CKSUM_MAX_BYTES];
int level;
unsigned int id;
compress_func_ptr compress;

View file

@ -65,19 +65,9 @@
#include <allocator.h>
#include <utils.h>
#include <pthread.h>
#include <arpa/nameser_compat.h>
#include <crc_macros.h>
#include "rabin_polynomial.h"
#if BYTE_ORDER == BIG_ENDIAN
# define A1(x) ((x) >> 56)
#else
# define A1 A
#endif
extern const uint64_t lzma_crc64_table[4][256];
extern int lzma_init(void **data, int *level, ssize_t chunksize);
extern int lzma_compress(void *src, size_t srclen, void *dst,
size_t *destlen, int level, uchar_t chdr, void *data);
@ -309,7 +299,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
uint64_t *fplist;
uint32_t len1, fpos[2], cur_sketch2;
uint32_t *charcounts, byts;
uint64_t crc;
if (rabin_pos == NULL) {
/*
@ -332,7 +321,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
j = 0;
cur_sketch = 0;
cur_sketch2 = 0;
crc = 0;
/*
* If rabin_pos is non-zero then we are being asked to scan for the last rabin boundary
@ -386,7 +374,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
cur_roll_checksum = (cur_roll_checksum << 1) + cur_byte;
cur_roll_checksum -= (pushed_out << RAB_POLYNOMIAL_WIN_SIZE);
cur_pos_checksum = cur_roll_checksum ^ ir[pushed_out];
crc = lzma_crc64_table[0][cur_byte ^ A1(crc)] ^ S8(crc);
/*
* Compute a super sketch value of the block. We store a sum of relative
@ -473,7 +460,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
ctx->blocks[blknum]->length = length;
ctx->blocks[blknum]->ref = 0;
ctx->blocks[blknum]->similar = 0;
ctx->blocks[blknum]->crc = crc;
ctx->blocks[blknum]->crc = lzma_crc64(buf1+last_offset, length, 0);
// Accumulate the 2 sketch values into a combined similarity checksum
ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2;
@ -487,7 +474,6 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
length = 0;
j = 0;
cur_sketch2 = 0;
crc = 0;
}
}
@ -518,7 +504,7 @@ rabin_dedup(rabin_context_t *ctx, uchar_t *buf, ssize_t *size, ssize_t offset, s
j = (j > 0 ? j:1);
ctx->blocks[blknum]->cksum_n_offset = (cur_sketch + cur_sketch2) / 2;
ctx->blocks[blknum]->mean_n_length = cur_sketch / j;
ctx->blocks[blknum]->crc = crc;
ctx->blocks[blknum]->crc = lzma_crc64(buf1+last_offset, ctx->blocks[blknum]->length, 0);
blknum++;
last_offset = *size;
}

View file

@ -168,5 +168,6 @@ extern void rabin_update_hdr(uchar_t *buf, ssize_t rabin_index_sz_cmp,
extern void reset_rabin_context(rabin_context_t *ctx);
extern uint32_t rabin_buf_extra(uint64_t chunksize, int rab_blk_sz, const char *algo,
int delta_flag);
extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc);
#endif /* _RABIN_POLY_H_ */

85
utils.c
View file

@ -32,9 +32,27 @@
#include <errno.h>
#include <link.h>
#include <rabin_polynomial.h>
#include <skein.h>
#include "utils.h"
/*
* Checksum properties
*/
static struct {
char *name;
cksum_t cksum_id;
int bytes;
} cksum_props[] = {
{"CRC64", CKSUM_CRC64, 8},
{"SKEIN256", CKSUM_SKEIN256, 32},
{"SKEIN512", CKSUM_SKEIN512, 64}
};
extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc);
extern uint64_t lzma_crc64_8bchk(const uint8_t *buf, size_t size,
uint64_t crc, uint64_t *cnt);
void
err_exit(int show_errno, const char *format, ...)
{
@ -295,3 +313,70 @@ set_threadcounts(algo_props_t *props, int *nthreads, int nprocs, algo_threads_ty
props->nthreads = nprocs;
}
}
int
compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes)
{
if (cksum == CKSUM_CRC64) {
uint64_t *ck = (uint64_t *)cksum_buf;
*ck = lzma_crc64(buf, bytes, 0);
} else if (cksum == CKSUM_SKEIN256) {
Skein_512_Ctxt_t ctx;
Skein_512_Init(&ctx, 256);
Skein_512_Update(&ctx, buf, bytes);
Skein_512_Final(&ctx, cksum_buf);
} else if (cksum == CKSUM_SKEIN512) {
Skein_512_Ctxt_t ctx;
Skein_512_Init(&ctx, 512);
Skein_512_Update(&ctx, buf, bytes);
Skein_512_Final(&ctx, cksum_buf);
} else {
fprintf(stderr, "Invalid checksum algorithm code: %d\n", cksum);
return (-1);
}
return (0);
}
int
get_checksum_props(char *name, int *cksum, int *cksum_bytes)
{
int i;
for (i=0; i<sizeof (cksum_props); i++) {
if ((name != NULL && strcmp(name, cksum_props[i].name) == 0) ||
(*cksum != 0 && *cksum == cksum_props[i].cksum_id)) {
*cksum = cksum_props[i].cksum_id;
*cksum_bytes = cksum_props[i].bytes;
return (0);
}
}
return (-1);
}
void
serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
{
int i,j;
j = 0;
for (i=cksum_bytes; i>0; i--) {
buf[j] = checksum[i-1];
j++;
}
}
void
deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
{
int i,j;
j = 0;
for (i=cksum_bytes; i>0; i--) {
checksum[i-1] = buf[j];
j++;
}
}

18
utils.h
View file

@ -101,6 +101,20 @@ typedef ssize_t bsize_t;
#define DEBUG_STAT_EN(...)
#endif
/*
* Public checksum properties. CKSUM_MAX_BYTES must be updated if a
* newer larger checksum is added to the list.
*/
typedef enum {
CKSUM_CRC64 = 0x100,
CKSUM_SKEIN256 = 0x200,
CKSUM_SKEIN512 = 0x300
} cksum_t;
#define CKSUM_MASK 0x700
#define CKSUM_MAX_BYTES 64
#define DEFAULT_CKSUM "SKEIN256"
typedef struct {
uint32_t buf_extra;
int compress_mt_capable;
@ -127,6 +141,10 @@ extern ssize_t Read_Adjusted(int fd, uchar_t *buf, size_t count,
extern ssize_t Write(int fd, const void *buf, size_t count);
extern void set_threadcounts(algo_props_t *props, int *nthreads, int nprocs,
algo_threads_type_t typ);
extern int compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes);
extern int get_checksum_props(char *name, int *cksum, int *cksum_bytes);
extern void serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
extern void deserialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes);
/* Pointer type for compress and decompress functions. */
typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst,