diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..be92153 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,83 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## +cmake_minimum_required(VERSION 3.0) +cmake_policy(VERSION 3.0) +cmake_policy(SET CMP0054 NEW) +project(libb2 VERSION 0.97 LANGUAGES C) + +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "in-source builds are not supported!") +endif() + +# add cmake script path +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") + +# set pre install output directories +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +if(MSVC) + set(CMAKE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +endif() + +######################################################################## +# platform detection/compiler support +include(TestBigEndian) +TEST_BIG_ENDIAN(BLAKE2_BIG_ENDIAN) + +# SSE feature level +set(BLAKE2_SSE_LEVELS NO_SSE SSE2 SSSE3 SSE41 AVX XOP) + +option(BLAKE2_FAT_BINARIES "build fat binaries with all available SSE code paths." ON) +if (BLAKE2_FAT_BINARIES) + list(FIND BLAKE2_SSE_LEVELS XOP BLAKE2_EIS_IDX) +else() + list(FIND BLAKE2_SSE_LEVELS NO_SSE BLAKE2_EIS_IDX) +endif() + +# openmp support +include(CheckOpenMPSupport) +if(OpenMP_AVAILABLE) + option(BLAKE2_UTILIZE_OPENMP "" ON) +else() + set(BLAKE2_UTILIZE_OPENMP OFF) +endif() + +option(BLAKE2_BUILD_TESTS "") +option(BLAKE2_SHARED_OBJECT "build a dynamic link library instead of a static one") + +if (BLAKE2_BUILD_TESTS) + enable_testing() +endif() + +######################################################################## +# add project +add_subdirectory(src) + +######################################################################## +# install target +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/libb2-config-version.cmake" + VERSION ${libb2_VERSION} + COMPATIBILITY ExactVersion +) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libb2-config-version.cmake" DESTINATION cmake) + +configure_file(cmake/libb2-config.cmake + "${CMAKE_CURRENT_BINARY_DIR}/libb2-config.cmake" + COPYONLY +) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libb2-config.cmake" DESTINATION cmake) + +install(EXPORT libb2-targets DESTINATION cmake) \ No newline at end of file diff --git a/README.md b/README.md index 17faa8f..6f96aa1 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ C library providing BLAKE2b, BLAKE2s, BLAKE2bp, BLAKE2sp -Installation: +## Installation +### Autotools ``` $ ./autogen.sh $ ./configure @@ -11,4 +12,11 @@ $ make $ sudo make install ``` -Contact: contact@blake2.net +### CMake +On Windows CMake can generate make/project files for Visual Studio, MinGW and Clang. +The install target will create and install a proper package config. The import project is called `libb2`. + +Please note that the CMake project is incapable of configuring OpenMP support on Clang. + +## Contact +[contact@blake2.net](mailto:contact@blake2.net) diff --git a/cmake/CheckOpenMPSupport.cmake b/cmake/CheckOpenMPSupport.cmake new file mode 100644 index 0000000..4127728 --- /dev/null +++ b/cmake/CheckOpenMPSupport.cmake @@ -0,0 +1,39 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## + +include(CheckCSourceCompiles) + +set(O_FLAGS ${CMAKE_REQUIRED_FLAGS}) +set(O_DEFS ${CMAKE_REQUIRED_DEFINITIONS}) + +if(MSVC) + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} /openmp") +elseif(CMAKE_COMPILER_IS_GNUCC) + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -fopenmp") +elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + # don't know how to enable clang's openmp support +endif() +check_c_source_compiles(" + #include + #ifndef _OPENMP + #error \"_OPENMP not defined\" + #endif + int main() + { + char hash[1024]; + omp_set_num_threads(4); + #pragma omp parallel shared(hash) + omp_get_thread_num(); + return 0; + } +" OpenMP_AVAILABLE) diff --git a/cmake/libb2-config.cmake b/cmake/libb2-config.cmake new file mode 100644 index 0000000..07137fe --- /dev/null +++ b/cmake/libb2-config.cmake @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/libb2-targets.cmake") \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..11f6a74 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,194 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## + +if(BLAKE2_SHARED_OBJECT) + set(BLAKE2_SHARED_LIBRARY_VAL 1) + set(BLAKE2_SHARED_LIBRARY_DEF SHARED) +else() + set(BLAKE2_SHARED_LIBRARY_VAL 0) + set(BLAKE2_SHARED_LIBRARY_DEF STATIC) +endif() + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +set(LIBB2_SUFFIX_LIST _sse2 _ssse3 _sse41 _avx _xop) + +set(COUNTER 0) +while(COUNTER LESS BLAKE2_EIS_IDX) + list(GET LIBB2_SUFFIX_LIST ${COUNTER} _CURR_SUFFIX) + MATH(EXPR COUNTER "0${COUNTER}+1") + + list(GET BLAKE2_SSE_LEVELS ${COUNTER} _CURR_LEVEL) + #list(APPEND BLAKE2_IMPL_LIST ${_CURR_LEVEL}) + + set(_CURR_B2S_FILE "${CMAKE_CURRENT_BINARY_DIR}/blake2s_${_CURR_LEVEL}.c") + set(_CURR_B2B_FILE "${CMAKE_CURRENT_BINARY_DIR}/blake2b_${_CURR_LEVEL}.c") + configure_file("blake2s.c" + ${_CURR_B2S_FILE} + COPYONLY + ) + configure_file("blake2b.c" + ${_CURR_B2B_FILE} + COPYONLY + ) + set(_CURR_FILES ${_CURR_B2S_FILE} ${_CURR_B2B_FILE}) + list(APPEND BLAKE2_IMPL_SOURCES ${_CURR_FILES}) + + if(COUNTER GREATER 0) + list(APPEND FEATURE_DEFS HAVE_${_CURR_LEVEL}) + if (CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + if(COUNTER GREATER 4) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -mxop") + elseif(COUNTER GREATER 3) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -mavx") + elseif(COUNTER GREATER 2) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -msse4.1") + elseif(COUNTER GREATER 1) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -mssse3") + else() + set(FEATURE_FLAGS -msse2) + endif() + endif() + endif() + + set_source_files_properties(${_CURR_FILES} PROPERTIES + COMPILE_FLAGS "${FEATURE_FLAGS}" + COMPILE_DEFINITIONS "SUFFIX=${_CURR_SUFFIX};${FEATURE_DEFS}" + ) + + unset(_CURR_FILES) + unset(_CURR_B2B_FILE) + unset(_CURR_B2S_FILE) + unset(_CURR_SUFFIX) + unset(_CURR_LEVEL) +endwhile() +unset(FEATURE_FLAGS) +unset(FEATURE_DEFS) +unset(COUNTER) + +if(NOT (BLAKE2_EIS_IDX EQUAL 0)) + set_source_files_properties(blake2b-ref.c blake2s-ref.c PROPERTIES + COMPILE_DEFINITIONS "SUFFIX=_ref" + ) +else() + set_source_files_properties(blake2b-ref.c blake2s-ref.c PROPERTIES + COMPILE_DEFINITIONS "SUFFIX=" + ) +endif() + +add_library(libb2 ${BLAKE2_SHARED_LIBRARY_DEF} + blake2.h + blake2-config.h + blake2-impl.h + $<$>:blake2-dispatch.c> + + + blake2b-ref.c + blake2b-round.h + blake2b-load-sse2.h + blake2b-load-sse41.h + + blake2bp.c + blake2xb.c + + blake2s-ref.c + blake2s-round.h + blake2s-load-sse2.h + blake2s-load-sse41.h + blake2s-load-xop.h + + blake2sp.c + blake2xs.c + + ${BLAKE2_IMPL_SOURCES} +) + +source_group(common REGULAR_EXPRESSION .*blake2.*) +source_group(blake2s REGULAR_EXPRESSION .*blake2s.*) +source_group(blake2b REGULAR_EXPRESSION .*blake2b.*) + +######################################################################## +# config +target_compile_definitions(libb2 + PRIVATE + _UNICODE + NO_CONFIG + $<$>:NATIVE_LITTLE_ENDIAN> + $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL_EXPORTS> + + PUBLIC + $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL> +) +target_include_directories(libb2 + PUBLIC + $ +) +set_target_properties(libb2 PROPERTIES PREFIX "") + +if(BLAKE2_UTILIZE_OPENMP) + if(MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /openmp") + elseif(CMAKE_COMPILER_IS_GNUCC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") + endif() +endif() + +######################################################################## +# install target +install(TARGETS libb2 EXPORT libb2-targets + RUNTIME DESTINATION bin/$ + LIBRARY DESTINATION lib/$ + ARCHIVE DESTINATION lib/$ + INCLUDES DESTINATION include +) +install(FILES blake2.h DESTINATION include) + + +######################################################################## +# test drivers + +if(BLAKE2_BUILD_TESTS) + + # BLAKE2 B + add_executable(blake2b_test + blake2b-test.c + blake2-kat.h + ) + target_link_libraries(blake2b_test PRIVATE libb2) + add_test(NAME blake2b COMMAND blake2b_test) + + # BLAKE2 S + add_executable(blake2s_test + blake2s-test.c + blake2-kat.h + ) + target_link_libraries(blake2s_test PRIVATE libb2) + add_test(NAME blake2s COMMAND blake2s_test) + + # BLAKE2 BP + add_executable(blake2bp_test + blake2bp-test.c + blake2-kat.h + ) + target_link_libraries(blake2bp_test PRIVATE libb2) + add_test(NAME blake2bp COMMAND blake2bp_test) + + # BLAKE2 SP + add_executable(blake2sp_test + blake2sp-test.c + blake2-kat.h + ) + target_link_libraries(blake2sp_test PRIVATE libb2) + add_test(NAME blake2sp COMMAND blake2sp_test) + +endif() diff --git a/src/blake2-config.h b/src/blake2-config.h index f5dd6fa..a524aa9 100644 --- a/src/blake2-config.h +++ b/src/blake2-config.h @@ -1,20 +1,22 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_CONFIG_H__ -#define __BLAKE2_CONFIG_H__ +#ifndef BLAKE2_CONFIG_H +#define BLAKE2_CONFIG_H -#if defined(__SSE2__) +/* These don't work everywhere */ +#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) #define HAVE_SSE2 #endif @@ -23,7 +25,7 @@ #endif #if defined(__SSE4_1__) -#define HAVE_SSE4_1 +#define HAVE_SSE41 #endif #if defined(__AVX__) @@ -48,8 +50,8 @@ #endif #ifdef HAVE_AVX -#ifndef HAVE_SSE4_1 -#define HAVE_SSE4_1 +#ifndef HAVE_SSE41 +#define HAVE_SSE41 #endif #endif @@ -68,4 +70,3 @@ #endif #endif - diff --git a/src/blake2-dispatch.c b/src/blake2-dispatch.c index 2b1ccc8..f14cca7 100644 --- a/src/blake2-dispatch.c +++ b/src/blake2-dispatch.c @@ -118,7 +118,7 @@ static inline cpu_feature_t get_cpu_features( void ) } /* For future architectures */ - /* + /* eax = 7; ecx = 0; cpuid(&eax, &ebx, &ecx, &edx); @@ -141,92 +141,98 @@ extern "C" { int blake2b_init_ref( blake2b_state *S, size_t outlen ); int blake2b_init_key_ref( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_ref( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_ref( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_ref( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_ref( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_ref( blake2b_state *S, void *out, size_t outlen ); + int blake2b_ref( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_ref( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(HAVE_X86) int blake2b_init_sse2( blake2b_state *S, size_t outlen ); int blake2b_init_key_sse2( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_sse2( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_sse2( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_sse2( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_sse2( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_sse2( blake2b_state *S, void *out, size_t outlen ); + int blake2b_sse2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_sse2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_ssse3( blake2b_state *S, size_t outlen ); int blake2b_init_key_ssse3( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_ssse3( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_ssse3( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_ssse3( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_ssse3( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_ssse3( blake2b_state *S, void *out, size_t outlen ); + int blake2b_ssse3( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_ssse3( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_sse41( blake2b_state *S, size_t outlen ); int blake2b_init_key_sse41( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_sse41( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_sse41( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_sse41( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_sse41( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_sse41( blake2b_state *S, void *out, size_t outlen ); + int blake2b_sse41( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_sse41( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_avx( blake2b_state *S, size_t outlen ); int blake2b_init_key_avx( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_avx( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_avx( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_avx( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_avx( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_avx( blake2b_state *S, void *out, size_t outlen ); + int blake2b_avx( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_avx( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_xop( blake2b_state *S, size_t outlen ); int blake2b_init_key_xop( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_xop( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_xop( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_xop( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_xop( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_xop( blake2b_state *S, void *out, size_t outlen ); + int blake2b_xop( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_xop( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #endif /* HAVE_X86 */ int blake2s_init_ref( blake2s_state *S, size_t outlen ); int blake2s_init_key_ref( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_ref( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_ref( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_ref( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_ref( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_ref( blake2s_state *S, void *out, size_t outlen ); + int blake2s_ref( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(HAVE_X86) int blake2s_init_sse2( blake2s_state *S, size_t outlen ); int blake2s_init_key_sse2( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_sse2( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_sse2( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_sse2( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_sse2( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_sse2( blake2s_state *S, void *out, size_t outlen ); + int blake2s_sse2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_ssse3( blake2s_state *S, size_t outlen ); int blake2s_init_key_ssse3( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_ssse3( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_ssse3( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_ssse3( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_ssse3( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_ssse3( blake2s_state *S, void *out, size_t outlen ); + int blake2s_ssse3( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_sse41( blake2s_state *S, size_t outlen ); int blake2s_init_key_sse41( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_sse41( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_sse41( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_sse41( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_sse41( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_sse41( blake2s_state *S, void *out, size_t outlen ); + int blake2s_sse41( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_avx( blake2s_state *S, size_t outlen ); int blake2s_init_key_avx( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_avx( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_avx( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_avx( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_avx( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_avx( blake2s_state *S, void *out, size_t outlen ); + int blake2s_avx( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_xop( blake2s_state *S, size_t outlen ); int blake2s_init_key_xop( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_xop( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_xop( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_xop( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_xop( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_xop( blake2s_state *S, void *out, size_t outlen ); + int blake2s_xop( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #endif /* HAVE_X86 */ @@ -237,16 +243,16 @@ extern "C" { typedef int ( *blake2b_init_fn )( blake2b_state *, size_t ); typedef int ( *blake2b_init_key_fn )( blake2b_state *, size_t, const void *, size_t ); typedef int ( *blake2b_init_param_fn )( blake2b_state *, const blake2b_param * ); -typedef int ( *blake2b_update_fn )( blake2b_state *, const uint8_t *, size_t ); -typedef int ( *blake2b_final_fn )( blake2b_state *, uint8_t *, size_t ); -typedef int ( *blake2b_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t ); +typedef int ( *blake2b_update_fn )( blake2b_state *, const void *, size_t ); +typedef int ( *blake2b_final_fn )( blake2b_state *, void *, size_t ); +typedef int ( *blake2b_fn )( void *, size_t, const void *, size_t, const void *, size_t ); typedef int ( *blake2s_init_fn )( blake2s_state *, size_t ); typedef int ( *blake2s_init_key_fn )( blake2s_state *, size_t, const void *, size_t ); typedef int ( *blake2s_init_param_fn )( blake2s_state *, const blake2s_param * ); -typedef int ( *blake2s_update_fn )( blake2s_state *, const uint8_t *, size_t ); -typedef int ( *blake2s_final_fn )( blake2s_state *, uint8_t *, size_t ); -typedef int ( *blake2s_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t ); +typedef int ( *blake2s_update_fn )( blake2s_state *, const void *, size_t ); +typedef int ( *blake2s_final_fn )( blake2s_state *, void *, size_t ); +typedef int ( *blake2s_fn )( void *, size_t, const void *, size_t, const void *, size_t ); static const blake2b_init_fn blake2b_init_table[] = { @@ -320,6 +326,18 @@ static const blake2b_fn blake2b_table[] = #endif }; +static const blake2b_fn blake2_table[] = +{ + blake2_ref, +#if defined(HAVE_X86) + blake2_sse2, + blake2_ssse3, + blake2_sse41, + blake2_avx, + blake2_xop +#endif +}; + static const blake2s_init_fn blake2s_init_table[] = { blake2s_init_ref, @@ -398,16 +416,17 @@ extern "C" { int blake2b_init_dispatch( blake2b_state *S, size_t outlen ); int blake2b_init_key_dispatch( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_dispatch( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_dispatch( blake2b_state *S, void *out, size_t outlen ); + int blake2b_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_dispatch( blake2s_state *S, size_t outlen ); int blake2s_init_key_dispatch( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_dispatch( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_dispatch( blake2s_state *S, void *out, size_t outlen ); + int blake2s_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -418,6 +437,7 @@ static blake2b_init_param_fn blake2b_init_param_ptr = blake2b_init_param_dispatc static blake2b_update_fn blake2b_update_ptr = blake2b_update_dispatch; static blake2b_final_fn blake2b_final_ptr = blake2b_final_dispatch; static blake2b_fn blake2b_ptr = blake2b_dispatch; +static blake2b_fn blake2_ptr = blake2_dispatch; static blake2s_init_fn blake2s_init_ptr = blake2s_init_dispatch; static blake2s_init_key_fn blake2s_init_key_ptr = blake2s_init_key_dispatch; @@ -444,22 +464,28 @@ int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P ) return blake2b_init_param_ptr( S, P ); } -int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen ) +int blake2b_update_dispatch( blake2b_state *S, const void *in, size_t inlen ) { blake2b_update_ptr = blake2b_update_table[get_cpu_features()]; return blake2b_update_ptr( S, in, inlen ); } -int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen ) +int blake2b_final_dispatch( blake2b_state *S, void *out, size_t outlen ) { blake2b_final_ptr = blake2b_final_table[get_cpu_features()]; return blake2b_final_ptr( S, out, outlen ); } -int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2b_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_ptr = blake2b_table[get_cpu_features()]; - return blake2b_ptr( out, in, key, outlen, inlen, keylen ); + return blake2b_ptr(out, outlen, in, inlen, key, keylen); +} + +int blake2_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) +{ + blake2b_ptr = blake2_table[get_cpu_features()]; + return blake2b_ptr(out, outlen, in, inlen, key, keylen); } BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ) @@ -477,19 +503,24 @@ BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) return blake2b_init_param_ptr( S, P ); } -BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) +BLAKE2_API int blake2b_update( blake2b_state *S, const void *in, size_t inlen ) { return blake2b_update_ptr( S, in, inlen ); } -BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) +BLAKE2_API int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { return blake2b_final_ptr( S, out, outlen ); } -BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +BLAKE2_API int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) +{ + return blake2b_ptr(out, outlen, in, inlen, key, keylen); +} + +BLAKE2_API int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { - return blake2b_ptr( out, in, key, outlen, inlen, keylen ); + return blake2_ptr(out, outlen, in, inlen, key, keylen); } int blake2s_init_dispatch( blake2s_state *S, size_t outlen ) @@ -510,22 +541,22 @@ int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P ) return blake2s_init_param_ptr( S, P ); } -int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen ) +int blake2s_update_dispatch( blake2s_state *S, const void *in, size_t inlen ) { blake2s_update_ptr = blake2s_update_table[get_cpu_features()]; return blake2s_update_ptr( S, in, inlen ); } -int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen ) +int blake2s_final_dispatch( blake2s_state *S, void *out, size_t outlen ) { blake2s_final_ptr = blake2s_final_table[get_cpu_features()]; return blake2s_final_ptr( S, out, outlen ); } -int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2s_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_ptr = blake2s_table[get_cpu_features()]; - return blake2s_ptr( out, in, key, outlen, inlen, keylen ); + return blake2s_ptr( out, outlen, in, inlen, key, keylen ); } BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ) @@ -543,18 +574,18 @@ BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) return blake2s_init_param_ptr( S, P ); } -BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) +BLAKE2_API int blake2s_update( blake2s_state *S, const void *in, size_t inlen ) { return blake2s_update_ptr( S, in, inlen ); } -BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) +BLAKE2_API int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { return blake2s_final_ptr( S, out, outlen ); } -BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +BLAKE2_API int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen) { - return blake2s_ptr( out, in, key, outlen, inlen, keylen ); + return blake2s_ptr( out, outlen, in, inlen, key, keylen ); } diff --git a/src/blake2-impl.h b/src/blake2-impl.h index c99e3de..e3f2e43 100644 --- a/src/blake2-impl.h +++ b/src/blake2-impl.h @@ -1,139 +1,164 @@ /* - BLAKE2 reference source code package - optimized C implementations + BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_IMPL_H__ -#define __BLAKE2_IMPL_H__ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H -#include #include #include -#include "config.h" + +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif #define BLAKE2_IMPL_CAT(x,y) x ## y #define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y) #define BLAKE2_IMPL_NAME(fun) BLAKE2_IMPL_EVAL(fun, SUFFIX) -static inline uint32_t load32( const void *src ) +static BLAKE2_INLINE uint32_t load32( const void *src ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - return *( uint32_t * )( src ); -#else - const uint8_t *p = ( uint8_t * )src; - uint32_t w = *p++; - w |= ( uint32_t )( *p++ ) << 8; - w |= ( uint32_t )( *p++ ) << 16; - w |= ( uint32_t )( *p++ ) << 24; +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; #endif } -static inline uint64_t load64( const void *src ) +static BLAKE2_INLINE uint64_t load64( const void *src ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - return *( uint64_t * )( src ); -#else - const uint8_t *p = ( uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - w |= ( uint64_t )( *p++ ) << 48; - w |= ( uint64_t )( *p++ ) << 56; +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) | + (( uint64_t )( p[6] ) << 48) | + (( uint64_t )( p[7] ) << 56) ; #endif } -static inline void store32( void *dst, uint32_t w ) +static BLAKE2_INLINE uint16_t load16( const void *src ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - *( uint32_t * )( dst ) = w; +#if defined(NATIVE_LITTLE_ENDIAN) + uint16_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + const uint8_t *p = ( const uint8_t * )src; + return (( uint16_t )( p[0] ) << 0) | + (( uint16_t )( p[1] ) << 8) ; #endif } -static inline void store64( void *dst, uint64_t w ) +static BLAKE2_INLINE void store16( void *dst, uint16_t w ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - *( uint64_t * )( dst ) = w; +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; *p++ = ( uint8_t )w; #endif } -static inline uint64_t load48( const void *src ) +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { - const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - return w; +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +#endif } -static inline void store48( void *dst, uint64_t w ) +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); + p[6] = (uint8_t)(w >> 48); + p[7] = (uint8_t)(w >> 56); +#endif } -static inline uint32_t rotl32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t load48( const void *src ) { - return ( w << c ) | ( w >> ( 32 - c ) ); + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) ; } -static inline uint64_t rotl64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { - return ( w << c ) | ( w >> ( 64 - c ) ); + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); } -static inline uint32_t rotr32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } -static inline uint64_t rotr64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } /* prevents compiler optimizing out memset() */ -static inline void secure_zero_memory(void *v, size_t n) +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) { static void *(*const volatile memset_v)(void *, int, size_t) = &memset; memset_v(v, 0, n); } #endif - diff --git a/src/blake2.h b/src/blake2.h index 5ca17f6..18c0e3d 100644 --- a/src/blake2.h +++ b/src/blake2.h @@ -1,52 +1,58 @@ /* - BLAKE2 reference source code package - optimized C implementations + BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #pragma once -#ifndef __BLAKE2_H__ -#define __BLAKE2_H__ +#ifndef BLAKE2_H +#define BLAKE2_H #include #include #if defined(_WIN32) || defined(__CYGWIN__) - #define BLAKE2_DLL_IMPORT __declspec(dllimport) - #define BLAKE2_DLL_EXPORT __declspec(dllexport) - #define BLAKE2_DLL_PRIVATE +#define BLAKE2_DLL_IMPORT __declspec(dllimport) +#define BLAKE2_DLL_EXPORT __declspec(dllexport) +#define BLAKE2_DLL_PRIVATE #elif __GNUC__ >= 4 - #define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default"))) - #define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default"))) - #define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden"))) +#define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default"))) +#define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default"))) +#define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden"))) #else - #define BLAKE2_DLL_IMPORT - #define BLAKE2_DLL_EXPORT - #define BLAKE2_DLL_PRIVATE +#define BLAKE2_DLL_IMPORT +#define BLAKE2_DLL_EXPORT +#define BLAKE2_DLL_PRIVATE #endif #if defined(BLAKE2_DLL) - #if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL - #define BLAKE2_API BLAKE2_DLL_EXPORT - #else - #define BLAKE2_API BLAKE2_DLL_IMPORT - #endif - #define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic +#if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL +#define BLAKE2_API BLAKE2_DLL_EXPORT #else - #define BLAKE2_API - #define BLAKE2_PRIVATE +#define BLAKE2_API BLAKE2_DLL_IMPORT +#endif +#define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic +#else +#define BLAKE2_API +#define BLAKE2_PRIVATE +#endif + +#if defined(_MSC_VER) +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) #endif #if defined(__cplusplus) extern "C" { -#elif defined(_MSC_VER) && !defined(inline) -#define inline __inline #endif enum blake2s_constant @@ -67,116 +73,150 @@ extern "C" { BLAKE2B_PERSONALBYTES = 16 }; -#pragma pack(push, 1) - typedef struct __blake2s_param - { - uint8_t digest_length; // 1 - uint8_t key_length; // 2 - uint8_t fanout; // 3 - uint8_t depth; // 4 - uint32_t leaf_length; // 8 - uint8_t node_offset[6];// 14 - uint8_t node_depth; // 15 - uint8_t inner_length; // 16 - // uint8_t reserved[0]; - uint8_t salt[BLAKE2S_SALTBYTES]; // 24 - uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32 - } blake2s_param; - - typedef struct __blake2s_state + typedef struct blake2s_state__ { uint32_t h[8]; uint32_t t[2]; uint32_t f[2]; - uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; uint8_t last_node; } blake2s_state; - typedef struct __blake2b_param - { - uint8_t digest_length; // 1 - uint8_t key_length; // 2 - uint8_t fanout; // 3 - uint8_t depth; // 4 - uint32_t leaf_length; // 8 - uint64_t node_offset; // 16 - uint8_t node_depth; // 17 - uint8_t inner_length; // 18 - uint8_t reserved[14]; // 32 - uint8_t salt[BLAKE2B_SALTBYTES]; // 48 - uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64 - } blake2b_param; - - typedef struct __blake2b_state + typedef struct blake2b_state__ { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; - uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; uint8_t last_node; } blake2b_state; - typedef struct __blake2sp_state + typedef struct blake2sp_state__ { blake2s_state S[8][1]; blake2s_state R[1]; - uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2sp_state; - typedef struct __blake2bp_state + typedef struct blake2bp_state__ { blake2b_state S[4][1]; blake2b_state R[1]; - uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2bp_state; -#pragma pack(pop) - // Streaming API + + BLAKE2_PACKED(struct blake2s_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint16_t xof_length; /* 14 */ + uint8_t node_depth; /* 15 */ + uint8_t inner_length; /* 16 */ + /* uint8_t reserved[0]; */ + uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + }); + + typedef struct blake2s_param__ blake2s_param; + + BLAKE2_PACKED(struct blake2b_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint32_t xof_length; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + }); + + typedef struct blake2b_param__ blake2b_param; + + typedef struct blake2xs_state__ + { + blake2s_state S[1]; + blake2s_param P[1]; + } blake2xs_state; + + typedef struct blake2xb_state__ + { + blake2b_state S[1]; + blake2b_param P[1]; + } blake2xb_state; + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES), + BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES) + }; + + /* Streaming API */ BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ); BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2s_final( blake2s_state *S, void *out, size_t outlen ); BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ); BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2b_final( blake2b_state *S, void *out, size_t outlen ); BLAKE2_API int blake2sp_init( blake2sp_state *S, size_t outlen ); BLAKE2_API int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ); BLAKE2_API int blake2bp_init( blake2bp_state *S, size_t outlen ); BLAKE2_API int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ); - // Simple API - BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + /* Variable output length API */ + BLAKE2_API int blake2xs_init( blake2xs_state *S, const size_t outlen ); + BLAKE2_API int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen ); + BLAKE2_API int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2xs_final(blake2xs_state *S, void *out, size_t outlen); - BLAKE2_API int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - BLAKE2_API int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + BLAKE2_API int blake2xb_init( blake2xb_state *S, const size_t outlen ); + BLAKE2_API int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen ); + BLAKE2_API int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2xb_final(blake2xb_state *S, void *out, size_t outlen); - static inline int blake2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) - { - return blake2b( out, in, key, outlen, inlen, keylen ); - } + /* Simple API */ + BLAKE2_API int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + BLAKE2_API int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + BLAKE2_API int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + BLAKE2_API int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + BLAKE2_API int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + BLAKE2_API int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + /* This is simply an alias for blake2b */ + BLAKE2_API int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif #endif - diff --git a/src/blake2b-load-sse2.h b/src/blake2b-load-sse2.h index 1ba153c..23a8d40 100644 --- a/src/blake2b-load-sse2.h +++ b/src/blake2b-load-sse2.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE2_H__ -#define __BLAKE2B_LOAD_SSE2_H__ +#ifndef BLAKE2B_LOAD_SSE2_H +#define BLAKE2B_LOAD_SSE2_H #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) @@ -65,4 +66,3 @@ #endif - diff --git a/src/blake2b-load-sse41.h b/src/blake2b-load-sse41.h index f6c1bc8..0eca865 100644 --- a/src/blake2b-load-sse41.h +++ b/src/blake2b-load-sse41.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE41_H__ -#define __BLAKE2B_LOAD_SSE41_H__ +#ifndef BLAKE2B_LOAD_SSE41_H +#define BLAKE2B_LOAD_SSE41_H #define LOAD_MSG_0_1(b0, b1) \ do \ @@ -399,4 +400,3 @@ b1 = _mm_unpackhi_epi64(m3, m1); \ #endif - diff --git a/src/blake2b-ref.c b/src/blake2b-ref.c index b153bcb..eae0275 100644 --- a/src/blake2b-ref.c +++ b/src/blake2b-ref.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -43,106 +45,36 @@ static const uint8_t blake2b_sigma[12][16] = }; -static inline int blake2b_set_lastnode( blake2b_state *S ) +static void blake2b_set_lastnode( blake2b_state *S ) { - S->f[1] = ~0ULL; - return 0; -} - -static inline int blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0ULL; - return 0; + S->f[1] = (uint64_t)-1; } /* Some helper functions, not necessarily useful */ -static inline int blake2b_set_lastblock( blake2b_state *S ) +static int blake2b_is_lastblock( const blake2b_state *S ) { - if( S->last_node ) blake2b_set_lastnode( S ); - - S->f[0] = ~0ULL; - return 0; + return S->f[0] != 0; } -static inline int blake2b_clear_lastblock( blake2b_state *S ) +static void blake2b_set_lastblock( blake2b_state *S ) { - if( S->last_node ) blake2b_clear_lastnode( S ); + if( S->last_node ) blake2b_set_lastnode( S ); - S->f[0] = 0ULL; - return 0; + S->f[0] = (uint64_t)-1; } -static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); - return 0; -} - - - -// Parameter-related functions -static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; } -static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - store64( &P->node_offset, node_offset ); - return 0; -} - -static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; -} - -static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -static inline int blake2b_init0( blake2b_state *S ) +static void blake2b_init0( blake2b_state *S ) { + size_t i; memset( S, 0, sizeof( blake2b_state ) ); - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; + for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; } #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) @@ -151,16 +83,18 @@ static inline int blake2b_init0( blake2b_state *S ) #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) #define blake2b BLAKE2_IMPL_NAME(blake2b) +#define blake2 BLAKE2_IMPL_NAME(blake2) #if defined(__cplusplus) extern "C" { #endif int blake2b_init( blake2b_state *S, size_t outlen ); - int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -168,11 +102,13 @@ extern "C" { /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { + const uint8_t *p = ( const uint8_t * )( P ); + size_t i; + blake2b_init0( S ); - uint8_t *p = ( uint8_t * )( P ); /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); S->outlen = P->digest_length; @@ -187,12 +123,13 @@ int blake2b_init( blake2b_state *S, size_t outlen ) if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - P->digest_length = ( uint8_t ) outlen; + P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store64( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; memset( P->reserved, 0, sizeof( P->reserved ) ); @@ -210,12 +147,13 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; - P->digest_length = ( uint8_t ) outlen; - P->key_length = ( uint8_t ) keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store64( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; memset( P->reserved, 0, sizeof( P->reserved ) ); @@ -234,48 +172,53 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k return 0; } -static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { uint64_t m[16]; uint64_t v[16]; size_t i; - for( i = 0; i < 16; ++i ) + for( i = 0; i < 16; ++i ) { m[i] = load64( block + i * sizeof( m[i] ) ); + } - for( i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { v[i] = S->h[i]; + } v[ 8] = blake2b_IV[0]; v[ 9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; - v[12] = S->t[0] ^ blake2b_IV[4]; - v[13] = S->t[1] ^ blake2b_IV[5]; - v[14] = S->f[0] ^ blake2b_IV[6]; - v[15] = S->f[1] ^ blake2b_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -289,73 +232,67 @@ static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCK ROUND( 10 ); ROUND( 11 ); - for( i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} #undef G #undef ROUND - return 0; -} - -int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2b_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { - uint8_t buffer[BLAKE2B_OUTBYTES]; + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; size_t i; - if(S->outlen != outlen) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } + if( blake2b_is_lastblock( S ) ) + return -1; blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - memcpy( out, buffer, outlen ); + memcpy( out, buffer, S->outlen ); + secure_zero_memory(buffer, sizeof(buffer)); return 0; } -int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_state S[1]; @@ -379,8 +316,11 @@ int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2b_init( S, outlen ) < 0 ) return -1; } - if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0 ) return -1; - return blake2b_final( S, out, outlen ); + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; } - +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); +} diff --git a/src/blake2b-round.h b/src/blake2b-round.h index cebc225..6537fff 100644 --- a/src/blake2b-round.h +++ b/src/blake2b-round.h @@ -1,23 +1,21 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_ROUND_H__ -#define __BLAKE2B_ROUND_H__ +#ifndef BLAKE2B_ROUND_H +#define BLAKE2B_ROUND_H -#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) -#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) - -#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) #define TOF(reg) _mm_castsi128_ps((reg)) @@ -62,7 +60,7 @@ \ row2l = _mm_roti_epi64(row2l, -24); \ row2h = _mm_roti_epi64(row2h, -24); \ - + #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ @@ -81,7 +79,7 @@ \ row2l = _mm_roti_epi64(row2l, -63); \ row2h = _mm_roti_epi64(row2h, -63); \ - + #if defined(HAVE_SSSE3) #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ t0 = _mm_alignr_epi8(row2h, row2l, 8); \ @@ -138,7 +136,7 @@ #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) #include "blake2b-load-sse41.h" #else #include "blake2b-load-sse2.h" @@ -157,4 +155,3 @@ UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); #endif - diff --git a/src/blake2b-test.c b/src/blake2b-test.c index 9310a27..18371e4 100644 --- a/src/blake2b-test.c +++ b/src/blake2b-test.c @@ -29,7 +29,7 @@ int main( int argc, char **argv ) { uint8_t hash[BLAKE2B_OUTBYTES]; - if( blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 || + if( blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2b.c b/src/blake2b.c index ca15046..216396c 100644 --- a/src/blake2b.c +++ b/src/blake2b.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -20,25 +22,15 @@ #include "blake2-config.h" -#if defined(_MSC_VER) -#include +#ifdef _MSC_VER +#include /* for _mm_set_epi64x */ #endif -#if defined(HAVE_SSE2) #include -// MSVC only defines _mm_set_epi64x for x86_64... -#if defined(_MSC_VER) && !defined(_M_X64) -static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) -{ - return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); -} -#endif -#endif - #if defined(HAVE_SSSE3) #include #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) #include #endif #if defined(HAVE_AVX) @@ -48,8 +40,6 @@ static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) #include #endif - - #include "blake2b-round.h" static const uint64_t blake2b_IV[8] = @@ -60,134 +50,30 @@ static const uint64_t blake2b_IV[8] = 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; -static const uint8_t blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - - -/* Some helper functions, not necessarily useful */ -static inline int blake2b_set_lastnode( blake2b_state *S ) +/* Some helper functions */ +static void blake2b_set_lastnode( blake2b_state *S ) { - S->f[1] = ~0ULL; - return 0; + S->f[1] = (uint64_t)-1; } -static inline int blake2b_clear_lastnode( blake2b_state *S ) +static int blake2b_is_lastblock( const blake2b_state *S ) { - S->f[1] = 0ULL; - return 0; + return S->f[0] != 0; } -static inline int blake2b_set_lastblock( blake2b_state *S ) +static void blake2b_set_lastblock( blake2b_state *S ) { if( S->last_node ) blake2b_set_lastnode( S ); - S->f[0] = ~0ULL; - return 0; + S->f[0] = (uint64_t)-1; } -static inline int blake2b_clear_lastblock( blake2b_state *S ) +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0ULL; - return 0; -} - - -static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) -{ -#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) - // ADD/ADC chain - __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; - t += inc; - S->t[0] = ( uint64_t )( t >> 0 ); - S->t[1] = ( uint64_t )( t >> 64 ); -#else S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); -#endif - return 0; -} - - -// Parameter-related functions -static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - P->node_offset = node_offset; - return 0; -} - -static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; } -static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -static inline int blake2b_init0( blake2b_state *S ) -{ - memset( S, 0, sizeof( blake2b_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; -} - - #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) #define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param) @@ -195,16 +81,18 @@ static inline int blake2b_init0( blake2b_state *S ) #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) #define blake2b BLAKE2_IMPL_NAME(blake2b) +#define blake2 BLAKE2_IMPL_NAME(blake2) #if defined(__cplusplus) extern "C" { #endif int blake2b_init( blake2b_state *S, size_t outlen ); - int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -212,15 +100,15 @@ extern "C" { /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { - uint8_t *p, *h, *v; - //blake2b_init0( S ); - v = ( uint8_t * )( blake2b_IV ); - h = ( uint8_t * )( S->h ); - p = ( uint8_t * )( P ); + size_t i; + /*blake2b_init0( S ); */ + const unsigned char * v = ( const unsigned char * )( blake2b_IV ); + const unsigned char * p = ( const unsigned char * )( P ); + unsigned char * h = ( unsigned char * )( S->h ); /* IV XOR ParamBlock */ memset( S, 0, sizeof( blake2b_state ) ); - for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; S->outlen = P->digest_length; return 0; @@ -228,50 +116,50 @@ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) /* Some sort of default parameter block initialization, for sequential blake2b */ - int blake2b_init( blake2b_state *S, size_t outlen ) { + blake2b_param P[1]; + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - const blake2b_param P = - { - ( uint8_t ) outlen, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; - return blake2b_init_param( S, &P ); + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2b_init_param( S, P ); } int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) { + blake2b_param P[1]; + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; - const blake2b_param P = - { - ( uint8_t ) outlen, - ( uint8_t ) keylen, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; - - if( blake2b_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return 0; { @@ -284,7 +172,7 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k return 0; } -static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { __m128i row1l, row1h; __m128i row2l, row2h; @@ -296,7 +184,7 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2 const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) const __m128i m0 = LOADU( block + 00 ); const __m128i m1 = LOADU( block + 16 ); const __m128i m2 = LOADU( block + 32 ); @@ -306,22 +194,22 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2 const __m128i m6 = LOADU( block + 96 ); const __m128i m7 = LOADU( block + 112 ); #else - const uint64_t m0 = ( ( uint64_t * )block )[ 0]; - const uint64_t m1 = ( ( uint64_t * )block )[ 1]; - const uint64_t m2 = ( ( uint64_t * )block )[ 2]; - const uint64_t m3 = ( ( uint64_t * )block )[ 3]; - const uint64_t m4 = ( ( uint64_t * )block )[ 4]; - const uint64_t m5 = ( ( uint64_t * )block )[ 5]; - const uint64_t m6 = ( ( uint64_t * )block )[ 6]; - const uint64_t m7 = ( ( uint64_t * )block )[ 7]; - const uint64_t m8 = ( ( uint64_t * )block )[ 8]; - const uint64_t m9 = ( ( uint64_t * )block )[ 9]; - const uint64_t m10 = ( ( uint64_t * )block )[10]; - const uint64_t m11 = ( ( uint64_t * )block )[11]; - const uint64_t m12 = ( ( uint64_t * )block )[12]; - const uint64_t m13 = ( ( uint64_t * )block )[13]; - const uint64_t m14 = ( ( uint64_t * )block )[14]; - const uint64_t m15 = ( ( uint64_t * )block )[15]; + const uint64_t m0 = load64(block + 0 * sizeof(uint64_t)); + const uint64_t m1 = load64(block + 1 * sizeof(uint64_t)); + const uint64_t m2 = load64(block + 2 * sizeof(uint64_t)); + const uint64_t m3 = load64(block + 3 * sizeof(uint64_t)); + const uint64_t m4 = load64(block + 4 * sizeof(uint64_t)); + const uint64_t m5 = load64(block + 5 * sizeof(uint64_t)); + const uint64_t m6 = load64(block + 6 * sizeof(uint64_t)); + const uint64_t m7 = load64(block + 7 * sizeof(uint64_t)); + const uint64_t m8 = load64(block + 8 * sizeof(uint64_t)); + const uint64_t m9 = load64(block + 9 * sizeof(uint64_t)); + const uint64_t m10 = load64(block + 10 * sizeof(uint64_t)); + const uint64_t m11 = load64(block + 11 * sizeof(uint64_t)); + const uint64_t m12 = load64(block + 12 * sizeof(uint64_t)); + const uint64_t m13 = load64(block + 13 * sizeof(uint64_t)); + const uint64_t m14 = load64(block + 14 * sizeof(uint64_t)); + const uint64_t m15 = load64(block + 15 * sizeof(uint64_t)); #endif row1l = LOADU( &S->h[0] ); row1h = LOADU( &S->h[2] ); @@ -351,63 +239,56 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2 row2h = _mm_xor_si128( row4h, row2h ); STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); - return 0; } -int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2b_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { - if(S->outlen != outlen) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } + if( blake2b_is_lastblock( S ) ) + return -1; blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); - memcpy( out, &S->h[0], outlen ); + + memcpy( out, &S->h[0], S->outlen ); return 0; } -int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_state S[1]; @@ -431,13 +312,11 @@ int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2b_init( S, outlen ) < 0 ) return -1; } - if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2b_final( S, out, outlen ); + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; } -#if defined(SUPERCOP) -int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) -{ - return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); } -#endif diff --git a/src/blake2bp-test.c b/src/blake2bp-test.c index 849666c..aad3546 100644 --- a/src/blake2bp-test.c +++ b/src/blake2bp-test.c @@ -30,7 +30,7 @@ int main( int argc, char **argv ) { uint8_t hash[BLAKE2B_OUTBYTES]; - if( blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 || + if( blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2bp.c b/src/blake2bp.c index a861c6d..3eb95d0 100644 --- a/src/blake2bp.c +++ b/src/blake2bp.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -25,89 +27,103 @@ #define PARALLELISM_DEGREE 4 -static int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +/* + blake2b_init_param defaults to setting the expecting output length + from the digest_length parameter block field. + + In some cases, however, we do not want this, as the output length + of these instances is given by inner_length instead. +*/ +static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P ) +{ + int err = blake2b_init_param(S, P); + S->outlen = P->inner_length; + return err; +} + +static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; P->node_offset = offset; + P->xof_length = 0; P->node_depth = 0; P->inner_length = BLAKE2B_OUTBYTES; memset( P->reserved, 0, sizeof( P->reserved ) ); memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2b_init_param( S, P ); - S->outlen = P->inner_length; - return 0; + return blake2bp_init_leaf_param( S, P ); } -static int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; P->node_offset = 0; + P->xof_length = 0; P->node_depth = 1; P->inner_length = BLAKE2B_OUTBYTES; memset( P->reserved, 0, sizeof( P->reserved ) ); memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2b_init_param( S, P ); - S->outlen = P->digest_length; - return 0; + return blake2b_init_param( S, P ); } int blake2bp_init( blake2bp_state *S, size_t outlen ) { + size_t i; if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2bp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 ) + if( blake2bp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; return 0; } int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2bp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2bp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; { uint8_t block[BLAKE2B_BLOCKBYTES]; memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ @@ -116,16 +132,18 @@ int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t } -int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) +int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); in += fill; @@ -134,22 +152,22 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S) + #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } @@ -161,19 +179,22 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) if( inlen > 0 ) memcpy( S->buf + left, in, inlen ); - S->buflen = ( uint32_t ) left + ( uint32_t ) inlen; + S->buflen = left + inlen; return 0; } -int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ) +int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; + size_t i; - if(S->outlen != outlen) return -1; + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2B_BLOCKBYTES ) { @@ -187,34 +208,34 @@ int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ) blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES ); - return blake2b_final( S->R, out, outlen ); + return blake2b_final( S->R, out, S->outlen ); } -int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; blake2b_state S[PARALLELISM_DEGREE][1]; blake2b_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; if ( NULL == out ) return -1; - if ( NULL == key && keylen > 0) return -1; + if( NULL == key && keylen > 0 ) return -1; if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; if( keylen > BLAKE2B_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2bp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; - S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node + S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ if( keylen > 0 ) { @@ -222,53 +243,119 @@ int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S,hash) + #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2B_BLOCKBYTES ) + if( inlen__ > i * BLAKE2B_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES; const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES; - blake2b_update( S[id__], in__, len ); + blake2b_update( S[i], in__, len ); } - blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES ); + blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES ); } - if( blake2bp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2bp_init_root( FS, outlen, keylen ) < 0 ) return -1; - FS->last_node = 1; // Mark as last node + FS->last_node = 1; /* Mark as last node */ - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES ); return blake2b_final( FS, out, outlen ); } +#if defined(BLAKE2BP_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); + if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2bp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2bp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2bp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/blake2s-load-sse2.h b/src/blake2s-load-sse2.h index b24483c..d2e9a09 100644 --- a/src/blake2s-load-sse2.h +++ b/src/blake2s-load-sse2.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE2_H__ -#define __BLAKE2S_LOAD_SSE2_H__ +#ifndef BLAKE2S_LOAD_SSE2_H +#define BLAKE2S_LOAD_SSE2_H #define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) #define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) diff --git a/src/blake2s-load-sse41.h b/src/blake2s-load-sse41.h index 3ac12eb..c316fb5 100644 --- a/src/blake2s-load-sse41.h +++ b/src/blake2s-load-sse41.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE41_H__ -#define __BLAKE2S_LOAD_SSE41_H__ +#ifndef BLAKE2S_LOAD_SSE41_H +#define BLAKE2S_LOAD_SSE41_H #define LOAD_MSG_0_1(buf) \ buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); @@ -226,4 +227,3 @@ t2 = _mm_blend_epi16(t0,t1,0x0F); \ buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3)); #endif - diff --git a/src/blake2s-load-xop.h b/src/blake2s-load-xop.h index ac591a7..a97ddcc 100644 --- a/src/blake2s-load-xop.h +++ b/src/blake2s-load-xop.h @@ -1,31 +1,34 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_XOP_H__ -#define __BLAKE2S_LOAD_XOP_H__ +#ifndef BLAKE2S_LOAD_XOP_H +#define BLAKE2S_LOAD_XOP_H -#define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB +#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */ +#if 0 /* Basic VPPERM emulation, for testing purposes */ -/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) +static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) { const __m128i sixteen = _mm_set1_epi8(16); const __m128i t0 = _mm_shuffle_epi8(src1, sel); const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen)); const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), - _mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00 + _mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */ return _mm_blendv_epi8(t0, s1, mask); -}*/ +} +#endif #define LOAD_MSG_0_1(buf) \ buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); @@ -166,7 +169,7 @@ buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) ); #define LOAD_MSG_8_3(buf) \ t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \ buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \ - + #define LOAD_MSG_8_4(buf) \ buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) ); @@ -186,4 +189,3 @@ t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \ buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) ); #endif - diff --git a/src/blake2s-ref.c b/src/blake2s-ref.c index 38bfdbe..6bbb30b 100644 --- a/src/blake2s-ref.c +++ b/src/blake2s-ref.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -38,104 +40,36 @@ static const uint8_t blake2s_sigma[10][16] = { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , }; -static inline int blake2s_set_lastnode( blake2s_state *S ) +static void blake2s_set_lastnode( blake2s_state *S ) { - S->f[1] = ~0U; - return 0; -} - -static inline int blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0U; - return 0; + S->f[1] = (uint32_t)-1; } /* Some helper functions, not necessarily useful */ -static inline int blake2s_set_lastblock( blake2s_state *S ) +static int blake2s_is_lastblock( const blake2s_state *S ) { - if( S->last_node ) blake2s_set_lastnode( S ); - - S->f[0] = ~0U; - return 0; + return S->f[0] != 0; } -static inline int blake2s_clear_lastblock( blake2s_state *S ) +static void blake2s_set_lastblock( blake2s_state *S ) { - if( S->last_node ) blake2s_clear_lastnode( S ); + if( S->last_node ) blake2s_set_lastnode( S ); - S->f[0] = 0U; - return 0; + S->f[0] = (uint32_t)-1; } -static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); - return 0; } -// Parameter-related functions -static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -static inline int blake2s_init0( blake2s_state *S ) +static void blake2s_init0( blake2s_state *S ) { + size_t i; memset( S, 0, sizeof( blake2s_state ) ); - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; + for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; } #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init) @@ -149,11 +83,11 @@ static inline int blake2s_init0( blake2s_state *S ) extern "C" { #endif int blake2s_init( blake2s_state *S, size_t outlen ); - int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -161,19 +95,21 @@ extern "C" { /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { + const unsigned char *p = ( const unsigned char * )( P ); + size_t i; + blake2s_init0( S ); - uint32_t *p = ( uint32_t * )( P ); /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) - S->h[i] ^= load32( &p[i] ); + for( i = 0; i < 8; ++i ) + S->h[i] ^= load32( &p[i * 4] ); S->outlen = P->digest_length; return 0; } -// Sequential blake2s initialization +/* Sequential blake2s initialization */ int blake2s_init( blake2s_state *S, size_t outlen ) { blake2s_param P[1]; @@ -181,15 +117,16 @@ int blake2s_init( blake2s_state *S, size_t outlen ) /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - P->digest_length = ( uint8_t) outlen; + P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store48( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; - // memset(P->reserved, 0, sizeof(P->reserved) ); + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); return blake2s_init_param( S, P ); @@ -203,15 +140,16 @@ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t k if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; - P->digest_length = ( uint8_t ) outlen; - P->key_length = ( uint8_t ) keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store48( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; - // memset(P->reserved, 0, sizeof(P->reserved) ); + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); @@ -227,16 +165,43 @@ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t k return 0; } -static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] ) { uint32_t m[16]; uint32_t v[16]; + size_t i; - for( size_t i = 0; i < 16; ++i ) - m[i] = load32( block + i * sizeof( m[i] ) ); + for( i = 0; i < 16; ++i ) { + m[i] = load32( in + i * sizeof( m[i] ) ); + } - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { v[i] = S->h[i]; + } v[ 8] = blake2s_IV[0]; v[ 9] = blake2s_IV[1]; @@ -246,28 +211,7 @@ static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCK v[13] = S->t[1] ^ blake2s_IV[5]; v[14] = S->f[0] ^ blake2s_IV[6]; v[15] = S->f[1] ^ blake2s_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2s_sigma[r][2*i+0]]; \ - d = rotr32(d ^ a, 16); \ - c = c + d; \ - b = rotr32(b ^ c, 12); \ - a = a + b + m[blake2s_sigma[r][2*i+1]]; \ - d = rotr32(d ^ a, 8); \ - c = c + d; \ - b = rotr32(b ^ c, 7); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) + ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -279,73 +223,66 @@ static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCK ROUND( 8 ); ROUND( 9 ); - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} #undef G #undef ROUND - return 0; -} - -int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - + size_t left = S->buflen; + size_t fill = BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2s_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) +int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { - uint8_t buffer[BLAKE2S_OUTBYTES]; + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; size_t i; - if(S->outlen != outlen) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } + if( blake2s_is_lastblock( S ) ) + return -1; blake2s_increment_counter( S, ( uint32_t )S->buflen ); blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); memcpy( out, buffer, outlen ); + secure_zero_memory(buffer, sizeof(buffer)); return 0; } -int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_state S[1]; @@ -354,7 +291,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if ( NULL == out ) return -1; - if ( NULL == key && keylen > 0 ) return -1; + if ( NULL == key && keylen > 0) return -1; if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; @@ -369,7 +306,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2s_init( S, outlen ) < 0 ) return -1; } - if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2s_final( S, out, outlen ); + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; } - diff --git a/src/blake2s-round.h b/src/blake2s-round.h index 1e2f2b7..44a5574 100644 --- a/src/blake2s-round.h +++ b/src/blake2s-round.h @@ -1,23 +1,21 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_ROUND_H__ -#define __BLAKE2S_ROUND_H__ +#ifndef BLAKE2S_ROUND_H +#define BLAKE2S_ROUND_H -#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) -#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) - -#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) #define TOF(reg) _mm_castsi128_ps((reg)) @@ -69,7 +67,7 @@ #if defined(HAVE_XOP) #include "blake2s-load-xop.h" -#elif defined(HAVE_SSE4_1) +#elif defined(HAVE_SSE41) #include "blake2s-load-sse41.h" #else #include "blake2s-load-sse2.h" @@ -86,6 +84,5 @@ LOAD_MSG_ ##r ##_4(buf4); \ G2(row1,row2,row3,row4,buf4); \ UNDIAGONALIZE(row1,row2,row3,row4); \ - -#endif +#endif diff --git a/src/blake2s-test.c b/src/blake2s-test.c index 5c3f1f1..9a75bf3 100644 --- a/src/blake2s-test.c +++ b/src/blake2s-test.c @@ -29,7 +29,7 @@ int main( int argc, char **argv ) { uint8_t hash[BLAKE2S_OUTBYTES]; - if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || + if( blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2s.c b/src/blake2s.c index 0c3636e..97af9f8 100644 --- a/src/blake2s.c +++ b/src/blake2s.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -24,22 +26,11 @@ #include #endif -#if defined(HAVE_SSE2) #include -// MSVC only defines _mm_set_epi64x for x86_64... -#if defined(_MSC_VER) && !defined(_M_X64) -static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) -{ - return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); -} -#endif -#endif - - #if defined(HAVE_SSSE3) #include #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) #include #endif #if defined(HAVE_AVX) @@ -57,124 +48,33 @@ static const uint32_t blake2s_IV[8] = 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; -static const uint8_t blake2s_sigma[10][16] = +/* Some helper functions */ +static void blake2s_set_lastnode( blake2s_state *S ) { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - - -/* Some helper functions, not necessarily useful */ -static inline int blake2s_set_lastnode( blake2s_state *S ) -{ - S->f[1] = ~0U; - return 0; + S->f[1] = (uint32_t)-1; } -static inline int blake2s_clear_lastnode( blake2s_state *S ) +static int blake2s_is_lastblock( const blake2s_state *S ) { - S->f[1] = 0U; - return 0; + return S->f[0] != 0; } -static inline int blake2s_set_lastblock( blake2s_state *S ) +static void blake2s_set_lastblock( blake2s_state *S ) { if( S->last_node ) blake2s_set_lastnode( S ); - S->f[0] = ~0U; - return 0; + S->f[0] = (uint32_t)-1; } -static inline int blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0U; - return 0; -} - -static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; t += inc; S->t[0] = ( uint32_t )( t >> 0 ); S->t[1] = ( uint32_t )( t >> 32 ); - return 0; } -// Parameter-related functions -static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -static inline int blake2s_init0( blake2s_state *S ) -{ - memset( S, 0, sizeof( blake2s_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; -} - #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init) #define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param) #define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key) @@ -186,11 +86,11 @@ static inline int blake2s_init0( blake2s_state *S ) extern "C" { #endif int blake2s_init( blake2s_state *S, size_t outlen ); - int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -199,15 +99,15 @@ extern "C" { /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { - uint8_t *p, *h, *v; - //blake2s_init0( S ); - v = ( uint8_t * )( blake2s_IV ); - h = ( uint8_t * )( S->h ); - p = ( uint8_t * )( P ); + size_t i; + /*blake2s_init0( S ); */ + const uint8_t * v = ( const uint8_t * )( blake2s_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); /* IV XOR ParamBlock */ memset( S, 0, sizeof( blake2s_state ) ); - for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; S->outlen = P->digest_length; return 0; @@ -217,46 +117,51 @@ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) /* Some sort of default parameter block initialization, for sequential blake2s */ int blake2s_init( blake2s_state *S, size_t outlen ) { + blake2s_param P[1]; + + /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - const blake2s_param P = - { - outlen, - 0, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; - return blake2s_init_param( S, &P ); + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2s_init_param( S, P ); } int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) { + blake2s_param P[1]; + + /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; - const blake2s_param P = - { - outlen, - keylen, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; - - if( blake2s_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2s_init_param( S, P ) < 0 ) return -1; { @@ -270,11 +175,11 @@ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t k } -static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) { __m128i row1, row2, row3, row4; __m128i buf1, buf2, buf3, buf4; -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) __m128i t0, t1; #if !defined(HAVE_XOP) __m128i t2; @@ -285,33 +190,33 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2 const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) const __m128i m0 = LOADU( block + 00 ); const __m128i m1 = LOADU( block + 16 ); const __m128i m2 = LOADU( block + 32 ); const __m128i m3 = LOADU( block + 48 ); #else - const uint32_t m0 = ( ( uint32_t * )block )[ 0]; - const uint32_t m1 = ( ( uint32_t * )block )[ 1]; - const uint32_t m2 = ( ( uint32_t * )block )[ 2]; - const uint32_t m3 = ( ( uint32_t * )block )[ 3]; - const uint32_t m4 = ( ( uint32_t * )block )[ 4]; - const uint32_t m5 = ( ( uint32_t * )block )[ 5]; - const uint32_t m6 = ( ( uint32_t * )block )[ 6]; - const uint32_t m7 = ( ( uint32_t * )block )[ 7]; - const uint32_t m8 = ( ( uint32_t * )block )[ 8]; - const uint32_t m9 = ( ( uint32_t * )block )[ 9]; - const uint32_t m10 = ( ( uint32_t * )block )[10]; - const uint32_t m11 = ( ( uint32_t * )block )[11]; - const uint32_t m12 = ( ( uint32_t * )block )[12]; - const uint32_t m13 = ( ( uint32_t * )block )[13]; - const uint32_t m14 = ( ( uint32_t * )block )[14]; - const uint32_t m15 = ( ( uint32_t * )block )[15]; + const uint32_t m0 = load32(block + 0 * sizeof(uint32_t)); + const uint32_t m1 = load32(block + 1 * sizeof(uint32_t)); + const uint32_t m2 = load32(block + 2 * sizeof(uint32_t)); + const uint32_t m3 = load32(block + 3 * sizeof(uint32_t)); + const uint32_t m4 = load32(block + 4 * sizeof(uint32_t)); + const uint32_t m5 = load32(block + 5 * sizeof(uint32_t)); + const uint32_t m6 = load32(block + 6 * sizeof(uint32_t)); + const uint32_t m7 = load32(block + 7 * sizeof(uint32_t)); + const uint32_t m8 = load32(block + 8 * sizeof(uint32_t)); + const uint32_t m9 = load32(block + 9 * sizeof(uint32_t)); + const uint32_t m10 = load32(block + 10 * sizeof(uint32_t)); + const uint32_t m11 = load32(block + 11 * sizeof(uint32_t)); + const uint32_t m12 = load32(block + 12 * sizeof(uint32_t)); + const uint32_t m13 = load32(block + 13 * sizeof(uint32_t)); + const uint32_t m14 = load32(block + 14 * sizeof(uint32_t)); + const uint32_t m15 = load32(block + 15 * sizeof(uint32_t)); #endif row1 = ff0 = LOADU( &S->h[0] ); row2 = ff1 = LOADU( &S->h[4] ); - row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); - row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); + row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] ); + row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) ); ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -324,68 +229,61 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2 ROUND( 9 ); STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); - return 0; } - -int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { size_t left = S->buflen; - size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - + size_t fill = BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2s_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } - -int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) +int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { - uint8_t buffer[BLAKE2S_OUTBYTES]; + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + size_t i; - if(outlen != S->outlen ) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } + if( blake2s_is_lastblock( S ) ) + return -1; - blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_increment_counter( S, (uint32_t)S->buflen ); blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); - for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - memcpy( out, buffer, outlen ); + memcpy( out, buffer, S->outlen ); + secure_zero_memory( buffer, sizeof(buffer) ); return 0; } -int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_state S[1]; @@ -409,14 +307,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2s_init( S, outlen ) < 0 ) return -1; } - if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2s_final( S, out, outlen ); -} - -#if defined(SUPERCOP) -int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) -{ - return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 ); + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; } -#endif - diff --git a/src/blake2sp-test.c b/src/blake2sp-test.c index 621e350..55c6882 100644 --- a/src/blake2sp-test.c +++ b/src/blake2sp-test.c @@ -29,7 +29,7 @@ int main( int argc, char **argv ) for( size_t i = 0; i < KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2S_OUTBYTES]; - if( blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || + if( blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2sp.c b/src/blake2sp.c index 2f32bf3..ed0e1ad 100644 --- a/src/blake2sp.c +++ b/src/blake2sp.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -24,87 +26,102 @@ #define PARALLELISM_DEGREE 8 -static int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +/* + blake2sp_init_param defaults to setting the expecting output length + from the digest_length parameter block field. + + In some cases, however, we do not want this, as the output length + of these instances is given by inner_length instead. +*/ +static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P ) +{ + int err = blake2s_init_param(S, P); + S->outlen = P->inner_length; + return err; +} + +static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; - store48( P->node_offset, offset ); + P->node_offset = offset; + P->xof_length = 0; P->node_depth = 0; P->inner_length = BLAKE2S_OUTBYTES; memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2s_init_param( S, P ); - S->outlen = P->inner_length; - return 0; + return blake2sp_init_leaf_param( S, P ); } -static int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; - store48( P->node_offset, 0ULL ); + P->node_offset = 0; + P->xof_length = 0; P->node_depth = 1; P->inner_length = BLAKE2S_OUTBYTES; memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2s_init_param( S, P ); - S->outlen = P->digest_length; - return 0; + return blake2s_init_param( S, P ); } int blake2sp_init( blake2sp_state *S, size_t outlen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2sp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 ) + if( blake2sp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; return 0; } int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2sp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2sp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; { uint8_t block[BLAKE2S_BLOCKBYTES]; memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ @@ -113,16 +130,18 @@ int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t } -int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) +int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); in += fill; @@ -131,22 +150,22 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S) + #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } @@ -158,18 +177,21 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) if( inlen > 0 ) memcpy( S->buf + left, in, inlen ); - S->buflen = ( uint32_t ) left + ( uint32_t ) inlen; + S->buflen = left + inlen; return 0; } -int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ) +int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; + size_t i; - if(S->outlen != outlen) return -1; + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2S_BLOCKBYTES ) { @@ -183,36 +205,35 @@ int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ) blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES ); - blake2s_final( S->R, out, outlen ); - return 0; + return blake2s_final( S->R, out, S->outlen ); } -int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; blake2s_state S[PARALLELISM_DEGREE][1]; blake2s_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; if ( NULL == out ) return -1; - if ( NULL == key && keylen > 0 ) return -1; + if ( NULL == key && keylen > 0) return -1; if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( keylen > BLAKE2S_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2sp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; - S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node + S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ if( keylen > 0 ) { @@ -220,55 +241,118 @@ int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S,hash) + #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) + if( inlen__ > i * BLAKE2S_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES; const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; - blake2s_update( S[id__], in__, len ); + blake2s_update( S[i], in__, len ); } - blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); + blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES ); } - if( blake2sp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2sp_init_root( FS, outlen, keylen ) < 0 ) return -1; FS->last_node = 1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); return blake2s_final( FS, out, outlen ); } +#if defined(BLAKE2SP_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); + if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2sp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2sp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2sp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/blake2xb.c b/src/blake2xb.c new file mode 100644 index 0000000..970d2e2 --- /dev/null +++ b/src/blake2xb.c @@ -0,0 +1,166 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2016, JP Aumasson . + Copyright 2016, Samuel Neves . + + You may use this under the terms of the CC0, the OpenSSL Licence, or + the Apache Public License 2.0, at your option. The terms of these + licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +int blake2xb_init( blake2xb_state *S, const size_t outlen ) { + return blake2xb_init_key(S, outlen, NULL, 0); +} + +int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen) +{ + if ( outlen == 0 || outlen > 0xFFFFFFFFUL ) { + return -1; + } + + if (NULL != key && keylen > BLAKE2B_KEYBYTES) { + return -1; + } + + if (NULL == key && keylen > 0) { + return -1; + } + + /* Initialize parameter block */ + S->P->digest_length = BLAKE2B_OUTBYTES; + S->P->key_length = keylen; + S->P->fanout = 1; + S->P->depth = 1; + store32( &S->P->leaf_length, 0 ); + store32( &S->P->node_offset, 0 ); + store32( &S->P->xof_length, outlen ); + S->P->node_depth = 0; + S->P->inner_length = 0; + memset( S->P->reserved, 0, sizeof( S->P->reserved ) ); + memset( S->P->salt, 0, sizeof( S->P->salt ) ); + memset( S->P->personal, 0, sizeof( S->P->personal ) ); + + if( blake2b_init_param( S->S, S->P ) < 0 ) { + return -1; + } + + if (keylen > 0) { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, BLAKE2B_BLOCKBYTES); + memcpy(block, key, keylen); + blake2b_update(S->S, block, BLAKE2B_BLOCKBYTES); + secure_zero_memory(block, BLAKE2B_BLOCKBYTES); + } + return 0; +} + +int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ) { + return blake2b_update( S->S, in, inlen ); +} + +int blake2xb_final( blake2xb_state *S, void *out, size_t outlen) { + + blake2b_state C[1]; + blake2b_param P[1]; + uint32_t xof_length = load32(&S->P->xof_length); + uint8_t root[BLAKE2B_BLOCKBYTES]; + size_t i; + + if (NULL == out) { + return -1; + } + + /* outlen must match the output size defined in xof_length, */ + /* unless it was -1, in which case anything goes except 0. */ + if(xof_length == 0xFFFFFFFFUL) { + if(outlen == 0) { + return -1; + } + } else { + if(outlen != xof_length) { + return -1; + } + } + + /* Finalize the root hash */ + if (blake2b_final(S->S, root, BLAKE2B_OUTBYTES) < 0) { + return -1; + } + + /* Set common block structure values */ + /* Copy values from parent instance, and only change the ones below */ + memcpy(P, S->P, sizeof(blake2b_param)); + P->key_length = 0; + P->fanout = 0; + P->depth = 0; + store32(&P->leaf_length, BLAKE2B_OUTBYTES); + P->inner_length = BLAKE2B_OUTBYTES; + P->node_depth = 0; + + for (i = 0; outlen > 0; ++i) { + const size_t block_size = (outlen < BLAKE2B_OUTBYTES) ? outlen : BLAKE2B_OUTBYTES; + /* Initialize state */ + P->digest_length = block_size; + store32(&P->node_offset, i); + blake2b_init_param(C, P); + /* Process key if needed */ + blake2b_update(C, root, BLAKE2B_OUTBYTES); + if (blake2b_final(C, (uint8_t *)out + i * BLAKE2B_OUTBYTES, block_size) < 0 ) { + return -1; + } + outlen -= block_size; + } + secure_zero_memory(root, sizeof(root)); + secure_zero_memory(P, sizeof(P)); + secure_zero_memory(C, sizeof(C)); + /* Put blake2xb in an invalid state? cf. blake2s_is_lastblock */ + return 0; + +} + +int blake2xb(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen) +{ + blake2xb_state S[1]; + + /* Verify parameters */ + if (NULL == in && inlen > 0) + return -1; + + if (NULL == out) + return -1; + + if (NULL == key && keylen > 0) + return -1; + + if (keylen > BLAKE2B_KEYBYTES) + return -1; + + if (outlen == 0) + return -1; + + /* Initialize the root block structure */ + if (blake2xb_init_key(S, outlen, key, keylen) < 0) { + return -1; + } + + /* Absorb the input message */ + blake2xb_update(S, in, inlen); + + /* Compute the root node of the tree and the final hash using the counter construction */ + return blake2xb_final(S, out, outlen); +} diff --git a/src/blake2xs.c b/src/blake2xs.c new file mode 100644 index 0000000..529d36a --- /dev/null +++ b/src/blake2xs.c @@ -0,0 +1,164 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2016, JP Aumasson . + Copyright 2016, Samuel Neves . + + You may use this under the terms of the CC0, the OpenSSL Licence, or + the Apache Public License 2.0, at your option. The terms of these + licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +int blake2xs_init( blake2xs_state *S, const size_t outlen ) { + return blake2xs_init_key(S, outlen, NULL, 0); +} + +int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen ) +{ + if ( outlen == 0 || outlen > 0xFFFFUL ) { + return -1; + } + + if (NULL != key && keylen > BLAKE2B_KEYBYTES) { + return -1; + } + + if (NULL == key && keylen > 0) { + return -1; + } + + /* Initialize parameter block */ + S->P->digest_length = BLAKE2S_OUTBYTES; + S->P->key_length = keylen; + S->P->fanout = 1; + S->P->depth = 1; + store32( &S->P->leaf_length, 0 ); + store32( &S->P->node_offset, 0 ); + store16( &S->P->xof_length, outlen ); + S->P->node_depth = 0; + S->P->inner_length = 0; + memset( S->P->salt, 0, sizeof( S->P->salt ) ); + memset( S->P->personal, 0, sizeof( S->P->personal ) ); + + if( blake2s_init_param( S->S, S->P ) < 0 ) { + return -1; + } + + if (keylen > 0) { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset(block, 0, BLAKE2S_BLOCKBYTES); + memcpy(block, key, keylen); + blake2s_update(S->S, block, BLAKE2S_BLOCKBYTES); + secure_zero_memory(block, BLAKE2S_BLOCKBYTES); + } + return 0; +} + +int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ) { + return blake2s_update( S->S, in, inlen ); +} + +int blake2xs_final(blake2xs_state *S, void *out, size_t outlen) { + + blake2s_state C[1]; + blake2s_param P[1]; + uint16_t xof_length = load16(&S->P->xof_length); + uint8_t root[BLAKE2S_BLOCKBYTES]; + size_t i; + + if (NULL == out) { + return -1; + } + + /* outlen must match the output size defined in xof_length, */ + /* unless it was -1, in which case anything goes except 0. */ + if(xof_length == 0xFFFFUL) { + if(outlen == 0) { + return -1; + } + } else { + if(outlen != xof_length) { + return -1; + } + } + + /* Finalize the root hash */ + if (blake2s_final(S->S, root, BLAKE2S_OUTBYTES) < 0) { + return -1; + } + + /* Set common block structure values */ + /* Copy values from parent instance, and only change the ones below */ + memcpy(P, S->P, sizeof(blake2s_param)); + P->key_length = 0; + P->fanout = 0; + P->depth = 0; + store32(&P->leaf_length, BLAKE2S_OUTBYTES); + P->inner_length = BLAKE2S_OUTBYTES; + P->node_depth = 0; + + for (i = 0; outlen > 0; ++i) { + const size_t block_size = (outlen < BLAKE2S_OUTBYTES) ? outlen : BLAKE2S_OUTBYTES; + /* Initialize state */ + P->digest_length = block_size; + store32(&P->node_offset, i); + blake2s_init_param(C, P); + /* Process key if needed */ + blake2s_update(C, root, BLAKE2S_OUTBYTES); + if (blake2s_final(C, (uint8_t *)out + i * BLAKE2S_OUTBYTES, block_size) < 0) { + return -1; + } + outlen -= block_size; + } + secure_zero_memory(root, sizeof(root)); + secure_zero_memory(P, sizeof(P)); + secure_zero_memory(C, sizeof(C)); + /* Put blake2xs in an invalid state? cf. blake2s_is_lastblock */ + return 0; +} + +int blake2xs(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen) +{ + blake2xs_state S[1]; + + /* Verify parameters */ + if (NULL == in && inlen > 0) + return -1; + + if (NULL == out) + return -1; + + if (NULL == key && keylen > 0) + return -1; + + if (keylen > BLAKE2S_KEYBYTES) + return -1; + + if (outlen == 0) + return -1; + + /* Initialize the root block structure */ + if (blake2xs_init_key(S, outlen, key, keylen) < 0) { + return -1; + } + + /* Absorb the input message */ + blake2xs_update(S, in, inlen); + + /* Compute the root node of the tree and the final hash using the counter construction */ + return blake2xs_final(S, out, outlen); +}