Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ease integration of MAYO on embedded platforms #2

Merged
merged 4 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ if(MSVC)
else()
set(STRICT_OPTIONS_CXX "${STRICT_OPTIONS_CXX} -std=c++14 -O2")
set(STRICT_OPTIONS_CPP "${STRICT_OPTIONS_CPP} -Wall -Wuninitialized -Wno-deprecated-declarations -Wno-missing-field-initializers")
set(STRICT_OPTIONS_C "${STRICT_OPTIONS_C} -O3 -Wstrict-prototypes -Wno-error=strict-prototypes -fvisibility=hidden -funroll-loops -Wno-error=implicit-function-declaration -Wno-error=attributes")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(STRICT_OPTIONS_C "${STRICT_OPTIONS_C} -O3")
endif()
set(STRICT_OPTIONS_C "${STRICT_OPTIONS_C} -Wstrict-prototypes -Wno-error=strict-prototypes -fvisibility=hidden -funroll-loops -Wno-error=implicit-function-declaration -Wno-error=attributes")
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(STRICT_OPTIONS_CPP "${STRICT_OPTIONS_CPP} -Wno-error=unknown-warning-option -Qunused-arguments -Wno-tautological-compare")
set(STRICT_OPTIONS_CPP "${STRICT_OPTIONS_CPP} -Wno-unused-function -Wno-pass-failed")
Expand Down
34 changes: 24 additions & 10 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,30 +92,44 @@ jobs:
rm -rf build
cmake -Bbuild -DENABLE_CT_TESTING=ON -DCMAKE_BUILD_TYPE=Debug -DMAYO_BUILD_TYPE=${{ matrix.mayo_build_type }} -DMAYO_MARCH="-march=haswell -maes"
cmake --build build
valgrind --track-origins=yes build/test/mayo_test_scheme_MAYO_1
valgrind --track-origins=yes build/test/mayo_test_scheme_MAYO_2
valgrind --track-origins=yes build/test/mayo_test_scheme_MAYO_3
valgrind --track-origins=yes build/test/mayo_test_scheme_MAYO_5
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_1
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_2
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_3
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_5
if: matrix.mayo_build_type == 'opt' || matrix.mayo_build_type == 'avx2'

- name: CT-Tests
run: |
rm -rf build
cmake -Bbuild -DENABLE_CT_TESTING=ON -DCMAKE_BUILD_TYPE=Debug -DMAYO_BUILD_TYPE=${{ matrix.mayo_build_type }} -DMAYO_MARCH="-march=haswell -maes"
cmake --build build
valgrind --track-origins=yes build/test/mayo_test_scheme MAYO_1
valgrind --track-origins=yes build/test/mayo_test_scheme MAYO_2
valgrind --track-origins=yes build/test/mayo_test_scheme MAYO_3
valgrind --track-origins=yes build/test/mayo_test_scheme MAYO_5
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_1
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_2
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_3
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_5
if: matrix.mayo_build_type == 'ref'

- name: Memcheck
run: |
rm -rf build
cmake -Bbuild -DCMAKE_BUILD_TYPE=Debug -DMAYO_BUILD_TYPE=${{ matrix.mayo_build_type }} -DMAYO_MARCH="-march=haswell -maes"
cmake --build build
ctest -T memcheck --test-dir build
# valgrind-ci unit_tests_valgrind.xml --summary
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_1
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_2
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_3
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme MAYO_5
if: matrix.mayo_build_type == 'ref'

- name: Memcheck
run: |
rm -rf build
cmake -Bbuild -DCMAKE_BUILD_TYPE=Debug -DMAYO_BUILD_TYPE=${{ matrix.mayo_build_type }} -DMAYO_MARCH="-march=haswell -maes"
cmake --build build
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_1
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_2
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_3
valgrind --error-exitcode=1 --track-origins=yes build/test/mayo_test_scheme_MAYO_5
if: matrix.mayo_build_type == 'opt' || matrix.mayo_build_type == 'avx2'

- name: Address Sanitizer ASAN
run: |
Expand Down
8 changes: 4 additions & 4 deletions apps/example.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static int example_mayo(const mayo_params_t* p) {
unsigned char *sk = calloc(p->csk_bytes, 1);

unsigned char *epk = calloc(p->epk_bytes, 1);
unsigned char *esk = calloc(p->esk_bytes, 1);
sk_t *esk = calloc(sizeof(sk_t), 1);

unsigned char *sig = calloc(p->sig_bytes + msglen, 1);

Expand Down Expand Up @@ -92,7 +92,7 @@ static int example_mayo(const mayo_params_t* p) {
}

printf("mayo_verify (with correct signature) -> ");
res = mayo_verify(p, msg, msglen, sig, p->sig_bytes, pk);
res = mayo_verify(p, msg, msglen, sig, pk);
if (res != MAYO_OK) {
printf("FAIL\n");
res = -1;
Expand All @@ -116,7 +116,7 @@ static int example_mayo(const mayo_params_t* p) {
}

printf("mayo_verify (with altered signature) -> ");
res = mayo_verify(p, msg, msglen, sig, p->sig_bytes, pk);
res = mayo_verify(p, msg, msglen, sig, pk);
if (res == MAYO_OK) {
printf("FAIL\n");
res = -1;
Expand All @@ -130,7 +130,7 @@ static int example_mayo(const mayo_params_t* p) {
free(pk);
free(epk);
mayo_secure_free(sk, p->csk_bytes);
mayo_secure_free(esk, p->esk_bytes);
free(esk);
free(sig);
return res;
}
Expand Down
13 changes: 11 additions & 2 deletions include/mayo.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#ifndef MAYO_H
#define MAYO_H

#include <stddef.h>
#include <stdint.h>

#define F_TAIL_LEN 5
#define F_TAIL_64 \
{ 8, 0, 2, 8, 0 } // f(z) = z^64 + x^3*z^3 + x*z^2 + x^3
Expand Down Expand Up @@ -124,6 +127,7 @@
#define N_MAX 133
#define M_MAX 128
#define O_MAX 18
#define V_MAX 121
#define K_MAX 12
#define Q_MAX 16
#define PK_SEED_BYTES_MAX 16
Expand Down Expand Up @@ -200,6 +204,11 @@ typedef struct {
const char *name;
} mayo_params_t;

typedef struct sk_t {
bhess marked this conversation as resolved.
Show resolved Hide resolved
uint32_t p[P1_BYTES_MAX/4 + P2_BYTES_MAX/4];
uint8_t o[O_BYTES_MAX];
} sk_t;

/**
* MAYO parameter sets
*/
Expand Down Expand Up @@ -308,7 +317,7 @@ int mayo_expand_pk(const mayo_params_t *p, const unsigned char *cpk,
* @return int return code
*/
int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk,
unsigned char *esk);
sk_t *esk);

/**
* Mayo verify signature.
Expand All @@ -326,6 +335,6 @@ int mayo_expand_sk(const mayo_params_t *p, const unsigned char *csk,
*/
int mayo_verify(const mayo_params_t *p, const unsigned char *m,
unsigned long long mlen, const unsigned char *sig,
unsigned long long siglen, const unsigned char *pk);
const unsigned char *pk);

#endif
30 changes: 19 additions & 11 deletions src/AVX2/bitsliced_arithmetic_128.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,16 +286,24 @@ inline void mayo_5_P1P1t_times_O(const uint32_t *_P1P1t, const unsigned char *O,

__m256i in0swap = _mm256_permute4x64_epi64(P1P1t[mp+0], 0b01001110);
__m256i in1swap = _mm256_permute4x64_epi64(P1P1t[mp+1], 0b01001110);

acc[2*(r * O_MAX + k) + 0] ^= (P1P1t[mp+0] & cmask1) ^ (in0swap & cmask3)
^ (P1P1t[mp+1] & cmask5) ^ (in1swap & cmask7);
acc[2*(r * O_MAX + k) + 1] ^= (P1P1t[mp+1] & cmask2) ^ (in1swap & cmask4)
^ (P1P1t[mp+0] & cmask6) ^ (in0swap & cmask8);

acc[2*(r * O_MAX + k + 1) + 0] ^= (P1P1t[mp+0] & cmask12) ^ (in0swap & cmask32)
^ (P1P1t[mp+1] & cmask52) ^ (in1swap & cmask72);
acc[2*(r * O_MAX + k + 1) + 1] ^= (P1P1t[mp+1] & cmask22) ^ (in1swap & cmask42)
^ (P1P1t[mp+0] & cmask62) ^ (in0swap & cmask82);

_mm256_storeu_si256(&acc[2*(r * O_MAX + k) + 0], _mm256_loadu_si256(&acc[2*(r * O_MAX + k) + 0]) ^ (
(P1P1t[mp+0] & cmask1) ^ (in0swap & cmask3)
^ (P1P1t[mp+1] & cmask5) ^ (in1swap & cmask7)
));
_mm256_storeu_si256(&acc[2*(r * O_MAX + k) + 1], _mm256_loadu_si256(&acc[2*(r * O_MAX + k) + 1]) ^ (
(P1P1t[mp+1] & cmask2) ^ (in1swap & cmask4)
^ (P1P1t[mp+0] & cmask6) ^ (in0swap & cmask8)
));

_mm256_storeu_si256(&acc[2*(r * O_MAX + k + 1) + 0], _mm256_loadu_si256(&acc[2*(r * O_MAX + k + 1) + 0]) ^ (
(P1P1t[mp+0] & cmask12) ^ (in0swap & cmask32)
^ (P1P1t[mp+1] & cmask52) ^ (in1swap & cmask72)
));
_mm256_storeu_si256(&acc[2*(r * O_MAX + k + 1) + 1], _mm256_loadu_si256(&acc[2*(r * O_MAX + k + 1) + 1]) ^ (
(P1P1t[mp+1] & cmask22) ^ (in1swap & cmask42)
^ (P1P1t[mp+0] & cmask62) ^ (in0swap & cmask82)
));
}
}
}
Expand Down Expand Up @@ -360,7 +368,7 @@ inline void mayo_5_P1_times_Vt(const uint32_t *_P1, const unsigned char *V, uint

__m256i in0swap = _mm256_permute4x64_epi64(P1[mp+0], 0b01001110);
__m256i in1swap = _mm256_permute4x64_epi64(P1[mp+1], 0b01001110);

acc[2*(r * K_MAX + k) + 0] ^= (P1[mp+0] & cmask1) ^ (in0swap & cmask3)
^ (P1[mp+1] & cmask5) ^ (in1swap & cmask7);
acc[2*(r * K_MAX + k) + 1] ^= (P1[mp+1] & cmask2) ^ (in1swap & cmask4)
Expand Down
84 changes: 55 additions & 29 deletions src/AVX2/bitsliced_arithmetic_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,26 +236,43 @@ inline void mayo_12_P1P1t_times_O(const uint32_t *_P1P1t, const unsigned char *O
__m256i inshuf2 = _mm256_permute4x64_epi64(P1P1t[r*V_MAX + c], 0b01001110);
__m256i inshuf1 = _mm256_shuffle_epi32(P1P1t[r*V_MAX + c], 0b01001110);
__m256i inshuf3 = _mm256_shuffle_epi32(inshuf2, 0b01001110);

acc[r * O_MAX + k] ^= P1P1t[r*V_MAX + c] & cmask1;
acc[r * O_MAX + k] ^= inshuf1 & cmask2;
acc[r * O_MAX + k] ^= inshuf2 & cmask3;
acc[r * O_MAX + k] ^= inshuf3 & cmask4;

acc[r * O_MAX + k + 1] ^= P1P1t[r*V_MAX + c] & cmask12;
acc[r * O_MAX + k + 1] ^= inshuf1 & cmask22;
acc[r * O_MAX + k + 1] ^= inshuf2 & cmask32;
acc[r * O_MAX + k + 1] ^= inshuf3 & cmask42;

acc[r * O_MAX + k + 2] ^= P1P1t[r*V_MAX + c] & cmask13;
acc[r * O_MAX + k + 2] ^= inshuf1 & cmask23;
acc[r * O_MAX + k + 2] ^= inshuf2 & cmask33;
acc[r * O_MAX + k + 2] ^= inshuf3 & cmask43;

acc[r * O_MAX + k + 3] ^= P1P1t[r*V_MAX + c] & cmask14;
acc[r * O_MAX + k + 3] ^= inshuf1 & cmask24;
acc[r * O_MAX + k + 3] ^= inshuf2 & cmask34;
acc[r * O_MAX + k + 3] ^= inshuf3 & cmask44;
__m256i acc0 = _mm256_loadu_si256(&acc[r * O_MAX + k]);
__m256i acc1 = _mm256_loadu_si256(&acc[r * O_MAX + k + 1]);
__m256i acc2 = _mm256_loadu_si256(&acc[r * O_MAX + k + 2]);
__m256i acc3 = _mm256_loadu_si256(&acc[r * O_MAX + k + 3]);

_mm256_storeu_si256(&acc[r * O_MAX + k],
acc0 ^
(P1P1t[r*V_MAX + c] & cmask1) ^
(inshuf1 & cmask2) ^
(inshuf2 & cmask3) ^
(inshuf3 & cmask4)
);

_mm256_storeu_si256(&acc[r * O_MAX + k + 1],
acc1 ^
(P1P1t[r*V_MAX + c] & cmask12) ^
(inshuf1 & cmask22) ^
(inshuf2 & cmask32) ^
(inshuf3 & cmask42)
);

_mm256_storeu_si256(&acc[r * O_MAX + k + 2],
acc2 ^
(P1P1t[r*V_MAX + c] & cmask13) ^
(inshuf1 & cmask23) ^
(inshuf2 & cmask33) ^
(inshuf3 & cmask43)
);

_mm256_storeu_si256(&acc[r * O_MAX + k + 3],
acc3 ^
(P1P1t[r*V_MAX + c] & cmask14) ^
(inshuf1 & cmask24) ^
(inshuf2 & cmask34) ^
(inshuf3 & cmask44)
);
}
}
for (; k < (O_MAX/2)*2; k += 2) {
Expand All @@ -276,16 +293,25 @@ inline void mayo_12_P1P1t_times_O(const uint32_t *_P1P1t, const unsigned char *O
__m256i inshuf2 = _mm256_permute4x64_epi64(P1P1t[r*V_MAX + c], 0b01001110);
__m256i inshuf1 = _mm256_shuffle_epi32(P1P1t[r*V_MAX + c], 0b01001110);
__m256i inshuf3 = _mm256_shuffle_epi32(inshuf2, 0b01001110);

acc[r * O_MAX + k] ^= P1P1t[r*V_MAX + c] & cmask1;
acc[r * O_MAX + k] ^= inshuf1 & cmask2;
acc[r * O_MAX + k] ^= inshuf2 & cmask3;
acc[r * O_MAX + k] ^= inshuf3 & cmask4;

acc[r * O_MAX + k + 1] ^= P1P1t[r*V_MAX + c] & cmask12;
acc[r * O_MAX + k + 1] ^= inshuf1 & cmask22;
acc[r * O_MAX + k + 1] ^= inshuf2 & cmask32;
acc[r * O_MAX + k + 1] ^= inshuf3 & cmask42;

__m256i acc0 = _mm256_loadu_si256(&acc[r * O_MAX + k]);
__m256i acc1 = _mm256_loadu_si256(&acc[r * O_MAX + k + 1]);

_mm256_storeu_si256(&acc[r * O_MAX + k],
acc0 ^
(P1P1t[r*V_MAX + c] & cmask1) ^
(inshuf1 & cmask2) ^
(inshuf2 & cmask3) ^
(inshuf3 & cmask4)
);

_mm256_storeu_si256(&acc[r * O_MAX + k + 1],
acc1 ^
(P1P1t[r*V_MAX + c] & cmask12) ^
(inshuf1 & cmask22) ^
(inshuf2 & cmask32) ^
(inshuf3 & cmask42)
);
}
}
}
Expand Down
24 changes: 12 additions & 12 deletions src/AVX2/bitsliced_arithmetic_96.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,18 +313,18 @@ inline void mayo_3_P1P1t_times_O(const uint32_t *_P1P1t, const unsigned char *O,
__m128i X8 = inrot[2] ^ (in[2]);

#define PART(x) \
acc[3 * (r * O_MAX + k + x) + 0] ^= a0##x & in[0]; \
acc[3 * (r * O_MAX + k + x) + 1] ^= a0##x & in[1]; \
acc[3 * (r * O_MAX + k + x) + 2] ^= a0##x & in[2]; \
acc[3 * (r * O_MAX + k + x) + 0] ^= a1##x & X1; \
acc[3 * (r * O_MAX + k + x) + 1] ^= a1##x & X2; \
acc[3 * (r * O_MAX + k + x) + 2] ^= a1##x & (inrot3[2]); \
acc[3 * (r * O_MAX + k + x) + 0] ^= a2##x & X3; \
acc[3 * (r * O_MAX + k + x) + 1] ^= a2##x & X4; \
acc[3 * (r * O_MAX + k + x) + 2] ^= a2##x & X5; \
acc[3 * (r * O_MAX + k + x) + 0] ^= a3##x & X6; \
acc[3 * (r * O_MAX + k + x) + 1] ^= a3##x & X7; \
acc[3 * (r * O_MAX + k + x) + 2] ^= a3##x & X8; \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 0], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 0]) ^ (a0##x & in[0])); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 1], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 1]) ^ (a0##x & in[1])); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 2], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 2]) ^ (a0##x & in[2])); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 0], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 0]) ^ (a1##x & X1)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 1], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 1]) ^ (a1##x & X2)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 2], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 2]) ^ (a1##x & (inrot3[2]))); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 0], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 0]) ^ (a2##x & X3)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 1], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 1]) ^ (a2##x & X4)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 2], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 2]) ^ (a2##x & X5)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 0], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 0]) ^ (a3##x & X6)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 1], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 1]) ^ (a3##x & X7)); \
_mm_storeu_si128(&acc[3 * (r * O_MAX + k + x) + 2], _mm_loadu_si128(&acc[3 * (r * O_MAX + k + x) + 2]) ^ (a3##x & X8)); \

PART(0)
PART(1)
Expand Down
Loading