Skip to content

Commit 76d46d4

Browse files
committed
[crypto] Let otcrypto_aes() have up to 3 blocks in flight
Previously, the hot loop inside otcrypto_aes() would only ever have 2 blocks in flight while the underlying driver function aes_update() as well as the hardware allows up to 3 blocks being in flight: - Block x-1 can be retrieved from the data output registers by SW. - Block x is being processed by HW. - Block x+1 can already be provided via the data input registers by SW. Doing this is better for SCA hardening (more concurrency means more background noise) and allows achieving higher throughput. This is related to #20308. This is a cherry pick of commit 8f0f500 to branch earlgrey_es_sival. Signed-off-by: Pirmin Vogel <[email protected]>
1 parent a9bbbf8 commit 76d46d4

File tree

1 file changed

+48
-21
lines changed
  • sw/device/lib/crypto/impl

1 file changed

+48
-21
lines changed

sw/device/lib/crypto/impl/aes.c

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -355,40 +355,67 @@ otcrypto_status_t otcrypto_aes(const otcrypto_blinded_key_t *key,
355355
return OTCRYPTO_BAD_ARGS;
356356
}
357357

358-
// Perform the cipher operation for all full blocks (excluding last block).
359-
// The input and output are offset by one, so if unrolled this loop would
360-
// look like:
358+
// Perform the cipher operation for all full blocks. The input and output are
359+
// offset by `block_offset` number of blocks, where `block_offset` can be 1
360+
// or 2. So if unrolled, these loops would look like:
361+
//
362+
// - block_offset == 1
361363
// aes_update(NULL, input[0]);
362364
// aes_update(output[0], input[1]);
363365
// aes_update(output[1], input[2]);
364-
// ...
366+
// aes_update(output[2], NULL);
367+
//
368+
// - block_offset == 2
369+
// aes_update(NULL, input[0]);
370+
// aes_update(NULL, input[1]);
371+
// aes_update(output[0], input[2]); // The HW is processing input[1].
372+
// aes_update(output[1], input[3]); // The HW is processing input[2].
373+
// aes_update(output[2], NULL);
374+
// aes_update(output[3], NULL);
375+
//
376+
// Using a `block_offset` of 2 allows having 3 blocks in flight which is
377+
// beneficial from a hardening and performance point of view:
378+
// - Software retrieves Block x-1 from the data output registers.
379+
// - Hardware processes Block x.
380+
// - Software provides Block x+1 via the data input registers.
381+
//
365382
// See the AES driver for details.
383+
const size_t block_offset = input_nblocks >= 3 ? 2 : 1;
366384
aes_block_t block_in;
367385
aes_block_t block_out;
368386
size_t i;
369-
for (i = 0; launder32(i) < input_nblocks; i++) {
370-
HARDENED_TRY(get_block(cipher_input, aes_padding, i, &block_in));
371387

372-
// Call the AES cipher and copy data to output buffer if needed.
373-
if (launder32(i) == 0) {
374-
HARDENED_CHECK_EQ(i, 0);
375-
HARDENED_TRY(aes_update(/*dest=*/NULL, &block_in));
376-
} else {
377-
HARDENED_TRY(aes_update(&block_out, &block_in));
378-
// TODO(#17711) Change to `hardened_memcpy`.
379-
memcpy(&cipher_output.data[(i - 1) * kAesBlockNumBytes], block_out.data,
380-
kAesBlockNumBytes);
381-
}
388+
// Provide the first `block_offset` number of input blocks and call the AES
389+
// cipher.
390+
for (i = 0; launder32(i) < block_offset; ++i) {
391+
HARDENED_TRY(get_block(cipher_input, aes_padding, i, &block_in));
392+
TRY(aes_update(/*dest=*/NULL, &block_in));
382393
}
394+
// Check that the loop ran for the correct number of iterations.
395+
HARDENED_CHECK_EQ(i, block_offset);
383396

397+
// Call the AES cipher while providing new input and copying data to the
398+
// output buffer.
399+
for (i = block_offset; launder32(i) < input_nblocks; ++i) {
400+
HARDENED_TRY(get_block(cipher_input, aes_padding, i, &block_in));
401+
TRY(aes_update(&block_out, &block_in));
402+
// TODO(#17711) Change to `hardened_memcpy`.
403+
memcpy(&cipher_output.data[(i - block_offset) * kAesBlockNumBytes],
404+
block_out.data, kAesBlockNumBytes);
405+
}
384406
// Check that the loop ran for the correct number of iterations.
385407
HARDENED_CHECK_EQ(i, input_nblocks);
386408

387-
// Retrieve the output from the final block (providing no input).
388-
HARDENED_TRY(aes_update(&block_out, /*src=*/NULL));
389-
// TODO(#17711) Change to `hardened_memcpy`.
390-
memcpy(&cipher_output.data[(input_nblocks - 1) * kAesBlockNumBytes],
391-
block_out.data, kAesBlockNumBytes);
409+
// Retrieve the output from the final `block_offset` blocks (providing no
410+
// input).
411+
for (i = block_offset; launder32(i) > 0; --i) {
412+
HARDENED_TRY(aes_update(&block_out, /*src=*/NULL));
413+
// TODO(#17711) Change to `hardened_memcpy`.
414+
memcpy(&cipher_output.data[(input_nblocks - i) * kAesBlockNumBytes],
415+
block_out.data, kAesBlockNumBytes);
416+
}
417+
// Check that the loop ran for the correct number of iterations.
418+
HARDENED_CHECK_EQ(i, 0);
392419

393420
// Deinitialize the AES block and update the IV (in ECB mode, skip the IV).
394421
if (aes_mode == launder32(kAesCipherModeEcb)) {

0 commit comments

Comments
 (0)