Skip to content

Commit

Permalink
Fix invalid pointer casts in arm64 implementation (#65)
Browse files Browse the repository at this point in the history
* Fix invalid pointer casts in arm64 implementation

It is UB in C and C++ to type-pun pointers in this way, for two reasons.
First, casting to a pointer is forbidden if the pointer is not aligned.
Second, this kind of type-punning is a strict aliasing violation.

Instead, the way to read 8 bytes as a time as a uint64_t is to call
memcpy. Compilers recognize this pattern and optimize it, lowering it to
the same code, but without breaking the language's abstract state
machine.

This is needed to fix some UBSan warnings in Chromium.

* Use crc32c_read_le.h instead

In doing so, I fixed up crc32c_read_le.h a bit:

1. The documentation and tests say the buffers need to be aligned, but
   they don't.

2. Add a 16-bit version.

3. Remove an unnecessary static_cast<uint8_t>. The pointer is already
   uint8_t.

4. Replace the necessary static_casts with uintN_t{x} per the Google
   style guide. https://google.github.io/styleguide/cppguide.html#Casting
  • Loading branch information
davidben authored Sep 6, 2024
1 parent 9d0fb14 commit d3d60ac
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 34 deletions.
26 changes: 12 additions & 14 deletions src/crc32c_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@

#include <cstddef>
#include <cstdint>
#include <cstring>

#include "./crc32c_internal.h"
#include "./crc32c_read_le.h"
#include "crc32c/crc32c_config.h"

#if HAVE_ARM64_CRC32C
Expand All @@ -25,16 +27,12 @@
#define SEGMENTBYTES 256

// compute 8bytes for each segment parallelly
#define CRC32C32BYTES(P, IND) \
do { \
crc1 = __crc32cd( \
crc1, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 1 + (IND))); \
crc2 = __crc32cd( \
crc2, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 2 + (IND))); \
crc3 = __crc32cd( \
crc3, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 3 + (IND))); \
crc0 = __crc32cd( \
crc0, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 0 + (IND))); \
#define CRC32C32BYTES(P, IND) \
do { \
crc1 = __crc32cd(crc1, ReadUint64LE((P) + SEGMENTBYTES * 1 + (IND)*8)); \
crc2 = __crc32cd(crc2, ReadUint64LE((P) + SEGMENTBYTES * 2 + (IND)*8)); \
crc3 = __crc32cd(crc3, ReadUint64LE((P) + SEGMENTBYTES * 3 + (IND)*8)); \
crc0 = __crc32cd(crc0, ReadUint64LE((P) + SEGMENTBYTES * 0 + (IND)*8)); \
} while (0);

// compute 8*8 bytes for each segment parallelly
Expand Down Expand Up @@ -86,7 +84,7 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
t2 = (uint64_t)vmull_p64(crc2, k2);
t1 = (uint64_t)vmull_p64(crc1, k1);
t0 = (uint64_t)vmull_p64(crc0, k0);
crc = __crc32cd(crc3, *(uint64_t *)data);
crc = __crc32cd(crc3, ReadUint64LE(data));
data += sizeof(uint64_t);
crc ^= __crc32cd(0, t2);
crc ^= __crc32cd(0, t1);
Expand All @@ -96,18 +94,18 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
}

while (length >= 8) {
crc = __crc32cd(crc, *(uint64_t *)data);
crc = __crc32cd(crc, ReadUint64LE(data));
data += 8;
length -= 8;
}

if (length & 4) {
crc = __crc32cw(crc, *(uint32_t *)data);
crc = __crc32cw(crc, ReadUint32LE(data));
data += 4;
}

if (length & 2) {
crc = __crc32ch(crc, *(uint16_t *)data);
crc = __crc32ch(crc, ReadUint16LE(data));
data += 2;
}

Expand Down
34 changes: 20 additions & 14 deletions src/crc32c_read_le.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,23 @@

namespace crc32c {

// Reads a little-endian 32-bit integer from a 32-bit-aligned buffer.
// Reads a little-endian 16-bit integer from bytes, not necessarily aligned.
inline uint16_t ReadUint16LE(const uint8_t* buffer) {
#if BYTE_ORDER_BIG_ENDIAN
return ((uint16_t{buffer[0]}) | (uint16_t{buffer[1]} << 8));
#else // !BYTE_ORDER_BIG_ENDIAN
uint16_t result;
// This should be optimized to a single instruction.
std::memcpy(&result, buffer, sizeof(result));
return result;
#endif // BYTE_ORDER_BIG_ENDIAN
}

// Reads a little-endian 32-bit integer from bytes, not necessarily aligned.
inline uint32_t ReadUint32LE(const uint8_t* buffer) {
#if BYTE_ORDER_BIG_ENDIAN
return ((static_cast<uint32_t>(static_cast<uint8_t>(buffer[0]))) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[1])) << 8) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[2])) << 16) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[3])) << 24));
return ((uint32_t{buffer[0]}) | (uint32_t{buffer[1]} << 8) |
(uint32_t{buffer[2]} << 16) | (uint32_t{buffer[3]} << 24));
#else // !BYTE_ORDER_BIG_ENDIAN
uint32_t result;
// This should be optimized to a single instruction.
Expand All @@ -27,17 +37,13 @@ inline uint32_t ReadUint32LE(const uint8_t* buffer) {
#endif // BYTE_ORDER_BIG_ENDIAN
}

// Reads a little-endian 64-bit integer from a 64-bit-aligned buffer.
// Reads a little-endian 64-bit integer from bytes, not necessarily aligned.
inline uint64_t ReadUint64LE(const uint8_t* buffer) {
#if BYTE_ORDER_BIG_ENDIAN
return ((static_cast<uint64_t>(static_cast<uint8_t>(buffer[0]))) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[1])) << 8) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[2])) << 16) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[3])) << 24) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[4])) << 32) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[5])) << 40) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[6])) << 48) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[7])) << 56));
return ((uint64_t{buffer[0]}) | (uint64_t{buffer[1]} << 8) |
(uint64_t{buffer[2]} << 16) | (uint64_t{buffer[3]} << 24) |
(uint64_t{buffer[4]} << 32) | (uint64_t{buffer[5]} << 40) |
(uint64_t{buffer[6]} << 48) | (uint64_t{buffer[7]} << 56));
#else // !BYTE_ORDER_BIG_ENDIAN
uint64_t result;
// This should be optimized to a single instruction.
Expand Down
17 changes: 11 additions & 6 deletions src/crc32c_read_le_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,25 @@

namespace crc32c {

TEST(Crc32CReadLETest, ReadUint16LE) {
// little-endian 0x1234
uint8_t bytes[] = {0x34, 0x12};

EXPECT_EQ(uint16_t{0x1234}, ReadUint16LE(bytes));
}

TEST(Crc32CReadLETest, ReadUint32LE) {
// little-endian 0x12345678
alignas(4) uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12};
uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12};

ASSERT_EQ(RoundUp<4>(bytes), bytes) << "Stack array is not aligned";
EXPECT_EQ(static_cast<uint32_t>(0x12345678), ReadUint32LE(bytes));
EXPECT_EQ(uint32_t{0x12345678}, ReadUint32LE(bytes));
}

TEST(Crc32CReadLETest, ReadUint64LE) {
// little-endian 0x123456789ABCDEF0
alignas(8) uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12};
uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12};

ASSERT_EQ(RoundUp<8>(bytes), bytes) << "Stack array is not aligned";
EXPECT_EQ(static_cast<uint64_t>(0x123456789ABCDEF0), ReadUint64LE(bytes));
EXPECT_EQ(uint64_t{0x123456789ABCDEF0}, ReadUint64LE(bytes));
}

} // namespace crc32c

0 comments on commit d3d60ac

Please sign in to comment.