Skip to content

Commit d3d60ac

Browse files
authored
Fix invalid pointer casts in arm64 implementation (#65)
* Fix invalid pointer casts in arm64 implementation It is UB in C and C++ to type-pun pointers in this way, for two reasons. First, casting to a pointer is forbidden if the pointer is not aligned. Second, this kind of type-punning is a strict aliasing violation. Instead, the way to read 8 bytes as a time as a uint64_t is to call memcpy. Compilers recognize this pattern and optimize it, lowering it to the same code, but without breaking the language's abstract state machine. This is needed to fix some UBSan warnings in Chromium. * Use crc32c_read_le.h instead In doing so, I fixed up crc32c_read_le.h a bit: 1. The documentation and tests say the buffers need to be aligned, but they don't. 2. Add a 16-bit version. 3. Remove an unnecessary static_cast<uint8_t>. The pointer is already uint8_t. 4. Replace the necessary static_casts with uintN_t{x} per the Google style guide. https://google.github.io/styleguide/cppguide.html#Casting
1 parent 9d0fb14 commit d3d60ac

File tree

3 files changed

+43
-34
lines changed

3 files changed

+43
-34
lines changed

src/crc32c_arm64.cc

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212

1313
#include <cstddef>
1414
#include <cstdint>
15+
#include <cstring>
1516

1617
#include "./crc32c_internal.h"
18+
#include "./crc32c_read_le.h"
1719
#include "crc32c/crc32c_config.h"
1820

1921
#if HAVE_ARM64_CRC32C
@@ -25,16 +27,12 @@
2527
#define SEGMENTBYTES 256
2628

2729
// compute 8bytes for each segment parallelly
28-
#define CRC32C32BYTES(P, IND) \
29-
do { \
30-
crc1 = __crc32cd( \
31-
crc1, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 1 + (IND))); \
32-
crc2 = __crc32cd( \
33-
crc2, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 2 + (IND))); \
34-
crc3 = __crc32cd( \
35-
crc3, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 3 + (IND))); \
36-
crc0 = __crc32cd( \
37-
crc0, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 0 + (IND))); \
30+
#define CRC32C32BYTES(P, IND) \
31+
do { \
32+
crc1 = __crc32cd(crc1, ReadUint64LE((P) + SEGMENTBYTES * 1 + (IND)*8)); \
33+
crc2 = __crc32cd(crc2, ReadUint64LE((P) + SEGMENTBYTES * 2 + (IND)*8)); \
34+
crc3 = __crc32cd(crc3, ReadUint64LE((P) + SEGMENTBYTES * 3 + (IND)*8)); \
35+
crc0 = __crc32cd(crc0, ReadUint64LE((P) + SEGMENTBYTES * 0 + (IND)*8)); \
3836
} while (0);
3937

4038
// compute 8*8 bytes for each segment parallelly
@@ -86,7 +84,7 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
8684
t2 = (uint64_t)vmull_p64(crc2, k2);
8785
t1 = (uint64_t)vmull_p64(crc1, k1);
8886
t0 = (uint64_t)vmull_p64(crc0, k0);
89-
crc = __crc32cd(crc3, *(uint64_t *)data);
87+
crc = __crc32cd(crc3, ReadUint64LE(data));
9088
data += sizeof(uint64_t);
9189
crc ^= __crc32cd(0, t2);
9290
crc ^= __crc32cd(0, t1);
@@ -96,18 +94,18 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
9694
}
9795

9896
while (length >= 8) {
99-
crc = __crc32cd(crc, *(uint64_t *)data);
97+
crc = __crc32cd(crc, ReadUint64LE(data));
10098
data += 8;
10199
length -= 8;
102100
}
103101

104102
if (length & 4) {
105-
crc = __crc32cw(crc, *(uint32_t *)data);
103+
crc = __crc32cw(crc, ReadUint32LE(data));
106104
data += 4;
107105
}
108106

109107
if (length & 2) {
110-
crc = __crc32ch(crc, *(uint16_t *)data);
108+
crc = __crc32ch(crc, ReadUint16LE(data));
111109
data += 2;
112110
}
113111

src/crc32c_read_le.h

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,23 @@
1212

1313
namespace crc32c {
1414

15-
// Reads a little-endian 32-bit integer from a 32-bit-aligned buffer.
15+
// Reads a little-endian 16-bit integer from bytes, not necessarily aligned.
16+
inline uint16_t ReadUint16LE(const uint8_t* buffer) {
17+
#if BYTE_ORDER_BIG_ENDIAN
18+
return ((uint16_t{buffer[0]}) | (uint16_t{buffer[1]} << 8));
19+
#else // !BYTE_ORDER_BIG_ENDIAN
20+
uint16_t result;
21+
// This should be optimized to a single instruction.
22+
std::memcpy(&result, buffer, sizeof(result));
23+
return result;
24+
#endif // BYTE_ORDER_BIG_ENDIAN
25+
}
26+
27+
// Reads a little-endian 32-bit integer from bytes, not necessarily aligned.
1628
inline uint32_t ReadUint32LE(const uint8_t* buffer) {
1729
#if BYTE_ORDER_BIG_ENDIAN
18-
return ((static_cast<uint32_t>(static_cast<uint8_t>(buffer[0]))) |
19-
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[1])) << 8) |
20-
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[2])) << 16) |
21-
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[3])) << 24));
30+
return ((uint32_t{buffer[0]}) | (uint32_t{buffer[1]} << 8) |
31+
(uint32_t{buffer[2]} << 16) | (uint32_t{buffer[3]} << 24));
2232
#else // !BYTE_ORDER_BIG_ENDIAN
2333
uint32_t result;
2434
// This should be optimized to a single instruction.
@@ -27,17 +37,13 @@ inline uint32_t ReadUint32LE(const uint8_t* buffer) {
2737
#endif // BYTE_ORDER_BIG_ENDIAN
2838
}
2939

30-
// Reads a little-endian 64-bit integer from a 64-bit-aligned buffer.
40+
// Reads a little-endian 64-bit integer from bytes, not necessarily aligned.
3141
inline uint64_t ReadUint64LE(const uint8_t* buffer) {
3242
#if BYTE_ORDER_BIG_ENDIAN
33-
return ((static_cast<uint64_t>(static_cast<uint8_t>(buffer[0]))) |
34-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[1])) << 8) |
35-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[2])) << 16) |
36-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[3])) << 24) |
37-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[4])) << 32) |
38-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[5])) << 40) |
39-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[6])) << 48) |
40-
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[7])) << 56));
43+
return ((uint64_t{buffer[0]}) | (uint64_t{buffer[1]} << 8) |
44+
(uint64_t{buffer[2]} << 16) | (uint64_t{buffer[3]} << 24) |
45+
(uint64_t{buffer[4]} << 32) | (uint64_t{buffer[5]} << 40) |
46+
(uint64_t{buffer[6]} << 48) | (uint64_t{buffer[7]} << 56));
4147
#else // !BYTE_ORDER_BIG_ENDIAN
4248
uint64_t result;
4349
// This should be optimized to a single instruction.

src/crc32c_read_le_unittest.cc

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,25 @@
1313

1414
namespace crc32c {
1515

16+
TEST(Crc32CReadLETest, ReadUint16LE) {
17+
// little-endian 0x1234
18+
uint8_t bytes[] = {0x34, 0x12};
19+
20+
EXPECT_EQ(uint16_t{0x1234}, ReadUint16LE(bytes));
21+
}
22+
1623
TEST(Crc32CReadLETest, ReadUint32LE) {
1724
// little-endian 0x12345678
18-
alignas(4) uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12};
25+
uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12};
1926

20-
ASSERT_EQ(RoundUp<4>(bytes), bytes) << "Stack array is not aligned";
21-
EXPECT_EQ(static_cast<uint32_t>(0x12345678), ReadUint32LE(bytes));
27+
EXPECT_EQ(uint32_t{0x12345678}, ReadUint32LE(bytes));
2228
}
2329

2430
TEST(Crc32CReadLETest, ReadUint64LE) {
2531
// little-endian 0x123456789ABCDEF0
26-
alignas(8) uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12};
32+
uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12};
2733

28-
ASSERT_EQ(RoundUp<8>(bytes), bytes) << "Stack array is not aligned";
29-
EXPECT_EQ(static_cast<uint64_t>(0x123456789ABCDEF0), ReadUint64LE(bytes));
34+
EXPECT_EQ(uint64_t{0x123456789ABCDEF0}, ReadUint64LE(bytes));
3035
}
3136

3237
} // namespace crc32c

0 commit comments

Comments
 (0)