diff --git a/src/crc32c_arm64.cc b/src/crc32c_arm64.cc index 2595135..701326d 100644 --- a/src/crc32c_arm64.cc +++ b/src/crc32c_arm64.cc @@ -12,8 +12,10 @@ #include #include +#include #include "./crc32c_internal.h" +#include "./crc32c_read_le.h" #include "crc32c/crc32c_config.h" #if HAVE_ARM64_CRC32C @@ -25,16 +27,12 @@ #define SEGMENTBYTES 256 // compute 8bytes for each segment parallelly -#define CRC32C32BYTES(P, IND) \ - do { \ - crc1 = __crc32cd( \ - crc1, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 1 + (IND))); \ - crc2 = __crc32cd( \ - crc2, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 2 + (IND))); \ - crc3 = __crc32cd( \ - crc3, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 3 + (IND))); \ - crc0 = __crc32cd( \ - crc0, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 0 + (IND))); \ +#define CRC32C32BYTES(P, IND) \ + do { \ + crc1 = __crc32cd(crc1, ReadUint64LE((P) + SEGMENTBYTES * 1 + (IND)*8)); \ + crc2 = __crc32cd(crc2, ReadUint64LE((P) + SEGMENTBYTES * 2 + (IND)*8)); \ + crc3 = __crc32cd(crc3, ReadUint64LE((P) + SEGMENTBYTES * 3 + (IND)*8)); \ + crc0 = __crc32cd(crc0, ReadUint64LE((P) + SEGMENTBYTES * 0 + (IND)*8)); \ } while (0); // compute 8*8 bytes for each segment parallelly @@ -86,7 +84,7 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) { t2 = (uint64_t)vmull_p64(crc2, k2); t1 = (uint64_t)vmull_p64(crc1, k1); t0 = (uint64_t)vmull_p64(crc0, k0); - crc = __crc32cd(crc3, *(uint64_t *)data); + crc = __crc32cd(crc3, ReadUint64LE(data)); data += sizeof(uint64_t); crc ^= __crc32cd(0, t2); crc ^= __crc32cd(0, t1); @@ -96,18 +94,18 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) { } while (length >= 8) { - crc = __crc32cd(crc, *(uint64_t *)data); + crc = __crc32cd(crc, ReadUint64LE(data)); data += 8; length -= 8; } if (length & 4) { - crc = __crc32cw(crc, *(uint32_t *)data); + crc = __crc32cw(crc, ReadUint32LE(data)); data += 4; } if (length & 2) { - crc = __crc32ch(crc, *(uint16_t *)data); + crc = __crc32ch(crc, ReadUint16LE(data)); data += 2; } diff --git a/src/crc32c_read_le.h b/src/crc32c_read_le.h index 1ebcf5d..bb2231e 100644 --- a/src/crc32c_read_le.h +++ b/src/crc32c_read_le.h @@ -12,13 +12,23 @@ namespace crc32c { -// Reads a little-endian 32-bit integer from a 32-bit-aligned buffer. +// Reads a little-endian 16-bit integer from bytes, not necessarily aligned. +inline uint16_t ReadUint16LE(const uint8_t* buffer) { +#if BYTE_ORDER_BIG_ENDIAN + return ((uint16_t{buffer[0]}) | (uint16_t{buffer[1]} << 8)); +#else // !BYTE_ORDER_BIG_ENDIAN + uint16_t result; + // This should be optimized to a single instruction. + std::memcpy(&result, buffer, sizeof(result)); + return result; +#endif // BYTE_ORDER_BIG_ENDIAN +} + +// Reads a little-endian 32-bit integer from bytes, not necessarily aligned. inline uint32_t ReadUint32LE(const uint8_t* buffer) { #if BYTE_ORDER_BIG_ENDIAN - return ((static_cast(static_cast(buffer[0]))) | - (static_cast(static_cast(buffer[1])) << 8) | - (static_cast(static_cast(buffer[2])) << 16) | - (static_cast(static_cast(buffer[3])) << 24)); + return ((uint32_t{buffer[0]}) | (uint32_t{buffer[1]} << 8) | + (uint32_t{buffer[2]} << 16) | (uint32_t{buffer[3]} << 24)); #else // !BYTE_ORDER_BIG_ENDIAN uint32_t result; // This should be optimized to a single instruction. @@ -27,17 +37,13 @@ inline uint32_t ReadUint32LE(const uint8_t* buffer) { #endif // BYTE_ORDER_BIG_ENDIAN } -// Reads a little-endian 64-bit integer from a 64-bit-aligned buffer. +// Reads a little-endian 64-bit integer from bytes, not necessarily aligned. inline uint64_t ReadUint64LE(const uint8_t* buffer) { #if BYTE_ORDER_BIG_ENDIAN - return ((static_cast(static_cast(buffer[0]))) | - (static_cast(static_cast(buffer[1])) << 8) | - (static_cast(static_cast(buffer[2])) << 16) | - (static_cast(static_cast(buffer[3])) << 24) | - (static_cast(static_cast(buffer[4])) << 32) | - (static_cast(static_cast(buffer[5])) << 40) | - (static_cast(static_cast(buffer[6])) << 48) | - (static_cast(static_cast(buffer[7])) << 56)); + return ((uint64_t{buffer[0]}) | (uint64_t{buffer[1]} << 8) | + (uint64_t{buffer[2]} << 16) | (uint64_t{buffer[3]} << 24) | + (uint64_t{buffer[4]} << 32) | (uint64_t{buffer[5]} << 40) | + (uint64_t{buffer[6]} << 48) | (uint64_t{buffer[7]} << 56)); #else // !BYTE_ORDER_BIG_ENDIAN uint64_t result; // This should be optimized to a single instruction. diff --git a/src/crc32c_read_le_unittest.cc b/src/crc32c_read_le_unittest.cc index 2a30302..8fbfc41 100644 --- a/src/crc32c_read_le_unittest.cc +++ b/src/crc32c_read_le_unittest.cc @@ -13,20 +13,25 @@ namespace crc32c { +TEST(Crc32CReadLETest, ReadUint16LE) { + // little-endian 0x1234 + uint8_t bytes[] = {0x34, 0x12}; + + EXPECT_EQ(uint16_t{0x1234}, ReadUint16LE(bytes)); +} + TEST(Crc32CReadLETest, ReadUint32LE) { // little-endian 0x12345678 - alignas(4) uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12}; + uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12}; - ASSERT_EQ(RoundUp<4>(bytes), bytes) << "Stack array is not aligned"; - EXPECT_EQ(static_cast(0x12345678), ReadUint32LE(bytes)); + EXPECT_EQ(uint32_t{0x12345678}, ReadUint32LE(bytes)); } TEST(Crc32CReadLETest, ReadUint64LE) { // little-endian 0x123456789ABCDEF0 - alignas(8) uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12}; + uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12}; - ASSERT_EQ(RoundUp<8>(bytes), bytes) << "Stack array is not aligned"; - EXPECT_EQ(static_cast(0x123456789ABCDEF0), ReadUint64LE(bytes)); + EXPECT_EQ(uint64_t{0x123456789ABCDEF0}, ReadUint64LE(bytes)); } } // namespace crc32c