diff --git a/src/crc32c_arm64.cc b/src/crc32c_arm64.cc
index 2595135..701326d 100644
--- a/src/crc32c_arm64.cc
+++ b/src/crc32c_arm64.cc
@@ -12,8 +12,10 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <cstring>
 
 #include "./crc32c_internal.h"
+#include "./crc32c_read_le.h"
 #include "crc32c/crc32c_config.h"
 
 #if HAVE_ARM64_CRC32C
@@ -25,16 +27,12 @@
 #define SEGMENTBYTES 256
 
 // compute 8bytes for each segment parallelly
-#define CRC32C32BYTES(P, IND)                                             \
-  do {                                                                    \
-    crc1 = __crc32cd(                                                     \
-        crc1, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 1 + (IND))); \
-    crc2 = __crc32cd(                                                     \
-        crc2, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 2 + (IND))); \
-    crc3 = __crc32cd(                                                     \
-        crc3, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 3 + (IND))); \
-    crc0 = __crc32cd(                                                     \
-        crc0, *((const uint64_t *)(P) + (SEGMENTBYTES / 8) * 0 + (IND))); \
+#define CRC32C32BYTES(P, IND)                                               \
+  do {                                                                      \
+    crc1 = __crc32cd(crc1, ReadUint64LE((P) + SEGMENTBYTES * 1 + (IND)*8)); \
+    crc2 = __crc32cd(crc2, ReadUint64LE((P) + SEGMENTBYTES * 2 + (IND)*8)); \
+    crc3 = __crc32cd(crc3, ReadUint64LE((P) + SEGMENTBYTES * 3 + (IND)*8)); \
+    crc0 = __crc32cd(crc0, ReadUint64LE((P) + SEGMENTBYTES * 0 + (IND)*8)); \
   } while (0);
 
 // compute 8*8 bytes for each segment parallelly
@@ -86,7 +84,7 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
     t2 = (uint64_t)vmull_p64(crc2, k2);
     t1 = (uint64_t)vmull_p64(crc1, k1);
     t0 = (uint64_t)vmull_p64(crc0, k0);
-    crc = __crc32cd(crc3, *(uint64_t *)data);
+    crc = __crc32cd(crc3, ReadUint64LE(data));
     data += sizeof(uint64_t);
     crc ^= __crc32cd(0, t2);
     crc ^= __crc32cd(0, t1);
@@ -96,18 +94,18 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
   }
 
   while (length >= 8) {
-    crc = __crc32cd(crc, *(uint64_t *)data);
+    crc = __crc32cd(crc, ReadUint64LE(data));
     data += 8;
     length -= 8;
   }
 
   if (length & 4) {
-    crc = __crc32cw(crc, *(uint32_t *)data);
+    crc = __crc32cw(crc, ReadUint32LE(data));
     data += 4;
   }
 
   if (length & 2) {
-    crc = __crc32ch(crc, *(uint16_t *)data);
+    crc = __crc32ch(crc, ReadUint16LE(data));
     data += 2;
   }
 
diff --git a/src/crc32c_read_le.h b/src/crc32c_read_le.h
index 1ebcf5d..bb2231e 100644
--- a/src/crc32c_read_le.h
+++ b/src/crc32c_read_le.h
@@ -12,13 +12,23 @@
 
 namespace crc32c {
 
-// Reads a little-endian 32-bit integer from a 32-bit-aligned buffer.
+// Reads a little-endian 16-bit integer from bytes, not necessarily aligned.
+inline uint16_t ReadUint16LE(const uint8_t* buffer) {
+#if BYTE_ORDER_BIG_ENDIAN
+  return ((uint16_t{buffer[0]}) | (uint16_t{buffer[1]} << 8));
+#else   // !BYTE_ORDER_BIG_ENDIAN
+  uint16_t result;
+  // This should be optimized to a single instruction.
+  std::memcpy(&result, buffer, sizeof(result));
+  return result;
+#endif  // BYTE_ORDER_BIG_ENDIAN
+}
+
+// Reads a little-endian 32-bit integer from bytes, not necessarily aligned.
 inline uint32_t ReadUint32LE(const uint8_t* buffer) {
 #if BYTE_ORDER_BIG_ENDIAN
-  return ((static_cast<uint32_t>(static_cast<uint8_t>(buffer[0]))) |
-          (static_cast<uint32_t>(static_cast<uint8_t>(buffer[1])) << 8) |
-          (static_cast<uint32_t>(static_cast<uint8_t>(buffer[2])) << 16) |
-          (static_cast<uint32_t>(static_cast<uint8_t>(buffer[3])) << 24));
+  return ((uint32_t{buffer[0]}) | (uint32_t{buffer[1]} << 8) |
+          (uint32_t{buffer[2]} << 16) | (uint32_t{buffer[3]} << 24));
 #else   // !BYTE_ORDER_BIG_ENDIAN
   uint32_t result;
   // This should be optimized to a single instruction.
@@ -27,17 +37,13 @@ inline uint32_t ReadUint32LE(const uint8_t* buffer) {
 #endif  // BYTE_ORDER_BIG_ENDIAN
 }
 
-// Reads a little-endian 64-bit integer from a 64-bit-aligned buffer.
+// Reads a little-endian 64-bit integer from bytes, not necessarily aligned.
 inline uint64_t ReadUint64LE(const uint8_t* buffer) {
 #if BYTE_ORDER_BIG_ENDIAN
-  return ((static_cast<uint64_t>(static_cast<uint8_t>(buffer[0]))) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[1])) << 8) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[2])) << 16) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[3])) << 24) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[4])) << 32) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[5])) << 40) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[6])) << 48) |
-          (static_cast<uint64_t>(static_cast<uint8_t>(buffer[7])) << 56));
+  return ((uint64_t{buffer[0]}) | (uint64_t{buffer[1]} << 8) |
+          (uint64_t{buffer[2]} << 16) | (uint64_t{buffer[3]} << 24) |
+          (uint64_t{buffer[4]} << 32) | (uint64_t{buffer[5]} << 40) |
+          (uint64_t{buffer[6]} << 48) | (uint64_t{buffer[7]} << 56));
 #else   // !BYTE_ORDER_BIG_ENDIAN
   uint64_t result;
   // This should be optimized to a single instruction.
diff --git a/src/crc32c_read_le_unittest.cc b/src/crc32c_read_le_unittest.cc
index 2a30302..8fbfc41 100644
--- a/src/crc32c_read_le_unittest.cc
+++ b/src/crc32c_read_le_unittest.cc
@@ -13,20 +13,25 @@
 
 namespace crc32c {
 
+TEST(Crc32CReadLETest, ReadUint16LE) {
+  // little-endian 0x1234
+  uint8_t bytes[] = {0x34, 0x12};
+
+  EXPECT_EQ(uint16_t{0x1234}, ReadUint16LE(bytes));
+}
+
 TEST(Crc32CReadLETest, ReadUint32LE) {
   // little-endian 0x12345678
-  alignas(4) uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12};
+  uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12};
 
-  ASSERT_EQ(RoundUp<4>(bytes), bytes) << "Stack array is not aligned";
-  EXPECT_EQ(static_cast<uint32_t>(0x12345678), ReadUint32LE(bytes));
+  EXPECT_EQ(uint32_t{0x12345678}, ReadUint32LE(bytes));
 }
 
 TEST(Crc32CReadLETest, ReadUint64LE) {
   // little-endian 0x123456789ABCDEF0
-  alignas(8) uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12};
+  uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12};
 
-  ASSERT_EQ(RoundUp<8>(bytes), bytes) << "Stack array is not aligned";
-  EXPECT_EQ(static_cast<uint64_t>(0x123456789ABCDEF0), ReadUint64LE(bytes));
+  EXPECT_EQ(uint64_t{0x123456789ABCDEF0}, ReadUint64LE(bytes));
 }
 
 }  // namespace crc32c