diff --git a/md5/Cargo.toml b/md5/Cargo.toml index 7defc824..e8f89ad1 100644 --- a/md5/Cargo.toml +++ b/md5/Cargo.toml @@ -29,5 +29,8 @@ hex-literal = "0.2.2" default = ["std"] std = ["digest/std"] asm = ["md5-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates +# Use assembly backend for LoongArch64 targets +# WARNING: Bumps MSRV to 1.72. This feature SHOULD NOT be enabled by library crates +loongarch64_asm = [] oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57 force-soft = [] # Force software implementation diff --git a/md5/src/compress.rs b/md5/src/compress.rs index 2f3a10b3..73106c2a 100644 --- a/md5/src/compress.rs +++ b/md5/src/compress.rs @@ -5,6 +5,9 @@ cfg_if::cfg_if! { } else if #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] { extern crate md5_asm; pub use md5_asm::compress; + } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] { + mod loongarch64_asm; + pub use loongarch64_asm::compress; } else { mod soft; pub use soft::compress; diff --git a/md5/src/compress/loongarch64_asm.rs b/md5/src/compress/loongarch64_asm.rs new file mode 100644 index 00000000..9dd20942 --- /dev/null +++ b/md5/src/compress/loongarch64_asm.rs @@ -0,0 +1,206 @@ +//! LoongArch64 assembly backend + +use core::arch::asm; + +const RC: [u32; 64] = [ + // round 1 + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + // round 2 + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + // round 3 + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + // round 4 + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +]; + +macro_rules! c { + ($($l:expr)*) => { + concat!($($l ,)*) + }; +} + +macro_rules! round0 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "xor $t4," $c "," $d ";" + "and $t4, $t4," $b ";" + "xor $t4, $t4," $d ";" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! round1 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "andn $t4," $c "," $d ";" + "and $t5," $d "," $b ";" + "or $t4, $t4, $t5;" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! round2 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "xor $t4," $c "," $d ";" + "xor $t4, $t4," $b ";" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! round3 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "orn $t4," $b "," $d ";" + "xor $t4, $t4," $c ";" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! roundtail { + ($a:literal, $b:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "ld.w $t5, $a3," $i " * 4;" + "ld.w $t6, $a1," $k " * 4;" + "add.w " $a "," $a ", $t5;" + "add.w " $a "," $a ", $t6;" + "add.w " $a "," $a ", $t4;" + "rotri.w " $a "," $a ", 32 -" $s ";" + "add.w " $a "," $a "," $b ";" + ) + } +} + +pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) { + if blocks.is_empty() { + return; + } + + unsafe { + asm!( + // Load state + "ld.w $t0, $a0, 0", + "ld.w $t1, $a0, 4", + "ld.w $t2, $a0, 8", + "ld.w $t3, $a0, 12", + + "42:", + + "move $a4, $t0", + "move $a5, $t1", + "move $a6, $t2", + "move $a7, $t3", + + /* 64 rounds of hashing */ + round0!("$t0", "$t1", "$t2", "$t3", 0, 7, 0), + round0!("$t3", "$t0", "$t1", "$t2", 1, 12, 1), + round0!("$t2", "$t3", "$t0", "$t1", 2, 17, 2), + round0!("$t1", "$t2", "$t3", "$t0", 3, 22, 3), + round0!("$t0", "$t1", "$t2", "$t3", 4, 7, 4), + round0!("$t3", "$t0", "$t1", "$t2", 5, 12, 5), + round0!("$t2", "$t3", "$t0", "$t1", 6, 17, 6), + round0!("$t1", "$t2", "$t3", "$t0", 7, 22, 7), + round0!("$t0", "$t1", "$t2", "$t3", 8, 7, 8), + round0!("$t3", "$t0", "$t1", "$t2", 9, 12, 9), + round0!("$t2", "$t3", "$t0", "$t1", 10, 17, 10), + round0!("$t1", "$t2", "$t3", "$t0", 11, 22, 11), + round0!("$t0", "$t1", "$t2", "$t3", 12, 7, 12), + round0!("$t3", "$t0", "$t1", "$t2", 13, 12, 13), + round0!("$t2", "$t3", "$t0", "$t1", 14, 17, 14), + round0!("$t1", "$t2", "$t3", "$t0", 15, 22, 15), + round1!("$t0", "$t1", "$t2", "$t3", 1, 5, 16), + round1!("$t3", "$t0", "$t1", "$t2", 6, 9, 17), + round1!("$t2", "$t3", "$t0", "$t1", 11, 14, 18), + round1!("$t1", "$t2", "$t3", "$t0", 0, 20, 19), + round1!("$t0", "$t1", "$t2", "$t3", 5, 5, 20), + round1!("$t3", "$t0", "$t1", "$t2", 10, 9, 21), + round1!("$t2", "$t3", "$t0", "$t1", 15, 14, 22), + round1!("$t1", "$t2", "$t3", "$t0", 4, 20, 23), + round1!("$t0", "$t1", "$t2", "$t3", 9, 5, 24), + round1!("$t3", "$t0", "$t1", "$t2", 14, 9, 25), + round1!("$t2", "$t3", "$t0", "$t1", 3, 14, 26), + round1!("$t1", "$t2", "$t3", "$t0", 8, 20, 27), + round1!("$t0", "$t1", "$t2", "$t3", 13, 5, 28), + round1!("$t3", "$t0", "$t1", "$t2", 2, 9, 29), + round1!("$t2", "$t3", "$t0", "$t1", 7, 14, 30), + round1!("$t1", "$t2", "$t3", "$t0", 12, 20, 31), + round2!("$t0", "$t1", "$t2", "$t3", 5, 4, 32), + round2!("$t3", "$t0", "$t1", "$t2", 8, 11, 33), + round2!("$t2", "$t3", "$t0", "$t1", 11, 16, 34), + round2!("$t1", "$t2", "$t3", "$t0", 14, 23, 35), + round2!("$t0", "$t1", "$t2", "$t3", 1, 4, 36), + round2!("$t3", "$t0", "$t1", "$t2", 4, 11, 37), + round2!("$t2", "$t3", "$t0", "$t1", 7, 16, 38), + round2!("$t1", "$t2", "$t3", "$t0", 10, 23, 39), + round2!("$t0", "$t1", "$t2", "$t3", 13, 4, 40), + round2!("$t3", "$t0", "$t1", "$t2", 0, 11, 41), + round2!("$t2", "$t3", "$t0", "$t1", 3, 16, 42), + round2!("$t1", "$t2", "$t3", "$t0", 6, 23, 43), + round2!("$t0", "$t1", "$t2", "$t3", 9, 4, 44), + round2!("$t3", "$t0", "$t1", "$t2", 12, 11, 45), + round2!("$t2", "$t3", "$t0", "$t1", 15, 16, 46), + round2!("$t1", "$t2", "$t3", "$t0", 2, 23, 47), + round3!("$t0", "$t1", "$t2", "$t3", 0, 6, 48), + round3!("$t3", "$t0", "$t1", "$t2", 7, 10, 49), + round3!("$t2", "$t3", "$t0", "$t1", 14, 15, 50), + round3!("$t1", "$t2", "$t3", "$t0", 5, 21, 51), + round3!("$t0", "$t1", "$t2", "$t3", 12, 6, 52), + round3!("$t3", "$t0", "$t1", "$t2", 3, 10, 53), + round3!("$t2", "$t3", "$t0", "$t1", 10, 15, 54), + round3!("$t1", "$t2", "$t3", "$t0", 1, 21, 55), + round3!("$t0", "$t1", "$t2", "$t3", 8, 6, 56), + round3!("$t3", "$t0", "$t1", "$t2", 15, 10, 57), + round3!("$t2", "$t3", "$t0", "$t1", 6, 15, 58), + round3!("$t1", "$t2", "$t3", "$t0", 13, 21, 59), + round3!("$t0", "$t1", "$t2", "$t3", 4, 6, 60), + round3!("$t3", "$t0", "$t1", "$t2", 11, 10, 61), + round3!("$t2", "$t3", "$t0", "$t1", 2, 15, 62), + round3!("$t1", "$t2", "$t3", "$t0", 9, 21, 63), + + "add.w $t0, $t0, $a4", + "add.w $t1, $t1, $a5", + "add.w $t2, $t2, $a6", + "add.w $t3, $t3, $a7", + + // Looping over blocks + "addi.d $a1, $a1, 64", + "addi.d $a2, $a2, -1", + "bnez $a2, 42b", + + // Save updated state + "st.w $t0, $a0, 0", + "st.w $t1, $a0, 4", + "st.w $t2, $a0, 8", + "st.w $t3, $a0, 12", + + in("$a0") state, + inout("$a1") blocks.as_ptr() => _, + inout("$a2") blocks.len() => _, + + in("$a3") RC.as_ptr(), + + // Clobbers + out("$a4") _, + out("$a5") _, + out("$a6") _, + out("$a7") _, + out("$t0") _, + out("$t1") _, + out("$t2") _, + out("$t3") _, + out("$t4") _, + out("$t5") _, + out("$t6") _, + + options(preserves_flags), + ); + } +}