Skip to content

Commit

Permalink
md5: Add inline-asm backend for LoongArch64 targets
Browse files Browse the repository at this point in the history
  • Loading branch information
heiher committed Sep 21, 2023
1 parent 70d304f commit f75cfdb
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 0 deletions.
3 changes: 3 additions & 0 deletions md5/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,8 @@ hex-literal = "0.2.2"
default = ["std"]
std = ["digest/std"]
asm = ["md5-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates
# Use assembly backend for LoongArch64 targets
# WARNING: Bumps MSRV to 1.72. This feature SHOULD NOT be enabled by library crates
loongarch64_asm = []
oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57
force-soft = [] # Force software implementation
3 changes: 3 additions & 0 deletions md5/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cfg_if::cfg_if! {
} else if #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] {
extern crate md5_asm;
pub use md5_asm::compress;
} else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
mod loongarch64_asm;
pub use loongarch64_asm::compress;
} else {
mod soft;
pub use soft::compress;
Expand Down
206 changes: 206 additions & 0 deletions md5/src/compress/loongarch64_asm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
//! LoongArch64 assembly backend

use core::arch::asm;

const RC: [u32; 64] = [
// round 1
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
// round 2
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
// round 3
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
// round 4
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
];

macro_rules! c {
($($l:expr)*) => {
concat!($($l ,)*)
};
}

macro_rules! round0 {
($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => {
c!(
"xor $t4," $c "," $d ";"
"and $t4, $t4," $b ";"
"xor $t4, $t4," $d ";"
roundtail!($a, $b, $k, $s, $i)
)
}
}

macro_rules! round1 {
($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => {
c!(
"andn $t4," $c "," $d ";"
"and $t5," $d "," $b ";"
"or $t4, $t4, $t5;"
roundtail!($a, $b, $k, $s, $i)
)
}
}

macro_rules! round2 {
($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => {
c!(
"xor $t4," $c "," $d ";"
"xor $t4, $t4," $b ";"
roundtail!($a, $b, $k, $s, $i)
)
}
}

macro_rules! round3 {
($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => {
c!(
"orn $t4," $b "," $d ";"
"xor $t4, $t4," $c ";"
roundtail!($a, $b, $k, $s, $i)
)
}
}

macro_rules! roundtail {
($a:literal, $b:literal, $k:literal, $s:literal, $i:literal) => {
c!(
"ld.w $t5, $a3," $i " * 4;"
"ld.w $t6, $a1," $k " * 4;"
"add.w " $a "," $a ", $t5;"
"add.w " $a "," $a ", $t6;"
"add.w " $a "," $a ", $t4;"
"rotri.w " $a "," $a ", 32 -" $s ";"
"add.w " $a "," $a "," $b ";"
)
}
}

pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) {
if blocks.is_empty() {
return;
}

unsafe {
asm!(
// Load state
"ld.w $t0, $a0, 0",
"ld.w $t1, $a0, 4",
"ld.w $t2, $a0, 8",
"ld.w $t3, $a0, 12",

"42:",

"move $a4, $t0",
"move $a5, $t1",
"move $a6, $t2",
"move $a7, $t3",

/* 64 rounds of hashing */
round0!("$t0", "$t1", "$t2", "$t3", 0, 7, 0),
round0!("$t3", "$t0", "$t1", "$t2", 1, 12, 1),
round0!("$t2", "$t3", "$t0", "$t1", 2, 17, 2),
round0!("$t1", "$t2", "$t3", "$t0", 3, 22, 3),
round0!("$t0", "$t1", "$t2", "$t3", 4, 7, 4),
round0!("$t3", "$t0", "$t1", "$t2", 5, 12, 5),
round0!("$t2", "$t3", "$t0", "$t1", 6, 17, 6),
round0!("$t1", "$t2", "$t3", "$t0", 7, 22, 7),
round0!("$t0", "$t1", "$t2", "$t3", 8, 7, 8),
round0!("$t3", "$t0", "$t1", "$t2", 9, 12, 9),
round0!("$t2", "$t3", "$t0", "$t1", 10, 17, 10),
round0!("$t1", "$t2", "$t3", "$t0", 11, 22, 11),
round0!("$t0", "$t1", "$t2", "$t3", 12, 7, 12),
round0!("$t3", "$t0", "$t1", "$t2", 13, 12, 13),
round0!("$t2", "$t3", "$t0", "$t1", 14, 17, 14),
round0!("$t1", "$t2", "$t3", "$t0", 15, 22, 15),
round1!("$t0", "$t1", "$t2", "$t3", 1, 5, 16),
round1!("$t3", "$t0", "$t1", "$t2", 6, 9, 17),
round1!("$t2", "$t3", "$t0", "$t1", 11, 14, 18),
round1!("$t1", "$t2", "$t3", "$t0", 0, 20, 19),
round1!("$t0", "$t1", "$t2", "$t3", 5, 5, 20),
round1!("$t3", "$t0", "$t1", "$t2", 10, 9, 21),
round1!("$t2", "$t3", "$t0", "$t1", 15, 14, 22),
round1!("$t1", "$t2", "$t3", "$t0", 4, 20, 23),
round1!("$t0", "$t1", "$t2", "$t3", 9, 5, 24),
round1!("$t3", "$t0", "$t1", "$t2", 14, 9, 25),
round1!("$t2", "$t3", "$t0", "$t1", 3, 14, 26),
round1!("$t1", "$t2", "$t3", "$t0", 8, 20, 27),
round1!("$t0", "$t1", "$t2", "$t3", 13, 5, 28),
round1!("$t3", "$t0", "$t1", "$t2", 2, 9, 29),
round1!("$t2", "$t3", "$t0", "$t1", 7, 14, 30),
round1!("$t1", "$t2", "$t3", "$t0", 12, 20, 31),
round2!("$t0", "$t1", "$t2", "$t3", 5, 4, 32),
round2!("$t3", "$t0", "$t1", "$t2", 8, 11, 33),
round2!("$t2", "$t3", "$t0", "$t1", 11, 16, 34),
round2!("$t1", "$t2", "$t3", "$t0", 14, 23, 35),
round2!("$t0", "$t1", "$t2", "$t3", 1, 4, 36),
round2!("$t3", "$t0", "$t1", "$t2", 4, 11, 37),
round2!("$t2", "$t3", "$t0", "$t1", 7, 16, 38),
round2!("$t1", "$t2", "$t3", "$t0", 10, 23, 39),
round2!("$t0", "$t1", "$t2", "$t3", 13, 4, 40),
round2!("$t3", "$t0", "$t1", "$t2", 0, 11, 41),
round2!("$t2", "$t3", "$t0", "$t1", 3, 16, 42),
round2!("$t1", "$t2", "$t3", "$t0", 6, 23, 43),
round2!("$t0", "$t1", "$t2", "$t3", 9, 4, 44),
round2!("$t3", "$t0", "$t1", "$t2", 12, 11, 45),
round2!("$t2", "$t3", "$t0", "$t1", 15, 16, 46),
round2!("$t1", "$t2", "$t3", "$t0", 2, 23, 47),
round3!("$t0", "$t1", "$t2", "$t3", 0, 6, 48),
round3!("$t3", "$t0", "$t1", "$t2", 7, 10, 49),
round3!("$t2", "$t3", "$t0", "$t1", 14, 15, 50),
round3!("$t1", "$t2", "$t3", "$t0", 5, 21, 51),
round3!("$t0", "$t1", "$t2", "$t3", 12, 6, 52),
round3!("$t3", "$t0", "$t1", "$t2", 3, 10, 53),
round3!("$t2", "$t3", "$t0", "$t1", 10, 15, 54),
round3!("$t1", "$t2", "$t3", "$t0", 1, 21, 55),
round3!("$t0", "$t1", "$t2", "$t3", 8, 6, 56),
round3!("$t3", "$t0", "$t1", "$t2", 15, 10, 57),
round3!("$t2", "$t3", "$t0", "$t1", 6, 15, 58),
round3!("$t1", "$t2", "$t3", "$t0", 13, 21, 59),
round3!("$t0", "$t1", "$t2", "$t3", 4, 6, 60),
round3!("$t3", "$t0", "$t1", "$t2", 11, 10, 61),
round3!("$t2", "$t3", "$t0", "$t1", 2, 15, 62),
round3!("$t1", "$t2", "$t3", "$t0", 9, 21, 63),

"add.w $t0, $t0, $a4",
"add.w $t1, $t1, $a5",
"add.w $t2, $t2, $a6",
"add.w $t3, $t3, $a7",

// Looping over blocks
"addi.d $a1, $a1, 64",
"addi.d $a2, $a2, -1",
"bnez $a2, 42b",

// Save updated state
"st.w $t0, $a0, 0",
"st.w $t1, $a0, 4",
"st.w $t2, $a0, 8",
"st.w $t3, $a0, 12",

in("$a0") state,
inout("$a1") blocks.as_ptr() => _,
inout("$a2") blocks.len() => _,

in("$a3") RC.as_ptr(),

// Clobbers
out("$a4") _,
out("$a5") _,
out("$a6") _,
out("$a7") _,
out("$t0") _,
out("$t1") _,
out("$t2") _,
out("$t3") _,
out("$t4") _,
out("$t5") _,
out("$t6") _,

options(preserves_flags),
);
}
}

0 comments on commit f75cfdb

Please sign in to comment.