odin-lang
diff --git a/‎core/crypto/_fiat/field_p256r1/field.odin‎
Lines changed: 345 additions & 0 deletions b/‎core/crypto/_fiat/field_p256r1/field.odin‎
Lines changed: 345 additions & 0 deletions
@@ -0,0 +1,345 @@
+package field_p256r1
+
+import "core:encoding/endian"
+import "core:math/bits"
+import "core:mem"
+
+fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) {
+	mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: []^Montgomery_Domain_Field_Element,
+) {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
+fe_from_bytes :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: []byte,
+	unsafe_assume_canonical := false,
+) -> bool {
+	ensure_contextless(len(arg1) == 32, "p256r1: invalid fe input buffer")
+
+	// Note: We assume the input is in big-endian.
+	tmp := Non_Montgomery_Domain_Field_Element {
+		endian.unchecked_get_u64le(arg1[24:]),
+		endian.unchecked_get_u64le(arg1[16:]),
+		endian.unchecked_get_u64le(arg1[8:]),
+		endian.unchecked_get_u64le(arg1[0:]),
+	}
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	// Check that tmp is in the the range [0, ELL).
+	if !unsafe_assume_canonical {
+		_, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0)
+		_, borrow = bits.sub_u64(ELL[1], tmp[1], borrow)
+		_, borrow = bits.sub_u64(ELL[2], tmp[2], borrow)
+		_, borrow = bits.sub_u64(ELL[3], tmp[3], borrow)
+		if borrow != 0 {
+			return false
+		}
+	}
+
+	fe_to_montgomery(out1, &tmp)
+
+	return true
+}
+
+fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
+	ensure_contextless(len(out1) == 32, "p256r1: invalid fe output buffer")
+
+	tmp: Non_Montgomery_Domain_Field_Element
+	fe_from_montgomery(&tmp, arg1)
+
+	// Note: Likewise, output in big-endian.
+	endian.unchecked_put_u64le(out1[24:], tmp[0])
+	endian.unchecked_put_u64le(out1[16:], tmp[1])
+	endian.unchecked_put_u64le(out1[8:], tmp[2])
+	endian.unchecked_put_u64le(out1[0:], tmp[3])
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(require_results)
+fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Montgomery_Domain_Field_Element
+	fe_sub(&tmp, arg1, arg2)
+
+	// This will only underflow iff arg1 == arg2, and we return the borrow,
+	// which will be 1.
+	_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
+
+	fe_clear(&tmp)
+
+	return int(borrow)
+}
+
+@(require_results)
+fe_is_odd :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Non_Montgomery_Domain_Field_Element
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	fe_from_montgomery(&tmp, arg1)
+	return int(tmp[0] & 1)
+}
+
+fe_pow2k :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^Montgomery_Domain_Field_Element,
+	arg2: uint,
+) {
+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
+	if arg2 == 0 {
+		fe_one(out1)
+		return
+	}
+
+	fe_square(out1, arg1)
+	for _ in 1 ..< arg2 {
+		fe_square(out1, out1)
+	}
+}
+
+fe_inv :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	// Inversion computation is derived from the addition chain:
+	//
+	//	_10     = 2*1
+	//	_11     = 1 + _10
+	//	_110    = 2*_11
+	//	_111    = 1 + _110
+	//	_111000 = _111 << 3
+	//	_111111 = _111 + _111000
+	//	x12     = _111111 << 6 + _111111
+	//	x15     = x12 << 3 + _111
+	//	x16     = 2*x15 + 1
+	//	x32     = x16 << 16 + x16
+	//	i53     = x32 << 15
+	//	x47     = x15 + i53
+	//	i263    = ((i53 << 17 + 1) << 143 + x47) << 47
+	//	return    (x47 + i263) << 2
+	//
+	// Operations: 255 squares 11 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Note: Need to stash `arg1` (`xx`) in the case that `out1`/`arg1` alias,
+	// as `arg1` is used after `out1` has been altered.
+	t0, t1, xx: Montgomery_Domain_Field_Element = ---, ---, arg1^
+
+	// Step 1: z = x^0x2
+	fe_square(out1, arg1)
+
+	// Step 2: z = x^0x3
+	fe_mul(out1, &xx, out1)
+
+	// Step 3: z = x^0x6
+	fe_square(out1, out1)
+
+	// Step 4: z = x^0x7
+	fe_mul(out1, &xx, out1)
+
+	// Step 7: t0 = x^0x38
+	fe_pow2k(&t0, out1, 3)
+
+	// Step 8: t0 = x^0x3f
+	fe_mul(&t0, out1, &t0)
+
+	// Step 14: t1 = x^0xfc0
+	fe_pow2k(&t1, &t0, 6)
+
+	// Step 15: t0 = x^0xfff
+	fe_mul(&t0, &t0, &t1)
+
+	// Step 18: t0 = x^0x7ff8
+	fe_pow2k(&t0, &t0, 3)
+
+	// Step 19: z = x^0x7fff
+	fe_mul(out1, out1, &t0)
+
+	// Step 20: t0 = x^0xfffe
+	fe_square(&t0, out1)
+
+	// Step 21: t0 = x^0xffff
+	fe_mul(&t0, &xx, &t0)
+
+	// Step 37: t1 = x^0xffff0000
+	fe_pow2k(&t1, &t0, 16)
+
+	// Step 38: t0 = x^0xffffffff
+	fe_mul(&t0, &t0, &t1)
+
+	// Step 53: t0 = x^0x7fffffff8000
+	fe_pow2k(&t0, &t0, 15)
+
+	// Step 54: z = x^0x7fffffffffff
+	fe_mul(out1, out1, &t0)
+
+	// Step 71: t0 = x^0xffffffff00000000
+	fe_pow2k(&t0, &t0, 17)
+
+	// Step 72: t0 = x^0xffffffff00000001
+	fe_mul(&t0, &xx, &t0)
+
+	// Step 215: t0 = x^0x7fffffff80000000800000000000000000000000000000000000
+	fe_pow2k(&t0, &t0, 143)
+
+	// Step 216: t0 = x^0x7fffffff800000008000000000000000000000007fffffffffff
+	fe_mul(&t0, out1, &t0)
+
+	// Step 263: t0 = x^0x3fffffffc00000004000000000000000000000003fffffffffff800000000000
+	fe_pow2k(&t0, &t0, 47)
+
+	// Step 264: z = x^0x3fffffffc00000004000000000000000000000003fffffffffffffffffffffff
+	fe_mul(out1, out1, &t0)
+
+	// Step 266: z = x^0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc
+	fe_pow2k(out1, out1, 2)
+
+	fe_mul(out1, out1, &xx)
+
+	fe_clear_vec([]^Montgomery_Domain_Field_Element{&t0, &t1, &xx})
+}
+
+@(require_results)
+fe_sqrt :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) -> int {
+	// Square root candidate can be derived via exponentiation by `(p + 1) / 4`
+	// From sage: 28948022302589062190674361737351893382521535853822578548883407827216774463488
+	//
+	// 	// Inversion computation is derived from the addition chain:
+	//
+	//	_10       = 2*1
+	//	_11       = 1 + _10
+	//	_1100     = _11 << 2
+	//	_1111     = _11 + _1100
+	//	_11110000 = _1111 << 4
+	//	_11111111 = _1111 + _11110000
+	//	x16       = _11111111 << 8 + _11111111
+	//	x32       = x16 << 16 + x16
+	//	return      ((x32 << 32 + 1) << 96 + 1) << 94
+	//
+	// Operations: 253 squares 7 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Likewise this tramples over arg1, so stash another copy.
+	t0, xx: Montgomery_Domain_Field_Element =  ---, arg1^
+
+	// Step 1: z = x^0x2
+	fe_square(out1, arg1)
+
+	// Step 2: z = x^0x3
+	fe_mul(out1, &xx, out1)
+
+	// Step 4: t0 = x^0xc
+	fe_pow2k(&t0, &xx, 2)
+
+	// Step 5: z = x^0xf
+	fe_mul(out1, out1, &t0)
+
+	// Step 9: t0 = x^0xf0
+	fe_pow2k(&t0, out1, 4)
+
+	// Step 10: z = x^0xff
+	fe_mul(out1, out1, &t0)
+
+	// Step 18: t0 = x^0xff00
+	fe_pow2k(&t0, out1, 8)
+
+	// Step 19: z = x^0xffff
+	fe_mul(out1, out1, &t0)
+
+	// Step 35: t0 = x^0xffff0000
+	fe_pow2k(&t0, out1, 16)
+
+	// Step 36: z = x^0xffffffff
+	fe_mul(out1, out1, &t0)
+
+	// Step 68: z = x^0xffffffff00000000
+	fe_pow2k(out1, out1, 32)
+
+	// Step 69: z = x^0xffffffff00000001
+	fe_mul(out1, &xx, out1)
+
+	// Step 165: z = x^0xffffffff00000001000000000000000000000000
+	fe_pow2k(out1, out1, 96)
+
+	// Step 166: z = x^0xffffffff00000001000000000000000000000001
+	fe_mul(out1, &xx, out1)
+
+	// Step 260: z = x^0x3fffffffc0000000400000000000000000000000400000000000000000000000
+	fe_pow2k(out1, out1, 94)
+
+	// Ensure that our candidate is actually the square root.
+	check, zero: Montgomery_Domain_Field_Element
+	fe_square(&check, out1)
+
+	is_valid := fe_equal(&check, &xx)
+	fe_cond_select(out1, &zero, out1, is_valid)
+
+	fe_clear_vec([]^Montgomery_Domain_Field_Element{&t0, &xx, &check})
+
+	return is_valid
+
+}
+
+fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Montgomery_Domain_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+}
+
+@(optimization_mode = "none")
+fe_cond_select :: #force_no_inline proc "contextless" (
+	out1, arg1, arg2: ^Montgomery_Domain_Field_Element,
+	arg3: int,
+) {
+	mask := (u64(arg3) * 0xffffffffffffffff)
+	x1 := ((mask & arg2[0]) | ((~mask) & arg1[0]))
+	x2 := ((mask & arg2[1]) | ((~mask) & arg1[1]))
+	x3 := ((mask & arg2[2]) | ((~mask) & arg1[2]))
+	x4 := ((mask & arg2[3]) | ((~mask) & arg1[3]))
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+fe_cond_negate :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element, ctrl: int) {
+	tmp1: Montgomery_Domain_Field_Element = ---
+	fe_opp(&tmp1, arg1)
+	fe_cond_select(out1, arg1, &tmp1, ctrl)
+
+	fe_clear(&tmp1)
+}