Skip to content

Commit 7b0ab68

Browse files
committed
s390x: Emit instructions for bitwise FP ops
cranelift requires that bitwise operations work across all data types, including floating point. The prior implementation of bitwise operations, xor in this example, would cause a panic with the message: no rule matched for term aluop_xor This patch adds lowerings for bitwise operations on floating point registers using the vector instructions and the vector register overlay property of the s390x register file.
1 parent 5730c76 commit 7b0ab68

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,10 @@
997997
(rule (lower (has_type (vr128_ty ty) (bnot x)))
998998
(vec_not ty x))
999999

1000+
;; Float version using vector NOR.
1001+
(rule 5 (lower (has_type (ty_scalar_float _) (bnot x)))
1002+
(vec_not $F64X2 x))
1003+
10001004
;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the
10011005
;; (bxor _ (bnot _)) lowering.
10021006
(rule 3 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot (bxor x y))))
@@ -1033,6 +1037,10 @@
10331037
(rule 0 (lower (has_type (vr128_ty ty) (band x y)))
10341038
(vec_and ty x y))
10351039

1040+
;; And two float registers, using vector overlay.
1041+
(rule 11 (lower (has_type (ty_scalar_float _) (band x y)))
1042+
(vec_and $F64X2 x y))
1043+
10361044
;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
10371045
;; by Cranelift's `band_not` instruction that is legalized into the simpler
10381046
;; forms early on.
@@ -1075,6 +1083,10 @@
10751083
(rule 0 (lower (has_type (vr128_ty ty) (bor x y)))
10761084
(vec_or ty x y))
10771085

1086+
;; Or two floating registers, using vector overlay
1087+
(rule 11 (lower (has_type (ty_scalar_float _) (bor x y)))
1088+
(vec_or $F64X2 x y))
1089+
10781090
;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
10791091
;; by Cranelift's `bor_not` instruction that is legalized into the simpler
10801092
;; forms early on.
@@ -1114,6 +1126,10 @@
11141126
(rule 0 (lower (has_type (vr128_ty ty) (bxor x y)))
11151127
(vec_xor ty x y))
11161128

1129+
;; Xor two floating registers, using vector overlay
1130+
(rule 9 (lower (has_type (ty_scalar_float _) (bxor x y)))
1131+
(vec_xor $F64X2 x y))
1132+
11171133
;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced
11181134
;; by Cranelift's `bxor_not` instruction that is legalized into the simpler
11191135
;; forms early on.

cranelift/filetests/filetests/isa/s390x/bitwise.clif

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,3 +1076,67 @@ block0(v0: i32x4, v1: i32x4):
10761076
; vnx %v24, %v24, %v25
10771077
; br %r14
10781078

1079+
function %bnot_f64(f64) -> f64 {
1080+
block0(v0: f64):
1081+
v1 = bnot v0
1082+
return v1
1083+
}
1084+
1085+
; VCode:
1086+
; block0:
1087+
; vno %v0, %v0, %v0
1088+
; br %r14
1089+
;
1090+
; Disassembled:
1091+
; block0: ; offset 0x0
1092+
; vno %v0, %v0, %v0
1093+
; br %r14
1094+
1095+
function %band_f64(f64, f64) -> f64 {
1096+
block0(v0: f64, v1: f64):
1097+
v2 = band v0, v1
1098+
return v2
1099+
}
1100+
1101+
; VCode:
1102+
; block0:
1103+
; vn %v0, %v0, %v2
1104+
; br %r14
1105+
;
1106+
; Disassembled:
1107+
; block0: ; offset 0x0
1108+
; vn %v0, %v0, %v2
1109+
; br %r14
1110+
1111+
function %bor_f64(f64, f64) -> f64 {
1112+
block0(v0: f64, v1: f64):
1113+
v2 = bor v0, v1
1114+
return v2
1115+
}
1116+
1117+
; VCode:
1118+
; block0:
1119+
; vo %v0, %v0, %v2
1120+
; br %r14
1121+
;
1122+
; Disassembled:
1123+
; block0: ; offset 0x0
1124+
; vo %v0, %v0, %v2
1125+
; br %r14
1126+
1127+
function %bxor_f64(f64, f64) -> f64 {
1128+
block0(v0: f64, v1: f64):
1129+
v2 = bxor v0, v1
1130+
return v2
1131+
}
1132+
1133+
; VCode:
1134+
; block0:
1135+
; vx %v0, %v0, %v2
1136+
; br %r14
1137+
;
1138+
; Disassembled:
1139+
; block0: ; offset 0x0
1140+
; vx %v0, %v0, %v2
1141+
; br %r14
1142+

0 commit comments

Comments
 (0)