Skip to content

Commit 3d58db8

Browse files
committed
Armv7-M: Default to .w for better alignment
Currently, we maintain the instruction-width modifiers as they are in the input code (except for some exceptions). However, this can negatively impact performance as SLOTHY may break code-alignment. This commit changes the Armv7-M instruction writer, to output .w for all instructions resulting in the best performance (modulo the size of the instruction cache). Unfortunately, LLVM (in the selftest) stumbles over some of these .w modifiers in places where they do not have any effect. To work around that, we remove the modifiers for the selftest.
1 parent 6142139 commit 3d58db8

File tree

2 files changed

+26
-6
lines changed

2 files changed

+26
-6
lines changed

slothy/helper.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1212,12 +1212,28 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa
12121212
include=include_paths)
12131213
except subprocess.CalledProcessError as exc:
12141214
log.error("CPreprocessor failed on the following input")
1215-
log.error(SouceLine.write_multiline(source))
1215+
log.error(SourceLine.write_multiline(source))
12161216
raise LLVM_Mc_Error from exc
12171217

12181218
if platform.system() == "Darwin":
12191219
source = list(filter(lambda s: s.text.strip().startswith(".type") is False, source))
12201220

1221+
1222+
# Remove all width information - LLVM cannot handle .w for
1223+
# some instructions that only have a 32-bit encoding,
1224+
# e.g., uadd16.w works in gcc, but not LLVM.
1225+
# Unfortunately, for some instructions this depends
1226+
# on the registers used and, hence, adjusting the input to
1227+
# SLOTHY is not sufficient.
1228+
# As currently, we don't have a model of the instruction encodings,
1229+
# there is no principled way to reason about it.
1230+
if thumb:
1231+
for line in source:
1232+
instruction = line.text
1233+
instruction = instruction.replace(".w ", " ")
1234+
instruction = instruction.replace(".n ", " ")
1235+
line.set_text(instruction)
1236+
12211237
code = SourceLine.write_multiline(source)
12221238

12231239
log.debug(f"Calling LLVM MC assmelber on the following code")
@@ -1585,7 +1601,7 @@ def extract(source, lbl, forced_loop_type=None):
15851601
"""
15861602
Find a loop with start label `lbl` in `source` and return it together
15871603
with its type.
1588-
1604+
15891605
Args:
15901606
source: list of SourceLine objects
15911607
lbl: label of the loop to extract

slothy/targets/arm_v7m/arch_v7m.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
277277
# if new_fixup != 0:
278278
# yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}"
279279
if fixup != 0:
280-
yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
280+
yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
281281
#if new_fixup != 0 or fixup != 0:
282282
if fixup != 0:
283283
yield f"{indent}vmov {self.additional_data['endf']}, {self.additional_data['end']}"
@@ -383,7 +383,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
383383
yield f"{indent}vmov {loop_end_reg}, {loop_end_reg_fpr}"
384384

385385
if fixup != 0:
386-
yield f"{indent}sub {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}"
386+
yield f"{indent}sub.w {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}"
387387

388388
if fixup != 0 and loop_end_reg_fpr is not None:
389389
yield f"{indent}vmov {loop_end_reg_fpr}, {loop_end_reg}"
@@ -457,7 +457,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
457457
# yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}"
458458

459459
if fixup != 0:
460-
yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
460+
yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
461461

462462
if jump_if_empty is not None:
463463
yield f"cbz {loop_cnt}, {jump_if_empty}"
@@ -499,7 +499,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
499499
assert unroll in [1,2,4,8,16,32]
500500
yield f"{indent}lsr {loop_cnt}, {loop_cnt}, #{int(math.log2(unroll))}"
501501
if fixup != 0:
502-
yield f"{indent}sub {loop_cnt}, {loop_cnt}, #{fixup}"
502+
yield f"{indent}sub.w {loop_cnt}, {loop_cnt}, #{fixup}"
503503
if jump_if_empty is not None:
504504
yield f"cbz {loop_cnt}, {jump_if_empty}"
505505
yield f"{self.lbl_start}:"
@@ -1079,6 +1079,10 @@ def make(cls, src):
10791079
return Armv7mInstruction.build(cls, src)
10801080

10811081
def write(self):
1082+
# Default to .w for all instructions for better performance
1083+
# TODO: find a more principled way to do this
1084+
self.width = ".w"
1085+
10821086
out = self.pattern
10831087
l = list(zip(self.args_in, self.pattern_inputs)) + \
10841088
list(zip(self.args_out, self.pattern_outputs)) + \

0 commit comments

Comments
 (0)