Skip to content

Commit efdf778

Browse files
authored
Merge pull request #1373 from Xilinx/fix/dyn_mvau_swidth
Update dynamic MVAU stream width
2 parents eae226d + 1b826a2 commit efdf778

File tree

3 files changed

+29
-5
lines changed

3 files changed

+29
-5
lines changed

src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ def read_npy_data(self):
267267
wdt = self.get_input_datatype(1)
268268
elem_bits = wdt.bitwidth()
269269
packed_bits = self.get_instream_width(1)
270+
if self.get_nodeattr("dynamic_input"):
271+
packed_bits = packed_bits * self.get_nodeattr("SIMD")
270272
packed_hls_type = "ap_uint<%d>" % packed_bits
271273
elem_hls_type = wdt.get_hls_datatype_str()
272274
npy_type = "float"
@@ -294,8 +296,11 @@ def strm_decl(self):
294296
)
295297

296298
if mem_mode == "internal_decoupled" or mem_mode == "external":
299+
iwidth = self.get_instream_width(1)
300+
if self.get_nodeattr("dynamic_input"):
301+
iwidth = iwidth * self.get_nodeattr("SIMD")
297302
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
298-
'hls::stream<ap_uint<{}>> in1_V ("in1_V");'.format(self.get_instream_width(1))
303+
'hls::stream<ap_uint<{}>> in1_V ("in1_V");'.format(iwidth)
299304
)
300305

301306
def docompute(self):
@@ -391,6 +396,9 @@ def blackboxfunction(self):
391396
)
392397
]
393398
elif mem_mode == "internal_decoupled" or mem_mode == "external":
399+
wwidth = self.get_instream_width(1)
400+
if self.get_nodeattr("dynamic_input"):
401+
wwidth = wwidth * self.get_nodeattr("SIMD")
394402
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
395403
"""void {}(
396404
hls::stream<ap_uint<{}>> &in0_V,
@@ -399,7 +407,7 @@ def blackboxfunction(self):
399407
)""".format(
400408
self.onnx_node.name,
401409
self.get_instream_width(0),
402-
self.get_instream_width(1),
410+
wwidth,
403411
self.get_outstream_width(),
404412
)
405413
]
@@ -464,9 +472,11 @@ def get_ap_int_max_w(self):
464472
max_of_io = super().get_ap_int_max_w()
465473
# internal_decoupled mode weight stream
466474
weightstream = self.get_instream_width(1)
475+
simd = self.get_nodeattr("SIMD")
476+
if self.get_nodeattr("dynamic_input"):
477+
weightstream = weightstream * simd
467478
# single PE weight entry
468479
weight_bits = self.get_input_datatype(1).bitwidth()
469-
simd = self.get_nodeattr("SIMD")
470480
single_pe_w = simd * weight_bits
471481
return max([weightstream, max_of_io, single_pe_w])
472482

@@ -543,6 +553,8 @@ def execute_node(self, context, graph):
543553

544554
if dynamic_input or mem_mode in ["external", "internal_decoupled"]:
545555
wnbits = self.get_instream_width(1)
556+
if self.get_nodeattr("dynamic_input"):
557+
wnbits = wnbits * self.get_nodeattr("SIMD")
546558
export_wdt = self.get_input_datatype(1)
547559

548560
# we have converted bipolar weights to binary for export,

src/finn/custom_op/fpgadataflow/matrixvectoractivation.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,11 @@ def get_instream_width(self, ind=0):
266266
i_bits = self.get_input_datatype(0).bitwidth()
267267
width = i_bits * self.get_nodeattr("SIMD")
268268
elif ind == 1:
269-
if (
269+
if self.get_nodeattr("dynamic_input"):
270+
width = (
271+
self.get_folded_input_shape(ind)[-1] * self.get_input_datatype(ind).bitwidth()
272+
)
273+
elif (
270274
self.get_nodeattr("mem_mode") == "internal_decoupled"
271275
or self.get_nodeattr("mem_mode") == "external"
272276
):
@@ -726,6 +730,8 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
726730
elif weight_file_mode == "decoupled_verilog_dat":
727731
# convert weight values into hexstring
728732
weight_width = self.get_instream_width(1)
733+
if self.get_nodeattr("dynamic_input"):
734+
weight_width = weight_width * simd
729735
# pad to nearest 4 bits to get hex strings
730736
weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
731737
weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string(
@@ -756,6 +762,8 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
756762
# memstream axi-lite interface will map each mem line to
757763
# one or multiple 32-bit words
758764
weight_width = self.get_instream_width(1)
765+
if self.get_nodeattr("dynamic_input"):
766+
weight_width = weight_width * simd
759767
words_per_memwidth = 2 ** math.ceil(math.log2(weight_width / 32))
760768
if words_per_memwidth < 1:
761769
words_per_memwidth = 1
@@ -906,7 +914,9 @@ def get_verilog_top_module_intf_names(self):
906914
dynamic_input = self.get_nodeattr("dynamic_input")
907915
mem_mode = self.get_nodeattr("mem_mode")
908916
if dynamic_input:
909-
intf_names["s_axis"].append(("in1_V", self.get_instream_width_padded(1)))
917+
weight_width = self.get_instream_width(1)
918+
weight_width = weight_width * self.get_nodeattr("SIMD")
919+
intf_names["s_axis"].append(("in1_V", roundup_to_integer_multiple(weight_width, 8)))
910920
else:
911921
if mem_mode == "external":
912922
intf_names["s_axis"].append(("in1_V", self.get_instream_width_padded(1)))

src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ def execute_node(self, context, graph):
104104

105105
if dynamic_input or mem_mode in ["external", "internal_decoupled"]:
106106
wnbits = self.get_instream_width(1)
107+
if dynamic_input:
108+
wnbits = wnbits * self.get_nodeattr("SIMD")
107109
export_wdt = self.get_input_datatype(1)
108110

109111
wei = npy_to_rtlsim_input("{}/input_1.npy".format(code_gen_dir), export_wdt, wnbits)

0 commit comments

Comments
 (0)