Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions src/finn/qnn-data/templates/driver/driver_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,13 @@ def load_external_weights(self):
# weight_buf.sync_to_device()
weight_buf.flush()

self.external_weights += [(iwdma, weight_buf, idma_name)]
input_shape = self._io_shape_dict["external_weights_input_shapes"][idma_name]
# NHWC input?
if len(input_shape) == 4:
num_repeats = input_shape[1] * input_shape[2]
else:
num_repeats = 1
self.external_weights += [(iwdma, weight_buf, idma_name, num_repeats)]

if "number_of_external_weights" in self._io_shape_dict:
hw_ext_weights = self._io_shape_dict["number_of_external_weights"]
Expand Down Expand Up @@ -351,9 +357,9 @@ def execute_on_buffers(self, asynch=False, batch_size=None):
for o in range(self.num_outputs):
assert self.odma[o].read(0x00) & 0x4 != 0, "Output DMA %d is not idle" % (o)
# manually launch IODMAs since signatures are missing
for iwdma, iwbuf, iwdma_name in self.external_weights:
for iwdma, iwbuf, iwdma_name, num_repeats in self.external_weights:
iwdma.write(0x10, iwbuf.device_address)
iwdma.write(0x1C, batch_size)
iwdma.write(0x1C, batch_size * num_repeats)
iwdma.write(0x00, 1)
for o in range(self.num_outputs):
self.odma[o].write(0x10, self.obuf_packed_device[o].device_address)
Expand All @@ -368,8 +374,8 @@ def execute_on_buffers(self, asynch=False, batch_size=None):
assert self.odma_handle[o] is None, "Output DMA %d is already running" % o
for i in range(self.num_inputs):
self.idma[i].start(self.ibuf_packed_device[i], batch_size)
for iwdma, iwbuf, iwdma_name in self.external_weights:
iwdma.start(iwbuf, batch_size)
for iwdma, iwbuf, iwdma_name, num_repeats in self.external_weights:
iwdma.start(iwbuf, batch_size * num_repeats)
for o in range(self.num_outputs):
self.odma_handle[o] = self.odma[o].start(self.obuf_packed_device[o], batch_size)
else:
Expand Down Expand Up @@ -437,9 +443,9 @@ def throughput_test(self):
for o in range(self.num_outputs):
total_out += np.prod(self.oshape_packed(o))
res["DRAM_out_bandwidth[MB/s]"] = total_out * 0.000001 / runtime
for iwdma, iwbuf, iwdma_name in self.external_weights:
for iwdma, iwbuf, iwdma_name, num_repeats in self.external_weights:
res["DRAM_extw_%s_bandwidth[MB/s]" % iwdma_name] = (
self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime
self.batch_size * np.prod(iwbuf.shape) * num_repeats * 0.000001 / runtime
)
if self.platform == "zynq-iodma":
res["fclk[mhz]"] = Clocks.fclk0_mhz
Expand Down
6 changes: 3 additions & 3 deletions src/finn/qnn-data/templates/driver/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@
driver.execute_on_buffers()
obuf_normal = np.empty_like(driver.obuf_packed_device[0])
driver.copy_output_data_from_device(obuf_normal)
ret = np.bincount(obuf_normal.flatten() == exp.flatten())
nok += ret[0]
ok += ret[1]
batch_ok = (obuf_normal.flatten() == exp.flatten()).sum()
ok += batch_ok
nok += bsize - batch_ok
print("batch %d / %d : total OK %d NOK %d" % (i + 1, n_batches, ok, nok))

acc = 100.0 * ok / (total)
Expand Down
116 changes: 116 additions & 0 deletions src/finn/qnn-data/test_ext_weights/cnv-w2a2-extw.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"Defaults": {},
"Thresholding_rtl_0": {
"PE": 3
},
"ConvolutionInputGenerator_rtl_0": {
"SIMD": 3,
"ram_style": "distributed"
},
"MVAU_hls_0": {
"PE": 8,
"SIMD": 3,
"ram_style": "auto",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_1": {
"PE": 1
},
"ConvolutionInputGenerator_rtl_1": {
"SIMD": 16,
"ram_style": "distributed"
},
"MVAU_hls_1": {
"PE": 16,
"SIMD": 16,
"mem_mode": "external"
},
"Thresholding_rtl_2": {
"PE": 1
},
"StreamingMaxPool_hls_0": {
"PE": 1
},
"ConvolutionInputGenerator_rtl_2": {
"SIMD": 16,
"ram_style": "distributed"
},
"MVAU_hls_2": {
"PE": 8,
"SIMD": 16,
"ram_style": "auto",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_3": {
"PE": 1
},
"ConvolutionInputGenerator_rtl_3": {
"SIMD": 16,
"ram_style": "distributed"
},
"MVAU_hls_3": {
"PE": 8,
"SIMD": 16,
"ram_style": "auto",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_4": {
"PE": 1
},
"StreamingMaxPool_hls_1": {
"PE": 1
},
"ConvolutionInputGenerator_rtl_4": {
"SIMD": 8,
"ram_style": "distributed"
},
"MVAU_hls_4": {
"PE": 4,
"SIMD": 8,
"ram_style": "auto",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_5": {
"PE": 1
},
"ConvolutionInputGenerator_rtl_5": {
"SIMD": 8,
"ram_style": "distributed"
},
"MVAU_hls_5": {
"PE": 1,
"SIMD": 8,
"ram_style": "auto",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_6": {
"PE": 1
},
"MVAU_hls_6": {
"PE": 1,
"SIMD": 2,
"ram_style": "block",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_7": {
"PE": 1
},
"MVAU_hls_7": {
"PE": 2,
"SIMD": 2,
"ram_style": "auto",
"mem_mode": "internal_decoupled"
},
"Thresholding_rtl_8": {
"PE": 1
},
"MVAU_hls_8": {
"PE": 5,
"SIMD": 1,
"ram_style": "distributed",
"mem_mode": "internal_decoupled"
},
"LabelSelect_hls_0": {
"PE": 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"Defaults": {},
"Thresholding_0": {
"preferred_impl_style": ""
},
"ConvolutionInputGenerator_0": {
"preferred_impl_style": ""
},
"MVAU_0": {
"preferred_impl_style": "hls"
},
"Thresholding_1": {
"preferred_impl_style": ""
},
"ConvolutionInputGenerator_1": {
"preferred_impl_style": ""
},
"MVAU_1": {
"preferred_impl_style": "hls"
},
"Thresholding_2": {
"preferred_impl_style": ""
},
"StreamingMaxPool_0": {
"preferred_impl_style": ""
},
"ConvolutionInputGenerator_2": {
"preferred_impl_style": ""
},
"MVAU_2": {
"preferred_impl_style": "hls"
},
"Thresholding_3": {
"preferred_impl_style": ""
},
"ConvolutionInputGenerator_3": {
"preferred_impl_style": ""
},
"MVAU_3": {
"preferred_impl_style": "hls"
},
"Thresholding_4": {
"preferred_impl_style": ""
},
"StreamingMaxPool_1": {
"preferred_impl_style": ""
},
"ConvolutionInputGenerator_4": {
"preferred_impl_style": ""
},
"MVAU_4": {
"preferred_impl_style": "hls"
},
"Thresholding_5": {
"preferred_impl_style": ""
},
"ConvolutionInputGenerator_5": {
"preferred_impl_style": ""
},
"MVAU_5": {
"preferred_impl_style": "hls"
},
"Thresholding_6": {
"preferred_impl_style": ""
},
"MVAU_6": {
"preferred_impl_style": "hls"
},
"Thresholding_7": {
"preferred_impl_style": ""
},
"MVAU_7": {
"preferred_impl_style": "hls"
},
"Thresholding_8": {
"preferred_impl_style": ""
},
"MVAU_8": {
"preferred_impl_style": "hls"
},
"LabelSelect_0": {
"preferred_impl_style": ""
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"preferred_impl_style": "rtl"
},
"MVAU_0": {
"preferred_impl_style": "rtl"
"preferred_impl_style": "hls"
},
"Thresholding_1": {
"preferred_impl_style": "rtl"
Expand All @@ -16,13 +16,13 @@
"preferred_impl_style": "rtl"
},
"MVAU_2": {
"preferred_impl_style": "rtl"
"preferred_impl_style": "hls"
},
"Thresholding_3": {
"preferred_impl_style": "rtl"
},
"MVAU_3": {
"preferred_impl_style": "rtl"
"preferred_impl_style": "hls"
},
"LabelSelect_0": {
"preferred_impl_style": "hls"
Expand Down
8 changes: 4 additions & 4 deletions src/finn/qnn-data/test_ext_weights/tfc-w2a2-extw.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,31 @@
"Thresholding_rtl_0": {
"PE": 49
},
"MVAU_rtl_0": {
"MVAU_hls_0": {
"PE": 16,
"SIMD": 49,
"ram_style": "block"
},
"Thresholding_rtl_1": {
"PE": 16
},
"MVAU_hls_0": {
"MVAU_hls_1": {
"PE": 8,
"SIMD": 8,
"mem_mode": "external"
},
"Thresholding_rtl_2": {
"PE": 8
},
"MVAU_rtl_1": {
"MVAU_hls_2": {
"PE": 8,
"SIMD": 8,
"mem_mode": "external"
},
"Thresholding_rtl_3": {
"PE": 8
},
"MVAU_rtl_2": {
"MVAU_hls_3": {
"PE": 10,
"SIMD": 8,
"ram_style": "distributed"
Expand Down
11 changes: 11 additions & 0 deletions src/finn/transformation/fpgadataflow/make_pynq_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def apply(self, model):
os.makedirs(weights_dir)
idma_idx = 0
ext_weight_dma_cnt = 0
ext_weight_shapes_dict = {}

for node in model.graph.node:
assert (
Expand All @@ -236,6 +237,15 @@ def apply(self, model):
assert df_model.graph.node[0].op_type == "IODMA_hls"
iodma_node = getCustomOp(df_model.graph.node[0])
if iodma_node.get_nodeattr("burstMode") == "wrap": # input weights dma?
dma_sdp_output = sdp_inst.onnx_node.output[0]
dma_target_sdp = getCustomOp(model.find_consumer(dma_sdp_output))
dma_target_model = ModelWrapper(dma_target_sdp.get_nodeattr("model"))
iodma_output_tensor = iodma_node.onnx_node.output[0]
dma_consumer = dma_target_model.find_consumer(iodma_output_tensor)
ext_weight_shapes_dict[idma_name] = dma_target_model.get_tensor_shape(
dma_consumer.output[0]
)

init_tensor = df_model.get_initializer(iodma_node.onnx_node.input[0])
ext_weight_dma_cnt += 1
w_dtype = df_model.get_tensor_datatype(iodma_node.onnx_node.input[0])
Expand All @@ -261,6 +271,7 @@ def apply(self, model):
driver = driver.replace("$NUM_INPUTS$", str(len(idma_names)))
driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names)))
driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))
driver = driver.replace("$EXT_WEIGHT_INPUT_SHAPES$", str(ext_weight_shapes_dict))

with open(driver_py, "w") as f:
f.write(driver)
Expand Down
1 change: 1 addition & 0 deletions src/finn/transformation/fpgadataflow/template_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"input_dma_name" : $INPUT_DMA_NAME$,
"output_dma_name" : $OUTPUT_DMA_NAME$,
"number_of_external_weights": $EXT_WEIGHT_NUM$,
"external_weights_input_shapes": $EXT_WEIGHT_INPUT_SHAPES$,
"num_inputs" : $NUM_INPUTS$,
"num_outputs" : $NUM_OUTPUTS$,
}
Expand Down
Loading