Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New upsampler integration #1202

Draft
wants to merge 8 commits into
base: dev
Choose a base branch
from
4 changes: 2 additions & 2 deletions fetch-repos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851"
BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4"
PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1"
CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3"
HLSLIB_COMMIT="2e4adf1f34d71cc76a87da189e7eefcff94ebd9f"
OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a"
AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e"
Expand All @@ -45,7 +45,7 @@ FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git"
BREVITAS_URL="https://github.com/Xilinx/brevitas.git"
PYVERILATOR_URL="https://github.com/maltanar/pyverilator.git"
CNPY_URL="https://github.com/rogersce/cnpy.git"
HLSLIB_URL="https://github.com/Xilinx/finn-hlslib.git"
HLSLIB_URL="https://github.com/mdanilow/finn-hlslib.git"
OMX_URL="https://github.com/maltanar/oh-my-xilinx.git"
AVNET_BDF_URL="https://github.com/Avnet/bdf.git"
XIL_BDF_URL="https://github.com/Xilinx/XilinxBoardStore.git"
Expand Down
151 changes: 119 additions & 32 deletions src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import numpy as np

from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour
from finn.custom_op.fpgadataflow import templates
from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy


Expand Down Expand Up @@ -58,48 +60,40 @@ def global_includes(self):
def defines(self, var):
self.code_gen_dict["$DEFINES$"] = []

ifm_ch = self.get_nodeattr("NumChannels")
self.code_gen_dict["$DEFINES$"] += ["#define IFMChannels {}".format(ifm_ch)]
HI = self.get_nodeattr("HI")
self.code_gen_dict["$DEFINES$"] += ["#define HI {}".format(HI)]

ibits = self.get_input_datatype().bitwidth()
self.code_gen_dict["$DEFINES$"] += ["#define Input_precision {}".format(ibits)]
WI = self.get_nodeattr("WI")
self.code_gen_dict["$DEFINES$"] += ["#define WI {}".format(WI)]

idim = self.get_nodeattr("IFMDim")
self.code_gen_dict["$DEFINES$"] += ["#define IFMDim {}".format(idim)]
HO = self.get_nodeattr("HO")
self.code_gen_dict["$DEFINES$"] += ["#define HO {}".format(HO)]

odim = self.get_nodeattr("OFMDim")
self.code_gen_dict["$DEFINES$"] += ["#define OFMDim {}".format(odim)]
WO = self.get_nodeattr("WO")
self.code_gen_dict["$DEFINES$"] += ["#define WO {}".format(WO)]

batch_size = self.get_nodeattr("numInputVectors")
self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(batch_size)]
SIMD = self.get_nodeattr("SIMD")
self.code_gen_dict["$DEFINES$"] += ["#define SIMD {}".format(SIMD)]

CF = self.get_nodeattr("NumChannels") // SIMD
self.code_gen_dict["$DEFINES$"] += ["#define CF {}".format(CF)]

def docompute(self):
is_2d = self.get_nodeattr("DimMode") == 0
batch = self.get_nodeattr("numInputVectors")
if is_2d:
self.code_gen_dict["$DOCOMPUTE$"] = [
"""UpsampleNearestNeighbour_Batch<OFMDim, IFMDim, IFMChannels,
ap_uint<Input_precision> > (in0_%s, out_%s, numReps);"""
% (self.hls_sname(), self.hls_sname())
]
else:
assert batch == 1, "1D upsampler currently needs numReps=1"
self.code_gen_dict["$DOCOMPUTE$"] = [
"""UpsampleNearestNeighbour_1D<OFMDim, IFMDim, IFMChannels,
ap_uint<Input_precision> > (in0_%s, out_%s);"""
% (self.hls_sname(), self.hls_sname())
]
self.code_gen_dict["$DOCOMPUTE$"] = [
"""upsample_nn<HI, WI, HO, WO, CF>(in0_%s, out_%s);"""
% (self.hls_sname(), self.hls_sname())
]

def blackboxfunction(self):
packed_bits = self.get_instream_width()
packed_hls_type = "ap_uint<%d>" % packed_bits
input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str()
output_elem_hls_type = self.get_output_datatype().get_hls_datatype_str()
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
"void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)"
"void %s(hls::stream<hls::vector<%s, SIMD>> &in0_%s, hls::stream<hls::vector<%s, SIMD>> &out_%s)"
% (
self.onnx_node.name,
packed_hls_type,
input_elem_hls_type,
self.hls_sname(),
packed_hls_type,
output_elem_hls_type,
self.hls_sname(),
)
]
Expand All @@ -109,7 +103,6 @@ def execute_node(self, context, graph):
node = self.onnx_node
exp_ishape = self.get_normal_input_shape()
exp_oshape = self.get_normal_output_shape()
folded_oshape = self.get_folded_output_shape()

if mode == "cppsim":
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
Expand Down Expand Up @@ -138,7 +131,7 @@ def execute_node(self, context, graph):
# load output npy file
super().npy_to_dynamic_output(context)
assert (
context[node.output[0]].shape == folded_oshape
context[node.output[0]].shape == exp_oshape
), "cppsim did not produce expected folded output shape"
context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
elif mode == "rtlsim":
Expand Down Expand Up @@ -173,3 +166,97 @@ def execute_node(self, context, graph):
context[node.output[0]].shape == exp_oshape
), """Output shape doesn't match expected shape
(1, OutputDim, OutputDim, NumChannels)."""

# def code_generation_cppsim(self, model):
# """Generates c++ code for simulation (cppsim)."""
# node = self.onnx_node
# path = self.get_nodeattr("code_gen_dir_cppsim")
# self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())]
# self.generate_params(model, path)
# self.global_includes()
# self.defines("cppsim")
# self.read_npy_data()
# self.strm_decl()
# self.pragmas()
# self.docompute()
# self.dataoutstrm()
# self.save_as_npy()
# self.timeout_value()
# self.timeout_condition()
# self.timeout_read_stream()

# template = templates.docompute_template_timeout

# for key in self.code_gen_dict:
# # transform list into long string separated by '\n'
# code_gen_line = "\n".join(self.code_gen_dict[key])
# template = template.replace(key, code_gen_line)
# code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
# f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w")
# f.write(template)
# f.close()
# self.code_gen_dict.clear()

def read_npy_data(self):
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
npy_type = "float"
self.code_gen_dict["$READNPYDATA$"] = []
input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str()
npy_in = "%s/input_0.npy" % (code_gen_dir)
self.code_gen_dict["$READNPYDATA$"].append(
'npy2vectorstream<%s, %s, SIMD>("%s", in0_%s);'
% (
input_elem_hls_type,
npy_type,
npy_in,
self.hls_sname(),
)
)

def dataoutstrm(self):
npy_type = "float"
code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
oshape = self.get_folded_output_shape()
oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
npy_out = "%s/output.npy" % code_gen_dir
self.code_gen_dict["$DATAOUTSTREAM$"] = [
'vectorstream2npy<%s, %s, SIMD>(out_%s, %s, "%s");'
% (
self.get_output_datatype().get_hls_datatype_str(),
npy_type,
self.hls_sname(),
oshape_cpp_str,
npy_out,
)
]

def strm_decl(self):
self.code_gen_dict["$STREAMDECLARATIONS$"] = []
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<hls::vector<{}, SIMD>> in0_{} ("in0_{}");'.format(
self.get_input_datatype().get_hls_datatype_str(),
self.hls_sname(),
self.hls_sname()
)
)
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<hls::vector<{}, SIMD>> out_{} ("out_{}");'.format(
self.get_output_datatype().get_hls_datatype_str(),
self.hls_sname(),
self.hls_sname()
)
)
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<hls::vector<{}, SIMD>> debug_out_{} ("debug_out_{}");'.format(
self.get_output_datatype().get_hls_datatype_str(),
self.hls_sname(),
self.hls_sname()
)
)

def pragmas(self):
super().pragmas()
self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS aggregate variable=in0_%s compact=bit" % self.hls_sname())
self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS aggregate variable=out_%s compact=bit" % self.hls_sname())
self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS dataflow disable_start_propagation")

14 changes: 14 additions & 0 deletions src/finn/custom_op/fpgadataflow/hlsbackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,3 +474,17 @@ def get_ap_int_max_w(self):
ret = max([instream, outstream])
assert ret <= 8191, "AP_INT_MAX_W=%d is larger than allowed maximum of 8191" % ret
return ret

def timeout_value(self):
"""Set timeout value for HLS functions defined for one clock cycle"""
self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"]

def timeout_condition(self):
"""Set timeout condition for HLS functions defined for one clock cycle"""
self.code_gen_dict["$TIMEOUT_CONDITION$"] = ["out_{}.empty()".format(self.hls_sname())]

def timeout_read_stream(self):
"""Set reading output stream procedure for HLS functions defined for one clock cycle"""
self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [
"debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname())
]
46 changes: 46 additions & 0 deletions src/finn/custom_op/fpgadataflow/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#define AP_INT_MAX_W $AP_INT_MAX_W$
#include "cnpy.h"
#include "npy2apintstream.hpp"
#include "npy2vectorstream.hpp"
#include <vector>
#include "bnn-library.h"

Expand All @@ -58,6 +59,51 @@

"""

# template for single node execution with timeout (for single clock hls operations)
docompute_template_timeout = """
#define AP_INT_MAX_W $AP_INT_MAX_W$
#include "cnpy.h"
#include "npy2apintstream.hpp"
#include "npy2vectorstream.hpp"
#include <vector>
#include "bnn-library.h"

// includes for network parameters
$GLOBALS$

// defines for network parameters
$DEFINES$

int main(){
$PRAGMAS$

$STREAMDECLARATIONS$

$READNPYDATA$

unsigned timeout = 0;
while(timeout < $TIMEOUT_VALUE$){

$DOCOMPUTE$

if($TIMEOUT_CONDITION$){
timeout++;
}

else{
$TIMEOUT_READ_STREAM$
timeout = 0;
}
}

$DATAOUTSTREAM$

$SAVEASCNPY$

}

"""

# templates for single node ip generation

# cpp file
Expand Down
Loading