fastmachinelearning
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎CITATION.cff
+2-1 b/‎CITATION.cff
+2-1
diff --git a/‎README.md
+2-2 b/‎README.md
+2-2
diff --git a/‎docs/advanced/extension.rst
+2-2 b/‎docs/advanced/extension.rst
+2-2
diff --git a/‎docs/intro/setup.rst
+38-18 b/‎docs/intro/setup.rst
+38-18
diff --git a/‎docs/intro/status.rst
+5-4 b/‎docs/intro/status.rst
+5-4
diff --git a/‎hls4ml/backends/catapult/passes/bn_quant.py
+1-1 b/‎hls4ml/backends/catapult/passes/bn_quant.py
+1-1
diff --git a/‎hls4ml/backends/fpga/passes/clone.py
+5-5 b/‎hls4ml/backends/fpga/passes/clone.py
+5-5
diff --git a/‎hls4ml/backends/fpga/passes/final_reshape.py
+1-2 b/‎hls4ml/backends/fpga/passes/final_reshape.py
+1-2
diff --git a/‎hls4ml/backends/fpga/passes/hgq_proxy_model.py
+1-1 b/‎hls4ml/backends/fpga/passes/hgq_proxy_model.py
+1-1
diff --git a/‎hls4ml/backends/fpga/passes/remove_softmax.py
+1-1 b/‎hls4ml/backends/fpga/passes/remove_softmax.py
+1-1
diff --git a/‎hls4ml/backends/oneapi/oneapi_backend.py
+3 b/‎hls4ml/backends/oneapi/oneapi_backend.py
+3
diff --git a/‎hls4ml/backends/oneapi/passes/bn_quant.py
+1-1 b/‎hls4ml/backends/oneapi/passes/bn_quant.py
+1-1
diff --git a/‎hls4ml/backends/oneapi/passes/transform_types.py
+1-1 b/‎hls4ml/backends/oneapi/passes/transform_types.py
+1-1
diff --git a/‎hls4ml/backends/quartus/passes/bn_quant.py
+1-1 b/‎hls4ml/backends/quartus/passes/bn_quant.py
+1-1
diff --git a/‎hls4ml/backends/vitis/vitis_backend.py
+4 b/‎hls4ml/backends/vitis/vitis_backend.py
+4
diff --git a/‎hls4ml/backends/vivado/passes/bn_quant.py
+1-1 b/‎hls4ml/backends/vivado/passes/bn_quant.py
+1-1
diff --git a/‎hls4ml/backends/vivado/passes/recurrent_templates.py
+67-1 b/‎hls4ml/backends/vivado/passes/recurrent_templates.py
+67-1
@@ -41,7 +41,7 @@ repos:
     args: ["--py310-plus"]
 
 - repo: https://github.com/pycqa/flake8
-  rev: 7.1.2
+  rev: 7.2.0
   hooks:
   - id: flake8
     exclude: docs/conf.py
 
@@ -4,7 +4,8 @@ type: software
 authors:
 - given-names: "FastML Team"
 title: "hls4ml"
-version: "v1.0.0"
+version: "v1.1.0"
+date-released: "2025-03-17"
 doi: 10.5281/zenodo.1201549
 repository-code: "https://github.com/fastmachinelearning/hls4ml"
 url: "https://fastmachinelearning.org/hls4ml"
 
@@ -73,9 +73,9 @@ If you use this software in a publication, please cite the software
 @software{fastml_hls4ml,
   author       = {{FastML Team}},
   title        = {fastmachinelearning/hls4ml},
-  year         = 2024,
+  year         = 2025,
   publisher    = {Zenodo},
-  version      = {v1.0.0},
+  version      = {v1.1.0},
   doi          = {10.5281/zenodo.1201549},
   url          = {https://github.com/fastmachinelearning/hls4ml}
 }
 
@@ -5,9 +5,9 @@ Extension API
 ``hls4ml`` natively supports a large number of neural network layers.
 But what if a desired layer is not supported?
 If it is standard enough and its implementation would benefit the community as a whole, we would welcome a contribution to add it to the standard set of supported layers.
-However, if it is a somewhat niche custom layer, there is another approach we can take to extend hls4ml through the *extension API*.
+However, if it is a somewhat niche custom layer, there is another approach we can take to extend hls4ml through the *extension API*. This feature is support for both keras and pytorch layers.
 
-This documentation will walk through a complete `complete end-to-end example <https://github.com/fastmachinelearning/hls4ml/blob/main/test/pytest/test_extensions.py>`_, which is part of our testing suite.
+Complete end-to-end examples are available for both `keras <https://github.com/fastmachinelearning/hls4ml/blob/main/test/pytest/test_extensions.py>`_ and `pytorch <https://github.com/fastmachinelearning/hls4ml/blob/main/test/pytest/test_extensions_pytorch.py>`_, which are part of our testing suite. The description here uses the keras example.
 To implement a custom layer in ``hls4ml`` with the extension API, the required components are:
 
 * Your custom layer class
 
@@ -20,14 +20,8 @@ If you want to use our :doc:`profiling <../advanced/profiling>` toolbox, you mig
 
    pip install hls4ml[profiling]
 
-``hls4ml`` is also available as a ``conda`` package in the ``conda-forge`` repository. To install, run:
-
 .. warning::
-   Version of hls4ml available on ``conda-forge`` is outdated, we recommend installing with ``pip`` to get the latest version.
-
-.. code-block::
-
-   conda install -c conda-forge hls4ml
+   Previously, versions of hls4ml were made available on ``conda-forge``. These are outdated and should NOT be used. Installing with ``pip`` is currently the only supported method.
 
 Development version
 -------------------
@@ -90,29 +84,55 @@ Here we give line-by-line instructions to demonstrate the general workflow.
 .. code-block:: python
 
    import hls4ml
+   import tensorflow as tf
+   from tensorflow.keras.layers import Dense
+
+   # Construct a basic keras model
+   model = tf.keras.models.Sequential()
+   model.add(Dense(64, input_shape=(16,), name='Dense', kernel_initializer='lecun_uniform', kernel_regularizer=None))
+   model.add(Activation(activation='elu', name='Activation'))
+   model.add(Dense(32, name='Dense2', kernel_initializer='lecun_uniform', kernel_regularizer=None))
+   model.add(Activation(activation='elu', name='Activation2'))
+
+   # This is where you would train the model in a real-world scenario
 
-   # Fetch a keras model from our example repository
-   # This will download our example model to your working directory and return an example configuration file
-   config = hls4ml.utils.fetch_example_model('KERAS_3layer.json')
+   # Generate an hls configuration from the keras model
+   config = hls4ml.utils.config_from_keras_model(model)
 
-   # You can print it to see some default parameters
+   # You can print the config to see some default parameters
    print(config)
 
-   # Convert it to a hls project
-   hls_model = hls4ml.converters.keras_to_hls(config)
+   # Convert the model to an hls project using the config
+   hls_model = hls4ml.converters.convert_from_keras_model(
+      model=model,
+      hls_config=config,
+      backend='Vitis'
+   )
+
+Once converted to an HLS project, you can connect the project into the Python runtime and use it to run predictions on a numpy array:
+
+.. code-block:: python
+
+   import numpy as np
+
+   # Compile the hls project and link it into the Python runtime
+   hls_model.compile()
+
+   # Generate random input data
+   X_input = np.random.rand(100, 16)
 
-   # Print full list of example model if you want to explore more
-   hls4ml.utils.fetch_example_list()
+   # Run the model on the input data
+   hls_prediction = hls_model.predict(X_input)
 
-After that, you can use :code:`Vivado HLS` to synthesize the model:
+After that, you can use :code:`Vitis HLS` to synthesize the model:
 
 .. code-block:: python
 
-   # Use Vivado HLS to synthesize the model
+   # Use Vitis HLS to synthesize the model
    # This might take several minutes
    hls_model.build()
 
-   # Print out the report if you want
+   # Optional: print out the report
    hls4ml.report.read_vivado_report('my-hls-test')
 
 Done! You've built your first project using ``hls4ml``! To learn more about our various API functionalities, check out our tutorials `here <https://github.com/fastmachinelearning/hls4ml-tutorial>`__.
 
@@ -89,14 +89,15 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below.
 
 Other feature notes:
 
-* ``hls4ml`` is tested on Linux, and supports
+* ``hls4ml`` is tested on the following platforms. Newer versions might work just fine, but try at your own risk.
    * Vivado HLS versions 2018.2 to 2020.1
-   * Intel HLS versions 20.1 to 21.4
-   * Vitis HLS versions 2022.2 to 2024.1
+   * Intel HLS versions 20.1 to 21.4, versions \> 21.4 have not been tested.
+   * Vitis HLS versions 2022.2 to 2024.1. Versions \<= 2022.1 are known not to work.
    * Catapult HLS versions 2024.1_1 to 2024.2
    * oneAPI versions 2024.1 to 2025.0
 
-* Windows and macOS are not supported
+* ``hls4ml`` supports Linux and requires python \>=3.10. hlsml does not require a specific Linux distribution version and we recommended to follow the requirements of the HLS tool you are using.
+* Windows and macOS are not supported. Setting up ``hls4ml`` on these platforms, for example using the Windows Subsystem for Linux (WSL) should be possible, but we do not provide support for such use cases.
 * BDT support has moved to the `Conifer <https://github.com/thesps/conifer>`__ package
 
 Example Models
 
@@ -96,7 +96,7 @@ def transform(self, model, node):
             bn_layer.get_weights('scale').data, bn_layer.get_weights('bias').data, node.get_attr('threshold', 0.5)
         )
         # Remove the BatchNormalization layer
-        model.remove_node(bn_layer, rewire=True)
+        model.remove_node(bn_layer)
         # Replace the old Activation layer with this one
         model.replace_node(node, bnbt_layer)
 
 
@@ -61,8 +61,8 @@ def match(self, node):
 
         # Check if the output is used more than once
         output_map = node.get_output_use_map()
-        in_output = node.name in node.model.outputs
         for output in node.outputs:
+            in_output = output in node.model.outputs
             if len(output_map[output]) + in_output > 1:
                 # model output also need a stream
                 return True
@@ -72,10 +72,10 @@ def match(self, node):
     def transform(self, model, node):
 
         output_map = node.get_output_use_map()
-        in_output = node.name in node.model.outputs
 
         transformed = False
         for output in node.outputs:
+            in_output = output in node.model.outputs
             n_outputs = len(output_map[output]) + in_output
             if n_outputs == 1:
                 continue
@@ -90,8 +90,8 @@ def transform(self, model, node):
             init_stream_idx = 1
             if in_output:
                 # If the value is used as output, add one extra stream
-                idx = node.model.outputs.index(node.name)
-                node.model.outputs[idx] = node.name + '_cpy1'
+                idx = node.model.outputs.index(output)
+                node.model.outputs[idx] = output + '_cpy1'
                 init_stream_idx = 2
             for i, layer in enumerate(output_map[output], init_stream_idx):
                 idx = layer.inputs.index(output)
@@ -102,7 +102,7 @@ def transform(self, model, node):
                 'clone_' + node.name,
                 attrs,
                 [output],
-                [output + '_cpy' + str(i + 1) for i in range(n_outputs)],
+                [f'{output}_cpy{i + 1}' for i in range(n_outputs)],
             )
             for i in range(n_outputs):
                 key = output + '_cpy' + str(i + 1)
 
@@ -12,8 +12,7 @@ def match(self, node):
     def transform(self, model, node):
         if model.config.get_config_value('IOType') == 'io_parallel':
             print('WARNING: Final layer is a Reshape, which does not affect the output for io_parallel; removing it')
-            # remove, but don't rewire because it's the output layer
-            model.remove_node(node, rewire=False)
+            model.remove_node(node)
             return True
         elif model.config.get_config_value('IOType') == 'io_stream':
             print(
 
@@ -53,7 +53,7 @@ def match(self, node: Layer):
 
     def transform(self, model, node: FixedPointQuantizer):
         if node.fusible:
-            model.remove_node(node, rewire=True)
+            model.remove_node(node)
             return True
 
         if model.config.config['IOType'] != 'io_parallel':
 
@@ -9,5 +9,5 @@ def match(self, node):
         return is_softmax and remove_softmax
 
     def transform(self, model, node):
-        model.remove_node(node, rewire=True)
+        model.remove_node(node)
         return True
@@ -10,6 +10,7 @@
 from hls4ml.model.layers import GRU, LSTM, Activation, Conv1D, Conv2D, Dense, Embedding, Layer, SimpleRNN, Softmax
 from hls4ml.model.optimizer import get_backend_passes, layer_optimizer
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType
+from hls4ml.report import parse_oneapi_report
 from hls4ml.utils import attribute_descriptions as descriptions
 
 # from hls4ml.report import parse_oneapi_report
@@ -207,6 +208,8 @@ def build(self, model, build_type='fpga_emu', run=False):
             executable = builddir / f'{model.config.get_project_name()}.{build_type}'
             subprocess.run(f'{str(executable)}', shell=True, cwd=builddir, check=True)
 
+        return parse_oneapi_report(model.config.get_output_dir())
+
     @layer_optimizer(Layer)
     def init_base_layer(self, layer):
         reuse_factor = layer.model.config.get_reuse_factor(layer)
 
@@ -149,7 +149,7 @@ def transform(self, model, node):
             bn_layer.get_weights('scale').data, bn_layer.get_weights('bias').data, node.get_attr('threshold', 0.5)
         )
         # Remove the BatchNormalization layer
-        model.remove_node(bn_layer, rewire=True)
+        model.remove_node(bn_layer)
         # Replace the old Activation layer with this one
         model.replace_node(node, bnbt_layer)
 
 
@@ -33,7 +33,7 @@ def transform(self, model, node):
                     new_var = self.interface_var_converter.convert(var, pragma='stream')
                 elif out_name in node.model.outputs:
                     new_var = self.interface_var_converter.convert(var, pragma='stream')
-                if isinstance(var, InplaceTensorVariable):
+                elif isinstance(var, InplaceTensorVariable):
                     new_var = self.inplace_stream_var_converter.convert(var, pragma='stream')
                 else:
                     new_var = self.stream_var_converter.convert(var, pragma='stream')
 
@@ -96,7 +96,7 @@ def transform(self, model, node):
             bn_layer.get_weights('scale').data, bn_layer.get_weights('bias').data, node.get_attr('threshold', 0.5)
         )
         # Remove the BatchNormalization layer
-        model.remove_node(bn_layer, rewire=True)
+        model.remove_node(bn_layer)
         # Replace the old Activation layer with this one
         model.replace_node(node, bnbt_layer)
 
 
@@ -50,6 +50,7 @@ def create_initial_config(
         namespace=None,
         write_weights_txt=True,
         write_tar=False,
+        tb_output_stream='both',
         **_,
     ):
         """Create initial configuration of the Vitis backend.
@@ -64,6 +65,8 @@ def create_initial_config(
             write_weights_txt (bool, optional): If True, writes weights to .txt files which speeds up compilation.
                 Defaults to True.
             write_tar (bool, optional): If True, compresses the output directory into a .tar.gz file. Defaults to False.
+            tb_output_stream (str, optional): Controls where to write the output. Options are 'stdout', 'file' and 'both'.
+                Defaults to 'both'.
 
         Returns:
             dict: initial configuration.
@@ -79,6 +82,7 @@ def create_initial_config(
             'Namespace': namespace,
             'WriteWeightsTxt': write_weights_txt,
             'WriteTar': write_tar,
+            'TBOutputStream': tb_output_stream,
         }
 
         return config
 
@@ -96,7 +96,7 @@ def transform(self, model, node):
             bn_layer.get_weights('scale').data, bn_layer.get_weights('bias').data, node.get_attr('threshold', 0.5)
         )
         # Remove the BatchNormalization layer
-        model.remove_node(bn_layer, rewire=True)
+        model.remove_node(bn_layer)
         # Replace the old Activation layer with this one
         model.replace_node(node, bnbt_layer)
 
 
@@ -1,6 +1,6 @@
 from hls4ml.backends.backend import get_backend
 from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
-from hls4ml.model.layers import GRU, LSTM
+from hls4ml.model.layers import GRU, LSTM, TimeDistributed
 
 # recurrent multiplication template
 
@@ -237,3 +237,69 @@ def format(self, node):
             template = recr_function_template
 
         return template.format(**params)
+
+
+time_distributed_config_template = """struct config{index} : nnet::time_distributed_config {{
+    static const unsigned dim = {dim};
+
+    static const unsigned n_time_steps = {n_time_steps};
+    static const unsigned in_height = {in_height};
+    static const unsigned in_width = {in_width};
+    static const unsigned n_chan = {n_chan};
+}};\n"""
+
+time_distributed_loop_start_template = """for (int ts = 0; ts < config{index}::n_time_steps; ts++) {{
+        {loop_mode}
+        nnet::read_time_step_{dim}d<{input_t}, {config}>(ts, {input}, {output});"""
+
+time_distributed_loop_end_template = """    nnet::write_time_step_{dim}d<{output_t}, {config}>(ts, {input}, {output});
+    }}"""
+
+time_distributed_include_list = ['nnet_utils/nnet_time_distributed.h']
+
+
+class TimeDistributedConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(TimeDistributed)
+        self.template = time_distributed_config_template
+
+    def format(self, node):
+        params = self._default_config_params(node)
+
+        input_shape = node.get_input_variable().shape
+        params['dim'] = len(input_shape)
+        if node.name.endswith('_end'):
+            params['dim'] += 1  # The input variable will be from the wrapped layer, without time dimension
+        params['in_height'] = input_shape[-3] if params['dim'] == 4 else 1
+        params['in_width'] = input_shape[-2] if params['dim'] >= 3 else 1
+        params['n_chan'] = input_shape[-1]
+
+        return self.template.format(**params)
+
+
+class TimeDistributedFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__((TimeDistributed), include_header=time_distributed_include_list)
+        self.template_start = time_distributed_loop_start_template
+        self.template_end = time_distributed_loop_end_template
+
+    def format(self, node):
+        params = self._default_function_params(node)
+
+        input_shape = node.get_input_variable().shape
+        params['dim'] = len(input_shape)
+        if node.name.endswith('_end'):
+            params['dim'] += 1  # The input variable will be from the wrapped layer, without time dimension
+
+        loop_mode = node.get_attr('time_step_loop_parallelism')
+        if loop_mode == 'unroll':
+            params['loop_mode'] = '#pragma HLS UNROLL'
+        elif loop_mode == 'pipeline':
+            params['loop_mode'] = '#pragma HLS PIPELINE'
+        else:
+            params['loop_mode'] = ''
+
+        if node.attributes['wrapped_layer'].name == node.name + '_end':
+            return self.template_start.format(**params)
+        else:
+            return self.template_end.format(**params)
Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ def transform(self, model, node):`
`96`	`96`	`bn_layer.get_weights('scale').data, bn_layer.get_weights('bias').data, node.get_attr('threshold', 0.5)`
`97`	`97`	`)`
`98`	`98`	`# Remove the BatchNormalization layer`
`99`		`- model.remove_node(bn_layer, rewire=True)`
	`99`	`+ model.remove_node(bn_layer)`
`100`	`100`	`# Replace the old Activation layer with this one`
`101`	`101`	`model.replace_node(node, bnbt_layer)`
`102`	`102`
Original file line number	Diff line number	Diff line change
`@@ -149,7 +149,7 @@ def transform(self, model, node):`
`149`	`149`	`bn_layer.get_weights('scale').data, bn_layer.get_weights('bias').data, node.get_attr('threshold', 0.5)`
`150`	`150`	`)`
`151`	`151`	`# Remove the BatchNormalization layer`
`152`		`- model.remove_node(bn_layer, rewire=True)`
	`152`	`+ model.remove_node(bn_layer)`
`153`	`153`	`# Replace the old Activation layer with this one`
`154`	`154`	`model.replace_node(node, bnbt_layer)`
`155`	`155`