|
| 1 | +# multi-file Workflow Example |
| 2 | + |
| 3 | +!!! tip |
| 4 | + |
| 5 | + This example uses the [`echo` app](../tutorials/echo.md), make sure to complete |
| 6 | + that tutorial first. |
| 7 | + |
| 8 | +This example showcases how to use **multi-file** applications _within_ a |
| 9 | +Nextpipe workflow. A multi-file application differs from a JSON-based |
| 10 | +application in that it accepts a directory of files as input and produces a |
| 11 | +directory of files as output. Note that the workflow itself is also a multi-file |
| 12 | +application, however, this is a user choice (i.e., you could also create a |
| 13 | +JSON-based workflow that uses multi-file sub-applications). |
| 14 | + |
| 15 | +For demonstration purposes, we will use the simple [echo-multi application] as |
| 16 | +the sub-application, which echoes the input files as output files. |
| 17 | + |
| 18 | +Find the workflow code below (mind the comments explaining each step): |
| 19 | + |
| 20 | +```python |
| 21 | +import os |
| 22 | +import shutil |
| 23 | + |
| 24 | +import nextmv |
| 25 | +import nextmv.cloud |
| 26 | + |
| 27 | +from nextpipe import FlowSpec, app, log, needs, step |
| 28 | + |
| 29 | +options = nextmv.Options( |
| 30 | + nextmv.Option("input", str, "inputs/", "Path to input file.", False), |
| 31 | + nextmv.Option("output", str, "outputs/", "Path to output file.", False), |
| 32 | +) |
| 33 | + |
| 34 | + |
| 35 | +# >>> Workflow definition |
| 36 | +class Flow(FlowSpec): |
| 37 | + # The first step receives the path to the input files directly (see main()) and |
| 38 | + # automatically zips the directory and passes it to the 'echo-multi' sub-app. |
| 39 | + @app(app_id="echo-multi") |
| 40 | + @step |
| 41 | + def solve1(): |
| 42 | + """Runs a multi-file model.""" |
| 43 | + pass |
| 44 | + |
| 45 | + # The second step receives the path to the output files from the first step. This path |
| 46 | + # will point to a temporary directory containing the output files from the first step. |
| 47 | + @needs(predecessors=[solve1]) |
| 48 | + @step |
| 49 | + def transform(result_path: str): |
| 50 | + """Transforms the result for the next step.""" |
| 51 | + # Just list the content of the result directory. |
| 52 | + log(f"Contents of result directory {result_path}:") |
| 53 | + for file_name in os.listdir(result_path): |
| 54 | + full_file_name = os.path.join(result_path, file_name) |
| 55 | + if os.path.isfile(full_file_name): |
| 56 | + log(f"- {file_name}") |
| 57 | + |
| 58 | + # Add a new file to the result for demonstration purposes. |
| 59 | + new_file_path = os.path.join(result_path, "additional_file.txt") |
| 60 | + with open(new_file_path, "w") as f: |
| 61 | + f.write("This is an additional file added in the transform step.\n") |
| 62 | + log(f"Added new file: {new_file_path}") |
| 63 | + |
| 64 | + return result_path |
| 65 | + |
| 66 | + # The third step receives the (modified) directory from the transform step and runs |
| 67 | + # another multi-file app on it. |
| 68 | + @app( |
| 69 | + app_id="echo-multi", |
| 70 | + # We specify the content type explicitly here. This is normally done via the app's |
| 71 | + # manifest, but we can do it explicitly like this too. |
| 72 | + run_configuration=nextmv.RunConfiguration( |
| 73 | + format=nextmv.Format( |
| 74 | + format_input=nextmv.FormatInput(input_type=nextmv.InputFormat.MULTI_FILE), |
| 75 | + format_output=nextmv.FormatOutput(output_type=nextmv.OutputFormat.MULTI_FILE), |
| 76 | + ) |
| 77 | + ), |
| 78 | + full_result=True, |
| 79 | + ) |
| 80 | + @needs(predecessors=[transform]) |
| 81 | + @step |
| 82 | + def solve2(result: nextmv.cloud.RunResult): |
| 83 | + """Runs another multi-file model.""" |
| 84 | + pass |
| 85 | + |
| 86 | + # The final step receives the output from 'solve2' as a full result object (see |
| 87 | + # 'full_result=True' above). In this case, the path to the output files is available |
| 88 | + # via 'result.output'. |
| 89 | + @needs(predecessors=[solve2]) |
| 90 | + @step |
| 91 | + def prepare_output(result: nextmv.cloud.RunResult): |
| 92 | + """Transforms the result for the next step.""" |
| 93 | + # Extract the path to the output files. |
| 94 | + result_path = result.output |
| 95 | + # Simply copy the files from the given directory to the expected output directory. |
| 96 | + os.makedirs(options.output, exist_ok=True) |
| 97 | + for file_name in os.listdir(result_path): |
| 98 | + full_file_name = os.path.join(result_path, file_name) |
| 99 | + if os.path.isfile(full_file_name): |
| 100 | + shutil.copy(full_file_name, options.output) |
| 101 | + |
| 102 | + |
| 103 | +def main(): |
| 104 | + # Run workflow (simply provide the path to the multi-file input) |
| 105 | + flow = Flow("DecisionFlow", options.input) |
| 106 | + flow.run() |
| 107 | + # The last step of the flow already prepares the output in the requested directory, |
| 108 | + # so no need to do anything here anymore. |
| 109 | + |
| 110 | + |
| 111 | +if __name__ == "__main__": |
| 112 | + main() |
| 113 | +``` |
| 114 | + |
| 115 | +Run the example: |
| 116 | + |
| 117 | +```bash |
| 118 | +$ python main.py |
| 119 | +[nextpipe] No application ID or run ID found, uplink is inactive. |
| 120 | +[nextpipe] Flow: Flow |
| 121 | +[nextpipe] nextpipe: v0.3.5 |
| 122 | +[nextpipe] nextmv: 0.33.0 |
| 123 | +[nextpipe] Flow graph steps: |
| 124 | +[nextpipe] Step: |
| 125 | +[nextpipe] Definition: Step(solve1, StepRun(echo-multi, , {}, False)) |
| 126 | +[nextpipe] Docstring: Runs a multi-file model. |
| 127 | +[nextpipe] Step: |
| 128 | +[nextpipe] Definition: Step(transform, StepNeeds(solve1)) |
| 129 | +[nextpipe] Docstring: Transforms the result for the next step. |
| 130 | +[nextpipe] Step: |
| 131 | +[nextpipe] Definition: Step(solve2, StepNeeds(transform), StepRun(echo-multi, , {}, True)) |
| 132 | +[nextpipe] Docstring: Runs another multi-file model. |
| 133 | +[nextpipe] Step: |
| 134 | +[nextpipe] Definition: Step(prepare_output, StepNeeds(solve2)) |
| 135 | +[nextpipe] Docstring: Transforms the result for the next step. |
| 136 | +[nextpipe] Mermaid diagram: |
| 137 | +[nextpipe] graph LR |
| 138 | + solve1(solve1) |
| 139 | + solve1 --> transform |
| 140 | + transform(transform) |
| 141 | + transform --> solve2 |
| 142 | + solve2(solve2) |
| 143 | + solve2 --> prepare_output |
| 144 | + prepare_output(prepare_output) |
| 145 | + |
| 146 | +[nextpipe] Mermaid URL: https://mermaid.ink/svg/Z3JhcGggTFIKICBzb2x2ZTEoc29sdmUxKQogIHNvbHZlMSAtLT4gdHJhbnNmb3JtCiAgdHJhbnNmb3JtKHRyYW5zZm9ybSkKICB0cmFuc2Zvcm0gLS0+IHNvbHZlMgogIHNvbHZlMihzb2x2ZTIpCiAgc29sdmUyIC0tPiBwcmVwYXJlX291dHB1dAogIHByZXBhcmVfb3V0cHV0KHByZXBhcmVfb3V0cHV0KQo=?theme=dark |
| 147 | +[nextpipe] Running node solve1_0 |
| 148 | +[nextpipe] Started app step solve1_0 run, find it at https://cloud.nextmv.io/app/echo-multi/run/latest-a-JAvuFgDR?view=details |
| 149 | +/home/marius/.asdf/installs/python/3.13.7/lib/python3.13/shutil.py:1281: DeprecationWarning: Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata. Use the filter argument to control this behavior. |
| 150 | + tarobj.extractall(extract_dir, filter=filter) |
| 151 | +[nextpipe] Running node transform_0 |
| 152 | +[transform_0] Contents of result directory /tmp/nextpipe_output_igqsibzm: |
| 153 | +[transform_0] - input.xlsx |
| 154 | +[transform_0] - data.csv |
| 155 | +[transform_0] Added new file: /tmp/nextpipe_output_igqsibzm/additional_file.txt |
| 156 | +[nextpipe] Running node solve2_0 |
| 157 | +[nextpipe] Started app step solve2_0 run, find it at https://cloud.nextmv.io/app/echo-multi/run/latest-HIwvuFgDg?view=details |
| 158 | +[nextpipe] Running node prepare_output_0 |
| 159 | +``` |
| 160 | + |
| 161 | +Content of the output directory: |
| 162 | + |
| 163 | +```bash |
| 164 | +tree outputs/ |
| 165 | +outputs/ |
| 166 | +├── additional_file.txt |
| 167 | +├── data.csv |
| 168 | +└── input.xlsx |
| 169 | + |
| 170 | +1 directory, 3 files |
| 171 | +``` |
| 172 | + |
| 173 | +The resulting Mermaid diagram for this flow looks like this: |
| 174 | + |
| 175 | +```mermaid |
| 176 | +graph LR |
| 177 | + solve1(solve1) |
| 178 | + solve1 --> transform |
| 179 | + transform(transform) |
| 180 | + transform --> solve2 |
| 181 | + solve2(solve2) |
| 182 | + solve2 --> prepare_output |
| 183 | + prepare_output(prepare_output) |
| 184 | +``` |
| 185 | + |
| 186 | +[echo-multi application]: ../tutorials/echo-multi.md |
0 commit comments