stfc · MM0hsin · Nov 12, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 28, 2025
diff --git a/examples/tutorials/descriptors_filter.ipynb b/examples/tutorials/descriptors_filter.ipynb
@@ -0,0 +1,395 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c13c1dd7",
+   "metadata": {},
+   "source": [
+    "# Running multiple calculations on a given model\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca81fa1b",
+   "metadata": {},
+   "source": [
+    "## Aim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35ecbb39",
+   "metadata": {},
+   "source": [
+    "This notebook shows how we can run multiple calculations of a given structure"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5acf0b46",
+   "metadata": {},
+   "source": [
+    "### Setup\n",
+    "\n",
+    "The initial setup is very similar to the other tutorials, such as `singlepoint.ipynb`, which goes into more detail about what each step is doing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73bfb65e",
+   "metadata": {},
+   "source": [
+    "Load the aiida profile and code:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79098139",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida import load_profile\n",
+    "load_profile()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bc2cf3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida_mlip.data.model import ModelData\n",
+    "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n",
+    "model = ModelData.from_uri(uri, architecture=\"mace_mp\", cache_dir=\"mlips\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae62816f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida.orm import load_code\n",
+    "code = load_code(\"janus@localhost\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "65fbe939",
+   "metadata": {},
+   "source": [
+    "Inputs should include the model, code, metadata, and any other keyword arguments expected by the calculation we are running:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1795dd41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida.orm import Str, Float, Bool\n",
+    "inputs = {\n",
+    "    \"code\": code,\n",
+    "    \"model\": model,\n",
+    "    \"arch\": Str(model.architecture),\n",
+    "    \"precision\": Str(\"float64\"),\n",
+    "    \"device\": Str(\"cpu\"),\n",
+    "    \"fmax\": Float(0.1), \n",
+    "    \"opt_cell_lengths\": Bool(False), \n",
+    "    \"opt_cell_fully\": Bool(True), \n",
+    "    \"metadata\": {\"options\": {\"resources\": {\"num_machines\": 1}}},\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1d661593",
+   "metadata": {},
+   "source": [
+    "We must now choose the calculations to perform:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e4c78c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida.plugins import CalculationFactory\n",
+    "geomoptCalc = CalculationFactory(\"mlip.opt\")\n",
+    "descriptorsCalc = CalculationFactory(\"mlip.descriptors\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "37df1f81",
+   "metadata": {},
+   "source": [
+    "Now we can create our WorkGraph. This includes passing in the inputs, checking the amount of structures we have and interating through. Note for this workbook we have decreased the amount of structures to two for the sake of simplicity. In the loop we can call each structure; run geomopt calculation, pass the output file into descriptors calculation, and get the final outputs of all the structures"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "176f1ea0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida_workgraph import WorkGraph, Zone\n",
+    "from aiida.orm import StructureData\n",
+    "from ase.build import bulk\n",
+    "from ase.io import read\n",
+    "from sample_split import descriptors_outputs, process_and_split_data\n",
+    "\n",
+    "\n",
+    "\n",
+    "num_structs = len(read(\"../../examples/tutorials/structures/lj-traj.xyz\", index=\":\"))\n",
+    "\n",
+    "with WorkGraph(\"Calculation Workgraph\") as wg:\n",
+    "    wg.inputs = inputs\n",
+    "    final_structures = {}\n",
+    "\n",
+    "    for i in range(num_structs - 9):\n",
+    "\n",
+    "        structure = StructureData(ase=read(\"../../examples/tutorials/structures/lj-traj.xyz\", index=f\"{i}\"))\n",
+    "        \n",
+    "        geomopt_calc = wg.add_task(\n",
+    "            geomoptCalc,\n",
+    "            code = wg.inputs.code,\n",
+    "            model = wg.inputs.model,\n",
+    "            arch = wg.inputs.arch,\n",
+    "            precision = wg.inputs.precision,\n",
+    "            device = wg.inputs.device,\n",
+    "            metadata = wg.inputs.metadata,\n",
+    "            fmax = wg.inputs.fmax,\n",
+    "            opt_cell_lengths = wg.inputs.opt_cell_lengths,\n",
+    "            opt_cell_fully = wg.inputs.opt_cell_fully,\n",
+    "            struct = structure,\n",
+    "        )\n",
+    "\n",
+    "        descriptors_calc = wg.add_task(\n",
+    "            descriptorsCalc,\n",
+    "            code = wg.inputs.code,\n",
+    "            model = wg.inputs.model,\n",
+    "            arch = wg.inputs.arch,\n",
+    "            precision = wg.inputs.precision,\n",
+    "            device = wg.inputs.device,\n",
+    "            metadata = wg.inputs.metadata,\n",
+    "            struct = geomopt_calc.outputs.final_structure,\n",
+    "            calc_per_element = True\n",
+    "        )\n",
+    "\n",
+    "        final_structures[f\"structs{i}\"] = descriptors_calc.outputs.xyz_output\n",
+    "    \n",
+    "    wg.outputs.final_structures = final_structures\n",
+    "        \n",
+    "    collect_result = wg.add_task(\n",
+    "        descriptors_outputs,\n",
+    "        structures = final_structures\n",
+    "    )\n",
+    "\n",
+    "    # split_task = wg.add_task(\n",
+    "    #     process_and_split_data,\n",
+    "    #     trajectory_path = collect_result.outputs.result\n",
+    "    #     config_types = \"\",\n",
+    "    #     n_samples = 2,\n",
+    "    #     prefix = \"\",\n",
+    "    #     scale = 1.0e5,\n",
+    "    #     append_mode = False\n",
+    "    # )\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50a51bfc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb77dab0",
+   "metadata": {},
+   "source": [
+    "Now we can run the calculations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f2c1fcd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wg.run()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29175d94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(collect_result.outputs.result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "531b2e36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for _, data in collect_result.outputs.result.value.items():\n",
+    "    print(type(data))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9d3f9ddd",
+   "metadata": {},
+   "source": [
+    "# TEST"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b14fe356",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida.orm import load_node\n",
+    "from sample_split import descriptors_outputs, process_and_split_data\n",
+    "from aiida import load_profile\n",
+    "from aiida_workgraph import WorkGraph, Zone\n",
+    "\n",
+    "load_profile()\n",
+    "\n",
+    "sfdata = load_node(pk= 15657)\n",
+    "sfdata1 = load_node(pk= 15661)\n",
+    "\n",
+    "structs_out = {\n",
+    "    \"structs0\" : sfdata,\n",
+    "    \"structs1\" : sfdata1,\n",
+    "}\n",
+    "\n",
+    "wg = WorkGraph(\"test\")\n",
+    "aggr = wg.add_task(descriptors_outputs)\n",
+    "aggr.set(structs_out)\n",
+    "\n",
+    "# split = wg.add_task(\n",
+    "#     process_and_split_data,\n",
+    "#     trajectory_path = aggr.outputs.result,\n",
+    "#     config_types = \"\",\n",
+    "#     n_samples = 2,\n",
+    "#     prefix = \"\",\n",
+    "#     scale = 1.0e5,\n",
+    "#     append_mode = False\n",
+    "# )\n",
+    "\n",
+    "wg.run()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a418d4c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wg.tasks.descriptors_outputs.outputs.result.value.get_dict()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c80ae1d3",
+   "metadata": {},
+   "source": [
+    "In order to pass the outputs from the WorkGraph into sample_split.py we create a list of structures"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "78a87e6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shutil, os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "descriptors_output = list(wg.outputs.final_structures)\n",
+    "traj_structs = []\n",
+    "\n",
+    "for i in range(len(descriptors_output)):\n",
+    "\n",
+    "    # create list variable which holds output structs\n",
+    "    singlefiledata = wg.outputs.final_structures[f\"structs{i}\"]\n",
+    "    traj_structs.append(singlefiledata.value)\n",
+    "\n",
+    "\n",
+    "print(traj_structs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5b666c73",
+   "metadata": {},
+   "source": [
+    "Now we can run the split script"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d4c948c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sample_split import process_and_split_data\n",
+    "\n",
+    "\n",
+    "process_and_split_data(\n",
+    "    trajectory_path = traj_structs,\n",
+    "    config_types = \"\",\n",
+    "    n_samples = 2,\n",
+    "    prefix = \"\",\n",
+    "    scale = 1.0e5,\n",
+    "    append_mode = False\n",
+    "    )\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "aiida-mlip (3.12.2)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}