stfc · MM0hsin · Nov 12, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 28, 2025
diff --git a/examples/tutorials/structures/NaCl-traj.xyz b/examples/tutorials/structures/NaCl-traj.xyz
diff --git a/examples/tutorials/structures/lj-traj.xyz b/examples/tutorials/structures/lj-traj.xyz
diff --git a/examples/tutorials/workgraphs/JanusConfigfile.yml b/examples/tutorials/workgraphs/JanusConfigfile.yml
@@ -0,0 +1,40 @@
+name: "test"
+E0s: 'average'
+max_num_epochs: 1
+model: 'MACE'
+energy_key: 'qe_energy'
+forces_key: 'qe_forces'
+stress_key: 'qe_stress'
+loss: 'universal'
+energy_weight: 1
+forces_weight: 10
+stress_weight: 100
+compute_stress: True
+eval_interval: 2
+error_table: 'PerAtomRMSE'
+interaction_first: 'RealAgnosticResidualInteractionBlock'
+interaction: 'RealAgnosticResidualInteractionBlock'
+num_interactions: 2
+correlation: 3
+max_ell: 3
+r_max: 4.0
+max_L: 0
+num_channels: 16
+num_radial_basis: 6
+MLP_irreps: '16x0e'
+scaling: 'rms_forces_scaling'
+lr: 0.005
+weight_decay: 1e-8
+ema: True
+ema_decay: 0.995
+scheduler_patience: 5
+batch_size: 2
+valid_batch_size: 2
+patience: 50
+amsgrad: True
+device: 'cpu'
+distributed: False
+clip_grad: 100
+keep_checkpoints: False
+keep_isolated_atoms: True
+save_cpu: True
diff --git a/examples/tutorials/workgraphs/descriptors_filter.ipynb b/examples/tutorials/workgraphs/descriptors_filter.ipynb
@@ -145,9 +145,32 @@
     "   The descriptors job reads the structure and computes numerical features\n",
     "   (fingerprints) for each structure.\n",
     "3. Collect the descriptor outputs, as StructureData, for all structures\n",
-    "   and pass them to `process_and_split_data` (a calcfunction).\n",
-    "4. `process_and_split_data` writes the structures to `train.xyz`, `test.xyz`,\n",
-    "   and `valid.xyz` files, and returns a Dict node with the file paths."
+    "   and pass them to `create_aiida_files` (a calcfunction task).\n",
+    "4. This calls `process_and_split_data` (a Python function) which writes the structures to `train.xyz`, `test.xyz`,\n",
+    "   and `valid.xyz` files. The task returns `SinglefileData` AiiDA data types, hence why we have to create a `calcfunction` task as oppose to just a `task`, we do this so the files are available on the workchain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eec0b5ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from aiida_workgraph import task\n",
+    "from sample_split import process_and_split_data\n",
+    "from aiida.orm import SinglefileData\n",
+    "\n",
+    "@task.calcfunction(outputs = [\"test_file\", \"train_file\", \"valid_file\"])\n",
+    "def create_qe_files(**inputs):\n",
+    "     \n",
+    "    files = process_and_split_data(**inputs)\n",
+    "\n",
+    "    return {\n",
+    "        \"train_file\": SinglefileData(files[\"train_file\"]),\n",
+    "        \"test_file\": SinglefileData(files[\"test_file\"]),\n",
+    "        \"valid_file\": SinglefileData(files[\"valid_file\"])\n",
+    "    }"
    ]
   },
   {
@@ -158,19 +181,18 @@
    "outputs": [],
    "source": [
     "from aiida.orm import Str, Float, Bool, Int\n",
-    "from ase.io import read\n",
+    "from ase.io import iread\n",
     "from aiida_workgraph import WorkGraph\n",
     "from aiida.orm import StructureData\n",
     "from sample_split import process_and_split_data\n",
     "\n",
-    "initail_structure = \"../structures/lj-traj.xyz\"\n",
-    "num_structs = len(read(initail_structure, index=\":\"))\n",
+    "initial_structure = \"../structures/NaCl-traj.xyz\"\n",
     "\n",
     "with WorkGraph(\"Calculation Workgraph\") as wg:\n",
     "    final_structures = {}\n",
     "\n",
-    "    for i in range(num_structs):\n",
-    "        structure = StructureData(ase=read(initail_structure, index=i))\n",
+    "    for i, struct in enumerate(iread(initial_structure)):\n",
+    "        structure = StructureData(ase=struct)\n",
     "\n",
     "        geomopt_calc = wg.add_task(\n",
     "            geomoptCalc,\n",
@@ -199,9 +221,9 @@
     "        final_structures[f\"structs{i}\"] = descriptors_calc.outputs.xyz_output\n",
     "\n",
     "    split_task = wg.add_task(\n",
-    "        process_and_split_data,\n",
+    "        create_qe_files,\n",
     "        config_types= Str(\"\"),\n",
-    "        n_samples=Int(num_structs),\n",
+    "        n_samples=Int(len(final_structures)),\n",
     "        prefix= Str(\"\"),\n",
     "        scale= Float(1.0e5),\n",
     "        append_mode= Bool(False),\n",
@@ -256,33 +278,23 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fe7291b7",
+   "id": "d2f463f6",
    "metadata": {},
    "outputs": [],
    "source": [
-    "wg.tasks.process_and_split_data.outputs.result.value.get_dict()"
+    "test_file = wg.tasks.create_qe_files.outputs.test_file.value\n",
+    "train_file = wg.tasks.create_qe_files.outputs.train_file.value\n",
+    "valid_file = wg.tasks.create_qe_files.outputs.valid_file.value"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "514aeb77",
+   "id": "8dedf8c5",
    "metadata": {},
    "source": [
     "We can use the outputs to visualise the data. For example, below we will plot a histogram of `mace_mp_descriptor`"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2f463f6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_file = wg.tasks.process_and_split_data.outputs.result.value.get_dict()[\"test_file\"]\n",
-    "train_file = wg.tasks.process_and_split_data.outputs.result.value.get_dict()[\"train_file\"]\n",
-    "valid_file = wg.tasks.process_and_split_data.outputs.result.value.get_dict()[\"valid_file\"]"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -294,9 +306,12 @@
     "from ase.io import iread\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
-    "test_mace_desc = np.array([i.info['mace_mp_descriptor'] for i in iread(test_file, index=':')])\n",
-    "train_mace_desc = np.array([i.info['mace_mp_descriptor'] for i in iread(train_file, index=':')])\n",
-    "valid_mace_desc = np.array([i.info['mace_mp_descriptor'] for i in iread(valid_file, index=':')])\n",
+    "with test_file.as_path() as path:\n",
+    "        test_mace_desc = np.array([struct.info['mace_mp_descriptor'] for struct in iread(path, index=':')])\n",
+    "with train_file.as_path() as path:\n",
+    "        train_mace_desc = np.array([struct.info['mace_mp_descriptor'] for struct in iread(path, index=':')])\n",
+    "with valid_file.as_path() as path:\n",
+    "        valid_mace_desc = np.array([struct.info['mace_mp_descriptor'] for struct in iread(path, index=':')])\n",
     "\n",
     "all_values = np.concatenate([train_mace_desc, valid_mace_desc, test_mace_desc])\n",
     "bins = np.linspace(all_values.min(), all_values.max(), len(all_values))\n",