Address PR comments

MM0hsin · MM0hsin · commit 843a6fe4aad0 · 2025-11-06T12:59:43.000Z
diff --git a/examples/tutorials/workgraphs/descriptors_filter.ipynb b/examples/tutorials/workgraphs/descriptors_filter.ipynb
@@ -181,19 +181,19 @@
    "outputs": [],
    "source": [
     "from aiida.orm import Str, Float, Bool, Int\n",
-    "from ase.io import read\n",
+    "from ase.io import iread\n",
     "from aiida_workgraph import WorkGraph\n",
     "from aiida.orm import StructureData\n",
     "from sample_split import process_and_split_data\n",
     "\n",
     "initial_structure = \"../structures/NaCl-traj.xyz\"\n",
-    "num_structs = len(read(initial_structure, index=\":\"))\n",
+    "# num_structs = len(read(initial_structure, index=\":\"))\n",
     "\n",
     "with WorkGraph(\"Calculation Workgraph\") as wg:\n",
     "    final_structures = {}\n",
     "\n",
-    "    for i in range(num_structs):\n",
-    "        structure = StructureData(ase=read(initial_structure, index=i))\n",
+    "    for i, struct in enumerate(iread(initial_structure)):\n",
+    "        structure = StructureData(ase=struct)\n",
     "\n",
     "        geomopt_calc = wg.add_task(\n",
     "            geomoptCalc,\n",
@@ -224,7 +224,7 @@
     "    split_task = wg.add_task(\n",
     "        create_aiida_files,\n",
     "        config_types= Str(\"\"),\n",
-    "        n_samples=Int(num_structs),\n",
+    "        n_samples=Int(len(final_structures)),\n",
     "        prefix= Str(\"\"),\n",
     "        scale= Float(1.0e5),\n",
     "        append_mode= Bool(False),\n",
@@ -308,11 +308,11 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "with test_file.as_path() as path:\n",
-    "        test_mace_desc = np.array([i.info['mace_mp_descriptor'] for i in iread(path, index=':')])\n",
+    "        test_mace_desc = np.array([struct.info['mace_mp_descriptor'] for struct in iread(path, index=':')])\n",
     "with train_file.as_path() as path:\n",
-    "        train_mace_desc = np.array([i.info['mace_mp_descriptor'] for i in iread(path, index=':')])\n",
+    "        train_mace_desc = np.array([struct.info['mace_mp_descriptor'] for struct in iread(path, index=':')])\n",
     "with valid_file.as_path() as path:\n",
-    "        valid_mace_desc = np.array([i.info['mace_mp_descriptor'] for i in iread(path, index=':')])\n",
+    "        valid_mace_desc = np.array([struct.info['mace_mp_descriptor'] for struct in iread(path, index=':')])\n",
     "\n",
     "all_values = np.concatenate([train_mace_desc, valid_mace_desc, test_mace_desc])\n",
     "bins = np.linspace(all_values.min(), all_values.max(), len(all_values))\n",
diff --git a/examples/tutorials/workgraphs/descriptors_filter_qe.ipynb b/examples/tutorials/workgraphs/descriptors_filter_qe.ipynb
@@ -139,7 +139,7 @@
    "source": [
     "from aiida_workgraph import task\n",
     "from aiida_workgraph.manager import get_current_graph\n",
-    "from aiida.orm import StructureData, load_group, KpointsData, SinglefileData\n",
+    "from aiida.orm import StructureData, load_group, KpointsData, SinglefileData, InstalledCode, List, Dict\n",
     "from ase.io import iread\n",
     "from pathlib import Path\n",
     "import yaml\n",
@@ -148,18 +148,23 @@
     "\n",
     "\n",
     "@task.graph(outputs = [\"test_file\", \"train_file\", \"valid_file\"])\n",
-    "def qe(**inputs):\n",
+    "def qe(\n",
+    "    code: InstalledCode,\n",
+    "    kpoints_mesh: List,\n",
+    "    task_metadata: Dict,\n",
+    "    test_file: SinglefileData,\n",
+    "    train_file: SinglefileData,\n",
+    "    valid_file: SinglefileData\n",
+    "    ):\n",
     "\n",
     "    wg = get_current_graph()\n",
     "\n",
-    "    task_inputs = inputs[\"task_params\"]['task_inputs']\n",
-    "    code =inputs[\"task_params\"][\"code\"]\n",
-    "\n",
     "    kpoints = KpointsData()\n",
-    "    kpoints.set_kpoints_mesh(task_inputs['kpoint_mesh'])\n",
+    "    kpoints.set_kpoints_mesh(kpoints_mesh)\n",
     "\n",
     "    pseudo_family = load_group('SSSP/1.3/PBE/efficiency')\n",
-    "    files = {\"test_file\": inputs['test_file'],\"train_file\":inputs['train_file'],\"valid_file\":inputs['valid_file']}\n",
+    "    \n",
+    "    files = {\"test_file\": test_file, \"train_file\": train_file, \"valid_file\": valid_file}\n",
     "\n",
     "    for file_name, file in files.items():\n",
     "        with file.as_path() as path:\n",
@@ -187,12 +192,12 @@
     "                \n",
     "                qe_task = wg.add_task(\n",
     "                    PwCalculation,\n",
-    "                    code = code,\n",
-    "                    parameters= pw_params,\n",
-    "                    kpoints= kpoints,\n",
-    "                    pseudos= pseudos,\n",
-    "                    metadata= task_inputs[\"metadata\"],\n",
-    "                    structure= structure,\n",
+    "                    code=code,\n",
+    "                    parameters=pw_params,\n",
+    "                    kpoints=kpoints,\n",
+    "                    pseudos=pseudos,\n",
+    "                    metadata=task_metadata.value,\n",
+    "                    structure=structure,\n",
     "                )\n",
     "                \n",
     "                structfile = f\"{file_name}.struct{i}\"\n",
@@ -239,9 +244,9 @@
     "    for file_name, structs in inputs.items():\n",
     "        path = Path(f\"mlip_{file_name}.extxyz\")\n",
     "\n",
-    "        for stuct_out_params in structs.values():\n",
+    "        for struct_out_params in structs.values():\n",
     "            \n",
-    "            trajectory = stuct_out_params[\"trajectory\"]\n",
+    "            trajectory = struct_out_params[\"trajectory\"]\n",
     "\n",
     "            fileStructure = trajectory.get_structure(index=0)\n",
     "            fileAtoms = fileStructure.get_ase()\n",
@@ -253,7 +258,7 @@
     "            fileAtoms.info[\"units\"] = {\"energy\": \"eV\",\"forces\": \"ev/Ang\",\"stress\": \"ev/Ang^3\"}\n",
     "            fileAtoms.set_array(\"qe_forces\", trajectory.arrays[\"forces\"][0])\n",
     "\n",
-    "            parameters = stuct_out_params[\"parameters\"]\n",
+    "            parameters = struct_out_params[\"parameters\"]\n",
     "            fileParams = parameters.get_dict()\n",
     "            fileAtoms.info[\"qe_energy\"] = fileParams[\"energy\"]\n",
     "            write(path, fileAtoms, append=True)\n",
@@ -318,7 +323,7 @@
     "    \"metadata\": {\"options\": {\"resources\": {\"num_machines\": 1}}},\n",
     "}\n",
     "\n",
-    "goemopt_inputs = {\n",
+    "geomopt_inputs = {\n",
     "    \"fmax\": Float(0.1),\n",
     "    \"opt_cell_lengths\": Bool(False),\n",
     "    \"opt_cell_fully\": Bool(True),\n",
@@ -332,8 +337,7 @@
     "}\n",
     "\n",
     "qe_inputs = {\n",
-    "    \"task_inputs\": Dict({\n",
-    "        \"metadata\": {\n",
+    "    \"task_metadata\": Dict({\n",
     "            \"options\": {\n",
     "                \"resources\": {\n",
     "                    \"num_machines\": 1,\n",
@@ -352,9 +356,8 @@
     "                \"\"\",\n",
     "                \"append_text\": \"\",\n",
     "            },\n",
-    "        },\n",
-    "        \"kpoint_mesh\": List([1, 1, 1]),\n",
     "    }),\n",
+    "    \"kpoints_mesh\": List([1, 1, 1]),\n",
     "    \"code\": qe_code,\n",
     "}"
    ]
@@ -364,7 +367,7 @@
    "id": "06ee80fd",
    "metadata": {},
    "source": [
-    "Now we can build the `Workgraph`. First we iterate through each structure in the initail structure file, and run `Geomopt` and `Descriptors` on them these give a `SinglefileData` instance of the structure outputs. These structures can then be passed to the `split_task`, which splits these structures up into training files. Then we run `QE` task, getting the outputs and passing them into the `training_files` task which, as the name suggests, it creates the training file from the `QE` task outputs. Finally we can run the training script. Ideally, if any of the inputs need to changed, they should be done in the cell above."
+    "Now we can build the `Workgraph`. First we iterate through each structure in the initail structure file, and run `Geomopt` and `Descriptors` on them these give a `SinglefileData` instance of the structure outputs. These structures can then be passed to the `split_task`, which splits these structures up into train, test and validation files. Then we run `QE` task, getting the outputs and passing them into the `training_files` task which, as the name suggests, it creates the training file from the `QE` task outputs. Finally we can run the training script. Ideally, if any of the inputs need to changed, they should be done in the cell above."
    ]
   },
   {
@@ -390,7 +393,7 @@
     "        geomopt_calc = wg.add_task(\n",
     "            geomoptCalc,\n",
     "            **calc_inputs,\n",
-    "            **goemopt_inputs,\n",
+    "            **geomopt_inputs,\n",
     "            struct=structure,\n",
     "        )\n",
     "        \n",
@@ -416,7 +419,7 @@
     "        test_file= split_task.outputs.test_file,\n",
     "        train_file= split_task.outputs.train_file,\n",
     "        valid_file= split_task.outputs.valid_file,\n",
-    "        task_params = qe_inputs\n",
+    "        **qe_inputs\n",
     "    )\n",
     "\n",
     "    training_files = wg.add_task(\n",
@@ -441,7 +444,7 @@
    "id": "7f3c72ca",
    "metadata": {},
    "source": [
-    "Run and visualise the workgraph"
+    "Visualise and run the workgraph"
    ]
   },
   {
@@ -464,6 +467,16 @@
     "wg.run()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13e509e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wg.tasks.create_aiida_files.outputs.test_file"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "2e36396f",
@@ -484,7 +497,8 @@
     "import matplotlib.image as mpimg\n",
     "\n",
     "folder = wg.tasks.Train.outputs.remote_folder.value\n",
-    "picturePath = f\"{os.getcwd()}/traingraph.png\"\n",
+    "picturePath = Path.cwd() / \"traingraph.png\"\n",
+    "\n",
     "folder.getfile(relpath='results/test_run-123_train_Default_stage_one.png',destpath=picturePath)\n",
     "\n",
     "img = mpimg.imread(picturePath)\n",
diff --git a/examples/tutorials/workgraphs/sample_split.py b/examples/tutorials/workgraphs/sample_split.py
@@ -47,7 +47,7 @@ def process_and_split_data(
     scale: float,
     append_mode: bool,
     **trajectory_data,
-) -> dict:
+) -> dict[str, Path]:
     """
     Split a trajectory into training, validation, and test sets.