From 6ddf56e54ba8ddef517546961246254bb8dcf70e Mon Sep 17 00:00:00 2001
From: "markus.kuehbach" <markus.kuehbach@hu-berlin.de>
Date: Fri, 18 Aug 2023 11:38:08 +0200
Subject: [PATCH] Implemented nxs_mtex subparser, default plot annotator, and
 NeXus root decorator

---
 .../readers/em/concepts/concept_mapper.py     | 116 ++++++++++++++++++
 .../readers/em/concepts/nexus_concepts.py     |  58 +++++++++
 pynxtools/dataconverter/readers/em/reader.py  |  36 ++++--
 .../readers/em/subparsers/nxs_mtex.py         |  54 ++++++++
 .../readers/em/utils/default_plots.py         |  56 +++++++++
 5 files changed, 312 insertions(+), 8 deletions(-)
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/concept_mapper.py
 create mode 100644 pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
 create mode 100644 pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
 create mode 100644 pynxtools/dataconverter/readers/em/utils/default_plots.py

diff --git a/pynxtools/dataconverter/readers/em/concepts/concept_mapper.py b/pynxtools/dataconverter/readers/em/concepts/concept_mapper.py
new file mode 100644
index 000000000..6ee855b84
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/concept_mapper.py
@@ -0,0 +1,116 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utilities for working with NeXus concepts encoded as Python dicts in the concepts dir."""
+
+# pylint: disable=no-member
+
+import pytz
+
+from datetime import datetime
+
+
+def load_from_modifier(terms, fd_dct):
+    """Implement modifier which reads values of different type from fd_dct."""
+    if isinstance(terms, str):
+        if terms in fd_dct.keys():
+            return fd_dct[terms]
+    if all(isinstance(entry, str) for entry in terms) is True:
+        if isinstance(terms, list):
+            lst = []
+            for entry in terms:
+                lst.append(fd_dct[entry])
+            return lst
+    return None
+
+
+def convert_iso8601_modifier(terms, dct: dict):
+    """Implement modifier which transforms nionswift time stamps to proper UTC ISO8601."""
+    if terms is not None:
+        if isinstance(terms, str):
+            if terms in dct.keys():
+                return None
+        elif (isinstance(terms, list)) and (len(terms) == 2) \
+                and (all(isinstance(entry, str) for entry in terms) is True):
+            # assume the first argument is a local time
+            # assume the second argument is a timezone string
+            if terms[0] in dct.keys() and terms[1] in dct.keys():
+                # handle the case that these times can be arbitrarily formatted
+                # for now we let ourselves be guided
+                # by how time stamps are returned in Christoph Koch's
+                # nionswift instances also formatting-wise
+                date_time_str = dct[terms[0]].replace("T", " ")
+                time_zone_str = dct[terms[1]]
+                if time_zone_str in pytz.all_timezones:
+                    date_time_obj \
+                        = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M:%S.%f')
+                    utc_time_zone_aware \
+                        = pytz.timezone(time_zone_str).localize(date_time_obj)
+                    return utc_time_zone_aware
+                else:
+                    raise ValueError('Invalid timezone string!')
+                return None
+        else:
+            return None
+    return None
+
+
+def apply_modifier(modifier, dct: dict):
+    """Interpret a functional mapping using data from dct via calling modifiers."""
+    if isinstance(modifier, dict):
+        # different commands are available
+        if set(["fun", "terms"]) == set(modifier.keys()):
+            if modifier["fun"] == "load_from":
+                return load_from_modifier(modifier["terms"], dct)
+            if modifier["fun"] == "convert_iso8601":
+                return convert_iso8601_modifier(modifier["terms"], dct)
+        elif set(["link"]) == set(modifier.keys()):
+            # CURRENTLY NOT IMPLEMENTED
+            # with the jsonmap reader Sherjeel conceptualized "link"
+            return None
+        else:
+            return None
+    if isinstance(modifier, str):
+        return modifier
+    return None
+
+
+# examples/tests how to use modifiers
+# modd = "µs"
+# modd = {"link": "some_link_to_somewhere"}
+# modd = {"fun": "load_from", "terms": "metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 11"}
+# modd = {"fun": "load_from", "terms": ["metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 11",
+#     "metadata/scan/scan_device_properties/mag_boards/MagBoard 1 Relay"]}
+# modd = {"fun": "convert_iso8601", "terms": ["data_modified", "timezone"]}
+# print(apply_modifier(modd, yml))
+
+def variadic_path_to_specific_path(path: str, instance_identifier: list):
+    """Transforms a variadic path to an actual path with instances."""
+    if (path is not None) and (path != ""):
+        narguments = path.count("*")
+        if narguments == 0:  # path is not variadic
+            return path
+        if len(instance_identifier) >= narguments:
+            tmp = path.split("*")
+            if len(tmp) == narguments + 1:
+                nx_specific_path = ""
+                for idx in range(0, narguments):
+                    nx_specific_path += f"{tmp[idx]}{instance_identifier[idx]}"
+                    idx += 1
+                nx_specific_path += f"{tmp[-1]}"
+                return nx_specific_path
+    return None
diff --git a/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
new file mode 100644
index 000000000..2f12ab5de
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/concepts/nexus_concepts.py
@@ -0,0 +1,58 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Implement NeXus-specific groups and fields to document software and versions used."""
+
+# pylint: disable=no-member
+
+from pynxtools.dataconverter.readers.em.concepts.concept_mapper \
+    import variadic_path_to_specific_path, apply_modifier
+
+
+PYNXTOOLS_VERSION = "n/a"
+PYNXTOOLS_URL = "https://www.github.com/FAIRmat-NFDI/pynxtools"
+
+NXEM_NAME = "NXem"
+NXEM_VERSION = "n/a"
+NXEM_URL = "https://www.github.com/FAIRmat-NFDI/nexus_definitions"
+
+NxEmRoot = {"/PROGRAM[program1]/program": "pynxtools/dataconverter/readers/em",
+            "/PROGRAM[program1]/program/@version": PYNXTOOLS_VERSION,
+            "/PROGRAM[program1]/program/@url": PYNXTOOLS_URL,
+            "/ENTRY[entry*]/@version": NXEM_VERSION,
+            "/ENTRY[entry*]/@url": NXEM_URL,
+            "/ENTRY[entry*]/definition": NXEM_NAME}
+
+
+class NxEmAppDef():
+    """Add NeXus NXem appdef specific contextualization.
+
+    """
+    def __init__(self):
+        pass
+
+    def parse(self, template: dict, entry_id: int = 1, cmd_line_args = []) -> dict:
+        for nx_path, modifier in NxEmRoot.items():
+            if (nx_path != "IGNORE") and (nx_path != "UNCLEAR"):
+                trg = variadic_path_to_specific_path(nx_path, [entry_id])
+                res = apply_modifier(modifier, modifier)
+                if res is not None:
+                    template[trg] = res
+        if cmd_line_args != [] and all(isinstance(item, str) for item in cmd_line_args):
+            template["/cs_profiling/@NX_class"] = "NXcs_profiling"
+            template["/cs_profiling/command_line_call"] = cmd_line_args
+        return template
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
index 7349cee2a..e424bbf9d 100644
--- a/pynxtools/dataconverter/readers/em/reader.py
+++ b/pynxtools/dataconverter/readers/em/reader.py
@@ -23,10 +23,13 @@
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 
-"""
-from pynxtools.dataconverter.readers.em_om.utils.use_case_selector \
-    import EmOmUseCaseSelector
+from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
+
+from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
+
+from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
 
+"""
 from pynxtools.dataconverter.readers.em_om.utils.generic_eln_io \
     import NxEmOmGenericElnSchemaParser
 
@@ -52,7 +55,7 @@ class EmReader(BaseReader):
     # pylint: disable=too-few-public-methods
 
     # Whitelist for the NXDLs that the reader supports and can process
-    supported_nxdls = ["NXem_refactoring"]
+    supported_nxdls = ["NXem"]  # ["NXem_refactoring"]
 
     # pylint: disable=duplicate-code
     def read(self,
@@ -63,8 +66,8 @@ def read(self,
         # pylint: disable=duplicate-code
         template.clear()
 
-        debug_id = 3
-        template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
+        # debug_id = 3
+        # template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
         # this em_om parser combines multiple sub-parsers
         # so we need the following input:
         # logical analysis which use case
@@ -100,11 +103,20 @@ def read(self,
         # else:
         #     print("No interpretable configuration file offered")
 
+        input_file_names = []
+        for file_path in file_paths:
+            if file_path != "":
+                input_file_names.append(file_path)
         print("Parse NeXus appdef-specific content...")
-        # nxs = NxEmAppDefContent()
-        # nxs.parse(template)
+        nxs = NxEmAppDef()
+        nxs.parse(template, entry_id, input_file_names)
 
         print("Parse and map pieces of information within files from tech partners...")
+        sub_parser = "nxs_mtex"
+        subparser = NxEmNxsMTexSubParser()
+        subparser.parse(template, entry_id)
+        # add further with resolving cases
+
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
@@ -133,6 +145,14 @@ def read(self,
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
+        nxs_plt = NxEmDefaultPlotResolver()
+        # if nxs_mtex is the sub-parser
+        resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
+            entry_id, file_paths[0])
+        print(f"DEFAULT PLOT IS {resolved_path}")
+        if resolved_path != "":
+            nxs_plt.annotate_default_plot(template, resolved_path)
+
         debugging = True
         if debugging is True:
             print("Reporting state of template before passing to HDF5 writing...")
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
new file mode 100644
index 000000000..4ebd685ca
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/subparsers/nxs_mtex.py
@@ -0,0 +1,54 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""(Sub-)parser mapping concepts and content from *.nxs.mtex files on NXem."""
+
+"""
+README.md
+*.nxs.mtex is a specific HDF5-based data processing report format for users of
+the MTex/Matlab texture toolbox to export results from MTex to other software.
+
+The format uses several concepts from the NXem appdef.
+Instances of *.nxs.mtex files thus contain several but not necessarily
+all pieces of information which the NXem application definition demands
+as required.
+
+Therefore, pynxtools can be used to append these missing pieces of information.
+
+Currently implemented I/O support for this format:
+The current implementation of *.nxs.mtex sub-parser in the em reader
+is implemented such that an existent *.nxs.mtex file is copied and
+this copy annotated with the missing pieces of information.
+
+The nxs_mtex sub-parser is the only sub-parser of the em parser
+with this copying-the-input-file design. For all other file formats
+the em parser uses the template to instantiate the complete file
+including all numerical data eventually generated by one or several
+of the sub-parsers.
+"""
+
+
+class NxEmNxsMTexSubParser():
+    """Map content from *.nxs.mtex files on an instance of NXem.
+
+    """
+    def __init__(self):
+        pass
+
+    def parse(self, template: dict, entry_id: int = 1) -> dict:
+        """Pass because for *.nxs.mtex all data are already in the copy of the output."""
+        return template
diff --git a/pynxtools/dataconverter/readers/em/utils/default_plots.py b/pynxtools/dataconverter/readers/em/utils/default_plots.py
new file mode 100644
index 000000000..4b1781c34
--- /dev/null
+++ b/pynxtools/dataconverter/readers/em/utils/default_plots.py
@@ -0,0 +1,56 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Logics and functionality to identify and annotate a default plot NXem."""
+
+import h5py
+import numpy as np
+
+
+class NxEmDefaultPlotResolver():
+    """Annotate the default plot in an instance of NXem.
+
+    """
+    def __init__(self):
+        pass
+
+    def annotate_default_plot(self, template: dict, plot_nxpath: str = "") -> dict:
+        """Write path to the default plot from root to plot_nxpath."""
+        if plot_nxpath != "":
+            print(plot_nxpath)
+            tmp = plot_nxpath.split("/")
+            print(tmp)
+            for idx in np.arange(0, len(tmp)):
+                if tmp[idx] != "":
+                    if idx != 0:
+                        template[f'{"/".join(tmp[0:idx])}/@default'] = tmp[idx]
+        return template
+
+    def nxs_mtex_get_nxpath_to_default_plot(self,
+                                            entry_id: int = 1,
+                                            nxs_mtex_file_name: str = "") -> str:
+        """Find a path to a default plot (i.e. NXdata instance) if any."""
+        h5r = h5py.File(nxs_mtex_file_name, "r")
+        if f"/entry{entry_id}/roi1/ebsd/indexing/roi" in h5r:
+            h5r.close()
+            return f"/entry{entry_id}/roi1/ebsd/indexing/roi"
+        h5r.close()
+        return ""
+
+    def parse(self, template: dict, entry_id: int = 1) -> dict:
+        """Pass because for *.nxs.mtex all data are already in the copy of the output."""
+        return template