From 55f21bbd29801e6d8e905509417b5d9497f033da Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 14 Jun 2024 17:28:18 -0400
Subject: [PATCH 1/5] Work on part.

---
 CHANGELOG.md                    |  2 +-
 heudiconv/heuristics/reproin.py | 47 +++++++++++++++++++++++++++++----
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index db236137..87d26126 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -556,7 +556,7 @@ target output directory during conversion.
   name later on), which avoids hitting file size limits of /tmp ([#481][]) and
   helped to avoid a regression in dcm2nixx 1.0.20201102
 - [#477][] replaced `rec-<magnitude|phase>` with `part-<mag|phase>` now
-  hat BIDSsupports the part entity
+  that BIDS supports the part entity
 - [#473][] made default for CogAtlasID to be a TODO URL
 - [#459][] made AcquisitionTime used for acq_time scans file field
 - [#451][] retained sub-second resolution in scans files
diff --git a/heudiconv/heuristics/reproin.py b/heudiconv/heuristics/reproin.py
index 0d919aa5..aa38c7ca 100644
--- a/heudiconv/heuristics/reproin.py
+++ b/heudiconv/heuristics/reproin.py
@@ -61,6 +61,7 @@
             (e.g. _task-memory_run-01, _task-oddball_run-02)
      fmap - field maps
      dwi  - diffusion weighted imaging (also can as well have runs)
+     perf - perfusion imaging
 
    The other BIDS modalities are not known ATM and their data will not be
    converted and will be just skipped (with a warning). Full list of datatypes
@@ -217,7 +218,7 @@
 }
 
 
-KNOWN_DATATYPES = {"anat", "func", "dwi", "behav", "fmap"}
+KNOWN_DATATYPES = {"anat", "func", "dwi", "behav", "fmap", "perf"}
 
 
 def _delete_chars(from_str: str, deletechars: str) -> str:
@@ -402,7 +403,7 @@ def infotodict(
     run_label: Optional[str] = None  # run-
     dcm_image_iod_spec: Optional[str] = None
     skip_derived = False
-    for s in seqinfo:
+    for i_acq, s in enumerate(seqinfo):
         # XXX: skip derived sequences, we don't store them to avoid polluting
         # the directory, unless it is the motion corrected ones
         # (will get _rec-moco suffix)
@@ -411,6 +412,38 @@ def infotodict(
             lgr.debug("Ignoring derived data %s", s.series_id)
             continue
 
+        if i_acq == 0:
+            prev_dcm_image_iod_spec = None
+            prev_image_type_datatype = None
+        else:
+            prev_dcm_image_iod_spec = seqinfo[i_acq - 1].image_type[2]
+            prev_image_type_datatype = {
+                # Note: P and M are too generic to make a decision here, could be
+                #  for different datatypes (bold, fmap, etc)
+                "FMRI": "func",
+                "MPR": "anat",
+                "DIFFUSION": "dwi",
+                "MIP_SAG": "anat",  # angiography
+                "MIP_COR": "anat",  # angiography
+                "MIP_TRA": "anat",  # angiography
+            }.get(prev_dcm_image_iod_spec, None)
+
+        if i_acq == (len(seqinfo) - 1):
+            next_dcm_image_iod_spec = None
+            next_image_type_datatype = None
+        else:
+            next_dcm_image_iod_spec = seqinfo[i_acq + 1].image_type[2]
+            next_image_type_datatype = {
+                # Note: P and M are too generic to make a decision here, could be
+                #  for different datatypes (bold, fmap, etc)
+                "FMRI": "func",
+                "MPR": "anat",
+                "DIFFUSION": "dwi",
+                "MIP_SAG": "anat",  # angiography
+                "MIP_COR": "anat",  # angiography
+                "MIP_TRA": "anat",  # angiography
+            }.get(next_dcm_image_iod_spec, None)
+
         # possibly apply present formatting in the series_description or protocol name
         for f in "series_description", "protocol_name":
             s = s._replace(**{f: getattr(s, f).format(**s._asdict())})
@@ -499,9 +532,11 @@ def infotodict(
                 if "_pace_" in series_spec:
                     datatype_suffix = "pace"  # or should it be part of seq-
                 elif "P" in s.image_type:
-                    datatype_suffix = "phase"
+                    datatype_suffix = "bold"
+                    series_info["part"] = "phase"
                 elif "M" in s.image_type:
                     datatype_suffix = "bold"
+                    series_info["part"] = "mag"
                 else:
                     # assume bold by default
                     datatype_suffix = "bold"
@@ -615,8 +650,10 @@ def from_series_info(name: str) -> Optional[str]:
             from_series_info("dir"),
             series_info.get("bids"),
             run_label,
+            from_series_info("part"),
             datatype_suffix,
         ]
+
         # filter those which are None, and join with _
         suffix = "_".join(filter(bool, filename_suffix_parts))  # type: ignore[arg-type]
 
@@ -641,8 +678,8 @@ def from_series_info(name: str) -> Optional[str]:
             "_Scout" in s.series_description
             or (
                 datatype == "anat"
-                and datatype_suffix
-                and datatype_suffix.startswith("scout")
+                and filename_suffix_parts[-1]
+                and filename_suffix_parts[-1].startswith("scout")
             )
             or (s.series_description.lower() == s.protocol_name.lower() + "_setter")
         ):

From b00e41fa6709b4524b1d4144f1d9e92f46f9da7b Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 21 Jun 2024 12:03:08 -0400
Subject: [PATCH 2/5] Get it working.

---
 heudiconv/heuristics/reproin.py | 120 ++++++++++++++++++--------------
 1 file changed, 68 insertions(+), 52 deletions(-)

diff --git a/heudiconv/heuristics/reproin.py b/heudiconv/heuristics/reproin.py
index aa38c7ca..c234cac0 100644
--- a/heudiconv/heuristics/reproin.py
+++ b/heudiconv/heuristics/reproin.py
@@ -403,64 +403,61 @@ def infotodict(
     run_label: Optional[str] = None  # run-
     dcm_image_iod_spec: Optional[str] = None
     skip_derived = False
-    for i_acq, s in enumerate(seqinfo):
+    for i_acq, curr_seqinfo in enumerate(seqinfo):
         # XXX: skip derived sequences, we don't store them to avoid polluting
         # the directory, unless it is the motion corrected ones
         # (will get _rec-moco suffix)
-        if skip_derived and s.is_derived and not s.is_motion_corrected:
-            skipped.append(s.series_id)
-            lgr.debug("Ignoring derived data %s", s.series_id)
+        if (
+            skip_derived
+            and curr_seqinfo.is_derived
+            and not curr_seqinfo.is_motion_corrected
+        ):
+            skipped.append(curr_seqinfo.series_id)
+            lgr.debug("Ignoring derived data %s", curr_seqinfo.series_id)
             continue
 
         if i_acq == 0:
+            prev_seqinfo = None
             prev_dcm_image_iod_spec = None
-            prev_image_type_datatype = None
         else:
-            prev_dcm_image_iod_spec = seqinfo[i_acq - 1].image_type[2]
-            prev_image_type_datatype = {
-                # Note: P and M are too generic to make a decision here, could be
-                #  for different datatypes (bold, fmap, etc)
-                "FMRI": "func",
-                "MPR": "anat",
-                "DIFFUSION": "dwi",
-                "MIP_SAG": "anat",  # angiography
-                "MIP_COR": "anat",  # angiography
-                "MIP_TRA": "anat",  # angiography
-            }.get(prev_dcm_image_iod_spec, None)
+            prev_seqinfo = seqinfo[i_acq - 1]
+            for f in "series_description", "protocol_name":
+                prev_seqinfo = prev_seqinfo._replace(
+                    **{f: getattr(prev_seqinfo, f).format(**prev_seqinfo._asdict())}
+                )
+
+            prev_dcm_image_iod_spec = prev_seqinfo.image_type[2]
 
         if i_acq == (len(seqinfo) - 1):
+            next_seqinfo = None
             next_dcm_image_iod_spec = None
-            next_image_type_datatype = None
         else:
-            next_dcm_image_iod_spec = seqinfo[i_acq + 1].image_type[2]
-            next_image_type_datatype = {
-                # Note: P and M are too generic to make a decision here, could be
-                #  for different datatypes (bold, fmap, etc)
-                "FMRI": "func",
-                "MPR": "anat",
-                "DIFFUSION": "dwi",
-                "MIP_SAG": "anat",  # angiography
-                "MIP_COR": "anat",  # angiography
-                "MIP_TRA": "anat",  # angiography
-            }.get(next_dcm_image_iod_spec, None)
+            next_seqinfo = seqinfo[i_acq + 1]
+            for f in "series_description", "protocol_name":
+                next_seqinfo = next_seqinfo._replace(
+                    **{f: getattr(next_seqinfo, f).format(**next_seqinfo._asdict())}
+                )
+
+            next_dcm_image_iod_spec = next_seqinfo.image_type[2]
 
         # possibly apply present formatting in the series_description or protocol name
         for f in "series_description", "protocol_name":
-            s = s._replace(**{f: getattr(s, f).format(**s._asdict())})
+            curr_seqinfo = curr_seqinfo._replace(
+                **{f: getattr(curr_seqinfo, f).format(**curr_seqinfo._asdict())}
+            )
 
         template = None
         suffix = ""
         # seq = []
 
-        # figure out type of image from s.image_info -- just for checking ATM
+        # figure out type of image from curr_seqinfo.image_info -- just for checking ATM
         # since we primarily rely on encoded in the protocol name information
-        prev_dcm_image_iod_spec = dcm_image_iod_spec
-        if len(s.image_type) > 2:
+        if len(curr_seqinfo.image_type) > 2:
             # https://dicom.innolitics.com/ciods/cr-image/general-image/00080008
             # 0 - ORIGINAL/DERIVED
             # 1 - PRIMARY/SECONDARY
             # 3 - Image IOD specific specialization (optional)
-            dcm_image_iod_spec = s.image_type[2]
+            dcm_image_iod_spec = curr_seqinfo.image_type[2]
             image_type_datatype = {
                 # Note: P and M are too generic to make a decision here, could be
                 #  for different datatypes (bold, fmap, etc)
@@ -476,7 +473,7 @@ def infotodict(
 
         series_info = {}  # For please lintian and its friends
         for sfield in series_spec_fields:
-            svalue = getattr(s, sfield)
+            svalue = getattr(curr_seqinfo, sfield)
             series_info = parse_series_spec(svalue)
             if series_info:  # looks like a valid spec - we are done
                 series_spec = svalue
@@ -487,10 +484,10 @@ def infotodict(
         if not series_info:
             series_spec = None  # we cannot know better
             lgr.warning(
-                "Could not determine the series name by looking at " "%s fields",
+                "Could not determine the series name by looking at %s fields",
                 ", ".join(series_spec_fields),
             )
-            skipped_unknown.append(s.series_id)
+            skipped_unknown.append(curr_seqinfo.series_id)
             continue
 
         if dcm_image_iod_spec and dcm_image_iod_spec.startswith("MIP"):
@@ -509,14 +506,14 @@ def infotodict(
                 series_spec,
             )
 
-        # if s.is_derived:
+        # if curr_seqinfo.is_derived:
         #     # Let's for now stash those close to original images
         #     # TODO: we might want a separate tree for all of this!?
         #     # so more of a parameter to the create_key
         #     #datatype += '/derivative'
         #     # just keep it lower case and without special characters
         #     # XXXX what for???
-        #     #seq.append(s.series_description.lower())
+        #     #seq.append(curr_seqinfo.series_description.lower())
         #     prefix = os.path.join('derivatives', 'scanner')
         # else:
         #     prefix = ''
@@ -526,17 +523,30 @@ def infotodict(
         # Figure out the datatype_suffix (BIDS _suffix)
         #
         # If none was provided -- let's deduce it from the information we find:
-        # analyze s.protocol_name (series_id is based on it) for full name mapping etc
+        # analyze curr_seqinfo.protocol_name (series_id is based on it) for full name mapping etc
         if not datatype_suffix:
             if datatype == "func":
                 if "_pace_" in series_spec:
                     datatype_suffix = "pace"  # or should it be part of seq-
-                elif "P" in s.image_type:
+                elif (
+                    "P" in curr_seqinfo.image_type
+                    and not curr_seqinfo.series_description.endswith("_SBRef")
+                ):
                     datatype_suffix = "bold"
                     series_info["part"] = "phase"
-                elif "M" in s.image_type:
+                elif "M" in curr_seqinfo.image_type:
                     datatype_suffix = "bold"
-                    series_info["part"] = "mag"
+
+                    # if next one is phase fMRI, we should set part to mag
+                    if (
+                        (
+                            next_seqinfo.series_description
+                            == curr_seqinfo.series_description
+                        )
+                        and (next_dcm_image_iod_spec == "P")
+                        and not curr_seqinfo.series_description.endswith("_SBRef")
+                    ):
+                        series_info["part"] = "mag"
                 else:
                     # assume bold by default
                     datatype_suffix = "bold"
@@ -561,7 +571,7 @@ def infotodict(
         # since they are complementary files produced along-side with original
         # ones.
         #
-        if s.series_description.endswith("_SBRef"):
+        if curr_seqinfo.series_description.endswith("_SBRef"):
             datatype_suffix = "sbref"
 
         if not datatype_suffix:
@@ -585,7 +595,10 @@ def infotodict(
                         # XXX if we have a known earlier study, we need to always
                         # increase the run counter for phasediff because magnitudes
                         # were not acquired
-                        if get_study_hash([s]) == "9d148e2a05f782273f6343507733309d":
+                        if (
+                            get_study_hash([curr_seqinfo])
+                            == "9d148e2a05f782273f6343507733309d"
+                        ):
                             current_run += 1
                         else:
                             raise RuntimeError(
@@ -618,10 +631,10 @@ def infotodict(
             run_label = None
 
         # yoh: had a wrong assumption
-        # if s.is_motion_corrected:
-        #     assert s.is_derived, "Motion corrected images must be 'derived'"
+        # if curr_seqinfo.is_motion_corrected:
+        #     assert curr_seqinfo.is_derived, "Motion corrected images must be 'derived'"
 
-        if s.is_motion_corrected and "rec-" in series_info.get("bids", ""):
+        if curr_seqinfo.is_motion_corrected and "rec-" in series_info.get("bids", ""):
             raise NotImplementedError(
                 "want to add _rec-moco but there is _rec- already"
             )
@@ -646,7 +659,7 @@ def from_series_info(name: str) -> Optional[str]:
             from_series_info("acq"),
             # But we want to add an indicator in case it was motion corrected
             # in the magnet. ref sample  /2017/01/03/qa
-            None if not s.is_motion_corrected else "rec-moco",
+            None if not curr_seqinfo.is_motion_corrected else "rec-moco",
             from_series_info("dir"),
             series_info.get("bids"),
             run_label,
@@ -658,7 +671,7 @@ def from_series_info(name: str) -> Optional[str]:
         suffix = "_".join(filter(bool, filename_suffix_parts))  # type: ignore[arg-type]
 
         # # .series_description in case of
-        # sdesc = s.study_description
+        # sdesc = curr_seqinfo.study_description
         # # temporary aliases for those phantoms which we already collected
         # # so we rename them into this
         # #MAPPING
@@ -675,13 +688,16 @@ def from_series_info(name: str) -> Optional[str]:
         # https://github.com/nipy/heudiconv/issues/145
         outtype: tuple[str, ...]
         if (
-            "_Scout" in s.series_description
+            "_Scout" in curr_seqinfo.series_description
             or (
                 datatype == "anat"
                 and filename_suffix_parts[-1]
                 and filename_suffix_parts[-1].startswith("scout")
             )
-            or (s.series_description.lower() == s.protocol_name.lower() + "_setter")
+            or (
+                curr_seqinfo.series_description.lower()
+                == curr_seqinfo.protocol_name.lower() + "_setter"
+            )
         ):
             outtype = ("dicom",)
         else:
@@ -691,7 +707,7 @@ def from_series_info(name: str) -> Optional[str]:
         # we wanted ordered dict for consistent demarcation of dups
         if template not in info:
             info[template] = []
-        info[template].append(s.series_id)
+        info[template].append(curr_seqinfo.series_id)
 
     if skipped:
         lgr.info("Skipped %d sequences: %s" % (len(skipped), skipped))

From 168e078f703691c0814986482a9bd1012dc93032 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 21 Jun 2024 12:05:51 -0400
Subject: [PATCH 3/5] Remove perfusion datatype.

---
 heudiconv/heuristics/reproin.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/heudiconv/heuristics/reproin.py b/heudiconv/heuristics/reproin.py
index c234cac0..280f6f17 100644
--- a/heudiconv/heuristics/reproin.py
+++ b/heudiconv/heuristics/reproin.py
@@ -61,7 +61,6 @@
             (e.g. _task-memory_run-01, _task-oddball_run-02)
      fmap - field maps
      dwi  - diffusion weighted imaging (also can as well have runs)
-     perf - perfusion imaging
 
    The other BIDS modalities are not known ATM and their data will not be
    converted and will be just skipped (with a warning). Full list of datatypes
@@ -218,7 +217,7 @@
 }
 
 
-KNOWN_DATATYPES = {"anat", "func", "dwi", "behav", "fmap", "perf"}
+KNOWN_DATATYPES = {"anat", "func", "dwi", "behav", "fmap"}
 
 
 def _delete_chars(from_str: str, deletechars: str) -> str:

From 2c4835e3d3c4a96efeee45a7b3b396b18393a64f Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Fri, 21 Jun 2024 12:20:38 -0400
Subject: [PATCH 4/5] Generalize change to other suffixes.

---
 heudiconv/heuristics/reproin.py | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/heudiconv/heuristics/reproin.py b/heudiconv/heuristics/reproin.py
index 280f6f17..d03c1a6e 100644
--- a/heudiconv/heuristics/reproin.py
+++ b/heudiconv/heuristics/reproin.py
@@ -527,25 +527,6 @@ def infotodict(
             if datatype == "func":
                 if "_pace_" in series_spec:
                     datatype_suffix = "pace"  # or should it be part of seq-
-                elif (
-                    "P" in curr_seqinfo.image_type
-                    and not curr_seqinfo.series_description.endswith("_SBRef")
-                ):
-                    datatype_suffix = "bold"
-                    series_info["part"] = "phase"
-                elif "M" in curr_seqinfo.image_type:
-                    datatype_suffix = "bold"
-
-                    # if next one is phase fMRI, we should set part to mag
-                    if (
-                        (
-                            next_seqinfo.series_description
-                            == curr_seqinfo.series_description
-                        )
-                        and (next_dcm_image_iod_spec == "P")
-                        and not curr_seqinfo.series_description.endswith("_SBRef")
-                    ):
-                        series_info["part"] = "mag"
                 else:
                     # assume bold by default
                     datatype_suffix = "bold"
@@ -565,6 +546,20 @@ def infotodict(
                 # label for dwi as well
                 datatype_suffix = "dwi"
 
+            # Add "part" entity as needed
+            if datatype != "fmap" and not curr_seqinfo.series_description.endswith(
+                "_SBRef"
+            ):
+                if "P" in curr_seqinfo.image_type:
+                    series_info["part"] = "phase"
+                elif "M" in curr_seqinfo.image_type:
+                    # if next one is phase from same scan, we should set part to mag
+                    if (
+                        next_seqinfo.series_description
+                        == curr_seqinfo.series_description
+                    ) and (next_dcm_image_iod_spec == "P"):
+                        series_info["part"] = "mag"
+
         #
         # Even if datatype_suffix was provided, for some data we might need to override,
         # since they are complementary files produced along-side with original

From 0ca30211dccfbaf28ba4c26c7c1b0e7d3503ce2b Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 24 Jun 2024 10:27:56 -0400
Subject: [PATCH 5/5] Fix.

---
 heudiconv/heuristics/reproin.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/heudiconv/heuristics/reproin.py b/heudiconv/heuristics/reproin.py
index d03c1a6e..03085b33 100644
--- a/heudiconv/heuristics/reproin.py
+++ b/heudiconv/heuristics/reproin.py
@@ -416,7 +416,6 @@ def infotodict(
             continue
 
         if i_acq == 0:
-            prev_seqinfo = None
             prev_dcm_image_iod_spec = None
         else:
             prev_seqinfo = seqinfo[i_acq - 1]
@@ -428,7 +427,7 @@ def infotodict(
             prev_dcm_image_iod_spec = prev_seqinfo.image_type[2]
 
         if i_acq == (len(seqinfo) - 1):
-            next_seqinfo = None
+            next_series_description = None
             next_dcm_image_iod_spec = None
         else:
             next_seqinfo = seqinfo[i_acq + 1]
@@ -437,6 +436,7 @@ def infotodict(
                     **{f: getattr(next_seqinfo, f).format(**next_seqinfo._asdict())}
                 )
 
+            next_series_description = next_seqinfo.series_description
             next_dcm_image_iod_spec = next_seqinfo.image_type[2]
 
         # possibly apply present formatting in the series_description or protocol name
@@ -555,8 +555,7 @@ def infotodict(
                 elif "M" in curr_seqinfo.image_type:
                     # if next one is phase from same scan, we should set part to mag
                     if (
-                        next_seqinfo.series_description
-                        == curr_seqinfo.series_description
+                        next_series_description == curr_seqinfo.series_description
                     ) and (next_dcm_image_iod_spec == "P"):
                         series_info["part"] = "mag"