Merge branch 'main' into NPI-3945-fix-gpsdate-to-datetime

treefern · treefern · commit 62caa42ca428 · 2025-05-16T03:55:09.000Z
diff --git a/gnssanalysis/gn_frame.py b/gnssanalysis/gn_frame.py
@@ -35,7 +35,7 @@ def get_frame_of_day(
 ):
     """Main function to propagate frame into datetime of interest"""
 
-    if isinstance(date_or_j2000, (int, _np.int64)):
+    if isinstance(date_or_j2000, (int, _np.int64)):  # TODO check: np.int64 is meant to be a class not a type
         date_J2000 = date_or_j2000
     else:
         date_J2000 = _gn_datetime.datetime2j2000(_np.datetime64(date_or_j2000))
diff --git a/gnssanalysis/gn_io/sinex.py b/gnssanalysis/gn_io/sinex.py
@@ -327,7 +327,7 @@ def _get_valid_stypes(stypes: Union[list[str], set[str]]) -> _List[str]:
     """Returns only stypes in allowed list
     Fastest if stypes size is small"""
     allowed_stypes = ["EST", "APR", "NEQ"]
-    stypes = set(stypes) if not isinstance(stypes, set) else stypes
+    stypes = set(stypes) if not isinstance(stypes, set) else stypes  # Convert to set if not one.
     ok_stypes = sorted(stypes.intersection(allowed_stypes), key=allowed_stypes.index)  # need EST to always be first
     if len(ok_stypes) != len(stypes):
         not_ok_stypes = stypes.difference(allowed_stypes)
@@ -544,12 +544,17 @@ def _get_snx_vector(
     if isinstance(path_or_bytes, str):
         path = path_or_bytes
         snx_bytes = _gn_io.common.path2bytes(path)
-    # TODO Removed this very broken code path, not sure what happened
-    # elif isinstance(path_or_bytes, list):
-    #     path, stypes, format, verbose = path_or_bytes
-    #     snx_bytes = _gn_io.common.path2bytes(path)
-    else:
+    # Very weird code path, should be removed if possible
+    elif isinstance(path_or_bytes, list):
+        _logging.error(
+            f"path_or_bytes was a list! Using legacy code path. Please update this! Input values: {path_or_bytes}"
+        )
+        path, stypes, format, verbose = path_or_bytes
+        snx_bytes = _gn_io.common.path2bytes(path)
+    elif isinstance(path_or_bytes, bytes):
         snx_bytes = path_or_bytes
+    else:
+        raise ValueError(f"Unexpected type for path_or_bytes: {type(path_or_bytes)}. Value: {path_or_bytes}")
 
     if snx_header == {}:
         snx_header = _get_snx_header(
@@ -560,7 +565,9 @@ def _get_snx_vector(
             "Indices are likely inconsistent between ESTIMATE and APRIORI in the EMR AC files hence files might be parsed incorrectly"
         )
 
+    _logging.info(f"Passing stypes through SType validator: {stypes}. Input path if available: {path}")
     stypes = _get_valid_stypes(stypes)  # EST is always first as APR may have skips
+    _logging.info(f"STypes after validator: {stypes}. Input path if available: {path}")
 
     extracted = _snx_extract(snx_bytes=snx_bytes, stypes=stypes, obj_type="VECTOR", verbose=verbose)
     if extracted is None:
@@ -764,7 +771,7 @@ def _get_snx_vector_gzchunks(filename: str, block_name="SOLUTION/ESTIMATE", size
                     stop = True
             i += 1
 
-    return _get_snx_vector(path_or_bytes=block_bytes, stypes=set("EST"), format=format)
+    return _get_snx_vector(path_or_bytes=block_bytes, stypes=set(["EST"]), format=format)
 
 
 def _get_snx_id(path):
diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
@@ -1133,35 +1133,30 @@ def parse_sp3_header(header: bytes, warn_on_negative_sv_acc_values: bool = True)
 
 def clean_sp3_orb(sp3_df: _pd.DataFrame, use_offline_sat_removal: bool) -> _pd.DataFrame:
     """
-    Clean SP3 orbit data in order to remove duplicates, leading and ending, and/or any satellites with nodata values
-    elsewhere in the DataFrame.
+    Clean SP3 orbit data: remove duplicates, remove leading or trailing rows of NA values, optionally remove satellites
+    with *any* missing position values.
 
     :param _pd.DataFrame sp3_df: The input SP3 DataFrame.
     :param bool use_offline_sat_removal: Flag indicating whether to remove satellites which are offline / have some
            nodata position values.
-    :return _pd.Series: A pandas Series containing the parsed information from the SP3 header.
+    :return _pd.DataFrame: A cleaned version of the SP3 DataFrame
     """
-    sp3_df = sp3_df.filter(items=[("EST", "X"), ("EST", "Y"), ("EST", "Z")])
+    # Trim DataFrame to position estimate columns
+    sp3_df_updated: _pd.DataFrame = sp3_df.filter(items=[("EST", "X"), ("EST", "Y"), ("EST", "Z")])
 
     # Drop any duplicates in the index
-    sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]
+    sp3_df_updated = sp3_df_updated[~sp3_df_updated.index.duplicated(keep="first")]
 
     # Trim the leading and ending epochs that are empty (i.e. all values are NaN) to avoid dropping all data
-    valid_rows = sp3_df.dropna(how="all")
+    valid_rows = sp3_df_updated.dropna(how="all")
     first_valid_epoch = valid_rows.index[0][0]
     last_valid_epoch = valid_rows.index[-1][0]
-    sp3_df = sp3_df.loc[first_valid_epoch:last_valid_epoch]
-    sp3_df_cleaned = sp3_df
+    sp3_df_updated = sp3_df_updated.loc[first_valid_epoch:last_valid_epoch]
 
-    # Drop any satellites (SVs) which are offline or partially offline.
-    # Note: this currently removes SVs with ANY nodata values for position, so a single glitch will remove
-    # the SV from the whole file.
-    # This step was added after velocity interpolation failures due to non-finite (NaN) values from offline SVs.
     if use_offline_sat_removal:
-        sp3_baseline = remove_offline_sats(sp3_baseline, df_friendly_name="baseline")
-        sp3_test = remove_offline_sats(sp3_test, df_friendly_name="test")
+        sp3_df_updated = remove_offline_sats(sp3_df_updated)
 
-    return sp3_df_cleaned
+    return sp3_df_updated
 
 
 def getVelSpline(sp3Df: _pd.DataFrame) -> _pd.DataFrame:
diff --git a/tests/test_datasets/sp3_test_data.py b/tests/test_datasets/sp3_test_data.py
@@ -58,7 +58,7 @@
 EOF
 """
 
-# Based on SP3c.txt example 2. SP3d is PDF formatted so alignment is hard to preserve.
+# Based on SP3c.txt example 2. SP3d is PDF formatted so alignment is hard to preserve. #TODO check this is actually right
 # https://files.igs.org/pub/data/format/sp3c.txt
 # Truncated and manually modified to reflect:
 #   Epochs: 1
diff --git a/tests/test_sp3.py b/tests/test_sp3.py
@@ -197,6 +197,186 @@ def test_read_sp3_correct_svs_read_when_ev_ep_present(self, mock_file):
     # TODO Add test(s) for correctly reading header fundamentals (ACC, ORB_TYPE, etc.)
     # TODO add tests for correctly reading the actual content of the SP3 in addition to the header.
 
+    @staticmethod
+    def get_example_dataframe(template_name: str = "normal", include_simple_header: bool = True) -> pd.DataFrame:
+
+        dataframe_templates = {
+            # "normal": {  # TODO fill in
+            #     "data_vals": [],
+            #     "index_vals": [],
+            # },
+            "dupe_epoch_offline_sat_empty_epoch": {
+                "data_vals": [
+                    # Epoch 1 ---------------------------------
+                    #     EST, X         EST, Y         EST, Z
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G01
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G02
+                    [0.000000, 0.000000, 0.000000],  # ---------------- < G03 (offline)
+                    # Epoch 2 ---------------------------------
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G01
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G02
+                    [0.000000, 0.000000, 0.000000],  # ---------------- < G03 (offline)
+                    # Epoch 3 --------------------------------- Effectively missing epoch, to test trimming.
+                    [np.nan, np.nan, np.nan],
+                    [np.nan, np.nan, np.nan],
+                    [np.nan, np.nan, np.nan],
+                ],
+                "index_vals": [[774619200, 774619200, 774619201], ["G01", "G02", "G03"]],
+            },
+            "offline_sat_nan": {
+                "data_vals": [
+                    # Epoch 1 ---------------------------------
+                    #     EST, X         EST, Y         EST, Z
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G01
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G02
+                    [np.nan, np.nan, np.nan],  # ---------------- < G03 (offline)
+                    # Epoch 2 ---------------------------------
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G01
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G02
+                    [np.nan, np.nan, np.nan],  # ---------------- < G03 (offline)
+                    # Epoch 3 ---------------------------------
+                    [4510.358405, -23377.282442, -11792.723580],
+                    [4510.358405, -23377.282442, -11792.723580],
+                    [np.nan, np.nan, np.nan],
+                ],
+                "index_vals": [[774619200, 774619200, 774619201], ["G01", "G02", "G03"]],
+            },
+            "offline_sat_zero": {
+                "data_vals": [
+                    # Epoch 1 ---------------------------------
+                    #     EST, X         EST, Y         EST, Z
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G01
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G02
+                    [0.000000, 0.000000, 0.000000],  # ---------------- < G03 (offline)
+                    # Epoch 2 ---------------------------------
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G01
+                    [4510.358405, -23377.282442, -11792.723580],  # --- < G02
+                    [0.000000, 0.000000, 0.000000],  # ---------------- < G03 (offline)
+                    # Epoch 3 ---------------------------------
+                    [4510.358405, -23377.282442, -11792.723580],
+                    [4510.358405, -23377.282442, -11792.723580],
+                    [0.000000, 0.000000, 0.000000],
+                ],
+                "index_vals": [[774619200, 774619200, 774619201], ["G01", "G02", "G03"]],
+            },
+        }
+
+        if template_name not in dataframe_templates:
+            raise ValueError(f"Unsupported template name: {template_name}")
+
+        # Worked example for defining MultiIndex
+        # # Build a MultiIndex of J2000 then PRN values
+        # # ----------------------------- Epochs: ---------- | PRNs within each of those Epochs:
+        # # ------------------ Epoch 1 -- Epoch 2 -- Epoch 3 - PRN 1  PRN 2  PRN 3
+        # index_elements = [[774619200, 774619200, 774619201], ["G01", "G02", "G03"]]
+
+        # Define columns: top level 'EST' and nested under that, 'X', 'Y', 'Z'
+        frame_columns = [["EST", "EST", "EST"], ["X", "Y", "Z"]]
+
+        # Load template
+        template = dataframe_templates[template_name]
+        frame_data = template["data_vals"]
+        index_elements = template["index_vals"]
+
+        index_names = ["J2000", "PRN"]
+        multi_index = pd.MultiIndex.from_product(index_elements, names=index_names)
+
+        # Compose it all into a DataFrame
+        df = pd.DataFrame(frame_data, index=multi_index, columns=frame_columns)
+
+        if include_simple_header:
+            # Build SV table
+            head_svs = ["G01", "G02", "G03"]  # SV header entries
+            head_svs_std = [0, 0, 0]  # Accuracy codes for those SVs
+            sv_tbl = pd.Series(head_svs_std, index=head_svs)
+
+            # Build header
+            header_array = np.asarray(
+                [
+                    "d",
+                    "P",
+                    "Time TODO",
+                    "3",  # Num epochs
+                    "Data TODO",
+                    "coords TODO",
+                    "orb type TODO",
+                    "GAA",
+                    "SP3",  # Probably
+                    "Time sys TODO",
+                    "3",  # Stated SVs
+                ]
+            ).astype(str)
+            sp3_heading = pd.Series(
+                data=header_array,
+                index=[
+                    "VERSION",
+                    "PV_FLAG",
+                    "DATETIME",
+                    "N_EPOCHS",
+                    "DATA_USED",
+                    "COORD_SYS",
+                    "ORB_TYPE",
+                    "AC",
+                    "FILE_TYPE",
+                    "TIME_SYS",
+                    "SV_COUNT_STATED",
+                ],
+            )
+
+            # Merge SV table and header, and store as 'HEADER'
+            df.attrs["HEADER"] = pd.concat([sp3_heading, sv_tbl], keys=["HEAD", "SV_INFO"], axis=0)
+        return df
+
+    def test_clean_sp3_orb(self):
+        """
+        Tests cleaning an SP3 DataFrame of duplicates, leading or trailing nodata values, and offline sats
+        """
+
+        # Create dataframe manually, as read function does deduplication itself. This also makes the test more self-contained
+        sp3_df = TestSP3.get_example_dataframe("dupe_epoch_offline_sat_empty_epoch")
+
+        self.assertTrue(
+            # Alterantively you can use all(array == array) to do an elementwise equality check
+            np.array_equal(sp3_df.index.get_level_values(0).unique(), [774619200, 774619201]),
+            "Sample data should have 2 unique epochs (one of which is empty)",
+        )
+        self.assertTrue(
+            np.array_equal(sp3_df.index.get_level_values(1).unique(), ["G01", "G02", "G03"]),
+            "Sample data should have 3 sats",
+        )
+
+        # There should be duplicates of each sat in the first epoch
+        # Note: syntax of loc here uses a tuple describing levels within the row MultiIndex, then column MultiIndex,
+        # i.e. (row, row), (column, column).
+        self.assertTrue(
+            np.array_equal(sp3_df.loc[(774619200, "G01"), ("EST", "X")].values, [4510.358405, 4510.358405]),
+            "Expect dupe in first epoch",
+        )
+
+        # Test cleaning function without offline sat removal
+        sp3_df_no_offline_removal = sp3.clean_sp3_orb(sp3_df, False)
+
+        self.assertTrue(
+            np.array_equal(sp3_df_no_offline_removal.index.get_level_values(0).unique(), [774619200]),
+            "After cleaning there should be a single unique epoch",
+        )
+
+        # This checks both (indirectly) that there is only one epoch (as the multi-index will repeat second level
+        # values, and the input doesn't change sats in successive epochs), and that those second level values
+        # aren't duplicated.
+        self.assertTrue(
+            np.array_equal(sp3_df_no_offline_removal.index.get_level_values(1), ["G01", "G02", "G03"]),
+            "After cleaning there should be no dupe PRNs. As offline sat removal is off, offline sat should remain",
+        )
+
+        # Now check with offline sat removal enabled too
+        sp3_df_with_offline_removal = sp3.clean_sp3_orb(sp3_df, True)
+        # Check that we still seem to have one epoch with no dupe sats, and now with the offline sat removed
+        self.assertTrue(
+            np.array_equal(sp3_df_with_offline_removal.index.get_level_values(1), ["G01", "G02"]),
+            "After cleaning there should be no dupe PRNs (and with offline removal, offline sat should be gone)",
+        )
+
     def test_gen_sp3_fundamentals(self):
         """
         Tests that the SP3 header and content generation functions produce output that (apart from trailing
@@ -737,6 +917,39 @@ def test_velinterpolation(self, mock_file):
         self.assertIsNotNone(r)
         self.assertIsNotNone(r2)
 
+    def test_sp3_offline_sat_removal_standalone(self):
+        """
+        Standalone test for remove_offline_sats() using manually constructed DataFrame to
+        avoid dependency on read_sp3()
+        """
+        sp3_df_nans = TestSP3.get_example_dataframe("offline_sat_nan")
+        sp3_df_zeros = TestSP3.get_example_dataframe("offline_sat_zero")
+
+        self.assertEqual(
+            sp3_df_zeros.index.get_level_values(1).unique().array.tolist(),
+            ["G01", "G02", "G03"],
+            "Should start with 3 SVs",
+        )
+        self.assertEqual(
+            sp3_df_nans.index.get_level_values(1).unique().array.tolist(),
+            ["G01", "G02", "G03"],
+            "Should start with 3 SVs",
+        )
+
+        sp3_df_zeros_removed = sp3.remove_offline_sats(sp3_df_zeros)
+        sp3_df_nans_removed = sp3.remove_offline_sats(sp3_df_nans)
+
+        self.assertEqual(
+            sp3_df_zeros_removed.index.get_level_values(1).unique().array.tolist(),
+            ["G01", "G02"],
+            "Should be two SVs after removing offline ones",
+        )
+        self.assertEqual(
+            sp3_df_nans_removed.index.get_level_values(1).unique().array.tolist(),
+            ["G01", "G02"],
+            "Should be two SVs after removing offline ones",
+        )
+
     @patch("builtins.open", new_callable=mock_open, read_data=offline_sat_test_data)
     def test_sp3_offline_sat_removal(self, mock_file):
         sp3_df = sp3.read_sp3("mock_path", pOnly=False)