feat: add --csv-txyz-idxs and rename --txyz to --csv-txyz

chanshing · chanshing · commit c710e6d81846 · 2025-12-10T11:03:00.000Z
**src/stepcount/stepcount.py**
- Rename --txyz to --csv-txyz for consistency with other CSV options
- Add --csv-txyz-idxs for specifying columns by index (e.g., '0,1,2,3')

**src/stepcount/utils.py**
- Add csv_txyz_idxs param to read()
- Parse and validate indices (must be 4 non-negative integers)
- Read CSV header to map indices to column names
- Raise ValueError for out-of-range or invalid indices
- Simplify warning message for non-CSV files
diff --git a/src/stepcount/stepcount.py b/src/stepcount/stepcount.py
@@ -42,10 +42,9 @@ def main():
                         type=str, default='cpu')
     parser.add_argument("--sample-rate", "-r", help="Sample rate for measurement, otherwise inferred.",
                         type=int, default=None)
-    parser.add_argument("--txyz",
-                        help=("Use this option to specify the column names for time, x, y, z "
-                              "in the input file, in that order. Use a comma-separated string. "
-                              "Default: 'time,x,y,z'"),
+    parser.add_argument("--csv-txyz",
+                        help=("Column names for time, x, y, z in CSV files. "
+                              "Comma-separated string. Default: 'time,x,y,z'"),
                         type=str, default="time,x,y,z")
     parser.add_argument("--exclude-wear-below", "-w",
                         help="Exclude days with wear time below threshold. Pass values as strings, e.g.: '12H', '30min'. "
@@ -88,6 +87,9 @@ def main():
     parser.add_argument("--csv-time-format",
                         help="Format string for parsing the time column (e.g., '%%Y-%%m-%%d %%H:%%M:%%S.%%f').",
                         type=str, default=None)
+    parser.add_argument("--csv-txyz-idxs",
+                        help="Column indices for time,x,y,z (0-indexed, e.g., '0,1,2,3'). Overrides --csv-txyz.",
+                        type=str, default=None)
     parser.add_argument('--quiet', '-q', action='store_true', help='Suppress output')
     args = parser.parse_args()
 
@@ -108,7 +110,7 @@ def main():
     # Load file
     data, info_read = utils.read(
         args.filepath,
-        usecols=args.txyz,
+        usecols=args.csv_txyz,
         start_time=args.start,
         end_time=args.end,
         calibration_stdtol_min=args.calibration_stdtol_min,
@@ -118,6 +120,7 @@ def main():
         csv_start_row=args.csv_start_row,
         csv_end_row=args.csv_end_row,
         csv_time_format=args.csv_time_format,
+        csv_txyz_idxs=args.csv_txyz_idxs,
         verbose=verbose
     )
     info.update(info_read)
diff --git a/src/stepcount/utils.py b/src/stepcount/utils.py
@@ -21,6 +21,7 @@ def read(
     csv_start_row: int = None,
     csv_end_row: int = None,
     csv_time_format: str = None,
+    csv_txyz_idxs: str = None,
     verbose: bool = True
 ):
     """
@@ -46,6 +47,8 @@ def read(
       Only applies to CSV files. Default is None (read to the end).
     - csv_time_format (str, optional): Format string for parsing the time column (e.g., '%Y-%m-%d %H:%M:%S.%f').
       Only applies to CSV files. Default is None (auto-detect).
+    - csv_txyz_idxs (str, optional): Column indices for time,x,y,z as comma-separated string (0-indexed, e.g., '0,1,2,3').
+      Overrides usecols for CSV files. Default is None (use usecols/csv_txyz).
     - verbose (bool, optional): If True, enables verbose output during processing. Default is True.
 
     Returns:
@@ -69,7 +72,23 @@ def read(
     if ftype in (".csv", ".pkl"):
 
         if ftype == ".csv":
-            tcol, xcol, ycol, zcol = usecols.split(',')
+            # Determine column names: either from indices or from usecols
+            if csv_txyz_idxs is not None:
+                # Parse and validate indices
+                try:
+                    tidx, xidx, yidx, zidx = map(int, csv_txyz_idxs.split(','))
+                except ValueError:
+                    raise ValueError(f"csv_txyz_idxs must be 4 comma-separated integers, got: '{csv_txyz_idxs}'")
+                if any(i < 0 for i in [tidx, xidx, yidx, zidx]):
+                    raise ValueError(f"csv_txyz_idxs must be non-negative integers, got: '{csv_txyz_idxs}'")
+                # Read header to get column names at those indices
+                header = pd.read_csv(filepath, nrows=0).columns.tolist()
+                max_idx = max(tidx, xidx, yidx, zidx)
+                if max_idx >= len(header):
+                    raise ValueError(f"Column index {max_idx} out of range. CSV has {len(header)} columns.")
+                tcol, xcol, ycol, zcol = header[tidx], header[xidx], header[yidx], header[zidx]
+            else:
+                tcol, xcol, ycol, zcol = usecols.split(',')
 
             # Validate csv_start_row and csv_end_row
             if csv_start_row is not None and csv_end_row is not None:
@@ -145,8 +164,8 @@ def read(
 
     elif ftype in (".cwa", ".gt3x", ".bin"):
 
-        if csv_start_row is not None or csv_end_row is not None or csv_time_format is not None:
-            warnings.warn("--csv-start-row, --csv-end-row, and --csv-time-format are only supported for CSV files. Ignoring.")
+        if csv_start_row is not None or csv_end_row is not None or csv_time_format is not None or csv_txyz_idxs is not None:
+            warnings.warn("--csv-* options are only supported for CSV files. Ignoring.")
 
         data, info = actipy.read_device(
             filepath,