@@ -67,6 +67,7 @@ def __init__( # noqa: PLR0913
67
67
self : CytoDataFrame_type ,
68
68
data : Union [CytoDataFrame_type , pd .DataFrame , str , pathlib .Path ],
69
69
data_context_dir : Optional [str ] = None ,
70
+ data_image_paths : Optional [pd .DataFrame ] = None ,
70
71
data_bounding_box : Optional [pd .DataFrame ] = None ,
71
72
data_mask_context_dir : Optional [str ] = None ,
72
73
data_outline_context_dir : Optional [str ] = None ,
@@ -82,6 +83,8 @@ def __init__( # noqa: PLR0913
82
83
The data source, either a pandas DataFrame or a file path.
83
84
data_context_dir (Optional[str]):
84
85
Directory context for the image data within the DataFrame.
86
+ data_image_paths (Optional[pd.DataFrame]):
87
+ Image path data for the image files.
85
88
data_bounding_box (Optional[pd.DataFrame]):
86
89
Bounding box data for the DataFrame images.
87
90
data_mask_context_dir: Optional[str]:
@@ -108,6 +111,7 @@ def __init__( # noqa: PLR0913
108
111
"data_context_dir" : (
109
112
data_context_dir if data_context_dir is not None else None
110
113
),
114
+ "data_image_paths" : None ,
111
115
"data_bounding_box" : None ,
112
116
"data_mask_context_dir" : (
113
117
data_mask_context_dir if data_mask_context_dir is not None else None
@@ -168,11 +172,17 @@ def __init__( # noqa: PLR0913
168
172
else :
169
173
super ().__init__ (data )
170
174
171
- if data_bounding_box is None :
172
- self ._custom_attrs ["data_bounding_box" ] = self .get_bounding_box_from_data ()
175
+ self ._custom_attrs ["data_bounding_box" ] = (
176
+ self .get_bounding_box_from_data ()
177
+ if data_bounding_box is None
178
+ else data_bounding_box
179
+ )
173
180
174
- else :
175
- self ._custom_attrs ["data_bounding_box" ] = data_bounding_box
181
+ self ._custom_attrs ["data_image_paths" ] = (
182
+ self .get_image_paths_from_data (image_cols = self .find_image_columns ())
183
+ if data_image_paths is None
184
+ else data_image_paths
185
+ )
176
186
177
187
def __getitem__ (self : CytoDataFrame_type , key : Union [int , str ]) -> Any : # noqa: ANN401
178
188
"""
@@ -196,6 +206,7 @@ def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # noqa:
196
206
return CytoDataFrame (
197
207
super ().__getitem__ (key ),
198
208
data_context_dir = self ._custom_attrs ["data_context_dir" ],
209
+ data_image_paths = self ._custom_attrs ["data_image_paths" ],
199
210
data_bounding_box = self ._custom_attrs ["data_bounding_box" ],
200
211
data_mask_context_dir = self ._custom_attrs ["data_mask_context_dir" ],
201
212
data_outline_context_dir = self ._custom_attrs ["data_outline_context_dir" ],
@@ -233,6 +244,7 @@ def _wrap_method(
233
244
result = CytoDataFrame (
234
245
result ,
235
246
data_context_dir = self ._custom_attrs ["data_context_dir" ],
247
+ data_image_paths = self ._custom_attrs ["data_image_paths" ],
236
248
data_bounding_box = self ._custom_attrs ["data_bounding_box" ],
237
249
data_mask_context_dir = self ._custom_attrs ["data_mask_context_dir" ],
238
250
data_outline_context_dir = self ._custom_attrs ["data_outline_context_dir" ],
@@ -381,8 +393,25 @@ def is_notebook_or_lab() -> bool:
381
393
except NameError :
382
394
return False
383
395
384
- def find_image_columns (self : CytoDataFrame_type ) -> bool :
396
+ def find_image_columns (self : CytoDataFrame_type ) -> List [str ]:
397
+ """
398
+ Find columns containing image file names.
399
+
400
+ This method searches for columns in the DataFrame
401
+ that contain image file names with extensions .tif
402
+ or .tiff (case insensitive).
403
+
404
+ Returns:
405
+ List[str]:
406
+ A list of column names that contain
407
+ image file names.
408
+
409
+ """
410
+ # build a pattern to match image file names
385
411
pattern = r".*\.(tif|tiff)$"
412
+
413
+ # search for columns containing image file names
414
+ # based on pattern above.
386
415
return [
387
416
column
388
417
for column in self .columns
@@ -394,6 +423,64 @@ def find_image_columns(self: CytoDataFrame_type) -> bool:
394
423
.any ()
395
424
]
396
425
426
+ def get_image_paths_from_data (
427
+ self : CytoDataFrame_type , image_cols : List [str ]
428
+ ) -> Dict [str , str ]:
429
+ """
430
+ Gather data containing image path names
431
+ (the directory storing the images but not the file
432
+ names). We do this by seeking the pattern:
433
+ Image_FileName_X --> Image_PathName_X.
434
+
435
+ Args:
436
+ image_cols: List[str]:
437
+ A list of column names that contain
438
+ image file names.
439
+
440
+ Returns:
441
+ Dict[str, str]:
442
+ A list of column names that contain
443
+ image file names.
444
+
445
+ """
446
+
447
+ image_path_columns = [
448
+ col .replace ("FileName" , "PathName" )
449
+ for col in image_cols
450
+ if col .replace ("FileName" , "PathName" ) in self .columns
451
+ ]
452
+
453
+ return self .filter (items = image_path_columns ) if image_path_columns else None
454
+
455
+ def find_image_path_columns (
456
+ self : CytoDataFrame_type , image_cols : List [str ], all_cols : List [str ]
457
+ ) -> Dict [str , str ]:
458
+ """
459
+ Find columns containing image path names
460
+ (the directory storing the images but not the file
461
+ names). We do this by seeking the pattern:
462
+ Image_FileName_X --> Image_PathName_X.
463
+
464
+ Args:
465
+ image_cols: List[str]:
466
+ A list of column names that contain
467
+ image file names.
468
+ all_cols: List[str]:
469
+ A list of all column names.
470
+
471
+ Returns:
472
+ Dict[str, str]:
473
+ A list of column names that contain
474
+ image file names.
475
+
476
+ """
477
+
478
+ return {
479
+ col : col .replace ("FileName" , "PathName" )
480
+ for col in image_cols
481
+ if col .replace ("FileName" , "PathName" ) in all_cols
482
+ }
483
+
397
484
def search_for_mask_or_outline ( # noqa: PLR0913, PLR0911
398
485
self : CytoDataFrame_type ,
399
486
data_value : str ,
@@ -471,6 +558,7 @@ def process_image_data_as_html_display(
471
558
self : CytoDataFrame_type ,
472
559
data_value : Any , # noqa: ANN401
473
560
bounding_box : Tuple [int , int , int , int ],
561
+ image_path : Optional [str ] = None ,
474
562
) -> str :
475
563
"""
476
564
Process the image data based on the provided data value
@@ -489,38 +577,55 @@ def process_image_data_as_html_display(
489
577
The HTML image display string, or the unmodified data
490
578
value if the image cannot be processed.
491
579
"""
580
+
492
581
candidate_path = None
493
582
# Get the pattern map for segmentation file regex
494
583
pattern_map = self ._custom_attrs .get ("segmentation_file_regex" )
495
584
496
585
# Step 1: Find the candidate file if the data value is not already a file
497
586
if not pathlib .Path (data_value ).is_file ():
587
+ # determine if we have a file from the path (dir) + filename
588
+ if (
589
+ self ._custom_attrs ["data_context_dir" ] is None
590
+ and image_path is not None
591
+ and (
592
+ existing_image_from_path := pathlib .Path (
593
+ f"{ image_path } /{ data_value } "
594
+ )
595
+ ).is_file ()
596
+ ):
597
+ candidate_path = existing_image_from_path
598
+
498
599
# Search for the data value in the data context directory
499
- if candidate_paths := list (
500
- pathlib .Path (self ._custom_attrs ["data_context_dir" ]).rglob (data_value )
600
+ elif self ._custom_attrs ["data_context_dir" ] is not None and (
601
+ candidate_paths := list (
602
+ pathlib .Path (self ._custom_attrs ["data_context_dir" ]).rglob (
603
+ data_value
604
+ )
605
+ )
501
606
):
502
607
# If a candidate file is found, use the first one
503
608
candidate_path = candidate_paths [0 ]
504
- orig_image_array = skimage .io .imread (candidate_path )
505
-
506
- # Adjust the image with image adjustment callable
507
- # or adaptive histogram equalization
508
- if self ._custom_attrs ["image_adjustment" ] is not None :
509
- orig_image_array = self ._custom_attrs ["image_adjustment" ](
510
- orig_image_array
511
- )
512
- else :
513
- orig_image_array = adjust_with_adaptive_histogram_equalization (
514
- orig_image_array
515
- )
516
-
517
- # Normalize to 0-255 for image saving
518
- orig_image_array = img_as_ubyte (orig_image_array )
519
609
520
610
else :
521
611
# If no candidate file is found, return the original data value
522
612
return data_value
523
613
614
+ # read the image as an array
615
+ orig_image_array = skimage .io .imread (candidate_path )
616
+
617
+ # Adjust the image with image adjustment callable
618
+ # or adaptive histogram equalization
619
+ if self ._custom_attrs ["image_adjustment" ] is not None :
620
+ orig_image_array = self ._custom_attrs ["image_adjustment" ](orig_image_array )
621
+ else :
622
+ orig_image_array = adjust_with_adaptive_histogram_equalization (
623
+ orig_image_array
624
+ )
625
+
626
+ # Normalize to 0-255 for image saving
627
+ orig_image_array = img_as_ubyte (orig_image_array )
628
+
524
629
prepared_image = None
525
630
# Step 2: Search for a mask
526
631
prepared_image = self .search_for_mask_or_outline (
@@ -632,8 +737,6 @@ def _repr_html_(
632
737
max_cols = get_option ("display.max_columns" )
633
738
show_dimensions = get_option ("display.show_dimensions" )
634
739
635
- # determine if we have image_cols to display
636
- if image_cols := self .find_image_columns ():
637
740
# re-add bounding box cols if they are no longer available as in cases
638
741
# of masking or accessing various pandas attr's
639
742
bounding_box_externally_joined = False
@@ -647,6 +750,25 @@ def _repr_html_(
647
750
else :
648
751
data = self .copy ()
649
752
753
+ # re-add image path (dirs for images) cols if they are no
754
+ # longer available as in cases of masking or accessing
755
+ # various pandas attr's
756
+ image_paths_externally_joined = False
757
+
758
+ if self ._custom_attrs ["data_image_paths" ] is not None and not all (
759
+ col in self .columns .tolist ()
760
+ for col in self ._custom_attrs ["data_image_paths" ].columns .tolist ()
761
+ ):
762
+ data = data .join (other = self ._custom_attrs ["data_image_paths" ])
763
+ image_paths_externally_joined = True
764
+
765
+ # determine if we have image_cols to display
766
+ if image_cols := self .find_image_columns ():
767
+ # attempt to find the image path columns
768
+ image_path_cols = self .find_image_path_columns (
769
+ image_cols = image_cols , all_cols = data .columns
770
+ )
771
+
650
772
# gather indices which will be displayed based on pandas configuration
651
773
display_indices = self .get_displayed_rows ()
652
774
@@ -691,6 +813,12 @@ def _repr_html_(
691
813
)
692
814
],
693
815
),
816
+ # set the image path based on the image_path cols.
817
+ image_path = (
818
+ row [image_path_cols [image_col ]]
819
+ if image_path_cols is not None and image_path_cols != {}
820
+ else None
821
+ ),
694
822
),
695
823
axis = 1 ,
696
824
)
@@ -700,6 +828,11 @@ def _repr_html_(
700
828
self ._custom_attrs ["data_bounding_box" ].columns .tolist (), axis = 1
701
829
)
702
830
831
+ if image_paths_externally_joined :
832
+ data = data .drop (
833
+ self ._custom_attrs ["data_image_paths" ].columns .tolist (), axis = 1
834
+ )
835
+
703
836
formatter = fmt .DataFrameFormatter (
704
837
data ,
705
838
columns = None ,
0 commit comments