@@ -190,53 +190,90 @@ def download_RADIO(path, samples=None):
190190 download_from_manifest (df , save_dir , samples )
191191
192192
193- def process_series_dir (series_dir ):
193+ def process_series_dir (series_dir : Path ):
194194 """
195195 Process the series directory and extract relevant information.
196196
197197 Args:
198198 series_dir (Path): The path to the series directory.
199199
200200 Returns:
201- dict: A dictionary containing the extracted information, including the image path, patient ID, and coordinates.
201+ dict: A dictionary containing the extracted information, including the
202+ image path, patient ID, and centroid coordinates.
203+ None: If there's no RTSTRUCT or SEG file, or any step fails.
202204
203205 Raises:
204206 None
205207 """
206208 # Check if RTSTRUCT file exists
207- rtstuct_files = list (series_dir .glob ("*RTSTRUCT*" ))
209+ rt_struct_files = list (series_dir .glob ("*RTSTRUCT*" ))
208210 seg_files = list (series_dir .glob ("*SEG*" ))
209211
210- if len (rtstuct_files ) != 0 :
211- dcmrtstruct2nii (str (rtstuct_files [0 ]), str (series_dir ), str (series_dir ))
212+ # Convert DICOM to NIfTI based on whether it's RTSTRUCT or SEG
213+ if len (rt_struct_files ) != 0 :
214+ dcmrtstruct2nii (str (rt_struct_files [0 ]), str (series_dir ), str (series_dir ))
212215
213216 elif len (seg_files ) != 0 :
214217 dcmseg2nii (str (seg_files [0 ]), str (series_dir ), tag = "GTV-" )
215-
216- series_id = str (list (series_dir .glob ("CT*.dcm" ))[0 ]).split ("_" )[- 2 ]
218+
219+ # Build the main image NIfTI
220+ try :
221+ series_id = str (list (series_dir .glob ("CT*.dcm" ))[0 ]).split ("_" )[- 2 ]
222+ except IndexError :
223+ logger .warning (f"No 'CT*.dcm' file found under { series_dir } . Skipping." )
224+ return None
225+
217226 dicom_image = DcmInputAdapter ().ingest (str (series_dir ), series_id = series_id )
218227 nii_output_adapter = NiiOutputAdapter ()
219228 nii_output_adapter .write (dicom_image , f"{ series_dir } /image" , gzip = True )
229+
220230 else :
221- logger .warning ("Skipped file without any RTSTRUCT or SEG file" )
231+ logger .warning (f"No RTSTRUCT or SEG file found in { series_dir } . Skipping." )
232+ return None
233+
234+ # Read the image (generated above)
235+ image_path = series_dir / "image.nii.gz"
236+ if not image_path .exists ():
237+ logger .warning (f"No image file found at { image_path } . Skipping." )
222238 return None
223239
224- image = sitk .ReadImage (str (series_dir / "image.nii.gz" ))
225- mask = sitk .ReadImage (str (list (series_dir .glob ("*GTV-1*" ))[0 ]))
240+ try :
241+ image = sitk .ReadImage (str (image_path ))
242+ except Exception as e :
243+ logger .error (f"Failed to read image { image_path } : { e } " )
244+ return None
245+
246+ # Find the GTV-1 mask files
247+ gtv1_masks = list (series_dir .glob ("*GTV-1*.nii.gz" ))
248+ if not gtv1_masks :
249+ logger .warning (f"No GTV-1 mask found in { series_dir } . Skipping." )
250+ return None
251+
252+ mask_path = gtv1_masks [0 ]
253+ try :
254+ mask = sitk .ReadImage (str (mask_path ))
255+ except Exception as e :
256+ logger .error (f"Failed to read mask { mask_path } : { e } " )
257+ return None
226258
227- # Get centroid from label shape filter
259+ # Extract centroid from the mask
228260 label_shape_filter = sitk .LabelShapeStatisticsImageFilter ()
229261 label_shape_filter .Execute (mask )
230262
263+ # Some masks label is 1, others are 255; try 255 first, else 1
231264 try :
232265 centroid = label_shape_filter .GetCentroid (255 )
233266 except :
234- centroid = label_shape_filter .GetCentroid (1 )
267+ try :
268+ centroid = label_shape_filter .GetCentroid (1 )
269+ except Exception as e :
270+ logger .warning (f"Could not extract centroid from mask { mask_path } : { e } " )
271+ return None
235272
236273 x , y , z = centroid
237274
238275 row = {
239- "image_path" : str (series_dir / "image.nii.gz" ),
276+ "image_path" : str (image_path ),
240277 "PatientID" : series_dir .parent .name ,
241278 "coordX" : x ,
242279 "coordY" : y ,
0 commit comments