@@ -190,53 +190,90 @@ def download_RADIO(path, samples=None):
190
190
download_from_manifest (df , save_dir , samples )
191
191
192
192
193
- def process_series_dir (series_dir ):
193
+ def process_series_dir (series_dir : Path ):
194
194
"""
195
195
Process the series directory and extract relevant information.
196
196
197
197
Args:
198
198
series_dir (Path): The path to the series directory.
199
199
200
200
Returns:
201
- dict: A dictionary containing the extracted information, including the image path, patient ID, and coordinates.
201
+ dict: A dictionary containing the extracted information, including the
202
+ image path, patient ID, and centroid coordinates.
203
+ None: If there's no RTSTRUCT or SEG file, or any step fails.
202
204
203
205
Raises:
204
206
None
205
207
"""
206
208
# Check if RTSTRUCT file exists
207
- rtstuct_files = list (series_dir .glob ("*RTSTRUCT*" ))
209
+ rt_struct_files = list (series_dir .glob ("*RTSTRUCT*" ))
208
210
seg_files = list (series_dir .glob ("*SEG*" ))
209
211
210
- if len (rtstuct_files ) != 0 :
211
- dcmrtstruct2nii (str (rtstuct_files [0 ]), str (series_dir ), str (series_dir ))
212
+ # Convert DICOM to NIfTI based on whether it's RTSTRUCT or SEG
213
+ if len (rt_struct_files ) != 0 :
214
+ dcmrtstruct2nii (str (rt_struct_files [0 ]), str (series_dir ), str (series_dir ))
212
215
213
216
elif len (seg_files ) != 0 :
214
217
dcmseg2nii (str (seg_files [0 ]), str (series_dir ), tag = "GTV-" )
215
-
216
- series_id = str (list (series_dir .glob ("CT*.dcm" ))[0 ]).split ("_" )[- 2 ]
218
+
219
+ # Build the main image NIfTI
220
+ try :
221
+ series_id = str (list (series_dir .glob ("CT*.dcm" ))[0 ]).split ("_" )[- 2 ]
222
+ except IndexError :
223
+ logger .warning (f"No 'CT*.dcm' file found under { series_dir } . Skipping." )
224
+ return None
225
+
217
226
dicom_image = DcmInputAdapter ().ingest (str (series_dir ), series_id = series_id )
218
227
nii_output_adapter = NiiOutputAdapter ()
219
228
nii_output_adapter .write (dicom_image , f"{ series_dir } /image" , gzip = True )
229
+
220
230
else :
221
- logger .warning ("Skipped file without any RTSTRUCT or SEG file" )
231
+ logger .warning (f"No RTSTRUCT or SEG file found in { series_dir } . Skipping." )
232
+ return None
233
+
234
+ # Read the image (generated above)
235
+ image_path = series_dir / "image.nii.gz"
236
+ if not image_path .exists ():
237
+ logger .warning (f"No image file found at { image_path } . Skipping." )
222
238
return None
223
239
224
- image = sitk .ReadImage (str (series_dir / "image.nii.gz" ))
225
- mask = sitk .ReadImage (str (list (series_dir .glob ("*GTV-1*" ))[0 ]))
240
+ try :
241
+ image = sitk .ReadImage (str (image_path ))
242
+ except Exception as e :
243
+ logger .error (f"Failed to read image { image_path } : { e } " )
244
+ return None
245
+
246
+ # Find the GTV-1 mask files
247
+ gtv1_masks = list (series_dir .glob ("*GTV-1*.nii.gz" ))
248
+ if not gtv1_masks :
249
+ logger .warning (f"No GTV-1 mask found in { series_dir } . Skipping." )
250
+ return None
251
+
252
+ mask_path = gtv1_masks [0 ]
253
+ try :
254
+ mask = sitk .ReadImage (str (mask_path ))
255
+ except Exception as e :
256
+ logger .error (f"Failed to read mask { mask_path } : { e } " )
257
+ return None
226
258
227
- # Get centroid from label shape filter
259
+ # Extract centroid from the mask
228
260
label_shape_filter = sitk .LabelShapeStatisticsImageFilter ()
229
261
label_shape_filter .Execute (mask )
230
262
263
+ # Some masks label is 1, others are 255; try 255 first, else 1
231
264
try :
232
265
centroid = label_shape_filter .GetCentroid (255 )
233
266
except :
234
- centroid = label_shape_filter .GetCentroid (1 )
267
+ try :
268
+ centroid = label_shape_filter .GetCentroid (1 )
269
+ except Exception as e :
270
+ logger .warning (f"Could not extract centroid from mask { mask_path } : { e } " )
271
+ return None
235
272
236
273
x , y , z = centroid
237
274
238
275
row = {
239
- "image_path" : str (series_dir / "image.nii.gz" ),
276
+ "image_path" : str (image_path ),
240
277
"PatientID" : series_dir .parent .name ,
241
278
"coordX" : x ,
242
279
"coordY" : y ,
0 commit comments