Skip to content

Commit 860b90b

Browse files
authored
Merge pull request #316 from earlydx-cloud/master
Fixed issue #314 (https://github.com/AIM-Harvard/foundation-cancer-im…
2 parents 2a2bb59 + e7f0fce commit 860b90b

File tree

1 file changed

+50
-13
lines changed

1 file changed

+50
-13
lines changed

fmcib/utils/idc_helper.py

+50-13
Original file line numberDiff line numberDiff line change
@@ -190,53 +190,90 @@ def download_RADIO(path, samples=None):
190190
download_from_manifest(df, save_dir, samples)
191191

192192

193-
def process_series_dir(series_dir):
193+
def process_series_dir(series_dir: Path):
194194
"""
195195
Process the series directory and extract relevant information.
196196
197197
Args:
198198
series_dir (Path): The path to the series directory.
199199
200200
Returns:
201-
dict: A dictionary containing the extracted information, including the image path, patient ID, and coordinates.
201+
dict: A dictionary containing the extracted information, including the
202+
image path, patient ID, and centroid coordinates.
203+
None: If there's no RTSTRUCT or SEG file, or any step fails.
202204
203205
Raises:
204206
None
205207
"""
206208
# Check if RTSTRUCT file exists
207-
rtstuct_files = list(series_dir.glob("*RTSTRUCT*"))
209+
rt_struct_files = list(series_dir.glob("*RTSTRUCT*"))
208210
seg_files = list(series_dir.glob("*SEG*"))
209211

210-
if len(rtstuct_files) != 0:
211-
dcmrtstruct2nii(str(rtstuct_files[0]), str(series_dir), str(series_dir))
212+
# Convert DICOM to NIfTI based on whether it's RTSTRUCT or SEG
213+
if len(rt_struct_files) != 0:
214+
dcmrtstruct2nii(str(rt_struct_files[0]), str(series_dir), str(series_dir))
212215

213216
elif len(seg_files) != 0:
214217
dcmseg2nii(str(seg_files[0]), str(series_dir), tag="GTV-")
215-
216-
series_id = str(list(series_dir.glob("CT*.dcm"))[0]).split("_")[-2]
218+
219+
# Build the main image NIfTI
220+
try:
221+
series_id = str(list(series_dir.glob("CT*.dcm"))[0]).split("_")[-2]
222+
except IndexError:
223+
logger.warning(f"No 'CT*.dcm' file found under {series_dir}. Skipping.")
224+
return None
225+
217226
dicom_image = DcmInputAdapter().ingest(str(series_dir), series_id=series_id)
218227
nii_output_adapter = NiiOutputAdapter()
219228
nii_output_adapter.write(dicom_image, f"{series_dir}/image", gzip=True)
229+
220230
else:
221-
logger.warning("Skipped file without any RTSTRUCT or SEG file")
231+
logger.warning(f"No RTSTRUCT or SEG file found in {series_dir}. Skipping.")
232+
return None
233+
234+
# Read the image (generated above)
235+
image_path = series_dir / "image.nii.gz"
236+
if not image_path.exists():
237+
logger.warning(f"No image file found at {image_path}. Skipping.")
222238
return None
223239

224-
image = sitk.ReadImage(str(series_dir / "image.nii.gz"))
225-
mask = sitk.ReadImage(str(list(series_dir.glob("*GTV-1*"))[0]))
240+
try:
241+
image = sitk.ReadImage(str(image_path))
242+
except Exception as e:
243+
logger.error(f"Failed to read image {image_path}: {e}")
244+
return None
245+
246+
# Find the GTV-1 mask files
247+
gtv1_masks = list(series_dir.glob("*GTV-1*.nii.gz"))
248+
if not gtv1_masks:
249+
logger.warning(f"No GTV-1 mask found in {series_dir}. Skipping.")
250+
return None
251+
252+
mask_path = gtv1_masks[0]
253+
try:
254+
mask = sitk.ReadImage(str(mask_path))
255+
except Exception as e:
256+
logger.error(f"Failed to read mask {mask_path}: {e}")
257+
return None
226258

227-
# Get centroid from label shape filter
259+
# Extract centroid from the mask
228260
label_shape_filter = sitk.LabelShapeStatisticsImageFilter()
229261
label_shape_filter.Execute(mask)
230262

263+
# Some masks label is 1, others are 255; try 255 first, else 1
231264
try:
232265
centroid = label_shape_filter.GetCentroid(255)
233266
except:
234-
centroid = label_shape_filter.GetCentroid(1)
267+
try:
268+
centroid = label_shape_filter.GetCentroid(1)
269+
except Exception as e:
270+
logger.warning(f"Could not extract centroid from mask {mask_path}: {e}")
271+
return None
235272

236273
x, y, z = centroid
237274

238275
row = {
239-
"image_path": str(series_dir / "image.nii.gz"),
276+
"image_path": str(image_path),
240277
"PatientID": series_dir.parent.name,
241278
"coordX": x,
242279
"coordY": y,

0 commit comments

Comments
 (0)