Skip to content

Commit

Permalink
fix: 🐛 Improve the way to handle AcquisitionDateTime
Browse files Browse the repository at this point in the history
Some typos fixed, set group_UNK if issues with AcquisitionDateTime and report, handle AcquisitionDateTime without ms
  • Loading branch information
alanwilter committed Nov 13, 2024
1 parent ecc4c6e commit 65afd80
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: check-shebang-scripts-are-executable
- id: check-added-large-files
args:
- "--maxkb=12000"
- "--maxkb=5000"
# - id: debug-statements # not for process_dcm
- id: check-yaml
exclude: mkdocs.yml
Expand Down
8 changes: 5 additions & 3 deletions .talismanrc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
fileignoreconfig:
- filename: tests/test_main.py
checksum: cc564a13bd73b557d0ef095252a2f1a9fd166a4a70d68d805ee9fd706fc28b32
- filename: tests/test_utils.py
checksum: 27c2987e02ca5255d6711a82e4b8921ea6585bc4ac06a893f250fe12b4b236a5
- filename: poetry.lock
checksum: e8dbedc758eb55f4bd5e6d5e8fdac611ad139df6d4f769603fa6671649032f5c
- filename: README.md
checksum: b842faad8156171b1388e3b3083a5c2bc120c45b25fa2412ca0f12ae1b5d5899
checksum: 4fe8810d9b7b0faa6e9aabea51871fde73b004dc93ebdba8462ec31cac3e22a8
version: ""
18 changes: 9 additions & 9 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion process_dcm/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""app to procces DCM files."""
"""app to process DCM files."""

import csv
import os
Expand Down
70 changes: 52 additions & 18 deletions process_dcm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def set_output_dir(ref_path: str | Path, a_path: str | Path) -> str:
Args:
ref_path: A reference path used when the given path is relative or broken.
a_path: The path to be analyzed and potentially resolved.
a_path: The path to be analysed and potentially resolved.
Returns:
The determined output directory as a POSIX string (using forward slashes).
Expand Down Expand Up @@ -123,7 +123,7 @@ def meta_images(dcm_obj: FileDataset) -> dict:
{"photo_locations": [{"start": {"x": cc[1], "y": cc[0]}, "end": {"x": cc[3], "y": cc[2]}}]}
)
else:
typer.secho("WARN: empty photo_locations", fg=typer.colors.RED)
typer.secho("\nWARN: empty photo_locations", fg=typer.colors.RED)
meta["contents"].append({"photo_locations": []})

return meta
Expand Down Expand Up @@ -275,23 +275,44 @@ def update_modality(dcm: FileDataset) -> bool:
return True # Modality updated successfully


def group_dcms_by_acquisition_time(dcms: list[FileDataset], tolerance_seconds: int = 2) -> dict[str, list[FileDataset]]:
"""Group DICOM files by AcquisitionDateTime within the specified tolerance."""
def group_dcms_by_acquisition_time(dcms: list[FileDataset], tol: int = 2) -> dict[str, list[FileDataset]]:
"""Group DICOM files by AcquisitionDateTime within the specified tolerance.
Args:
dcms: List of DICOM FileDataset objects.
tol: Time tolerance for grouping in seconds.
Returns:
Dictionary of grouped DICOM files, keyed by acquisition datetime string.
"""
grouped_dcms: dict[str, list[FileDataset]] = defaultdict(list)

def parse_datetime(dt_str: str) -> datetime:
try:
return datetime.strptime(dt_str, "%Y%m%d%H%M%S.%f")
except ValueError:
return datetime.strptime(dt_str, "%Y%m%d%H%M%S")

for dcm in dcms:
acquisition_datetime_str = dcm.get("AcquisitionDateTime", "unknown")
if acquisition_datetime_str != "unknown":
acquisition_datetime = datetime.strptime(acquisition_datetime_str, "%Y%m%d%H%M%S.%f")
# Find the closest group within the tolerance
for group_time_str, group in grouped_dcms.items():
if group_time_str != "unknown":
group_time = datetime.strptime(group_time_str, "%Y%m%d%H%M%S.%f")
if abs(acquisition_datetime - group_time) <= timedelta(seconds=tolerance_seconds):
grouped_dcms[group_time_str].append(dcm)
break
else:
# If no close group found, create a new one
grouped_dcms[acquisition_datetime_str].append(dcm)
try:
acquisition_datetime = parse_datetime(acquisition_datetime_str)
# Find the closest group within the tolerance
for group_time_str, group in grouped_dcms.items():
if group_time_str != "unknown":
group_time = parse_datetime(group_time_str)
if abs(acquisition_datetime - group_time) <= timedelta(seconds=tol):
grouped_dcms[group_time_str].append(dcm)
break
else:
# If no close group found, create a new one
grouped_dcms[acquisition_datetime_str].append(dcm)
except ValueError:
typer.secho(
f"\nWARN: Unexpected AcquisitionDateTime format: {acquisition_datetime_str}", fg=typer.colors.RED
)
grouped_dcms["unknown"].append(dcm)
else:
grouped_dcms["unknown"].append(dcm)

Expand Down Expand Up @@ -382,13 +403,20 @@ def process_dcm(

if group:
# Group DICOM files by AcquisitionDateTime
grouped_dcms = group_dcms_by_acquisition_time(dcms, tolerance_seconds=tol)
grouped_dcms = group_dcms_by_acquisition_time(dcms, tol=tol)

# Sort groups by AcquisitionDateTime
sorted_groups = sorted(grouped_dcms.items())

for gid, (_, group_dcms) in enumerate(sorted_groups):
for gid, (acquisition_time, group_dcms) in enumerate(sorted_groups):
group_dir = os.path.join(output_dir, f"group_{gid}")

if acquisition_time == "unknown":
group_dir = os.path.join(output_dir, "group_UNK")
typer.secho(
f"\nWARN: unknown AcquisitionDateTime, results in {group_dir} are not reliable", fg=typer.colors.RED
)

os.makedirs(group_dir, exist_ok=True)

for dcm in group_dcms:
Expand All @@ -400,6 +428,12 @@ def process_dcm(

for i in range(dcm.NumberOfFrames):
out_img = os.path.join(group_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}")
if os.path.exists(out_img):
dcm.AccessionNumber += 1 # increase group_id
out_img = os.path.join(
group_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}"
)

array = cv2.normalize(arr[i], None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # type: ignore #AWSS
image = Image.fromarray(array)
image.save
Expand Down Expand Up @@ -551,7 +585,7 @@ def process_and_save_csv(unique_sorted_results: list, reserved_csv: str, quiet:
unique_sorted_results (list): The data to be written to the CSV file. Each sublist
represents a row with 'study_id' and 'patient_id'.
reserved_csv (str): The path to the reserved CSV file.
quiet (bool, optional): Silene verbosity. Defaults to False.
quiet (bool, optional): Silence verbosity. Defaults to False.
"""
temp_filename = save_to_temp_file(unique_sorted_results)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ python-gdcm = "^3.0.24.1"
process-dcm = "process_dcm.main:cli"

[tool.poetry.group.dev.dependencies]
boto3-stubs = { extras = ["lambda", "s3"], version = "1.35.58" }
boto3-stubs = { extras = ["lambda", "s3"], version = "1.35.59" }
ipdb = "0.13.13"
ipython = "8.29.0"
jupyterlab = "4.3.0"
Expand Down
Binary file added tests/cataract/eye.dcm
Binary file not shown.
Binary file added tests/dummy_ex/wrong_acqui_time.dcm
Binary file not shown.
21 changes: 18 additions & 3 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ def test_main_dummy(janitor, runner):
assert result.exit_code == 0
tof = sorted(glob("dummy_dir/**/*"))
of = [x for x in tof if "metadata.json" not in x]
assert len(tof) == 2
assert get_md5("dummy_dir/dummy_ex/metadata.json", bottom) == "0fbf0de5556e77447925ff3bfaf8168e"
assert get_md5(of) in ["377b5a17c284226518ccef9bddff25af"]
assert len(tof) == 3
assert get_md5("dummy_dir/dummy_ex/metadata.json", bottom) == "0693469a3fcf388d89627eb212ace2bc"
assert get_md5(of) in ["30b70623445f7c12d8ad773c9738c7ce"]


def test_main_mapping(janitor, runner):
Expand Down Expand Up @@ -207,6 +207,21 @@ def test_process_task_optos():
assert result == ("0570586923", "BEH002")


def test_process_acquisition_datetime():
with TemporaryDirectory() as tmpdirname:
output_dir = Path(tmpdirname)
task_data = ("tests/cataract/", str(output_dir))
image_format = "png"
overwrite = True
verbose = True
keep = ""
mapping = ""
group = True
tol = 2
result = process_task(task_data, image_format, overwrite, verbose, keep, mapping, group, tol)
assert result == ("0558756784", "20241113-093410")


# def test_process_taskL():
# with TemporaryDirectory() as tmpdirname:
# output_dir = Path(tmpdirname)
Expand Down
9 changes: 5 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_meta_images_optopol(dicom_opotopol: FileDataset, mocker: MockerFixture)

assert meta["modality"] == "OCT", "Modality extraction failed"
assert len(meta["contents"]) == 1 # Should correspond to NumberOfFrames
mock_secho.assert_called_with("WARN: empty photo_locations", fg=typer.colors.RED)
mock_secho.assert_called_with("\nWARN: empty photo_locations", fg=typer.colors.RED)


def test_meta_images_attribute_error(dicom_attribute_error: FileDataset) -> None:
Expand Down Expand Up @@ -277,7 +277,7 @@ def test_process_dcm_dummy(temp_output_dir):
input_dir="tests/dummy_ex", output_dir=temp_output_dir, overwrite=True
)
assert new_patient_key, original_patient_key == ("2375458543", "123456")
assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "75a81f29397d3919603a8d55b305fdb1"
assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "b1fb22938cd95348cbcb44a63ed34fcf"


def test_process_dcm_dummy_group(temp_output_dir):
Expand All @@ -286,7 +286,8 @@ def test_process_dcm_dummy_group(temp_output_dir):
)
assert new_patient_key, original_patient_key == ("2375458543", "123456")
assert (
get_md5(os.path.join(temp_output_dir, "group_0", "metadata.json"), bottom) == "75a81f29397d3919603a8d55b305fdb1"
get_md5(os.path.join(temp_output_dir, "group_UNK", "metadata.json"), bottom)
== "b1fb22938cd95348cbcb44a63ed34fcf"
)


Expand All @@ -295,7 +296,7 @@ def test_process_dcm_dummy_mapping(temp_output_dir):
input_dir="tests/dummy_ex", output_dir=temp_output_dir, overwrite=True, mapping="tests/map.csv"
)
assert new_patient_key, original_patient_key == ("2375458543", "123456")
assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "75a81f29397d3919603a8d55b305fdb1"
assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "b1fb22938cd95348cbcb44a63ed34fcf"


def test_delete_empty_folder(temp_directory):
Expand Down

0 comments on commit 65afd80

Please sign in to comment.