diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 86b76e5..1ce9be4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - id: check-shebang-scripts-are-executable - id: check-added-large-files args: - - "--maxkb=12000" + - "--maxkb=5000" # - id: debug-statements # not for process_dcm - id: check-yaml exclude: mkdocs.yml diff --git a/.talismanrc b/.talismanrc index b6b8266..8e2e2fd 100644 --- a/.talismanrc +++ b/.talismanrc @@ -1,6 +1,8 @@ fileignoreconfig: + - filename: tests/test_main.py + checksum: cc564a13bd73b557d0ef095252a2f1a9fd166a4a70d68d805ee9fd706fc28b32 + - filename: tests/test_utils.py + checksum: 27c2987e02ca5255d6711a82e4b8921ea6585bc4ac06a893f250fe12b4b236a5 - filename: poetry.lock - checksum: e8dbedc758eb55f4bd5e6d5e8fdac611ad139df6d4f769603fa6671649032f5c - - filename: README.md - checksum: b842faad8156171b1388e3b3083a5c2bc120c45b25fa2412ca0f12ae1b5d5899 + checksum: 4fe8810d9b7b0faa6e9aabea51871fde73b004dc93ebdba8462ec31cac3e22a8 version: "" diff --git a/poetry.lock b/poetry.lock index 9b3d068..5c1e608 100644 --- a/poetry.lock +++ b/poetry.lock @@ -214,13 +214,13 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "boto3-stubs" -version = "1.35.58" -description = "Type annotations for boto3 1.35.58 generated with mypy-boto3-builder 8.2.1" +version = "1.35.59" +description = "Type annotations for boto3 1.35.59 generated with mypy-boto3-builder 8.2.1" optional = false python-versions = ">=3.8" files = [ - {file = "boto3_stubs-1.35.58-py3-none-any.whl", hash = "sha256:221dbe6aec614a12bd2c7426e3f879a0f4c2d90a8ed283a248af9d7b3071eed3"}, - {file = "boto3_stubs-1.35.58.tar.gz", hash = "sha256:a818cb8d1d5d9b51db2631f485d3de8d227b3607b2014a9089b55735772dd0ff"}, + {file = "boto3_stubs-1.35.59-py3-none-any.whl", hash = "sha256:65b52800dc7ff1579c1d9f46d1176f4e6e4a883483a4f5b338bde114f24c8a5c"}, + {file = "boto3_stubs-1.35.59.tar.gz", hash = "sha256:984e705d354cb969645b8f6384a4f167620afc239e52f998a7287fd7c9bb0b68"}, ] [package.dependencies] @@ -273,7 +273,7 @@ bedrock-agent = ["mypy-boto3-bedrock-agent (>=1.35.0,<1.36.0)"] bedrock-agent-runtime = ["mypy-boto3-bedrock-agent-runtime (>=1.35.0,<1.36.0)"] bedrock-runtime = ["mypy-boto3-bedrock-runtime (>=1.35.0,<1.36.0)"] billingconductor = ["mypy-boto3-billingconductor (>=1.35.0,<1.36.0)"] -boto3 = ["boto3 (==1.35.58)", "botocore (==1.35.58)"] +boto3 = ["boto3 (==1.35.59)", "botocore (==1.35.59)"] braket = ["mypy-boto3-braket (>=1.35.0,<1.36.0)"] budgets = ["mypy-boto3-budgets (>=1.35.0,<1.36.0)"] ce = ["mypy-boto3-ce (>=1.35.0,<1.36.0)"] @@ -628,13 +628,13 @@ xray = ["mypy-boto3-xray (>=1.35.0,<1.36.0)"] [[package]] name = "botocore-stubs" -version = "1.35.58" +version = "1.35.59" description = "Type annotations and code completion for botocore" optional = false python-versions = ">=3.8" files = [ - {file = "botocore_stubs-1.35.58-py3-none-any.whl", hash = "sha256:cd10f24916177c3a77e4b5a5d4443e19c907e1bf0a5db25da21449a50aebad70"}, - {file = "botocore_stubs-1.35.58.tar.gz", hash = "sha256:9b695d6309e7d5eed08fbaf30f9526669b847f65b0f01006afd23ad977187d1c"}, + {file = "botocore_stubs-1.35.59-py3-none-any.whl", hash = "sha256:dd83003963ca957a6e4835d192d7f163fb55312ce3d3f798f625ac9438616e4f"}, + {file = "botocore_stubs-1.35.59.tar.gz", hash = "sha256:1456af3358be1a0e49dd8428bfb81863406659d9fad871362bf18a098eeac90a"}, ] [package.dependencies] @@ -3527,4 +3527,4 @@ test = ["pytest"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "3dc382a0649acbdb0099c06026ee24d02258d2ef7e523352b251e15c603142f7" +content-hash = "b43c093bbf42bbf3f7a02c23af585cf503c22a8d9623e1a895cd2ba072f6b578" diff --git a/process_dcm/main.py b/process_dcm/main.py index 6647d15..79034e2 100644 --- a/process_dcm/main.py +++ b/process_dcm/main.py @@ -1,4 +1,4 @@ -"""app to procces DCM files.""" +"""app to process DCM files.""" import csv import os diff --git a/process_dcm/utils.py b/process_dcm/utils.py index f5d6735..6941d78 100644 --- a/process_dcm/utils.py +++ b/process_dcm/utils.py @@ -47,7 +47,7 @@ def set_output_dir(ref_path: str | Path, a_path: str | Path) -> str: Args: ref_path: A reference path used when the given path is relative or broken. - a_path: The path to be analyzed and potentially resolved. + a_path: The path to be analysed and potentially resolved. Returns: The determined output directory as a POSIX string (using forward slashes). @@ -123,7 +123,7 @@ def meta_images(dcm_obj: FileDataset) -> dict: {"photo_locations": [{"start": {"x": cc[1], "y": cc[0]}, "end": {"x": cc[3], "y": cc[2]}}]} ) else: - typer.secho("WARN: empty photo_locations", fg=typer.colors.RED) + typer.secho("\nWARN: empty photo_locations", fg=typer.colors.RED) meta["contents"].append({"photo_locations": []}) return meta @@ -275,23 +275,44 @@ def update_modality(dcm: FileDataset) -> bool: return True # Modality updated successfully -def group_dcms_by_acquisition_time(dcms: list[FileDataset], tolerance_seconds: int = 2) -> dict[str, list[FileDataset]]: - """Group DICOM files by AcquisitionDateTime within the specified tolerance.""" +def group_dcms_by_acquisition_time(dcms: list[FileDataset], tol: int = 2) -> dict[str, list[FileDataset]]: + """Group DICOM files by AcquisitionDateTime within the specified tolerance. + + Args: + dcms: List of DICOM FileDataset objects. + tol: Time tolerance for grouping in seconds. + + Returns: + Dictionary of grouped DICOM files, keyed by acquisition datetime string. + """ grouped_dcms: dict[str, list[FileDataset]] = defaultdict(list) + + def parse_datetime(dt_str: str) -> datetime: + try: + return datetime.strptime(dt_str, "%Y%m%d%H%M%S.%f") + except ValueError: + return datetime.strptime(dt_str, "%Y%m%d%H%M%S") + for dcm in dcms: acquisition_datetime_str = dcm.get("AcquisitionDateTime", "unknown") if acquisition_datetime_str != "unknown": - acquisition_datetime = datetime.strptime(acquisition_datetime_str, "%Y%m%d%H%M%S.%f") - # Find the closest group within the tolerance - for group_time_str, group in grouped_dcms.items(): - if group_time_str != "unknown": - group_time = datetime.strptime(group_time_str, "%Y%m%d%H%M%S.%f") - if abs(acquisition_datetime - group_time) <= timedelta(seconds=tolerance_seconds): - grouped_dcms[group_time_str].append(dcm) - break - else: - # If no close group found, create a new one - grouped_dcms[acquisition_datetime_str].append(dcm) + try: + acquisition_datetime = parse_datetime(acquisition_datetime_str) + # Find the closest group within the tolerance + for group_time_str, group in grouped_dcms.items(): + if group_time_str != "unknown": + group_time = parse_datetime(group_time_str) + if abs(acquisition_datetime - group_time) <= timedelta(seconds=tol): + grouped_dcms[group_time_str].append(dcm) + break + else: + # If no close group found, create a new one + grouped_dcms[acquisition_datetime_str].append(dcm) + except ValueError: + typer.secho( + f"\nWARN: Unexpected AcquisitionDateTime format: {acquisition_datetime_str}", fg=typer.colors.RED + ) + grouped_dcms["unknown"].append(dcm) else: grouped_dcms["unknown"].append(dcm) @@ -382,13 +403,20 @@ def process_dcm( if group: # Group DICOM files by AcquisitionDateTime - grouped_dcms = group_dcms_by_acquisition_time(dcms, tolerance_seconds=tol) + grouped_dcms = group_dcms_by_acquisition_time(dcms, tol=tol) # Sort groups by AcquisitionDateTime sorted_groups = sorted(grouped_dcms.items()) - for gid, (_, group_dcms) in enumerate(sorted_groups): + for gid, (acquisition_time, group_dcms) in enumerate(sorted_groups): group_dir = os.path.join(output_dir, f"group_{gid}") + + if acquisition_time == "unknown": + group_dir = os.path.join(output_dir, "group_UNK") + typer.secho( + f"\nWARN: unknown AcquisitionDateTime, results in {group_dir} are not reliable", fg=typer.colors.RED + ) + os.makedirs(group_dir, exist_ok=True) for dcm in group_dcms: @@ -400,6 +428,12 @@ def process_dcm( for i in range(dcm.NumberOfFrames): out_img = os.path.join(group_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}") + if os.path.exists(out_img): + dcm.AccessionNumber += 1 # increase group_id + out_img = os.path.join( + group_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}" + ) + array = cv2.normalize(arr[i], None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # type: ignore #AWSS image = Image.fromarray(array) image.save @@ -551,7 +585,7 @@ def process_and_save_csv(unique_sorted_results: list, reserved_csv: str, quiet: unique_sorted_results (list): The data to be written to the CSV file. Each sublist represents a row with 'study_id' and 'patient_id'. reserved_csv (str): The path to the reserved CSV file. - quiet (bool, optional): Silene verbosity. Defaults to False. + quiet (bool, optional): Silence verbosity. Defaults to False. """ temp_filename = save_to_temp_file(unique_sorted_results) diff --git a/pyproject.toml b/pyproject.toml index d7e1aa0..bc5cd12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ python-gdcm = "^3.0.24.1" process-dcm = "process_dcm.main:cli" [tool.poetry.group.dev.dependencies] -boto3-stubs = { extras = ["lambda", "s3"], version = "1.35.58" } +boto3-stubs = { extras = ["lambda", "s3"], version = "1.35.59" } ipdb = "0.13.13" ipython = "8.29.0" jupyterlab = "4.3.0" diff --git a/tests/cataract/eye.dcm b/tests/cataract/eye.dcm new file mode 100644 index 0000000..b700bc6 Binary files /dev/null and b/tests/cataract/eye.dcm differ diff --git a/tests/dummy_ex/wrong_acqui_time.dcm b/tests/dummy_ex/wrong_acqui_time.dcm new file mode 100644 index 0000000..727160d Binary files /dev/null and b/tests/dummy_ex/wrong_acqui_time.dcm differ diff --git a/tests/test_main.py b/tests/test_main.py index 8ee317d..c16629e 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -97,9 +97,9 @@ def test_main_dummy(janitor, runner): assert result.exit_code == 0 tof = sorted(glob("dummy_dir/**/*")) of = [x for x in tof if "metadata.json" not in x] - assert len(tof) == 2 - assert get_md5("dummy_dir/dummy_ex/metadata.json", bottom) == "0fbf0de5556e77447925ff3bfaf8168e" - assert get_md5(of) in ["377b5a17c284226518ccef9bddff25af"] + assert len(tof) == 3 + assert get_md5("dummy_dir/dummy_ex/metadata.json", bottom) == "0693469a3fcf388d89627eb212ace2bc" + assert get_md5(of) in ["30b70623445f7c12d8ad773c9738c7ce"] def test_main_mapping(janitor, runner): @@ -207,6 +207,21 @@ def test_process_task_optos(): assert result == ("0570586923", "BEH002") +def test_process_acquisition_datetime(): + with TemporaryDirectory() as tmpdirname: + output_dir = Path(tmpdirname) + task_data = ("tests/cataract/", str(output_dir)) + image_format = "png" + overwrite = True + verbose = True + keep = "" + mapping = "" + group = True + tol = 2 + result = process_task(task_data, image_format, overwrite, verbose, keep, mapping, group, tol) + assert result == ("0558756784", "20241113-093410") + + # def test_process_taskL(): # with TemporaryDirectory() as tmpdirname: # output_dir = Path(tmpdirname) diff --git a/tests/test_utils.py b/tests/test_utils.py index c6301c5..9fc7215 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -36,7 +36,7 @@ def test_meta_images_optopol(dicom_opotopol: FileDataset, mocker: MockerFixture) assert meta["modality"] == "OCT", "Modality extraction failed" assert len(meta["contents"]) == 1 # Should correspond to NumberOfFrames - mock_secho.assert_called_with("WARN: empty photo_locations", fg=typer.colors.RED) + mock_secho.assert_called_with("\nWARN: empty photo_locations", fg=typer.colors.RED) def test_meta_images_attribute_error(dicom_attribute_error: FileDataset) -> None: @@ -277,7 +277,7 @@ def test_process_dcm_dummy(temp_output_dir): input_dir="tests/dummy_ex", output_dir=temp_output_dir, overwrite=True ) assert new_patient_key, original_patient_key == ("2375458543", "123456") - assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "75a81f29397d3919603a8d55b305fdb1" + assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "b1fb22938cd95348cbcb44a63ed34fcf" def test_process_dcm_dummy_group(temp_output_dir): @@ -286,7 +286,8 @@ def test_process_dcm_dummy_group(temp_output_dir): ) assert new_patient_key, original_patient_key == ("2375458543", "123456") assert ( - get_md5(os.path.join(temp_output_dir, "group_0", "metadata.json"), bottom) == "75a81f29397d3919603a8d55b305fdb1" + get_md5(os.path.join(temp_output_dir, "group_UNK", "metadata.json"), bottom) + == "b1fb22938cd95348cbcb44a63ed34fcf" ) @@ -295,7 +296,7 @@ def test_process_dcm_dummy_mapping(temp_output_dir): input_dir="tests/dummy_ex", output_dir=temp_output_dir, overwrite=True, mapping="tests/map.csv" ) assert new_patient_key, original_patient_key == ("2375458543", "123456") - assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "75a81f29397d3919603a8d55b305fdb1" + assert get_md5(os.path.join(temp_output_dir, "metadata.json"), bottom) == "b1fb22938cd95348cbcb44a63ed34fcf" def test_delete_empty_folder(temp_directory):