From 1be893b84473ac558b4b7a1ff850f6e37f3b7256 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Fri, 24 May 2024 12:57:24 -0400 Subject: [PATCH] feat: clean up --- .gitignore | 2 + .pre-commit-config.yaml | 9 +-- pixi.lock | 36 ++++----- src/pydicomsorter/cli.py | 139 +++++++++++++++++++++------------ src/pydicomsorter/file_list.py | 2 + src/pydicomsorter/io.py | 43 +++++----- 6 files changed, 135 insertions(+), 96 deletions(-) diff --git a/.gitignore b/.gitignore index 83b2b41..37bdbad 100644 --- a/.gitignore +++ b/.gitignore @@ -171,6 +171,8 @@ src/testmypixipkg/__pycache__/__init__.cpython-312.pyc data nbia-data +test-data **/.vscode/ +test_fileset.ipynb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2fc1f81..cc590bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,9 @@ repos: - id: pretty-format-json - id: detect-private-key - id: debug-statements + # - id: no-commit-to-branch + # always_run: true + # args: ['--branch', 'main'] - repo: https://github.com/timothycrosley/isort rev: 5.13.2 hooks: @@ -22,13 +25,9 @@ repos: - id: pixi-install name: Run pixi install entry: bash -c 'pixi install --all && git add pixi.lock' + always_run: true language: system files: .+ - # - id: pixi-run-tests - # name: Run pixi tests - # entry: bash -c 'pixi run test' - # language: system - # files: ^src/.*\.py$|^pyproject\.toml$ - id: pixi-run-lint name: Run pixi lint entry: bash -c 'pixi run lint' diff --git a/pixi.lock b/pixi.lock index dadc554..05c1f25 100644 --- a/pixi.lock +++ b/pixi.lock @@ -5627,16 +5627,16 @@ packages: - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/15/b1/e6edfe46402a5b415fc3de86aa64fb10009b323907f8d513175bfb839aa9/pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl - sha256: fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242 + url: https://files.pythonhosted.org/packages/e4/49/f29028068b5cb364ad066a58490dd26fd1d4ba2943d829eb0f85dbc8ab06/pydantic_core-2.18.2-cp312-none-win_amd64.whl + sha256: b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/30/49/397da3f6910d62f092684a50bcaba2566825c6eee27a743846583a01fadf/pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl - sha256: 6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043 + url: https://files.pythonhosted.org/packages/15/b1/e6edfe46402a5b415fc3de86aa64fb10009b323907f8d513175bfb839aa9/pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl + sha256: fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' @@ -5651,48 +5651,48 @@ packages: - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/e4/49/f29028068b5cb364ad066a58490dd26fd1d4ba2943d829eb0f85dbc8ab06/pydantic_core-2.18.2-cp312-none-win_amd64.whl - sha256: b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438 + url: https://files.pythonhosted.org/packages/30/49/397da3f6910d62f092684a50bcaba2566825c6eee27a743846583a01fadf/pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl + sha256: 6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/a6/a5/c351d83454267964d24b79e9116d716157071df4682f865d1274235a0cac/pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - sha256: 553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af + url: https://files.pythonhosted.org/packages/79/b8/8be6e21881344ab91df49dcd6f7ef34729c2868019f503699b2724f4195a/pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl + sha256: f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/79/b8/8be6e21881344ab91df49dcd6f7ef34729c2868019f503699b2724f4195a/pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl - sha256: f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2 + url: https://files.pythonhosted.org/packages/a6/a5/c351d83454267964d24b79e9116d716157071df4682f865d1274235a0cac/pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl + sha256: 553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/c0/94/55f5b643992a57a244f7f7119b5eabebe9e592c3c8282a132ac21c965812/pydantic_core-2.18.2-cp310-none-win_amd64.whl - sha256: e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543 + url: https://files.pythonhosted.org/packages/d8/3d/ae9491c1f071f7f49c8bc7b161325e658ea53f72a12ec5fd7a4ea4fa01ee/pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl + sha256: 9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/d8/3d/ae9491c1f071f7f49c8bc7b161325e658ea53f72a12ec5fd7a4ea4fa01ee/pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl - sha256: 9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81 + url: https://files.pythonhosted.org/packages/c0/94/55f5b643992a57a244f7f7119b5eabebe9e592c3c8282a132ac21c965812/pydantic_core-2.18.2-cp310-none-win_amd64.whl + sha256: e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/5d/61/bfc32484eac102051ef85f5e648c9777f57398c83e5f87e3c0a420a6550b/pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl - sha256: cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4 + url: https://files.pythonhosted.org/packages/9d/b0/e8bebe8fd08ea6ec027b7304c84f4652f2933514caf9f6a418d259d2a950/pydantic_core-2.18.2-cp311-none-win_amd64.whl + sha256: 800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' @@ -5715,8 +5715,8 @@ packages: - kind: pypi name: pydantic-core version: 2.18.2 - url: https://files.pythonhosted.org/packages/9d/b0/e8bebe8fd08ea6ec027b7304c84f4652f2933514caf9f6a418d259d2a950/pydantic_core-2.18.2-cp311-none-win_amd64.whl - sha256: 800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3 + url: https://files.pythonhosted.org/packages/5d/61/bfc32484eac102051ef85f5e648c9777f57398c83e5f87e3c0a420a6550b/pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl + sha256: cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4 requires_dist: - typing-extensions>=4.6.0,!=4.7.0 requires_python: '>=3.8' diff --git a/src/pydicomsorter/cli.py b/src/pydicomsorter/cli.py index e4d5b00..7ae2630 100644 --- a/src/pydicomsorter/cli.py +++ b/src/pydicomsorter/cli.py @@ -14,18 +14,22 @@ click.rich_click.STYLE_COMMANDS_TABLE_COLUMN_WIDTH_RATIO = (1, 2) click.rich_click.OPTION_GROUPS = { - 'dicomsort': [ + "dicomsort": [ { - 'name': 'Required options', - 'options': ['--method'], + "name": "Required options", + "options": ["--method"], }, { - 'name': 'Advanced options', - 'options': ['--dry_run', '--overwrite', '--keep_going'], + "name": "Advanced options", + "options": [ + "--overwrite", + "--keep-going", + "--dry-run", + ], }, { - 'name': 'Basic options', - 'options': ['--version', '--verbose', '--debug', '--help'], + "name": "Basic options", + "options": ["--version", "--verbose", "--debug", "--help"], }, ] } @@ -39,15 +43,15 @@ def generate_destination_paths( return {k: pathlib.Path(fmt % v).resolve() for k, v in dicom_data.items()} -@click.command(context_settings={'help_option_names': ['-h', '--help']}) +@click.command(context_settings={"help_option_names": ["-h", "--help"]}) @click.option( - '--method', - '-m', - type=click.Choice(['move', 'copy', 'link'], case_sensitive=False), + "--method", + "-m", + type=click.Choice(["move", "copy", "link"], case_sensitive=False), required=True, ) @click.argument( - 'sourcedir', + "sourcedir", type=click.Path( exists=True, path_type=pathlib.Path, @@ -56,41 +60,41 @@ def generate_destination_paths( ), ) @click.argument( - 'destination_dir', + "destination_dir", type=str, ) @click.option( - '-n', - '--dry_run', + "-o", + "--overwrite", is_flag=True, - help='Do not move or copy files, just print what would be done.', + default=False, + show_default=True, + help="Overwrite files if they already exist.", ) @click.option( - '-o', - '--overwrite', + "-k", + "--keep-going", is_flag=True, - default=False, - help='Overwrite files if they already exist.', - show_default=True, + help="Keep going when an error occurs.", ) @click.option( - '-k', - '--keep_going', + "-n", + "--dry-run", is_flag=True, - help='Keep going when an error occurs.', + help="Do not move or copy files, just print what would be done.", ) @click.option( - '--verbose', + "--verbose", is_flag=True, - help='Print verbose output.', + help="Print verbose output.", ) @click.option( - '--debug', + "--debug", is_flag=True, - help='Print debug output.', + help="Print debug output.", ) @click.version_option() -@rich_config(help_config={'style_option': 'bold cyan'}) +@rich_config(help_config={"style_option": "bold cyan"}) def cli( sourcedir: pathlib.Path, destination_dir: str, @@ -102,61 +106,92 @@ def cli( debug: bool, ) -> None: """Main function of the package.""" - import timeit - - start = timeit.default_timer() - - # run find_dicom_files asynchronously files: list[pathlib.Path] = find_dicom_files(source_dir=sourcedir) - print(f'Found {len(files)} DICOM files.') + print(f"Found {len(files)} DICOM files.") # # other code here try: sorter = DICOMSorter(destination_dir).validate_keys() except ValueError as ve: - print(f'Error: {ve}') + print(f"Error: {ve}") return - print(f'Keys to use: {sorter.keys}') file_list: DICOMFileList = DICOMFileList(files).read_tags(sorter.keys) if dry_run: + print(f"Keys to use: {sorter.keys}") file_list.summarize(sorter.keys) return destination_paths = generate_destination_paths(file_list.dicom_data, sorter.format) - print(destination_paths.__len__()) - - execute_method(method, destination_paths) - - print(f'Time: {timeit.default_timer() - start}') - + try: + execute_method( + method, + destination_paths, + overwrite, + keep_going, + ) + except FileExistsError as fee: + print(f"Error: {fee}") + return -def execute_method(method: str, destination_paths: dict[pathlib.Path, pathlib.Path]) -> None: +def execute_method( + method: str, + destination_paths: dict[pathlib.Path, pathlib.Path], + overwrite: bool, + keep_going: bool, +) -> None: """Execute the method on the destination paths.""" with progress.Progress( - '[progress.description]{task.description}', + "[progress.description]{task.description}", progress.BarColumn(), - '[progress.percentage]{task.percentage:>3.0f}%', + "[progress.percentage]{task.percentage:>3.0f}%", progress.MofNCompleteColumn(), - 'Time elapsed:', + "Time elapsed:", progress.TimeElapsedColumn(), - 'Time remaining:', + "Time remaining:", progress.TimeRemainingColumn(compact=True), refresh_per_second=10, # bit slower updates + transient=True, ) as progress2: - task = progress2.add_task('Executing method...', total=len(destination_paths)) + # make sure that method is one of the allowed values + assert method in [ + "move", + "copy", + "link", + ], "Method must be one of 'move', 'copy', or 'link'." + + match method: + case "move": + msg = "Moving files..." + case "copy": + msg = "Copying files..." + case "link": + msg = "Linking files..." + case _: + raise ValueError(f"Invalid method: {method}") + + task = progress2.add_task(f"{msg:.<21}", total=len(destination_paths)) + for source, destination in destination_paths.items(): + if destination.exists() and not overwrite: + print(f"Destination exists: {destination}") + if keep_going: + progress2.update(task, advance=1) + continue + else: + raise FileExistsError(f"Destination exists: {destination}") + if not destination.parent.exists(): destination.parent.mkdir(parents=True) - if method == 'move': + if method == "move": source.rename(destination) - elif method == 'copy': + elif method == "copy": copy(source, destination) - elif method == 'link': + elif method == "link": destination.symlink_to(source) progress2.update(task, advance=1) return None diff --git a/src/pydicomsorter/file_list.py b/src/pydicomsorter/file_list.py index b138acd..fd87fcc 100644 --- a/src/pydicomsorter/file_list.py +++ b/src/pydicomsorter/file_list.py @@ -6,6 +6,7 @@ from pydicomsorter.io import read_tags + class DICOMFileList: """A class to handle and manipulate a list of paths to dicom files.""" @@ -26,6 +27,7 @@ def read_tags(self, tags: list[str]) -> 'DICOMFileList': 'Time remaining:', progress.TimeRemainingColumn(compact=True), refresh_per_second=10, # bit slower updates + transient=True, ) as progress2: task = progress2.add_task('Reading DICOM tags...', total=len(self.files)) for file in self.files: diff --git a/src/pydicomsorter/io.py b/src/pydicomsorter/io.py index c6b6c95..084f233 100644 --- a/src/pydicomsorter/io.py +++ b/src/pydicomsorter/io.py @@ -6,9 +6,10 @@ from pydicom import dcmread from pydicom.errors import InvalidDicomError + def find_dicom_files(source_dir: Path) -> list[Path]: """Find all DICOM files in the source directory.""" - return [file.resolve() for file in source_dir.glob('**/*.dcm') if file.is_file()] + return [file.resolve() for file in source_dir.glob("**/*.dcm") if file.is_file()] def sanitize_file_name(filename: str) -> str: @@ -18,13 +19,13 @@ def sanitize_file_name(filename: str) -> str: # Define a pattern for disallowed filename characters and their replacements disallowed_characters_pattern = re.compile(r'[<>:"/\\|?*\x00-\x1f]') # Replace disallowed characters with an underscore - sanitized_name = disallowed_characters_pattern.sub('_', filename) + sanitized_name = disallowed_characters_pattern.sub("_", filename) # replace spaces with underscores - sanitized_name = sanitized_name.replace(' ', '_') + sanitized_name = sanitized_name.replace(" ", "_") # Remove subsequent underscores - sanitized_name = re.sub(r'(_{2,})', '_', sanitized_name) + sanitized_name = re.sub(r"(_{2,})", "_", sanitized_name) return sanitized_name @@ -47,12 +48,12 @@ def read_all(file: Path, tags: list[str]) -> dict[str, str]: try: dicom = dcmread(file, stop_before_pixels=True) except TypeError as te: - raise TypeError(f'Type error reading DICOM file: {file}') from te + raise TypeError(f"Type error reading DICOM file: {file}") from te except InvalidDicomError as ide: - raise InvalidDicomError(f'Invalid DICOM file: {file}') from ide + raise InvalidDicomError(f"Invalid DICOM file: {file}") from ide except ValueError as ve: - raise ValueError(f'Value error reading DICOM file: {file}') from ve - return {tag: str(dicom.get(tag, '')) for tag in tags} + raise ValueError(f"Value error reading DICOM file: {file}") from ve + return {tag: str(dicom.get(tag, "")) for tag in tags} def read_tags( @@ -65,34 +66,34 @@ def read_tags( try: dicom = dcmread(file, specific_tags=tags, stop_before_pixels=True) except TypeError as te: - raise TypeError(f'Type error reading DICOM file: {file}') from te + raise TypeError(f"Type error reading DICOM file: {file}") from te except InvalidDicomError as ide: - raise InvalidDicomError(f'Invalid DICOM file: {file}') from ide + raise InvalidDicomError(f"Invalid DICOM file: {file}") from ide except ValueError as ve: - raise ValueError(f'Value error reading DICOM file: {file}') from ve + raise ValueError(f"Value error reading DICOM file: {file}") from ve # for all tags, add to dict, but if ends in UID, then truncateUID mydict = {} for tag in tags: val = ( - truncate_uid(str(dicom.get(tag, ''))) - if tag.endswith('UID') and truncate - else str(dicom.get(tag, 'UNKOWN')) + truncate_uid(str(dicom.get(tag, ""))) + if tag.endswith("UID") and truncate + else str(dicom.get(tag, "UNKOWN")) ) - if val == 'UNKOWN': + if val == "UNKOWN": if ( - tag == 'InstanceNumber' + tag == "InstanceNumber" and dcmread( file, - specific_tags=['Modality'], + specific_tags=["Modality"], stop_before_pixels=True, - ).get('Modality') - == 'RTSTRUCT' + ).get("Modality") + == "RTSTRUCT" ): # sometimes the instance number is missing in RTSTRUCT files - val = '1' + val = "1" else: - print(f'Unknown tag: {tag} in file: {file}') + print(f"No value for tag: {tag} in file: {file}") mydict[tag] = sanitize_file_name(val) if sanitize else val