From 8155ed1eeb3782c8ba59d96d826991b4869035f4 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 11:32:27 -0800 Subject: [PATCH 1/6] update pytest and version number --- setup.py | 2 +- tests/integration/test_end_to_end_workflow.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index ab0c30b5..1daa321f 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( name='gen3_tracker', - version='0.0.7rc1', + version='0.0.7rc2', description='A CLI for adding version control to Gen3 data submission projects.', long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 429ae5fb..86b03fb0 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -58,7 +58,7 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: run(runner, ["--debug", "meta", "validate"]) # update the file - test_file = pathlib.Path("my-project-data/hello.txt") + test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) test_file.write_text('hello UPDATE\n') # re-add the file @@ -213,7 +213,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec run(runner, ["init", project_id, "--approve"]) result = run(runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=0, + expected_exit_code=1, expected_files=[log_file_path] ) @@ -225,9 +225,8 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec lines = log_file.readlines() str_lines = str(lines) - assert "/content/0/attachment/creation" in str_lines, f"expected errors to describe to /content/0/attachment/creation, instead got: \n{str_lines}" - assert "jsonschema" in str_lines, f"expected errors to mention jsonschema, instead got: \n{str_lines}" - assert invalid_date in str_lines, f"expected invalid date {invalid_date} to be logged, instead got: \n{str_lines} " + for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: + assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path): From ec045a3cf49d42c8ab604188e546d796da77e0b4 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 11:36:29 -0800 Subject: [PATCH 2/6] linting --- gen3_tracker/gen3/jobs.py | 128 ++- gen3_tracker/git/cli.py | 853 +++++++++++++----- gen3_tracker/meta/entities.py | 21 +- tests/__init__.py | 18 +- tests/integration/__init__.py | 33 +- tests/integration/conftest.py | 2 +- tests/integration/test_bucket_import.py | 17 +- tests/integration/test_bundle.py | 32 +- tests/integration/test_end_to_end_workflow.py | 195 +++- tests/unit/meta/conftest.py | 2 +- tests/unit/meta/test_meta.py | 136 ++- tests/unit/test_coding_conventions.py | 5 +- tests/unit/test_flatten_fhir_example.py | 277 ++++-- tests/unit/test_hash_types.py | 18 +- tests/unit/test_read_dvc.py | 18 +- 15 files changed, 1238 insertions(+), 517 deletions(-) diff --git a/gen3_tracker/gen3/jobs.py b/gen3_tracker/gen3/jobs.py index f544d4a6..24f6b3e7 100644 --- a/gen3_tracker/gen3/jobs.py +++ b/gen3_tracker/gen3/jobs.py @@ -14,26 +14,36 @@ from gen3_tracker import Config from gen3_tracker.common import Push, Commit from gen3_tracker.gen3.indexd import write_indexd -from gen3_tracker.git import calculate_hash, DVC, run_command, DVCMeta, DVCItem, modified_date +from gen3_tracker.git import ( + calculate_hash, + DVC, + run_command, + DVCMeta, + DVCItem, + modified_date, +) def _validate_parameters(from_: str) -> pathlib.Path: - assert len(urlparse(from_).scheme) == 0, f"{from_} appears to be an url. url to url cp not supported" + assert ( + len(urlparse(from_).scheme) == 0 + ), f"{from_} appears to be an url. url to url cp not supported" return from_ -def cp(config: Config, - from_: str, - project_id: str, - ignore_state: bool, - auth=None, - user=None, - object_name=None, - bucket_name=None, - metadata: dict = {}, - ): +def cp( + config: Config, + from_: str, + project_id: str, + ignore_state: bool, + auth=None, + user=None, + object_name=None, + bucket_name=None, + metadata: dict = {}, +): """Copy meta to bucket, used by etl_pod job""" from_ = _validate_parameters(str(from_)) if not isinstance(from_, pathlib.Path): @@ -41,13 +51,15 @@ def cp(config: Config, assert auth, "auth is required" - metadata = dict({'submitter': None, 'metadata_version': '0.0.1', 'is_metadata': True} | metadata) - if not metadata['submitter']: + metadata = dict( + {"submitter": None, "metadata_version": "0.0.1", "is_metadata": True} | metadata + ) + if not metadata["submitter"]: if not user: - user = auth.curl('/user/user').json() - metadata['submitter'] = user['name'] + user = auth.curl("/user/user").json() + metadata["submitter"] = user["name"] - program, project = project_id.split('-') + program, project = project_id.split("-") assert bucket_name, f"could not find bucket for {program}" @@ -57,27 +69,26 @@ def cp(config: Config, if not object_name: now = datetime.now().strftime("%Y%m%d-%H%M%S") - object_name = f'_{project_id}-{now}_meta.zip' + object_name = f"_{project_id}-{now}_meta.zip" zipfile_path = temp_dir / object_name - with ZipFile(zipfile_path, 'w') as zip_object: + with ZipFile(zipfile_path, "w") as zip_object: for _ in from_.glob("*.ndjson"): zip_object.write(_) stat = zipfile_path.stat() - md5_sum = calculate_hash('md5', zipfile_path) + md5_sum = calculate_hash("md5", zipfile_path) my_dvc = DVC( meta=DVCMeta(), outs=[ DVCItem( path=object_name, md5=md5_sum, - hash='md5', + hash="md5", modified=modified_date(zipfile_path), size=stat.st_size, - ) - ] + ], ) metadata = write_indexd( @@ -92,56 +103,81 @@ def cp(config: Config, # document = file_client.upload_file_to_guid(guid=id_, file_name=object_name, bucket=bucket_name) # print(document, file=sys.stderr) - run_command(f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", no_capture=False) + run_command( + f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", + no_capture=False, + ) - return {'msg': f"Uploaded {zipfile_path} to {bucket_name}", "object_id": my_dvc.object_id, "object_name": object_name} + return { + "msg": f"Uploaded {zipfile_path} to {bucket_name}", + "object_id": my_dvc.object_id, + "object_name": object_name, + } -def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None) -> dict: +def publish_commits( + config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None +) -> dict: """Publish commits to the portal.""" # TODO legacy fhir-import-export job: copies meta to bucket and triggers job, # meta information is already in git REPO, # we should consider changing the fhir_import_export job to use the git REPO - user = auth.curl('/user/user').json() + user = auth.curl("/user/user").json() # copy meta to bucket upload_result = cp( config=config, - from_='META', + from_="META", project_id=config.gen3.project_id, ignore_state=True, auth=auth, user=user, - bucket_name=bucket_name + bucket_name=bucket_name, ) - object_id = upload_result['object_id'] + object_id = upload_result["object_id"] push = Push(config=config) jobs_client = Gen3Jobs(auth_provider=auth) # create "legacy" commit object, read by fhir-import-export job - push.commits.append(Commit(object_id=object_id, message='From g3t-git', meta_path=upload_result['object_name'], commit_id=object_id)) - args = {'push': push.model_dump(), 'project_id': config.gen3.project_id, 'method': 'put'} + push.commits.append( + Commit( + object_id=object_id, + message="From g3t-git", + meta_path=upload_result["object_name"], + commit_id=object_id, + ) + ) + args = { + "push": push.model_dump(), + "project_id": config.gen3.project_id, + "method": "put", + } # capture logging from gen3.jobs from cdislogging import get_logger # noqa + cdis_logging = get_logger("__name__") cdis_logging.setLevel(logging.WARN) - + if wait: # async_run_job_and_wait monkeypatched below - _ = asyncio.run(jobs_client.async_run_job_and_wait(job_name='fhir_import_export', job_input=args, spinner=spinner)) + _ = asyncio.run( + jobs_client.async_run_job_and_wait( + job_name="fhir_import_export", job_input=args, spinner=spinner + ) + ) else: - _ = jobs_client.create_job('fhir_import_export', args) + _ = jobs_client.create_job("fhir_import_export", args) if not isinstance(_, dict): - _ = {'output': _} - if isinstance(_['output'], str): + _ = {"output": _} + if isinstance(_["output"], str): try: - _['output'] = json.loads(_['output']) + _["output"] = json.loads(_["output"]) except json.JSONDecodeError: pass return _ @@ -149,7 +185,9 @@ def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str # monkey patch for gen3.jobs.Gen3Jobs.async_run_job_and_wait # make it less noisy and sleep less (max of 30 seconds) -async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=None, **kwargs): +async def async_run_job_and_wait( + self, job_name, job_input, spinner=None, _ssl=None, **kwargs +): """ Asynchronous function to create a job, wait for output, and return. Will sleep in a linear delay until the job is done, starting with 1 second. @@ -188,12 +226,12 @@ async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=N if status.get("status") != "Completed": # write failed output to log file before raising exception response = await self.async_get_output(job_create_response.get("uid")) - with open("logs/publish.log", 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()} - log_msg.update(response) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') - + with open("logs/publish.log", "a") as f: + log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()} + log_msg.update(response) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") + raise Exception(f"Job status not complete: {status.get('status')}") response = await self.async_get_output(job_create_response.get("uid")) diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py index e7c4cc40..c0222bd8 100644 --- a/gen3_tracker/git/cli.py +++ b/gen3_tracker/git/cli.py @@ -34,20 +34,36 @@ import gen3_tracker from gen3_tracker import Config -from gen3_tracker.common import CLIOutput, INFO_COLOR, ERROR_COLOR, is_url, filter_dicts, SUCCESS_COLOR, \ - read_ndjson_file +from gen3_tracker.common import ( + CLIOutput, + INFO_COLOR, + ERROR_COLOR, + is_url, + filter_dicts, + SUCCESS_COLOR, + read_ndjson_file, +) from gen3_tracker.config import init as config_init, ensure_auth from gen3_tracker.gen3.buckets import get_buckets -from gen3_tracker.git import git_files, to_indexd, to_remote, dvc_data, \ - data_file_changes, modified_date, git_status, DVC, MISSING_G3T_MESSAGE -from gen3_tracker.git import run_command, \ - MISSING_GIT_MESSAGE, git_repository_exists +from gen3_tracker.git import ( + git_files, + to_indexd, + to_remote, + dvc_data, + data_file_changes, + modified_date, + git_status, + DVC, + MISSING_G3T_MESSAGE, +) +from gen3_tracker.git import run_command, MISSING_GIT_MESSAGE, git_repository_exists from gen3_tracker.git.adder import url_path, write_dvc_file from gen3_tracker.git.cloner import ls from gen3_tracker.git.initializer import initialize_project_server_side from gen3_tracker.git.snapshotter import push_snapshot from gen3_tracker.meta.skeleton import meta_index, get_data_from_meta from gen3_tracker.common import _default_json_serializer + # logging.basicConfig(level=logging.INFO) _logger = logging.getLogger(__package__) @@ -66,14 +82,20 @@ # if debug: # _logger.setLevel(logging.DEBUG) + def _check_parameters(config, project_id): """Common parameter checks.""" if not project_id: raise AssertionError("project_id is required") - if not project_id.count('-') == 1: - raise AssertionError(f"project_id must be of the form program-project {project_id}") + if not project_id.count("-") == 1: + raise AssertionError( + f"project_id must be of the form program-project {project_id}" + ) if not config.gen3.profile: - click.secho("No profile set. Continuing in disconnected mode. Use `set profile `", fg='yellow') + click.secho( + "No profile set. Continuing in disconnected mode. Use `set profile `", + fg="yellow", + ) @click.group(cls=gen3_tracker.NaturalOrderGroup) @@ -84,10 +106,34 @@ def cli(): @cli.command(context_settings=dict(ignore_unknown_options=True)) # @click.option('--force', '-f', is_flag=True, help='Force the init.') -@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID") -@click.option('--approve', '-a', help='Approve the addition (privileged)', is_flag=True, default=False, show_default=True) -@click.option('--no-server', help='Skip server setup (testing)', is_flag=True, default=False, show_default=True, hidden=True) -@click.option('--debug', is_flag=True, envvar='G3T_DEBUG', help='Enable debug mode. G3T_DEBUG environment variable can also be used.') +@click.argument( + "project_id", + default=None, + required=False, + envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", +) +@click.option( + "--approve", + "-a", + help="Approve the addition (privileged)", + is_flag=True, + default=False, + show_default=True, +) +@click.option( + "--no-server", + help="Skip server setup (testing)", + is_flag=True, + default=False, + show_default=True, + hidden=True, +) +@click.option( + "--debug", + is_flag=True, + envvar="G3T_DEBUG", + help="Enable debug mode. G3T_DEBUG environment variable can also be used.", +) @click.pass_obj def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: bool): """Initialize a new repository.""" @@ -113,15 +159,29 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: ensure_git_repo(config) if not no_server: - init_logs, approval_needed = initialize_project_server_side(config, project_id) + init_logs, approval_needed = initialize_project_server_side( + config, project_id + ) logs.extend(init_logs) if approve and approval_needed: - run_command('g3t collaborator approve --all', dry_run=config.dry_run, no_capture=True) + run_command( + "g3t collaborator approve --all", + dry_run=config.dry_run, + no_capture=True, + ) elif approval_needed and not approve: - click.secho("Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`", fg=INFO_COLOR, file=sys.stderr) + click.secho( + "Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`", + fg=INFO_COLOR, + file=sys.stderr, + ) else: - click.secho(f"Approval not needed. Project {project_id} has approved read/write", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Approval not needed. Project {project_id} has approved read/write", + fg=INFO_COLOR, + file=sys.stderr, + ) if config.debug: for _ in logs: @@ -135,26 +195,34 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: def ensure_git_repo(config): # ensure a git repo - if pathlib.Path('.git').exists(): + if pathlib.Path(".git").exists(): return - if not pathlib.Path('.git').exists(): - command = 'git init' + if not pathlib.Path(".git").exists(): + command = "git init" run_command(command, dry_run=config.dry_run, no_capture=True) else: - click.secho('Git repository already exists.', fg=INFO_COLOR, file=sys.stderr) - pathlib.Path('MANIFEST').mkdir(exist_ok=True) - pathlib.Path('META').mkdir(exist_ok=True) - pathlib.Path('LOGS').mkdir(exist_ok=True) - with open('.gitignore', 'w') as f: - f.write('LOGS/\n') - f.write('.g3t/state/\n') # legacy - with open('META/README.md', 'w') as f: - f.write('This directory contains metadata files for the data files in the MANIFEST directory.\n') - with open('MANIFEST/README.md', 'w') as f: - f.write('This directory contains dvc files that reference the data files.\n') - run_command('git add MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True) - run_command('git commit -m "initialized" MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True) + click.secho("Git repository already exists.", fg=INFO_COLOR, file=sys.stderr) + pathlib.Path("MANIFEST").mkdir(exist_ok=True) + pathlib.Path("META").mkdir(exist_ok=True) + pathlib.Path("LOGS").mkdir(exist_ok=True) + with open(".gitignore", "w") as f: + f.write("LOGS/\n") + f.write(".g3t/state/\n") # legacy + with open("META/README.md", "w") as f: + f.write( + "This directory contains metadata files for the data files in the MANIFEST directory.\n" + ) + with open("MANIFEST/README.md", "w") as f: + f.write("This directory contains dvc files that reference the data files.\n") + run_command( + "git add MANIFEST META .gitignore .g3t", dry_run=config.dry_run, no_capture=True + ) + run_command( + 'git commit -m "initialized" MANIFEST META .gitignore .g3t', + dry_run=config.dry_run, + no_capture=True, + ) # Note: The commented code below is an example of how to use context settings to allow extra arguments. @@ -165,8 +233,8 @@ def ensure_git_repo(config): @cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True)) -@click.argument('target') -@click.option('--no-git-add', default=False, is_flag=True, hidden=True) +@click.argument("target") +@click.option("--no-git-add", default=False, is_flag=True, hidden=True) @click.pass_context def add(ctx, target, no_git_add: bool): """ @@ -211,10 +279,10 @@ def add(ctx, target, no_git_add: bool): assert not config.no_config_found, MISSING_G3T_MESSAGE # needs to have a target - assert target, 'No targets specified.' + assert target, "No targets specified." # Expand wildcard paths - if is_url(target) and not target.startswith('file://'): + if is_url(target) and not target.startswith("file://"): all_changed_files, updates = add_url(ctx, target) else: all_changed_files, updates = add_file(ctx, target) @@ -224,8 +292,12 @@ def add(ctx, target, no_git_add: bool): # adds = [str(_) for _ in all_changed_files if _ not in updates] if adds and not no_git_add: - adds.append('.gitignore') - run_command(f'git add {" ".join([str(_) for _ in adds])}', dry_run=config.dry_run, no_capture=True) + adds.append(".gitignore") + run_command( + f'git add {" ".join([str(_) for _ in adds])}', + dry_run=config.dry_run, + no_capture=True, + ) except Exception as e: click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) @@ -234,9 +306,15 @@ def add(ctx, target, no_git_add: bool): @cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True)) -@click.argument('targets', nargs=-1) -@click.option('--message', '-m', help='The commit message.') -@click.option('--all', '-a', is_flag=True, default=False, help='Automatically stage files that have been modified and deleted.') +@click.argument("targets", nargs=-1) +@click.option("--message", "-m", help="The commit message.") +@click.option( + "--all", + "-a", + is_flag=True, + default=False, + help="Automatically stage files that have been modified and deleted.", +) @click.pass_context def commit(ctx, targets, message, all): """Commit the changes @@ -269,11 +347,11 @@ def status(config): """Show changed files.""" soft_error = False try: - with Halo(text='Scanning', spinner='line', placement='right', color='white'): - manifest_path = pathlib.Path('MANIFEST') + with Halo(text="Scanning", spinner="line", placement="right", color="white"): + manifest_path = pathlib.Path("MANIFEST") changes = data_file_changes(manifest_path) # Get a list of all files in the MANIFEST directory and its subdirectories - files = glob.glob('MANIFEST/**/*.dvc', recursive=True) + files = glob.glob("MANIFEST/**/*.dvc", recursive=True) # Filter out directories, keep only files files = [f for f in files if os.path.isfile(f)] if not files: @@ -284,28 +362,46 @@ def status(config): document_reference_mtime = 0 - if pathlib.Path('META/DocumentReference.ndjson').exists(): + if pathlib.Path("META/DocumentReference.ndjson").exists(): # Get the modification time - document_reference_mtime = os.path.getmtime('META/DocumentReference.ndjson') + document_reference_mtime = os.path.getmtime( + "META/DocumentReference.ndjson" + ) latest_file_mtime = os.path.getmtime(latest_file) if document_reference_mtime < latest_file_mtime: - document_reference_mtime = datetime.fromtimestamp(document_reference_mtime).isoformat() - latest_file_mtime = datetime.fromtimestamp(latest_file_mtime).isoformat() - click.secho(f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}. Please check DocumentReferences.ndjson", fg=INFO_COLOR, file=sys.stderr) + document_reference_mtime = datetime.fromtimestamp( + document_reference_mtime + ).isoformat() + latest_file_mtime = datetime.fromtimestamp( + latest_file_mtime + ).isoformat() + click.secho( + f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}. Please check DocumentReferences.ndjson", + fg=INFO_COLOR, + file=sys.stderr, + ) soft_error = True if changes: - click.secho(f"# There are {len(changes)} data files that you need to update via `g3t add`:", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"# There are {len(changes)} data files that you need to update via `g3t add`:", + fg=INFO_COLOR, + file=sys.stderr, + ) cwd = pathlib.Path.cwd() for _ in changes: - data_path = str(_.data_path).replace(str(cwd) + '/', "") - click.secho(f' g3t add {data_path} # changed: {modified_date(_.data_path)}, last added: {modified_date(_.dvc_path)}', fg=INFO_COLOR, file=sys.stderr) + data_path = str(_.data_path).replace(str(cwd) + "/", "") + click.secho( + f" g3t add {data_path} # changed: {modified_date(_.data_path)}, last added: {modified_date(_.dvc_path)}", + fg=INFO_COLOR, + file=sys.stderr, + ) soft_error = True else: click.secho("No data file changes.", fg=INFO_COLOR, file=sys.stderr) - _ = run_command('git status') + _ = run_command("git status") print(_.stdout) if soft_error: exit(1) @@ -316,27 +412,66 @@ def status(config): @cli.command() -@click.option('--step', - type=click.Choice(['index', 'upload', 'publish', 'all', 'fhir']), - default='all', - show_default=True, - help='The step to run ' - ) -@click.option('--transfer-method', - type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()), - default='gen3', - show_default=True, - help='The upload method.' - ) -@click.option('--overwrite', is_flag=True, help='(index): Overwrite previously submitted files.') -@click.option('--wait', default=True, is_flag=True, show_default=True, help="(publish): Wait for metadata completion.") -@click.option('--dry-run', show_default=True, default=False, is_flag=True, help='Print the commands that would be executed, but do not execute them.') -@click.option('--re-run', show_default=True, default=False, is_flag=True, help='Re-run the last publish step') -@click.option('--fhir-server', show_default=True, default=False, is_flag=True, help='Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server') -@click.option('--debug', is_flag=True) -@click.option('--skip_validate', is_flag=True, help='Skip validation of the metadata') +@click.option( + "--step", + type=click.Choice(["index", "upload", "publish", "all", "fhir"]), + default="all", + show_default=True, + help="The step to run ", +) +@click.option( + "--transfer-method", + type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()), + default="gen3", + show_default=True, + help="The upload method.", +) +@click.option( + "--overwrite", is_flag=True, help="(index): Overwrite previously submitted files." +) +@click.option( + "--wait", + default=True, + is_flag=True, + show_default=True, + help="(publish): Wait for metadata completion.", +) +@click.option( + "--dry-run", + show_default=True, + default=False, + is_flag=True, + help="Print the commands that would be executed, but do not execute them.", +) +@click.option( + "--re-run", + show_default=True, + default=False, + is_flag=True, + help="Re-run the last publish step", +) +@click.option( + "--fhir-server", + show_default=True, + default=False, + is_flag=True, + help="Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server", +) +@click.option("--debug", is_flag=True) +@click.option("--skip_validate", is_flag=True, help="Skip validation of the metadata") @click.pass_context -def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wait: bool, dry_run: bool, fhir_server: bool, debug: bool, skip_validate: bool): +def push( + ctx, + step: str, + transfer_method: str, + overwrite: bool, + re_run: bool, + wait: bool, + dry_run: bool, + fhir_server: bool, + debug: bool, + skip_validate: bool, +): """Push changes to the remote repository. \b steps: @@ -362,27 +497,35 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa raise NotImplementedError("Re-run not implemented") try: - with Halo(text='Checking', spinner='line', placement='right', color='white'): + with Halo( + text="Checking", spinner="line", placement="right", color="white" + ): run_command("g3t status") if not skip_validate: run_command("g3t meta validate", no_capture=True) except Exception as e: - click.secho("Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr) + click.secho( + "Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr + ) click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) if config.debug: raise exit(1) - with Halo(text='Scanning', spinner='line', placement='right', color='white'): + with Halo(text="Scanning", spinner="line", placement="right", color="white"): # check git status branch, uncommitted = git_status() - assert not uncommitted, "Uncommitted changes found. Please commit or stash them first." + assert ( + not uncommitted + ), "Uncommitted changes found. Please commit or stash them first." # check dvc vs external files - changes = data_file_changes(pathlib.Path('MANIFEST')) - assert not changes, f"# There are {len(changes)} data files that you need to update. See `g3t status`" + changes = data_file_changes(pathlib.Path("MANIFEST")) + assert ( + not changes + ), f"# There are {len(changes)} data files that you need to update. See `g3t status`" # initialize dvc objects with this project_id committed_files, dvc_objects = manifest(config.gen3.project_id) @@ -392,133 +535,232 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa bucket_name = get_program_bucket(config=config, auth=auth) # check for new files - records = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)['records'] - dids = {_['did']: _['updated_date'] for _ in records} + records = ls( + config, metadata={"project_id": config.gen3.project_id}, auth=auth + )["records"] + dids = {_["did"]: _["updated_date"] for _ in records} new_dvc_objects = [_ for _ in dvc_objects if _.object_id not in dids] - updated_dvc_objects = [_ for _ in dvc_objects if _.object_id in dids and _.out.modified > dids[_.object_id]] + updated_dvc_objects = [ + _ + for _ in dvc_objects + if _.object_id in dids and _.out.modified > dids[_.object_id] + ] if step not in ["publish", "fhir"]: if not overwrite: dvc_objects = new_dvc_objects + updated_dvc_objects - assert dvc_objects, "No new files to index. Use --overwrite to force" - - click.secho(f'Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files', fg=INFO_COLOR, file=sys.stderr) + assert ( + dvc_objects + ), "No new files to index. Use --overwrite to force" + + click.secho( + f"Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files", + fg=INFO_COLOR, + file=sys.stderr, + ) if updated_dvc_objects: - click.secho(f'Found {len(updated_dvc_objects)} updated files. overwriting', fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Found {len(updated_dvc_objects)} updated files. overwriting", + fg=INFO_COLOR, + file=sys.stderr, + ) overwrite = True - if step in ['index', 'all']: + if step in ["index", "all"]: # send to index if dry_run: - click.secho("Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr) + click.secho( + "Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr + ) yaml.dump( { - 'new': [_.model_dump() for _ in new_dvc_objects], - 'updated': [_.model_dump() for _ in updated_dvc_objects], + "new": [_.model_dump() for _ in new_dvc_objects], + "updated": [_.model_dump() for _ in updated_dvc_objects], }, - sys.stdout + sys.stdout, ) return for _ in tqdm( - to_indexd( - dvc_objects=dvc_objects, - auth=auth, - project_id=config.gen3.project_id, - bucket_name=bucket_name, - overwrite=overwrite, - restricted_project_id=None - - ), - desc='Indexing', unit='file', leave=False, total=len(committed_files)): + to_indexd( + dvc_objects=dvc_objects, + auth=auth, + project_id=config.gen3.project_id, + bucket_name=bucket_name, + overwrite=overwrite, + restricted_project_id=None, + ), + desc="Indexing", + unit="file", + leave=False, + total=len(committed_files), + ): pass - click.secho(f'Indexed {len(committed_files)} files.', fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Indexed {len(committed_files)} files.", fg=INFO_COLOR, file=sys.stderr + ) - if step in ['upload', 'all']: - click.secho(f'Checking {len(dvc_objects)} files for upload via {transfer_method}', fg=INFO_COLOR, file=sys.stderr) + if step in ["upload", "all"]: + click.secho( + f"Checking {len(dvc_objects)} files for upload via {transfer_method}", + fg=INFO_COLOR, + file=sys.stderr, + ) to_remote( upload_method=transfer_method, dvc_objects=dvc_objects, bucket_name=bucket_name, profile=config.gen3.profile, dry_run=config.dry_run, - work_dir=config.work_dir + work_dir=config.work_dir, ) - if fhir_server or step in ['fhir']: + if fhir_server or step in ["fhir"]: """Either there exists a Bundle.ndjson file in META signifying a revision to the data, or there is no bundle.json, - signifying that the data in the META directory should be upserted into gen34""" - meta_dir = pathlib.Path('META') + signifying that the data in the META directory should be upserted into gen34 + """ + meta_dir = pathlib.Path("META") bundle_file = meta_dir / "Bundle.ndjson" if os.path.isfile(bundle_file): - with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'): + with Halo( + text="Sending to FHIR Server", + spinner="line", + placement="right", + color="white", + ): with open(bundle_file, "r") as file: json_string = file.read() bundle_data = orjson.loads(json_string) headers = {"Authorization": f"{auth._access_token}"} - result = requests.delete(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_data, default=_default_json_serializer, - option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers) - - with open("logs/publish.log", 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"} - click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') + result = requests.delete( + url=f"{auth.endpoint}/Bundle", + data=orjson.dumps( + bundle_data, + default=_default_json_serializer, + option=orjson.OPT_APPEND_NEWLINE, + ).decode(), + headers=headers, + ) + + with open("logs/publish.log", "a") as f: + log_msg = { + "timestamp": datetime.now(pytz.UTC).isoformat(), + "result": f"{result}", + } + click.secho( + "Published project. See logs/publish.log", + fg=SUCCESS_COLOR, + file=sys.stderr, + ) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") return project_id = config.gen3.project_id now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ") - bundle = Bundle(type='transaction', timestamp=now) - bundle.identifier = Identifier(value=project_id, system="https://aced-idp.org/project_id") + bundle = Bundle(type="transaction", timestamp=now) + bundle.identifier = Identifier( + value=project_id, system="https://aced-idp.org/project_id" + ) from gen3_tracker import ACED_NAMESPACE + bundle.id = str(uuid.uuid5(ACED_NAMESPACE, f"Bundle/{project_id}/{now}")) bundle.entry = [] for _ in get_data_from_meta(): bundle_entry = BundleEntry() # See https://build.fhir.org/bundle-definitions.html#Bundle.entry.request.url - bundle_entry.request = BundleEntryRequest(url=f"{_['resourceType']}/{_['id']}", method='PUT') + bundle_entry.request = BundleEntryRequest( + url=f"{_['resourceType']}/{_['id']}", method="PUT" + ) bundle_entry.resource = _ bundle.entry.append(bundle_entry) headers = {"Authorization": f"{auth._access_token}"} bundle_dict = bundle.dict() - with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'): - result = requests.put(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_dict, default=_default_json_serializer, - option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers) - - with open("logs/publish.log", 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"} - click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') + with Halo( + text="Sending to FHIR Server", + spinner="line", + placement="right", + color="white", + ): + result = requests.put( + url=f"{auth.endpoint}/Bundle", + data=orjson.dumps( + bundle_dict, + default=_default_json_serializer, + option=orjson.OPT_APPEND_NEWLINE, + ).decode(), + headers=headers, + ) + + with open("logs/publish.log", "a") as f: + log_msg = { + "timestamp": datetime.now(pytz.UTC).isoformat(), + "result": f"{result}", + } + click.secho( + "Published project. See logs/publish.log", + fg=SUCCESS_COLOR, + file=sys.stderr, + ) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") return - if step in ['publish', 'all'] and not fhir_server: + if step in ["publish", "all"] and not fhir_server: log_path = "logs/publish.log" - with Halo(text='Uploading snapshot', spinner='line', placement='right', color='white'): + with Halo( + text="Uploading snapshot", + spinner="line", + placement="right", + color="white", + ): # push the snapshot of the `.git` sub-directory in the current directory push_snapshot(config, auth=auth) - if transfer_method == 'gen3': + if transfer_method == "gen3": try: # legacy, "old" fhir_import_export use publish_commits to publish the META - with Halo(text='Publishing', spinner='line', placement='right', color='white') as spinner: - _ = publish_commits(config, wait=wait, auth=auth, bucket_name=bucket_name, spinner=spinner) + with Halo( + text="Publishing", + spinner="line", + placement="right", + color="white", + ) as spinner: + _ = publish_commits( + config, + wait=wait, + auth=auth, + bucket_name=bucket_name, + spinner=spinner, + ) except Exception as e: - click.secho(f'Unable to publish project. See {log_path} for more info', fg=ERROR_COLOR, file=sys.stderr) + click.secho( + f"Unable to publish project. See {log_path} for more info", + fg=ERROR_COLOR, + file=sys.stderr, + ) raise e # print success message and save logs - with open(log_path, 'a') as f: - log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()} + with open(log_path, "a") as f: + log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()} log_msg.update(_) - f.write(json.dumps(log_msg, separators=(',', ':'))) - f.write('\n') - click.secho(f'Published project. Logs found at {log_path}', fg=SUCCESS_COLOR, file=sys.stderr) + f.write(json.dumps(log_msg, separators=(",", ":"))) + f.write("\n") + click.secho( + f"Published project. Logs found at {log_path}", + fg=SUCCESS_COLOR, + file=sys.stderr, + ) else: - click.secho(f'Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading', fg=ERROR_COLOR, file=sys.stderr) + click.secho( + f"Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading", + fg=ERROR_COLOR, + file=sys.stderr, + ) except Exception as e: click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) @@ -529,7 +771,7 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa def manifest(project_id) -> tuple[list[str], list[DVC]]: """Get the committed files and their dvc objects. Initialize dvc objects with this project_id""" - committed_files = [_ for _ in git_files() if _.endswith('.dvc')] + committed_files = [_ for _ in git_files() if _.endswith(".dvc")] dvc_objects = [_ for _ in dvc_data(committed_files)] for _ in dvc_objects: _.project_id = project_id @@ -537,26 +779,43 @@ def manifest(project_id) -> tuple[list[str], list[DVC]]: @cli.command() -@click.option('--remote', - type=click.Choice(['gen3', 's3', 'ln', 'scp']), - default='gen3', - show_default=True, - help='Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy' - ) -@click.option('--worker_count', '-w', default=(multiprocessing.cpu_count() - 1), show_default=True, - type=int, - help='Number of workers to use.') -@click.option('--data-only', help='Ignore git snapshot', is_flag=True, default=False, show_default=True) +@click.option( + "--remote", + type=click.Choice(["gen3", "s3", "ln", "scp"]), + default="gen3", + show_default=True, + help="Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy", +) +@click.option( + "--worker_count", + "-w", + default=(multiprocessing.cpu_count() - 1), + show_default=True, + type=int, + help="Number of workers to use.", +) +@click.option( + "--data-only", + help="Ignore git snapshot", + is_flag=True, + default=False, + show_default=True, +) @click.pass_obj def pull(config: Config, remote: str, worker_count: int, data_only: bool): - """ Fetch from and integrate with a remote repository.""" + """Fetch from and integrate with a remote repository.""" try: - with Halo(text='Authorizing', spinner='line', placement='right', color='white'): + with Halo(text="Authorizing", spinner="line", placement="right", color="white"): auth = gen3_tracker.config.ensure_auth(config=config) if not data_only: - with Halo(text='Pulling git snapshot', spinner='line', placement='right', color='white'): + with Halo( + text="Pulling git snapshot", + spinner="line", + placement="right", + color="white", + ): if not auth: auth = gen3_tracker.config.ensure_auth(config=config) snapshot, zip_filepath = download_snapshot(auth, config) @@ -567,35 +826,52 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool): # Rename the directory shutil.move(".git", new_dir_name) # unzip the snapshot - with zipfile.ZipFile(zip_filepath, 'r') as zip_ref: - zip_ref.extractall('.') - click.secho(f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr) + with zipfile.ZipFile(zip_filepath, "r") as zip_ref: + zip_ref.extractall(".") + click.secho( + f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr + ) manifest_files, dvc_objects = manifest(config.gen3.project_id) - if remote == 'gen3': + if remote == "gen3": # download the files - with Halo(text='Pulling from gen3', spinner='line', placement='right', color='white'): - object_ids = [{'object_id': _.object_id} for _ in dvc_objects] # if not _.out.source_url - current_time = datetime.now().strftime("%Y%m%d%H%M%S") # Format datetime as you need - manifest_file = pathlib.Path(config.work_dir) / f'manifest-{current_time}.json' - with open(manifest_file, 'w') as fp: + with Halo( + text="Pulling from gen3", + spinner="line", + placement="right", + color="white", + ): + object_ids = [ + {"object_id": _.object_id} for _ in dvc_objects + ] # if not _.out.source_url + current_time = datetime.now().strftime( + "%Y%m%d%H%M%S" + ) # Format datetime as you need + manifest_file = ( + pathlib.Path(config.work_dir) / f"manifest-{current_time}.json" + ) + with open(manifest_file, "w") as fp: json.dump(object_ids, fp) - cmd = f'gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}' + cmd = f"gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}" print(cmd) run_command(cmd, no_capture=True) - elif remote == 's3': - with Halo(text='Pulling from s3', spinner='line', placement='right', color='white'): + elif remote == "s3": + with Halo( + text="Pulling from s3", spinner="line", placement="right", color="white" + ): if not auth: auth = gen3_tracker.config.ensure_auth(config=config) - results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth) + results = ls( + config, metadata={"project_id": config.gen3.project_id}, auth=auth + ) object_ids = [_.object_id for _ in dvc_objects] - for _ in results['records']: - if _['did'] in object_ids: - print('aws s3 cp ', _['urls'][0], _['file_name']) - elif remote == 'ln': + for _ in results["records"]: + if _["did"] in object_ids: + print("aws s3 cp ", _["urls"][0], _["file_name"]) + elif remote == "ln": for _ in dvc_objects: print(f"ln -s {_.out.realpath} {_.out.path}") - elif remote == 'scp': + elif remote == "scp": for _ in dvc_objects: print(f"scp USER@HOST:{_.out.realpath} {_.out.path}") @@ -609,56 +885,79 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool): @cli.command() -@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", metavar='PROJECT_ID') +@click.argument( + "project_id", + default=None, + required=False, + envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", + metavar="PROJECT_ID", +) @click.pass_obj def clone(config, project_id): """Clone a repository into a new directory""" try: config.gen3.project_id = project_id - assert not pathlib.Path(project_id).exists(), f"{project_id} already exists. Please remove it first." + assert not pathlib.Path( + project_id + ).exists(), f"{project_id} already exists. Please remove it first." os.mkdir(project_id) os.chdir(project_id) - with Halo(text='Cloning', spinner='line', placement='right', color='white'): + with Halo(text="Cloning", spinner="line", placement="right", color="white"): auth = gen3_tracker.config.ensure_auth(config=config) snapshot, zip_filepath = download_snapshot(auth, config) - assert not pathlib.Path('.git').exists(), "A git repository already exists. Please remove it, or move to another directory first." + assert not pathlib.Path( + ".git" + ).exists(), "A git repository already exists. Please remove it, or move to another directory first." # unzip - with zipfile.ZipFile(zip_filepath, 'r') as zip_ref: - zip_ref.extractall('.') + with zipfile.ZipFile(zip_filepath, "r") as zip_ref: + zip_ref.extractall(".") # if we just unzipped a .git these directories will exist - expected_dirs = ['.git', 'META', 'MANIFEST'] + expected_dirs = [".git", "META", "MANIFEST"] if not all([pathlib.Path(_).exists() for _ in expected_dirs]): # if not, we have downloaded a legacy SNAPSHOT.zip, so lets migrate the data to the expected drirectories - click.secho(f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot", + fg=INFO_COLOR, + file=sys.stderr, + ) # legacy - was this a *SNAPSHOT.zip? - meta_files = (pathlib.Path('studies') / config.gen3.project) + meta_files = pathlib.Path("studies") / config.gen3.project # legacy - was this a *meta.zip? if not meta_files.exists(): - meta_files = pathlib.Path('.') + meta_files = pathlib.Path(".") # create local directories and git [_ for _ in config_init(config, project_id)] ensure_git_repo(config=config) # move ndjson from studies to META - for _ in meta_files.glob('*.ndjson'): - shutil.move(_, 'META/') + for _ in meta_files.glob("*.ndjson"): + shutil.move(_, "META/") # add to git - run_command('git add META/*.*') + run_command("git add META/*.*") # migrate DocumentReferences to MANIFEST references = meta_index() manifest_files = [] - for _ in read_ndjson_file('META/DocumentReference.ndjson'): + for _ in read_ndjson_file("META/DocumentReference.ndjson"): document_reference = DocumentReference.parse_obj(_) - dvc_object = DVC.from_document_reference(config, document_reference, references) - manifest_files.append(write_dvc_file(yaml_data=dvc_object.model_dump(), target=dvc_object.out.path)) + dvc_object = DVC.from_document_reference( + config, document_reference, references + ) + manifest_files.append( + write_dvc_file( + yaml_data=dvc_object.model_dump(), + target=dvc_object.out.path, + ) + ) # Get the current time in seconds since the epoch current_time = time.time() # Update the access and modification times of the file - os.utime('META/DocumentReference.ndjson', (current_time, current_time)) + os.utime("META/DocumentReference.ndjson", (current_time, current_time)) - run_command('git add MANIFEST/') - run_command('git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore') + run_command("git add MANIFEST/") + run_command( + 'git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore' + ) shutil.move(zip_filepath, config.work_dir / zip_filepath.name) click.secho(f"Cloned {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr) @@ -673,27 +972,30 @@ def clone(config, project_id): def download_snapshot(auth, config): """Download the latest snapshot.""" from gen3_tracker.git.cloner import find_latest_snapshot + snapshot = find_latest_snapshot(auth, config) gen3_file = Gen3File(auth) - pathlib.Path(snapshot['file_name']).parent.mkdir(exist_ok=True, parents=True) - ok = gen3_file.download_single(snapshot['did'], '.') + pathlib.Path(snapshot["file_name"]).parent.mkdir(exist_ok=True, parents=True) + ok = gen3_file.download_single(snapshot["did"], ".") assert ok, f"Failed to download {snapshot['did']}" - zip_filepath = pathlib.Path(snapshot['file_name']) + zip_filepath = pathlib.Path(snapshot["file_name"]) assert zip_filepath.exists(), f"Failed to download {snapshot['did']}" return snapshot, zip_filepath def file_name_or_guid(config, object_id) -> (str, pathlib.Path): """Check if the object_id is a file name or a GUID.""" - guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$') + guid_pattern = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" + ) path = None if not guid_pattern.match(object_id): if not is_url(object_id): - path = pathlib.Path('MANIFEST') / (object_id + ".dvc") + path = pathlib.Path("MANIFEST") / (object_id + ".dvc") else: - path = pathlib.Path('MANIFEST') / (url_path(object_id) + ".dvc") + path = pathlib.Path("MANIFEST") / (url_path(object_id) + ".dvc") if path.exists(): dvc_object = next(iter(dvc_data([str(path)])), None) @@ -702,20 +1004,33 @@ def file_name_or_guid(config, object_id) -> (str, pathlib.Path): object_id = dvc_object.object_id else: raise ValueError( - f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID).") + f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID)." + ) else: committed_files, dvc_objects = manifest(config.gen3.project_id) - dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id] + dvc_objects = [ + dvc_object + for dvc_object in dvc_objects + if dvc_object.object_id == object_id + ] assert dvc_objects, f"{object_id} not found in MANIFEST." - path = pathlib.Path('MANIFEST') / (dvc_objects[0].out.path + ".dvc") + path = pathlib.Path("MANIFEST") / (dvc_objects[0].out.path + ".dvc") assert guid_pattern.match(object_id), f"{object_id} was not found in MANIFEST." return object_id, path @cli.command("ls") -@click.option('--long', '-l', 'long_flag', default=False, is_flag=True, help='Long listing format.', show_default=True) -@click.argument('target', default=None, required=False) +@click.option( + "--long", + "-l", + "long_flag", + default=False, + is_flag=True, + help="Long listing format.", + show_default=True, +) +@click.argument("target", default=None, required=False) @click.pass_obj def ls_cli(config: Config, long_flag: bool, target: str): """List files in the repository. @@ -724,10 +1039,14 @@ def ls_cli(config: Config, long_flag: bool, target: str): """ try: - with Halo(text='Pulling file list', spinner='line', placement='right', color='white'): + with Halo( + text="Pulling file list", spinner="line", placement="right", color="white" + ): auth = gen3_tracker.config.ensure_auth(config=config) - results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth) - indexd_records = results['records'] + results = ls( + config, metadata={"project_id": config.gen3.project_id}, auth=auth + ) + indexd_records = results["records"] committed_files, dvc_objects = manifest(config.gen3.project_id) # list all data files dvc_objects = {_.object_id: _ for _ in dvc_objects} @@ -742,44 +1061,49 @@ def _dvc_meta(dvc_object, full=False) -> dict: _[k] = v else: _ = dvc_object.model_dump(exclude_none=True) - _['object_id'] = dvc_object.object_id + _["object_id"] = dvc_object.object_id return _ if not long_flag: indexd_records = [ { - 'did': _['did'], - 'file_name': _['file_name'], - 'indexd_created_date': _['created_date'], - 'meta': _dvc_meta(dvc_objects.get(_['did'], None)), - 'urls': _['urls'] - } for _ in indexd_records + "did": _["did"], + "file_name": _["file_name"], + "indexd_created_date": _["created_date"], + "meta": _dvc_meta(dvc_objects.get(_["did"], None)), + "urls": _["urls"], + } + for _ in indexd_records ] - bucket_ids = {_['did'] for _ in indexd_records} + bucket_ids = {_["did"] for _ in indexd_records} - uncommitted = pathlib.Path('MANIFEST').glob('**/*.dvc') + uncommitted = pathlib.Path("MANIFEST").glob("**/*.dvc") uncommitted = [str(_) for _ in uncommitted] uncommitted = [str(_) for _ in uncommitted if _ not in committed_files] uncommitted = [_.model_dump(exclude_none=True) for _ in dvc_data(uncommitted)] _ = { - 'bucket': indexd_records, - 'committed': [_dvc_meta(v, full=True) for k, v in dvc_objects.items() if k not in bucket_ids], - 'uncommitted': uncommitted + "bucket": indexd_records, + "committed": [ + _dvc_meta(v, full=True) + for k, v in dvc_objects.items() + if k not in bucket_ids + ], + "uncommitted": uncommitted, } if target: # Escape special characters and replace wildcard '*' with '.*' for regex pattern pattern = re.escape(target).replace("\\*", ".*") filtered = { - 'bucket': filter_dicts(_.get('bucket', []), pattern), - 'committed': filter_dicts(_.get('committed', []), pattern), - 'uncommitted': filter_dicts(_.get('uncommitted', []), pattern) + "bucket": filter_dicts(_.get("bucket", []), pattern), + "committed": filter_dicts(_.get("committed", []), pattern), + "uncommitted": filter_dicts(_.get("uncommitted", []), pattern), } _ = filtered - if config.output.format == 'json': + if config.output.format == "json": print(json.dumps(_, indent=2)) else: yaml.dump(_, sys.stdout, default_flow_style=False) @@ -791,7 +1115,7 @@ def _dvc_meta(dvc_object, full=False) -> dict: @cli.command() -@click.argument('object_id', metavar='') +@click.argument("object_id", metavar="") @click.pass_obj def rm(config: Config, object_id: str): """Remove a single file from the server index, and MANIFEST. Does not alter META. @@ -800,29 +1124,50 @@ def rm(config: Config, object_id: str): """ try: - with Halo(text='Searching', spinner='line', placement='right', color='white'): + with Halo(text="Searching", spinner="line", placement="right", color="white"): object_id, path = file_name_or_guid(config, object_id) - with Halo(text='Deleting from server', spinner='line', placement='right', color='white'): + with Halo( + text="Deleting from server", + spinner="line", + placement="right", + color="white", + ): auth = gen3_tracker.config.ensure_auth(config=config) index = Gen3Index(auth) result = index.delete_record(object_id) if not result: if not path: - path = '' - click.secho(f"Failed to delete {object_id} from server. {path}", fg=ERROR_COLOR, file=sys.stderr) + path = "" + click.secho( + f"Failed to delete {object_id} from server. {path}", + fg=ERROR_COLOR, + file=sys.stderr, + ) else: - click.secho(f"Deleted {object_id} from server. {path}", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Deleted {object_id} from server. {path}", + fg=INFO_COLOR, + file=sys.stderr, + ) - with Halo(text='Scanning', spinner='line', placement='right', color='white'): + with Halo(text="Scanning", spinner="line", placement="right", color="white"): committed_files, dvc_objects = manifest(config.gen3.project_id) - dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id] + dvc_objects = [ + dvc_object + for dvc_object in dvc_objects + if dvc_object.object_id == object_id + ] assert dvc_objects, f"{object_id} not found in MANIFEST." dvc_object = dvc_objects[0] - path = pathlib.Path('MANIFEST') / (dvc_object.out.path + ".dvc") + path = pathlib.Path("MANIFEST") / (dvc_object.out.path + ".dvc") assert path.exists(), f"{path} not found" path.unlink() - click.secho(f"Deleted {path} from MANIFEST. Please adjust META resources", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"Deleted {path} from MANIFEST. Please adjust META resources", + fg=INFO_COLOR, + file=sys.stderr, + ) except Exception as e: click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr) @@ -858,10 +1203,10 @@ def ping(config: Config): msgs.append(str(e)) ok = False except Gen3AuthError as e: - msg = str(e).split(':')[0] + msg = str(e).split(":")[0] msgs.append(msg) msg2 = str(e).split('

')[-1] - msg2 = msg2.split('

')[0] + msg2 = msg2.split("

")[0] msgs.append(msg2) ok = False @@ -871,34 +1216,42 @@ def ping(config: Config): _ = "Configuration ERROR: " output.exit_code = 1 - _ = {'msg': _ + ', '.join(msgs)} + _ = {"msg": _ + ", ".join(msgs)} if auth: - _['endpoint'] = auth.endpoint - user_info = auth.curl('/user/user').json() - _['username'] = user_info['username'] + _["endpoint"] = auth.endpoint + user_info = auth.curl("/user/user").json() + _["username"] = user_info["username"] buckets = get_buckets(config=config) bucket_info = {} program_info = defaultdict(list) - for k, v in buckets['S3_BUCKETS'].items(): + for k, v in buckets["S3_BUCKETS"].items(): bucket_info[k] = {} - if 'programs' not in v: + if "programs" not in v: bucket_info[k] = "No `programs` found" - click.secho(f"WARNING: No `programs` found for bucket {k}", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"WARNING: No `programs` found for bucket {k}", + fg=INFO_COLOR, + file=sys.stderr, + ) continue - bucket_info[k] = ",".join(v['programs']) - for program in v['programs']: + bucket_info[k] = ",".join(v["programs"]) + for program in v["programs"]: program_info[program].append(k) - _['bucket_programs'] = bucket_info + _["bucket_programs"] = bucket_info for k, v in program_info.items(): if len(v) > 1: - click.secho(f"WARNING: {k} is in multiple buckets: {', '.join(v)}", fg=INFO_COLOR, file=sys.stderr) + click.secho( + f"WARNING: {k} is in multiple buckets: {', '.join(v)}", + fg=INFO_COLOR, + file=sys.stderr, + ) - assert 'authz' in user_info, "No authz found" + assert "authz" in user_info, "No authz found" authz_info = defaultdict(dict) - for k, v in user_info['authz'].items(): - authz_info[k] = ",".join(set([_['method'] for _ in v])) - _['your_access'] = dict(authz_info) + for k, v in user_info["authz"].items(): + authz_info[k] = ",".join(set([_["method"] for _ in v])) + _["your_access"] = dict(authz_info) output.update(_) diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py index 888cf903..c6beda14 100644 --- a/gen3_tracker/meta/entities.py +++ b/gen3_tracker/meta/entities.py @@ -246,7 +246,6 @@ def scalars(self) -> dict: if (not isinstance(v, list) and not isinstance(v, dict)) } - @computed_field @property def codings(self) -> dict: @@ -261,8 +260,12 @@ def codings(self) -> dict: if isinstance(elem, dict): # TODO: implement hierarchy of codes rather than just taking last code? for value, source in normalize_coding(elem): - if len(v) > 1 and get_nested_value(elem, [source, 0, 'system']): - _codings[elem[source][0]["system"].split("/")[-1]] = value + if len(v) > 1 and get_nested_value( + elem, [source, 0, "system"] + ): + _codings[elem[source][0]["system"].split("/")[-1]] = ( + value + ) else: _codings[k] = value elif isinstance(v, dict): @@ -281,10 +284,15 @@ def identifiers(self) -> dict: if not identifiers_len: return {"identifier": None} elif identifiers_len == 1: - return {"identifier": identifiers[0].get('value')} + return {"identifier": identifiers[0].get("value")} else: - base_identifier = {"identifier": identifiers[0].get('value')} - base_identifier.update({identifier.get("system").split("/")[-1]: identifier.get("value") for identifier in identifiers[1:]}) + base_identifier = {"identifier": identifiers[0].get("value")} + base_identifier.update( + { + identifier.get("system").split("/")[-1]: identifier.get("value") + for identifier in identifiers[1:] + } + ) return base_identifier @computed_field @@ -375,7 +383,6 @@ def values(self) -> dict: if "code" in self.resource and "text" in self.resource["code"]: _values["observation_code"] = self.resource["code"]["text"] - assert len(_values) > 0, f"no values found in Observation: {self.resource}" return _values diff --git a/tests/__init__.py b/tests/__init__.py index 204633dd..7ff71bef 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -5,7 +5,13 @@ from gen3_tracker.cli import cli -def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result: +def run( + runner: CliRunner, + args: list[str], + expected_output: list[str] = [], + expected_exit_code: int = 0, + expected_files: list[pathlib.Path] = [], +) -> Result: """Run a command and check the output, exit code and expected files.""" if isinstance(args, str): args = args.split() @@ -15,16 +21,20 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp expected_files = [expected_files] expected_files = [pathlib.Path(_) for _ in expected_files] - print('------------------------------------------------------------') + print("------------------------------------------------------------") print("g3t " + " ".join(args)) result = runner.invoke(cli, args) print("result.stdout", result.stdout) print("result.output", result.output) print("result.exception", result.exception) print("CWD", pathlib.Path.cwd()) - assert result.exit_code == expected_exit_code, f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}" + assert ( + result.exit_code == expected_exit_code + ), f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}" for line in expected_output: - assert line in result.output, f"output: {result.output}, expected: {expected_output}" + assert ( + line in result.output + ), f"output: {result.output}, expected: {expected_output}" print(f"{line} found in output.") for file in expected_files: assert file.exists(), f"{file} does not exist." diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index fac2d49a..68c1c087 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -7,7 +7,13 @@ from gen3.query import Gen3Query -def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result: +def run( + runner: CliRunner, + args: list[str], + expected_output: list[str] = [], + expected_exit_code: int = 0, + expected_files: list[pathlib.Path] = [], +) -> Result: """Run a command and check the output, exit code and expected files.""" if isinstance(args, str): args = args.split() @@ -17,13 +23,17 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp expected_files = [expected_files] expected_files = [pathlib.Path(_) for _ in expected_files] - print('------------------------------------------------------------') + print("------------------------------------------------------------") print("g3t " + " ".join(args)) result = runner.invoke(cli, args) print(result.stdout) - assert result.exit_code == expected_exit_code, f"exit_code: {result.exit_code}, expected: {expected_exit_code}" + assert ( + result.exit_code == expected_exit_code + ), f"exit_code: {result.exit_code}, expected: {expected_exit_code}" for line in expected_output: - assert line in result.output, f"output: {result.output}, expected: {expected_output}" + assert ( + line in result.output + ), f"output: {result.output}, expected: {expected_output}" print(f"{line} found in output.") for file in expected_files: assert file.exists(), f"{file} does not exist." @@ -37,11 +47,12 @@ def validate_document_in_grip(did: str, auth=None, project_id=None): if not auth: auth = ensure_auth(config=default()) token = auth.get_access_token() - result = requests.get(f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}", - headers={"Authorization": f"bearer {token}"} - ).json() - assert 'data' in result, f"Failed to query grip for {did} {result}" - assert result['data']['gid'] == did + result = requests.get( + f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}", + headers={"Authorization": f"bearer {token}"}, + ).json() + assert "data" in result, f"Failed to query grip for {did} {result}" + assert result["data"]["gid"] == did def validate_document_in_elastic(did, auth): @@ -55,7 +66,7 @@ def validate_document_in_elastic(did, auth): } } """, - variables={"filter": {"AND": [{"IN": {"id": [did]}}]}} + variables={"filter": {"AND": [{"IN": {"id": [did]}}]}}, ) print(result) - assert result['data']['file'][0]['id'] == did + assert result["data"]["file"][0]["id"] == did diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b171120e..8de22443 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -16,7 +16,7 @@ def program() -> str: @pytest.fixture def project() -> str: - project = uuid.uuid4().hex.replace('-', '_') + project = uuid.uuid4().hex.replace("-", "_") return project diff --git a/tests/integration/test_bucket_import.py b/tests/integration/test_bucket_import.py index 5edc31d3..9a0959cc 100644 --- a/tests/integration/test_bucket_import.py +++ b/tests/integration/test_bucket_import.py @@ -42,8 +42,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None: print(project_id) - run(runner, ["--debug", "init", project_id, "--approve", "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve", "--no-server"], + expected_files=[".g3t", ".git"], + ) for _ in SHOULD_SUCCEED: run(runner, _.split()) @@ -60,11 +63,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None: result = run(runner, ["--debug", "--format", "json", "ls"]) listing = json.loads(result.stdout) - for _ in ['bucket', 'committed', 'uncommitted']: + for _ in ["bucket", "committed", "uncommitted"]: assert _ in listing # files should appear in uncommitted - assert len(listing['uncommitted']) == len(SHOULD_SUCCEED) + assert len(listing["uncommitted"]) == len(SHOULD_SUCCEED) # commit the changes run(runner, ["--debug", "commit", "-am", "initial commit"]) @@ -72,11 +75,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None: # test the ls command, should now be in committed result = run(runner, ["--debug", "--format", "json", "ls"]) listing = json.loads(result.stdout) - assert len(listing['committed']) == len(SHOULD_SUCCEED) + assert len(listing["committed"]) == len(SHOULD_SUCCEED) # test the ls filter for _ in EXPECTED_MANIFEST_PATHS: - bucket_name = _.split('/')[1] + bucket_name = _.split("/")[1] result = run(runner, ["--debug", "--format", "json", "ls", bucket_name]) listing = json.loads(result.stdout) - assert len(listing['committed']) == 1 + assert len(listing["committed"]) == 1 diff --git a/tests/integration/test_bundle.py b/tests/integration/test_bundle.py index f98d0e8c..d27d99ad 100644 --- a/tests/integration/test_bundle.py +++ b/tests/integration/test_bundle.py @@ -11,21 +11,21 @@ CHANGE_PATIENT = [ "--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1", "--debug meta init", - "--debug commit -am \"initial commit\"", + '--debug commit -am "initial commit"', "--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1-prime", "--debug meta init", - "--debug commit -am \"prime commit\"", + '--debug commit -am "prime commit"', ] # user made a mistake and added the wrong file CHANGE_FILE = [ "--debug add s3://s3-bucket/p1-object-mistake.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1", "--debug meta init", - "--debug commit -am \"initial commit\"", + '--debug commit -am "initial commit"', "--debug rm s3://s3-bucket/p1-object-mistake.txt", "--debug add s3://s3-bucket/p1-object-correct.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1", "--debug meta init", - "--debug commit -am \"prime commit\"", + '--debug commit -am "prime commit"', ] @@ -37,8 +37,11 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None: print(project_id) - run(runner, ["--debug", "init", project_id, "--approve", "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve", "--no-server"], + expected_files=[".g3t", ".git"], + ) for _ in CHANGE_PATIENT: run(runner, _.split()) @@ -54,8 +57,10 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None: assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected." urls = [_.request.url for _ in bundle.entry] - assert any([_.startswith('Patient') for _ in urls]), "Expected to delete a Patient." - assert any([_.startswith('ResearchSubject') for _ in urls]), "Expected to delete a ResearchSubject." + assert any([_.startswith("Patient") for _ in urls]), "Expected to delete a Patient." + assert any( + [_.startswith("ResearchSubject") for _ in urls] + ), "Expected to delete a ResearchSubject." def test_change_file(runner: CliRunner, project_id, tmpdir) -> None: @@ -66,8 +71,11 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None: print(project_id) - run(runner, ["--debug", "init", project_id, "--approve", "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve", "--no-server"], + expected_files=[".g3t", ".git"], + ) for _ in CHANGE_FILE: run(runner, _.split()) @@ -83,4 +91,6 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None: assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected." urls = [_.request.url for _ in bundle.entry] - assert any([_.startswith('DocumentReference') for _ in urls]), "Expected to delete a DocumentReference." + assert any( + [_.startswith("DocumentReference") for _ in urls] + ), "Expected to delete a DocumentReference." diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 86b03fb0..6237edab 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -17,23 +17,36 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: assert tmpdir.chdir() print(Path.cwd()) - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." print(project_id) - run(runner, ["--debug", "init", project_id, "--approve"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve"], + expected_files=[".g3t", ".git"], + ) # check ping - run(runner, ["--debug", "ping"], expected_output=["bucket_programs", "your_access", "endpoint", "username"]) + run( + runner, + ["--debug", "ping"], + expected_output=["bucket_programs", "your_access", "endpoint", "username"], + ) # create a test file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello\n') + test_file.write_text("hello\n") # add the file - run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) + run( + runner, + ["--debug", "add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) # should create a dvc file dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc") @@ -49,7 +62,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: object_id = dvc.object_id # create the meta file - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"]) + run( + runner, + ["--debug", "meta", "init"], + expected_files=["META/DocumentReference.ndjson"], + ) # commit the changes, delegating to git run(runner, ["--debug", "commit", "-am", "initial commit"]) @@ -60,10 +77,18 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # update the file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello UPDATE\n') + test_file.write_text("hello UPDATE\n") # re-add the file - run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"]) + run( + runner, + ["--debug", "add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) + run( + runner, + ["--debug", "meta", "init"], + expected_files=["META/DocumentReference.ndjson"], + ) run(runner, ["--debug", "commit", "-am", "updated"]) run(runner, ["--debug", "meta", "validate"]) @@ -71,7 +96,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: run(runner, ["--debug", "meta", "graph"], expected_files=["meta.html"]) # create a dataframe - run(runner, ["--debug", "meta", "dataframe", 'DocumentReference'], expected_files=["DocumentReference.csv"]) + run( + runner, + ["--debug", "meta", "dataframe", "DocumentReference"], + expected_files=["DocumentReference.csv"], + ) # push to the server run(runner, ["--debug", "push"]) @@ -103,12 +132,25 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # check the files exist in the cloned directory run_command("ls -l") - assert Path("my-project-data/hello.txt").exists(), "hello.txt does not exist in the cloned directory." + assert Path( + "my-project-data/hello.txt" + ).exists(), "hello.txt does not exist in the cloned directory." # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) # See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229 - run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"]) + run( + runner, + [ + "--debug", + "projects", + "empty", + "--project_id", + project_id, + "--confirm", + "empty", + ], + ) # TODO fix `collaborator rm` # arborist logs: "Policy `data_upload` does not exist for user `xxx@xxx.xxx`: not revoking. Check if it is assigned through a group." @@ -116,13 +158,27 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None: # run(runner, ["--debug", "collaborator", "rm", username, "--approve"], expected_output=[username]) # add a user with write permissions - run(runner, ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"]) + run( + runner, + ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"], + ) # add a user from another directory (without config) os.mkdir("empty") os.chdir("empty") program, project = project_id.split("-") - run(runner, ["--debug", "collaborator", "add", "foo2@bar.com", f"/programs/{program}/projects/{project}", "--write", "--approve"]) + run( + runner, + [ + "--debug", + "collaborator", + "add", + "foo2@bar.com", + f"/programs/{program}/projects/{project}", + "--write", + "--approve", + ], + ) def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> None: @@ -131,20 +187,29 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N assert tmpdir.chdir() print(Path.cwd()) - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." print(project_id) - run(runner, ["--debug", "init", project_id, "--approve"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "init", project_id, "--approve"], + expected_files=[".g3t", ".git"], + ) # create a test file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello\n') + test_file.write_text("hello\n") # add the file - run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) + run( + runner, + ["--debug", "add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) # should create a dvc file dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc") @@ -160,7 +225,11 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N object_id = dvc.object_id # create the meta file - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"]) + run( + runner, + ["--debug", "meta", "init"], + expected_files=["META/DocumentReference.ndjson"], + ) # commit the changes, delegating to git run(runner, ["--debug", "commit", "-am", "initial commit"]) @@ -184,24 +253,44 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N # remove the project from the server. # TODO note, this does not remove the files from the bucket (UChicago bug) # See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229 - run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"]) - - -def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, project_id: str, tmp_path: Path): + run( + runner, + [ + "--debug", + "projects", + "empty", + "--project_id", + project_id, + "--confirm", + "empty", + ], + ) + + +def test_push_fails_with_invalid_doc_ref_creation_date( + runner: CliRunner, project_id: str, tmp_path: Path +): # check - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." # copy fixture to temp test dir project_dir = "fhir-gdc-examples" fixtures_path = Path(os.path.dirname(__file__)).parent / "fixtures" fhir_gdc_dir = fixtures_path / project_dir - modified_doc_ref_path = fixtures_path / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson" + modified_doc_ref_path = ( + fixtures_path + / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson" + ) # init project new_project_dir = tmp_path / project_dir shutil.copytree(fhir_gdc_dir, new_project_dir) - shutil.copy(modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson" ) + shutil.copy( + modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson" + ) # get invalid date from fixture doc_ref_content = pd.read_json(modified_doc_ref_path, lines=True)["content"][0] @@ -211,14 +300,17 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec log_file_path = "logs/publish.log" os.chdir(new_project_dir) run(runner, ["init", project_id, "--approve"]) - result = run(runner, - ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1, - expected_files=[log_file_path] - ) + result = run( + runner, + ["push", "--skip_validate", "--overwrite"], + expected_exit_code=1, + expected_files=[log_file_path], + ) # ensure push has useful useful error logs - assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}" + assert ( + log_file_path in result.output + ), f"expected log file path in stdout, instead got:\n{result.output}" # ensure saved log file contains info about invalid date with open(log_file_path, "r") as log_file: @@ -226,40 +318,53 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec str_lines = str(lines) for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: - assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' + assert ( + keyword in str_lines + ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}' -def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path): +def test_push_fails_with_no_write_permissions( + runner: CliRunner, project_id: str, tmp_path: Path +): # setup - assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set." + assert os.environ.get( + "G3T_PROFILE" + ), "G3T_PROFILE environment variable must be set." os.chdir(tmp_path) # initialize project without approving permissions log_file_path = "logs/publish.log" - run(runner, [ "init", project_id], - expected_files=[".g3t", ".git"]) + run(runner, ["init", project_id], expected_files=[".g3t", ".git"]) # create test file test_file = Path("my-project-data/hello.txt") test_file.parent.mkdir(parents=True, exist_ok=True) - test_file.write_text('hello\n') + test_file.write_text("hello\n") # prepare test file for submission - run(runner, ["add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"]) + run( + runner, + ["add", str(test_file)], + expected_files=["MANIFEST/my-project-data/hello.txt.dvc"], + ) run(runner, ["meta", "init"], expected_files=["META/DocumentReference.ndjson"]) - print("current directory:",os.getcwd()) + print("current directory:", os.getcwd()) run(runner, ["commit", "-m", "initial commit"]) # push result = run(runner, ["push"], expected_exit_code=1, expected_files=[log_file_path]) # ensure stdout mentions log files - assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}" + assert ( + log_file_path in result.output + ), f"expected log file path in stdout, instead got:\n{result.output}" # check valid error messages within with open(log_file_path, "r") as log_file: # grab last line - line = [l for l in log_file.readlines()][-1] + line = [_ for _ in log_file.readlines()][-1] for output in ["401", "permission"]: - assert "401" in line, f"expected {log_file_path} to contain {output}, instead got: \n{line}" + assert ( + "401" in line + ), f"expected {log_file_path} to contain {output}, instead got: \n{line}" diff --git a/tests/unit/meta/conftest.py b/tests/unit/meta/conftest.py index b171120e..8de22443 100644 --- a/tests/unit/meta/conftest.py +++ b/tests/unit/meta/conftest.py @@ -16,7 +16,7 @@ def program() -> str: @pytest.fixture def project() -> str: - project = uuid.uuid4().hex.replace('-', '_') + project = uuid.uuid4().hex.replace("-", "_") return project diff --git a/tests/unit/meta/test_meta.py b/tests/unit/meta/test_meta.py index 818ba083..d602a5c6 100644 --- a/tests/unit/meta/test_meta.py +++ b/tests/unit/meta/test_meta.py @@ -10,15 +10,20 @@ from tests import run -def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None: +def test_assert_object_id_invalid_on_project_id_change( + runner: CliRunner, project_id, tmp_path: pathlib.Path +) -> None: """Test object_id validation command.""" # change to the temporary directory os.chdir(tmp_path) print(pathlib.Path.cwd()) print(project_id) - run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "--profile", "local", "init", project_id, "--no-server"], + expected_files=[".g3t", ".git"], + ) # create test files cmds = """ @@ -27,42 +32,67 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec echo "hello" > my-project-data/hello.txt echo "big-data" > my-read-only-data/big-file.txt ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt - """.split('\n') + """.split( + "\n" + ) for cmd in cmds: run_command(cmd, no_capture=True) - assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist." - assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist." - assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/hello.txt" + ).exists(), "hello.txt does not exist." + assert pathlib.Path( + "my-read-only-data/big-file.txt" + ).exists(), "my-read-only-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/big-file.txt" + ).exists(), "my-project-data/big-file.txt does not exist." files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"] patients = ["P1", "P2"] for f, p in zip(files, patients): - run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"]) - - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"]) + run( + runner, + ["--debug", "add", str(f), "--patient", p], + expected_files=[f"MANIFEST/{f}.dvc"], + ) + + run( + runner, + ["--debug", "meta", "init"], + expected_files=[ + "META/DocumentReference.ndjson", + "META/Patient.ndjson", + "META/ResearchStudy.ndjson", + "META/ResearchSubject.ndjson", + ], + ) run(runner, ["--debug", "meta", "validate"]) - run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) + run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) # now change the project_id to something new # this should cause invalid object_id errors config = gen3_tracker.config.default() config.gen3.project_id = config.gen3.project_id + "XXXX" - with open('.g3t/config.yaml', 'w') as f: + with open(".g3t/config.yaml", "w") as f: yaml.dump(config.model_dump(), f) - run(runner, ["commit", "-m", "change-project_id", '.g3t/config.yaml']) + run(runner, ["commit", "-m", "change-project_id", ".g3t/config.yaml"]) # should error now run(runner, ["--debug", "meta", "validate"], expected_exit_code=1) run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=1) # also check skip_validate - run(runner, ["--debug", "push", "--dry-run", "--skip_validate"], expected_exit_code=0) + run( + runner, + ["--debug", "push", "--dry-run", "--skip_validate"], + expected_exit_code=0, + ) # should pass now config.gen3.project_id = config.gen3.project_id.replace("XXXX", "") - with open('.g3t/config.yaml', 'w') as f: + with open(".g3t/config.yaml", "w") as f: yaml.dump(config.model_dump(), f) - run(runner, ["commit", "-m", "restore-project_id", '.g3t/config.yaml']) + run(runner, ["commit", "-m", "restore-project_id", ".g3t/config.yaml"]) # ensure we can validate without passing project id results = validate(directory_path="META") @@ -72,7 +102,9 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=0) -def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None: +def test_assert_add_specimen_after_init( + runner: CliRunner, project_id, tmp_path: pathlib.Path +) -> None: """Test meta skeleton handles re-add of data with new specimen""" # change to the temporary directory os.chdir(tmp_path) @@ -80,8 +112,11 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: print(project_id) # init the project, no server - run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"], - expected_files=[".g3t", ".git"]) + run( + runner, + ["--debug", "--profile", "local", "init", project_id, "--no-server"], + expected_files=[".g3t", ".git"], + ) # create test files cmds = """ @@ -90,34 +125,73 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: echo "hello" > my-project-data/hello.txt echo "big-data" > my-read-only-data/big-file.txt ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt - """.split('\n') + """.split( + "\n" + ) for cmd in cmds: run_command(cmd, no_capture=True) - assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist." - assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist." - assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/hello.txt" + ).exists(), "hello.txt does not exist." + assert pathlib.Path( + "my-read-only-data/big-file.txt" + ).exists(), "my-read-only-data/big-file.txt does not exist." + assert pathlib.Path( + "my-project-data/big-file.txt" + ).exists(), "my-project-data/big-file.txt does not exist." def _files_with_patients(): files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"] patients = ["P1", "P2"] for f, p in zip(files, patients): - run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"]) - - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"]) + run( + runner, + ["--debug", "add", str(f), "--patient", p], + expected_files=[f"MANIFEST/{f}.dvc"], + ) + + run( + runner, + ["--debug", "meta", "init"], + expected_files=[ + "META/DocumentReference.ndjson", + "META/Patient.ndjson", + "META/ResearchStudy.ndjson", + "META/ResearchSubject.ndjson", + ], + ) run(runner, ["--debug", "meta", "validate"]) - run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) + run( + runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"] + ) def _files_with_patients_and_specimens(): files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"] patients = ["P1", "P2"] specimens = ["S1", "S2"] for f, p, s in zip(files, patients, specimens): - run(runner, ["--debug", "add", str(f), "--patient", p, "--specimen", s], expected_files=[f"MANIFEST/{f}.dvc"]) - - run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson", "META/Specimen.ndjson"]) + run( + runner, + ["--debug", "add", str(f), "--patient", p, "--specimen", s], + expected_files=[f"MANIFEST/{f}.dvc"], + ) + + run( + runner, + ["--debug", "meta", "init"], + expected_files=[ + "META/DocumentReference.ndjson", + "META/Patient.ndjson", + "META/ResearchStudy.ndjson", + "META/ResearchSubject.ndjson", + "META/Specimen.ndjson", + ], + ) run(runner, ["--debug", "meta", "validate"]) - run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]) + run( + runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"] + ) # create initial association between patients and files _files_with_patients() diff --git a/tests/unit/test_coding_conventions.py b/tests/unit/test_coding_conventions.py index 34171929..e88b5cb6 100644 --- a/tests/unit/test_coding_conventions.py +++ b/tests/unit/test_coding_conventions.py @@ -7,7 +7,10 @@ def test_coding_conventions(): """Check python conventions on key directories""" script_dir = os.path.dirname(os.path.abspath(__file__)) - directories = [os.path.join(script_dir, "../../gen3_tracker"), os.path.join(script_dir, "../../tests")] + directories = [ + os.path.join(script_dir, "../../gen3_tracker"), + os.path.join(script_dir, "../../tests"), + ] failures = [] for directory in directories: cmd_str = f"flake8 {directory} --max-line-length 256 --exclude test_flatten_fhir_example.py" diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py index 82acc470..9b923d88 100644 --- a/tests/unit/test_flatten_fhir_example.py +++ b/tests/unit/test_flatten_fhir_example.py @@ -29,19 +29,40 @@ # test data ------------------------------------------------------------ # The following fixtures provide test data for the tests below. + @pytest.fixture def patient_dict() -> dict: # TODO - read the patient example from a file - patient_dict = {"resourceType": "Patient", "id": "3", "meta": {"lastUpdated": "2012-05-29T23:45:32Z"}, - "text": {"status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003eKidd, Kari. SSN:\n 444555555\u003c/div\u003e"}, - "identifier": [{"type": { - "coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "SS", "display": "Social Security number"}]}, - "system": "http://hl7.org/fhir/sid/us-ssn", "value": "444555555"}], "active": True, - "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}], - "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}], "gender": "female", - "address": [{"use": "home", "line": ["2222 Home Street"]}], - "managingOrganization": {"reference": "Organization/hl7"}} + patient_dict = { + "resourceType": "Patient", + "id": "3", + "meta": {"lastUpdated": "2012-05-29T23:45:32Z"}, + "text": { + "status": "generated", + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003eKidd, Kari. SSN:\n 444555555\u003c/div\u003e', + }, + "identifier": [ + { + "type": { + "coding": [ + { + "system": "http://terminology.hl7.org/CodeSystem/v2-0203", + "code": "SS", + "display": "Social Security number", + } + ] + }, + "system": "http://hl7.org/fhir/sid/us-ssn", + "value": "444555555", + } + ], + "active": True, + "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}], + "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}], + "gender": "female", + "address": [{"use": "home", "line": ["2222 Home Street"]}], + "managingOrganization": {"reference": "Organization/hl7"}, + } yield patient_dict @@ -52,38 +73,42 @@ def specimen_dict(): "id": "denovo-3", "text": { "status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://browser.ihtsdotools.org/\"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"Patient-denovoFather.html\"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d\"ServiceRequest-genomicServiceRequest.html\"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d\"grid\"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d\"Practitioner-practitioner01.html\"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html\"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e"}, + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://browser.ihtsdotools.org/"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"Patient-denovoFather.html"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d"ServiceRequest-genomicServiceRequest.html"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d"grid"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d"Practitioner-practitioner01.html"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e', + }, "identifier": [ { "system": "http://www.somesystemabc.net/identifiers/specimens", - "value": "3"}], + "value": "3", + } + ], "status": "available", "type": { "coding": [ { "system": "http://snomed.info/sct", "code": "122555007", - "display": "Venous blood specimen"}]}, - "subject": { - "reference": "Patient/denovoFather", - "display": "John Doe"}, + "display": "Venous blood specimen", + } + ] + }, + "subject": {"reference": "Patient/denovoFather", "display": "John Doe"}, "receivedTime": "2021-01-01T01:01:01Z", - "request": [ - { - "reference": "ServiceRequest/genomicServiceRequest"}], + "request": [{"reference": "ServiceRequest/genomicServiceRequest"}], "collection": { - "collector": { - "reference": "Practitioner/practitioner01"}, + "collector": {"reference": "Practitioner/practitioner01"}, "collectedDateTime": "2021-01-01T01:01:00Z", - "quantity": { - "value": 1, - "unit": "mL"}, + "quantity": {"value": 1, "unit": "mL"}, "method": { "coding": [ { "system": "http://terminology.hl7.org/CodeSystem/v2-0488", "code": "LNV", - "display": "Line, Venous"}]}}} + "display": "Line, Venous", + } + ] + }, + }, + } @pytest.fixture @@ -93,14 +118,14 @@ def observation_eye_color_dict(): "id": "eye-color", "text": { "status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"eye-color\"\u003e \u003c/a\u003e\u003ca name\u003d\"hceye-color\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e"}, + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"eye-color"\u003e \u003c/a\u003e\u003ca name\u003d"hceye-color"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e', + }, "status": "final", - "code": { - "text": "eye color"}, - "subject": { - "reference": "Patient/example"}, + "code": {"text": "eye color"}, + "subject": {"reference": "Patient/example"}, "effectiveDateTime": "2016-05-18", - "valueString": "blue"} + "valueString": "blue", + } @pytest.fixture @@ -110,7 +135,8 @@ def observation_bmi_dict(): "id": "bmi-using-related", "text": { "status": "generated", - "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"bmi-using-related\"\u003e \u003c/a\u003e\u003ca name\u003d\"hcbmi-using-related\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d\" \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n \"\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html\"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://loinc.org/\"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d\"background: LightGoldenRodYellow\"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d\"broken-link.html\"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"observation-example.html\"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e"}, + "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"bmi-using-related"\u003e \u003c/a\u003e\u003ca name\u003d"hcbmi-using-related"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d" \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n "\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://loinc.org/"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d"background: LightGoldenRodYellow"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d"broken-link.html"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d"observation-example.html"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e', + }, "status": "final", "category": [ { @@ -118,35 +144,41 @@ def observation_bmi_dict(): { "system": "http://terminology.hl7.org/CodeSystem/observation-category", "code": "vital-signs", - "display": "Vital Signs"}], - "text": "Vital Signs"}], + "display": "Vital Signs", + } + ], + "text": "Vital Signs", + } + ], "code": { "coding": [ { "system": "http://loinc.org", "code": "39156-5", - "display": "Body mass index (BMI) [Ratio]"}], - "text": "BMI"}, - "subject": { - "reference": "Patient/example"}, + "display": "Body mass index (BMI) [Ratio]", + } + ], + "text": "BMI", + }, + "subject": {"reference": "Patient/example"}, "effectiveDateTime": "1999-07-02", "valueQuantity": { "value": 16.2, "unit": "kg/m2", "system": "http://unitsofmeasure.org", - "code": "kg/m2"}, + "code": "kg/m2", + }, "derivedFrom": [ - { - "reference": "Observation/bodyheight", - "display": "Body Height"}, - { - "reference": "Observation/example", - "display": "Body Weight"}]} + {"reference": "Observation/bodyheight", "display": "Body Height"}, + {"reference": "Observation/example", "display": "Body Weight"}, + ], + } # flatteners ------------------------------------------------------------ # The following functions are used to flatten the FHIR resources. + def flatten_simple(self: DomainResource): """Convert the DomainResource instance to just an id.""" return self.id @@ -161,13 +193,17 @@ def _isodate(v): def flatten_scalars(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" - _ = {k: _isodate(v) for k, v in self.dict().items() if not isinstance(v, (list, dict))} + _ = { + k: _isodate(v) + for k, v in self.dict().items() + if not isinstance(v, (list, dict)) + } return _ def flatten_references(self: DomainResource) -> dict: """Convert the DomainResource instance to a dictionary.""" - fields = [_ for _ in self.__fields__.keys() if not _.endswith('__ext')] + fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")] _ = {} # if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference for k in fields: @@ -181,14 +217,16 @@ def flatten_references(self: DomainResource) -> dict: def flatten_identifier(self: Identifier) -> dict: """Convert the Identifier instance to a key value, use a simplified system as key.""" parsed_url = urlparse(self.system) - path_parts = parsed_url.path.split('/') # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn - key = path_parts[-1] if path_parts else 'identifier' + path_parts = parsed_url.path.split( + "/" + ) # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn + key = path_parts[-1] if path_parts else "identifier" return {key: self.value} def flatten_coding(self: Coding) -> dict: """Convert the DomainResource instance to a dictionary.""" - return {'display': self.display} + return {"display": self.display} def flatten_scalars_and_references(self: DomainResource) -> dict: @@ -213,12 +251,12 @@ def flatten_observation(self: Observation) -> dict: _ = flatten_scalars_references_identifiers(self) # normalize all the valueXXXXX to 'value' if self.valueQuantity: - _['value'] = f"{self.valueQuantity.value} {self.valueQuantity.unit}" + _["value"] = f"{self.valueQuantity.value} {self.valueQuantity.unit}" elif self.valueString: - _['value'] = self.valueString - del _['valueString'] + _["value"] = self.valueString + del _["valueString"] elif self.valueCodeableConcept: - _['value'] = self.valueCodeableConcept.text + _["value"] = self.valueCodeableConcept.text # there are many other value types, but we'll ignore them for now # see https://build.fhir.org/observation-definitions.html#Observation.value_x_ # Quantity|CodeableConcept|string|boolean|integer|Range|Ratio|SampledData|time|dateTime|Period|Attachment|Reference(MolecularSequence) @@ -229,6 +267,7 @@ def flatten_observation(self: Observation) -> dict: # patchers ------------------------------------------------------------ # The following fixtures are used to patch the DomainResource class to add the desired method. + @pytest.fixture def patched_domain_resource_simple() -> bool: """Patch the DomainResource class to add a flatten method.""" @@ -284,66 +323,126 @@ def patched_scalars_references_identifiers_observation() -> bool: # tests ------------------------------------------------------------ + def test_patient_without_flatten(patient_dict: dict): """This patient object should NOT have a 'flatten' method.""" # without path dependency, just have a plain patient object with no flatten method patient = Patient.parse_obj(patient_dict) - assert not hasattr(patient, 'flatten'), "Patient object should not have a 'flatten' method" + assert not hasattr( + patient, "flatten" + ), "Patient object should not have a 'flatten' method" def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict): """This patient object should have a 'flatten' method.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == patient.id, f"Patient.flatten() should return {patient.id}" + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert ( + patient.flatten() == patient.id + ), f"Patient.flatten() should return {patient.id}" def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict): """This patient object should have a 'flatten' method that returns a dict of scalar values.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values" - - -def test_patient_with_scalars_and_references(patched_scalars_and_references: bool, patient_dict: dict): + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert patient.flatten() == { + "active": True, + "gender": "female", + "id": "3", + "resourceType": "Patient", + }, "Patient.flatten() should return a dict of all scalar values" + + +def test_patient_with_scalars_and_references( + patched_scalars_and_references: bool, patient_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values and references" - - -def test_patient_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, patient_dict: dict): + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert patient.flatten() == { + "active": True, + "gender": "female", + "id": "3", + "managingOrganization": "Organization/hl7", + "resourceType": "Patient", + }, "Patient.flatten() should return a dict of all scalar values and references" + + +def test_patient_with_scalars_references_identifiers( + patched_scalars_references_identifiers: bool, patient_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" patient = Patient.parse_obj(patient_dict) - assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method" - assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient', 'us-ssn': '444555555'}, "Patient.flatten() should return a dict of all scalar values and references" - - -def test_specimen_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, specimen_dict: dict): + assert hasattr( + patient, "flatten" + ), "Patient object does not have a 'flatten' method" + assert patient.flatten() == { + "active": True, + "gender": "female", + "id": "3", + "managingOrganization": "Organization/hl7", + "resourceType": "Patient", + "us-ssn": "444555555", + }, "Patient.flatten() should return a dict of all scalar values and references" + + +def test_specimen_with_scalars_references_identifiers( + patched_scalars_references_identifiers: bool, specimen_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" specimen = Specimen.parse_obj(specimen_dict) - assert hasattr(specimen, 'flatten'), "Specimen object does not have a 'flatten' method" - assert specimen.flatten() == {'resourceType': 'Specimen', 'id': 'denovo-3', 'status': 'available', - 'receivedTime': '2021-01-01T01:01:01+00:00', - 'subject': 'Patient/denovoFather', 'specimens': '3'} + assert hasattr( + specimen, "flatten" + ), "Specimen object does not have a 'flatten' method" + assert specimen.flatten() == { + "resourceType": "Specimen", + "id": "denovo-3", + "status": "available", + "receivedTime": "2021-01-01T01:01:01+00:00", + "subject": "Patient/denovoFather", + "specimens": "3", + } -def test_eye_color_observation(patched_scalars_references_identifiers_observation: bool, observation_eye_color_dict: dict): +def test_eye_color_observation( + patched_scalars_references_identifiers_observation: bool, + observation_eye_color_dict: dict, +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" observation = Observation.parse_obj(observation_eye_color_dict) - assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method" - assert observation.flatten() == {'resourceType': 'Observation', 'id': 'eye-color', 'status': 'final', - 'effectiveDateTime': '2016-05-18', 'value': 'blue', - 'subject': 'Patient/example'} + assert hasattr( + observation, "flatten" + ), "Observation object does not have a 'flatten' method" + assert observation.flatten() == { + "resourceType": "Observation", + "id": "eye-color", + "status": "final", + "effectiveDateTime": "2016-05-18", + "value": "blue", + "subject": "Patient/example", + } -def test_bmi_observation(patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict): +def test_bmi_observation( + patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict +): """This patient object should have a 'flatten' method that returns a dict of scalar values and references.""" observation = Observation.parse_obj(observation_bmi_dict) - assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method" - assert observation.flatten() == {'effectiveDateTime': '1999-07-02', - 'id': 'bmi-using-related', - 'resourceType': 'Observation', - 'status': 'final', - 'subject': 'Patient/example', - 'value': '16.2 kg/m2'} + assert hasattr( + observation, "flatten" + ), "Observation object does not have a 'flatten' method" + assert observation.flatten() == { + "effectiveDateTime": "1999-07-02", + "id": "bmi-using-related", + "resourceType": "Observation", + "status": "final", + "subject": "Patient/example", + "value": "16.2 kg/m2", + } diff --git a/tests/unit/test_hash_types.py b/tests/unit/test_hash_types.py index ab51944c..68559e6f 100644 --- a/tests/unit/test_hash_types.py +++ b/tests/unit/test_hash_types.py @@ -5,20 +5,20 @@ from gen3_tracker.git import DVCItem VALID_HASHES = { - 'md5': 'acbd18db4cc2f85cedef654fccc4a4d8', - 'sha1': '2ef7bde608ce5404e97d5f042f95f89f1c232871', - 'sha256': '5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf', - 'sha512': '3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78', - 'crc': '3e25960a', - 'etag': 'acbd18db4cc2f85cedef654fccc4a4d8-3' + "md5": "acbd18db4cc2f85cedef654fccc4a4d8", + "sha1": "2ef7bde608ce5404e97d5f042f95f89f1c232871", + "sha256": "5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf", + "sha512": "3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78", + "crc": "3e25960a", + "etag": "acbd18db4cc2f85cedef654fccc4a4d8-3", } def test_invalid_hash_values(): """Test that invalid hash values raise a ValidationError.""" for hash_type in ACCEPTABLE_HASHES.keys(): - _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1) - _[hash_type] = 'foo' + _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1) + _[hash_type] = "foo" print(_) with pytest.raises(ValidationError): item = DVCItem(**_) @@ -28,7 +28,7 @@ def test_invalid_hash_values(): def test_valid_hash_values(): """Test that valid hash values do raise a ValidationError.""" for hash_type in VALID_HASHES.keys(): - _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1) + _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1) _[hash_type] = VALID_HASHES[hash_type] print(_) item = DVCItem(**_) diff --git a/tests/unit/test_read_dvc.py b/tests/unit/test_read_dvc.py index 7feef182..c9a5c231 100644 --- a/tests/unit/test_read_dvc.py +++ b/tests/unit/test_read_dvc.py @@ -3,15 +3,23 @@ def test_read_dvc(data_path: Path): - dvc = to_dvc(data_path / 'hello.txt.dvc') + dvc = to_dvc(data_path / "hello.txt.dvc") assert dvc assert dvc.outs - assert dvc.outs[0].path == 'my-project-data/hello.txt' + assert dvc.outs[0].path == "my-project-data/hello.txt" def test_read_dvc_item(): - _ = {'hash': 'md5', 'is_symlink': False, 'md5': 'b1946ac92492d2347c6235b4d2611184', 'mime': 'text/plain', 'modified': '2024-04-30T17:46:30.819143+00:00', - 'path': 'my-project-data/hello.txt', 'realpath': '/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt', 'size': 6} + _ = { + "hash": "md5", + "is_symlink": False, + "md5": "b1946ac92492d2347c6235b4d2611184", + "mime": "text/plain", + "modified": "2024-04-30T17:46:30.819143+00:00", + "path": "my-project-data/hello.txt", + "realpath": "/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt", + "size": 6, + } item = DVCItem(**_) assert item - assert item.hash == 'md5' + assert item.hash == "md5" From 2cb2feb138aff124c258f107ccfbfb420608a070 Mon Sep 17 00:00:00 2001 From: quinnwai Date: Mon, 2 Dec 2024 13:00:20 -0800 Subject: [PATCH 3/6] ensure dataframer unit tests pass --- tests/unit/dataframer/test_dataframer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/dataframer/test_dataframer.py b/tests/unit/dataframer/test_dataframer.py index 87a51f22..422665f6 100644 --- a/tests/unit/dataframer/test_dataframer.py +++ b/tests/unit/dataframer/test_dataframer.py @@ -93,6 +93,7 @@ def simplified_resources( "category": "Laboratory", "sample_type": "Primary Solid Tumor", "library_id": "12345", + "observation_code": "sample type abc", "tissue_type": "Tumor", "treatments": "Trastuzumab", "allocated_for_site": "TEST Clinical Research", @@ -284,6 +285,7 @@ def specimen_row(simplified_resources, specimen_key): "tissue_type": "Tumor", "treatments": "Trastuzumab", "allocated_for_site": "TEST Clinical Research", + "observation_code": "sample type abc", "indexed_collection_date": "365", "biopsy_specimens": "specimenA, specimenB, specimenC", "biopsy_procedure_type": "Biopsy - Core", From 4a71cf1f57755610e3516e8e9b4abcfbda5e094e Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Mon, 2 Dec 2024 13:34:55 -0800 Subject: [PATCH 4/6] fix test --- tests/integration/test_end_to_end_workflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 6237edab..d4c5f407 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( result = run( runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1, + expected_exit_code=0 , expected_files=[log_file_path], ) @@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( with open(log_file_path, "r") as log_file: lines = log_file.readlines() str_lines = str(lines) + print("log lines: ", str_lines) for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: assert ( From 885bfce2e0828c37724fcd63f33a73ae74678a46 Mon Sep 17 00:00:00 2001 From: matthewpeterkort Date: Tue, 3 Dec 2024 14:45:09 -0800 Subject: [PATCH 5/6] fix test to work with new output --- tests/integration/test_end_to_end_workflow.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index d4c5f407..911ffc2e 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( result = run( runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=0 , + expected_exit_code=1 , expected_files=[log_file_path], ) @@ -316,8 +316,6 @@ def test_push_fails_with_invalid_doc_ref_creation_date( with open(log_file_path, "r") as log_file: lines = log_file.readlines() str_lines = str(lines) - print("log lines: ", str_lines) - for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]: assert ( keyword in str_lines From 6d46e966914e362e94a1a24e00b1b91ff6ec2d2c Mon Sep 17 00:00:00 2001 From: quinnwai Date: Tue, 3 Dec 2024 14:52:18 -0800 Subject: [PATCH 6/6] fix spacing --- tests/integration/test_end_to_end_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py index 911ffc2e..5fdca0c0 100644 --- a/tests/integration/test_end_to_end_workflow.py +++ b/tests/integration/test_end_to_end_workflow.py @@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date( result = run( runner, ["push", "--skip_validate", "--overwrite"], - expected_exit_code=1 , + expected_exit_code=1, expected_files=[log_file_path], )