From 8155ed1eeb3782c8ba59d96d826991b4869035f4 Mon Sep 17 00:00:00 2001
From: quinnwai
Date: Mon, 2 Dec 2024 11:32:27 -0800
Subject: [PATCH 1/6] update pytest and version number
---
setup.py | 2 +-
tests/integration/test_end_to_end_workflow.py | 9 ++++-----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/setup.py b/setup.py
index ab0c30b5..1daa321f 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
setup(
name='gen3_tracker',
- version='0.0.7rc1',
+ version='0.0.7rc2',
description='A CLI for adding version control to Gen3 data submission projects.',
long_description=long_description,
long_description_content_type='text/markdown',
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 429ae5fb..86b03fb0 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -58,7 +58,7 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
run(runner, ["--debug", "meta", "validate"])
# update the file
- test_file = pathlib.Path("my-project-data/hello.txt")
+ test_file = Path("my-project-data/hello.txt")
test_file.parent.mkdir(parents=True, exist_ok=True)
test_file.write_text('hello UPDATE\n')
# re-add the file
@@ -213,7 +213,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
run(runner, ["init", project_id, "--approve"])
result = run(runner,
["push", "--skip_validate", "--overwrite"],
- expected_exit_code=0,
+ expected_exit_code=1,
expected_files=[log_file_path]
)
@@ -225,9 +225,8 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
lines = log_file.readlines()
str_lines = str(lines)
- assert "/content/0/attachment/creation" in str_lines, f"expected errors to describe to /content/0/attachment/creation, instead got: \n{str_lines}"
- assert "jsonschema" in str_lines, f"expected errors to mention jsonschema, instead got: \n{str_lines}"
- assert invalid_date in str_lines, f"expected invalid date {invalid_date} to be logged, instead got: \n{str_lines} "
+ for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
+ assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path):
From ec045a3cf49d42c8ab604188e546d796da77e0b4 Mon Sep 17 00:00:00 2001
From: quinnwai
Date: Mon, 2 Dec 2024 11:36:29 -0800
Subject: [PATCH 2/6] linting
---
gen3_tracker/gen3/jobs.py | 128 ++-
gen3_tracker/git/cli.py | 853 +++++++++++++-----
gen3_tracker/meta/entities.py | 21 +-
tests/__init__.py | 18 +-
tests/integration/__init__.py | 33 +-
tests/integration/conftest.py | 2 +-
tests/integration/test_bucket_import.py | 17 +-
tests/integration/test_bundle.py | 32 +-
tests/integration/test_end_to_end_workflow.py | 195 +++-
tests/unit/meta/conftest.py | 2 +-
tests/unit/meta/test_meta.py | 136 ++-
tests/unit/test_coding_conventions.py | 5 +-
tests/unit/test_flatten_fhir_example.py | 277 ++++--
tests/unit/test_hash_types.py | 18 +-
tests/unit/test_read_dvc.py | 18 +-
15 files changed, 1238 insertions(+), 517 deletions(-)
diff --git a/gen3_tracker/gen3/jobs.py b/gen3_tracker/gen3/jobs.py
index f544d4a6..24f6b3e7 100644
--- a/gen3_tracker/gen3/jobs.py
+++ b/gen3_tracker/gen3/jobs.py
@@ -14,26 +14,36 @@
from gen3_tracker import Config
from gen3_tracker.common import Push, Commit
from gen3_tracker.gen3.indexd import write_indexd
-from gen3_tracker.git import calculate_hash, DVC, run_command, DVCMeta, DVCItem, modified_date
+from gen3_tracker.git import (
+ calculate_hash,
+ DVC,
+ run_command,
+ DVCMeta,
+ DVCItem,
+ modified_date,
+)
def _validate_parameters(from_: str) -> pathlib.Path:
- assert len(urlparse(from_).scheme) == 0, f"{from_} appears to be an url. url to url cp not supported"
+ assert (
+ len(urlparse(from_).scheme) == 0
+ ), f"{from_} appears to be an url. url to url cp not supported"
return from_
-def cp(config: Config,
- from_: str,
- project_id: str,
- ignore_state: bool,
- auth=None,
- user=None,
- object_name=None,
- bucket_name=None,
- metadata: dict = {},
- ):
+def cp(
+ config: Config,
+ from_: str,
+ project_id: str,
+ ignore_state: bool,
+ auth=None,
+ user=None,
+ object_name=None,
+ bucket_name=None,
+ metadata: dict = {},
+):
"""Copy meta to bucket, used by etl_pod job"""
from_ = _validate_parameters(str(from_))
if not isinstance(from_, pathlib.Path):
@@ -41,13 +51,15 @@ def cp(config: Config,
assert auth, "auth is required"
- metadata = dict({'submitter': None, 'metadata_version': '0.0.1', 'is_metadata': True} | metadata)
- if not metadata['submitter']:
+ metadata = dict(
+ {"submitter": None, "metadata_version": "0.0.1", "is_metadata": True} | metadata
+ )
+ if not metadata["submitter"]:
if not user:
- user = auth.curl('/user/user').json()
- metadata['submitter'] = user['name']
+ user = auth.curl("/user/user").json()
+ metadata["submitter"] = user["name"]
- program, project = project_id.split('-')
+ program, project = project_id.split("-")
assert bucket_name, f"could not find bucket for {program}"
@@ -57,27 +69,26 @@ def cp(config: Config,
if not object_name:
now = datetime.now().strftime("%Y%m%d-%H%M%S")
- object_name = f'_{project_id}-{now}_meta.zip'
+ object_name = f"_{project_id}-{now}_meta.zip"
zipfile_path = temp_dir / object_name
- with ZipFile(zipfile_path, 'w') as zip_object:
+ with ZipFile(zipfile_path, "w") as zip_object:
for _ in from_.glob("*.ndjson"):
zip_object.write(_)
stat = zipfile_path.stat()
- md5_sum = calculate_hash('md5', zipfile_path)
+ md5_sum = calculate_hash("md5", zipfile_path)
my_dvc = DVC(
meta=DVCMeta(),
outs=[
DVCItem(
path=object_name,
md5=md5_sum,
- hash='md5',
+ hash="md5",
modified=modified_date(zipfile_path),
size=stat.st_size,
-
)
- ]
+ ],
)
metadata = write_indexd(
@@ -92,56 +103,81 @@ def cp(config: Config,
# document = file_client.upload_file_to_guid(guid=id_, file_name=object_name, bucket=bucket_name)
# print(document, file=sys.stderr)
- run_command(f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}", no_capture=False)
+ run_command(
+ f"gen3-client upload-single --bucket {bucket_name} --guid {my_dvc.object_id} --file {zipfile_path} --profile {config.gen3.profile}",
+ no_capture=False,
+ )
- return {'msg': f"Uploaded {zipfile_path} to {bucket_name}", "object_id": my_dvc.object_id, "object_name": object_name}
+ return {
+ "msg": f"Uploaded {zipfile_path} to {bucket_name}",
+ "object_id": my_dvc.object_id,
+ "object_name": object_name,
+ }
-def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None) -> dict:
+def publish_commits(
+ config: Config, wait: bool, auth: Gen3Auth, bucket_name: str, spinner=None
+) -> dict:
"""Publish commits to the portal."""
# TODO legacy fhir-import-export job: copies meta to bucket and triggers job,
# meta information is already in git REPO,
# we should consider changing the fhir_import_export job to use the git REPO
- user = auth.curl('/user/user').json()
+ user = auth.curl("/user/user").json()
# copy meta to bucket
upload_result = cp(
config=config,
- from_='META',
+ from_="META",
project_id=config.gen3.project_id,
ignore_state=True,
auth=auth,
user=user,
- bucket_name=bucket_name
+ bucket_name=bucket_name,
)
- object_id = upload_result['object_id']
+ object_id = upload_result["object_id"]
push = Push(config=config)
jobs_client = Gen3Jobs(auth_provider=auth)
# create "legacy" commit object, read by fhir-import-export job
- push.commits.append(Commit(object_id=object_id, message='From g3t-git', meta_path=upload_result['object_name'], commit_id=object_id))
- args = {'push': push.model_dump(), 'project_id': config.gen3.project_id, 'method': 'put'}
+ push.commits.append(
+ Commit(
+ object_id=object_id,
+ message="From g3t-git",
+ meta_path=upload_result["object_name"],
+ commit_id=object_id,
+ )
+ )
+ args = {
+ "push": push.model_dump(),
+ "project_id": config.gen3.project_id,
+ "method": "put",
+ }
# capture logging from gen3.jobs
from cdislogging import get_logger # noqa
+
cdis_logging = get_logger("__name__")
cdis_logging.setLevel(logging.WARN)
-
+
if wait:
# async_run_job_and_wait monkeypatched below
- _ = asyncio.run(jobs_client.async_run_job_and_wait(job_name='fhir_import_export', job_input=args, spinner=spinner))
+ _ = asyncio.run(
+ jobs_client.async_run_job_and_wait(
+ job_name="fhir_import_export", job_input=args, spinner=spinner
+ )
+ )
else:
- _ = jobs_client.create_job('fhir_import_export', args)
+ _ = jobs_client.create_job("fhir_import_export", args)
if not isinstance(_, dict):
- _ = {'output': _}
- if isinstance(_['output'], str):
+ _ = {"output": _}
+ if isinstance(_["output"], str):
try:
- _['output'] = json.loads(_['output'])
+ _["output"] = json.loads(_["output"])
except json.JSONDecodeError:
pass
return _
@@ -149,7 +185,9 @@ def publish_commits(config: Config, wait: bool, auth: Gen3Auth, bucket_name: str
# monkey patch for gen3.jobs.Gen3Jobs.async_run_job_and_wait
# make it less noisy and sleep less (max of 30 seconds)
-async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=None, **kwargs):
+async def async_run_job_and_wait(
+ self, job_name, job_input, spinner=None, _ssl=None, **kwargs
+):
"""
Asynchronous function to create a job, wait for output, and return. Will
sleep in a linear delay until the job is done, starting with 1 second.
@@ -188,12 +226,12 @@ async def async_run_job_and_wait(self, job_name, job_input, spinner=None, _ssl=N
if status.get("status") != "Completed":
# write failed output to log file before raising exception
response = await self.async_get_output(job_create_response.get("uid"))
- with open("logs/publish.log", 'a') as f:
- log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()}
- log_msg.update(response)
- f.write(json.dumps(log_msg, separators=(',', ':')))
- f.write('\n')
-
+ with open("logs/publish.log", "a") as f:
+ log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()}
+ log_msg.update(response)
+ f.write(json.dumps(log_msg, separators=(",", ":")))
+ f.write("\n")
+
raise Exception(f"Job status not complete: {status.get('status')}")
response = await self.async_get_output(job_create_response.get("uid"))
diff --git a/gen3_tracker/git/cli.py b/gen3_tracker/git/cli.py
index e7c4cc40..c0222bd8 100644
--- a/gen3_tracker/git/cli.py
+++ b/gen3_tracker/git/cli.py
@@ -34,20 +34,36 @@
import gen3_tracker
from gen3_tracker import Config
-from gen3_tracker.common import CLIOutput, INFO_COLOR, ERROR_COLOR, is_url, filter_dicts, SUCCESS_COLOR, \
- read_ndjson_file
+from gen3_tracker.common import (
+ CLIOutput,
+ INFO_COLOR,
+ ERROR_COLOR,
+ is_url,
+ filter_dicts,
+ SUCCESS_COLOR,
+ read_ndjson_file,
+)
from gen3_tracker.config import init as config_init, ensure_auth
from gen3_tracker.gen3.buckets import get_buckets
-from gen3_tracker.git import git_files, to_indexd, to_remote, dvc_data, \
- data_file_changes, modified_date, git_status, DVC, MISSING_G3T_MESSAGE
-from gen3_tracker.git import run_command, \
- MISSING_GIT_MESSAGE, git_repository_exists
+from gen3_tracker.git import (
+ git_files,
+ to_indexd,
+ to_remote,
+ dvc_data,
+ data_file_changes,
+ modified_date,
+ git_status,
+ DVC,
+ MISSING_G3T_MESSAGE,
+)
+from gen3_tracker.git import run_command, MISSING_GIT_MESSAGE, git_repository_exists
from gen3_tracker.git.adder import url_path, write_dvc_file
from gen3_tracker.git.cloner import ls
from gen3_tracker.git.initializer import initialize_project_server_side
from gen3_tracker.git.snapshotter import push_snapshot
from gen3_tracker.meta.skeleton import meta_index, get_data_from_meta
from gen3_tracker.common import _default_json_serializer
+
# logging.basicConfig(level=logging.INFO)
_logger = logging.getLogger(__package__)
@@ -66,14 +82,20 @@
# if debug:
# _logger.setLevel(logging.DEBUG)
+
def _check_parameters(config, project_id):
"""Common parameter checks."""
if not project_id:
raise AssertionError("project_id is required")
- if not project_id.count('-') == 1:
- raise AssertionError(f"project_id must be of the form program-project {project_id}")
+ if not project_id.count("-") == 1:
+ raise AssertionError(
+ f"project_id must be of the form program-project {project_id}"
+ )
if not config.gen3.profile:
- click.secho("No profile set. Continuing in disconnected mode. Use `set profile `", fg='yellow')
+ click.secho(
+ "No profile set. Continuing in disconnected mode. Use `set profile `",
+ fg="yellow",
+ )
@click.group(cls=gen3_tracker.NaturalOrderGroup)
@@ -84,10 +106,34 @@ def cli():
@cli.command(context_settings=dict(ignore_unknown_options=True))
# @click.option('--force', '-f', is_flag=True, help='Force the init.')
-@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID")
-@click.option('--approve', '-a', help='Approve the addition (privileged)', is_flag=True, default=False, show_default=True)
-@click.option('--no-server', help='Skip server setup (testing)', is_flag=True, default=False, show_default=True, hidden=True)
-@click.option('--debug', is_flag=True, envvar='G3T_DEBUG', help='Enable debug mode. G3T_DEBUG environment variable can also be used.')
+@click.argument(
+ "project_id",
+ default=None,
+ required=False,
+ envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID",
+)
+@click.option(
+ "--approve",
+ "-a",
+ help="Approve the addition (privileged)",
+ is_flag=True,
+ default=False,
+ show_default=True,
+)
+@click.option(
+ "--no-server",
+ help="Skip server setup (testing)",
+ is_flag=True,
+ default=False,
+ show_default=True,
+ hidden=True,
+)
+@click.option(
+ "--debug",
+ is_flag=True,
+ envvar="G3T_DEBUG",
+ help="Enable debug mode. G3T_DEBUG environment variable can also be used.",
+)
@click.pass_obj
def init(config: Config, project_id: str, approve: bool, no_server: bool, debug: bool):
"""Initialize a new repository."""
@@ -113,15 +159,29 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug:
ensure_git_repo(config)
if not no_server:
- init_logs, approval_needed = initialize_project_server_side(config, project_id)
+ init_logs, approval_needed = initialize_project_server_side(
+ config, project_id
+ )
logs.extend(init_logs)
if approve and approval_needed:
- run_command('g3t collaborator approve --all', dry_run=config.dry_run, no_capture=True)
+ run_command(
+ "g3t collaborator approve --all",
+ dry_run=config.dry_run,
+ no_capture=True,
+ )
elif approval_needed and not approve:
- click.secho("Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ "Approval needed. to approve the project, a privileged user must run `g3t collaborator approve --all`",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
else:
- click.secho(f"Approval not needed. Project {project_id} has approved read/write", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"Approval not needed. Project {project_id} has approved read/write",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
if config.debug:
for _ in logs:
@@ -135,26 +195,34 @@ def init(config: Config, project_id: str, approve: bool, no_server: bool, debug:
def ensure_git_repo(config):
# ensure a git repo
- if pathlib.Path('.git').exists():
+ if pathlib.Path(".git").exists():
return
- if not pathlib.Path('.git').exists():
- command = 'git init'
+ if not pathlib.Path(".git").exists():
+ command = "git init"
run_command(command, dry_run=config.dry_run, no_capture=True)
else:
- click.secho('Git repository already exists.', fg=INFO_COLOR, file=sys.stderr)
- pathlib.Path('MANIFEST').mkdir(exist_ok=True)
- pathlib.Path('META').mkdir(exist_ok=True)
- pathlib.Path('LOGS').mkdir(exist_ok=True)
- with open('.gitignore', 'w') as f:
- f.write('LOGS/\n')
- f.write('.g3t/state/\n') # legacy
- with open('META/README.md', 'w') as f:
- f.write('This directory contains metadata files for the data files in the MANIFEST directory.\n')
- with open('MANIFEST/README.md', 'w') as f:
- f.write('This directory contains dvc files that reference the data files.\n')
- run_command('git add MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True)
- run_command('git commit -m "initialized" MANIFEST META .gitignore .g3t', dry_run=config.dry_run, no_capture=True)
+ click.secho("Git repository already exists.", fg=INFO_COLOR, file=sys.stderr)
+ pathlib.Path("MANIFEST").mkdir(exist_ok=True)
+ pathlib.Path("META").mkdir(exist_ok=True)
+ pathlib.Path("LOGS").mkdir(exist_ok=True)
+ with open(".gitignore", "w") as f:
+ f.write("LOGS/\n")
+ f.write(".g3t/state/\n") # legacy
+ with open("META/README.md", "w") as f:
+ f.write(
+ "This directory contains metadata files for the data files in the MANIFEST directory.\n"
+ )
+ with open("MANIFEST/README.md", "w") as f:
+ f.write("This directory contains dvc files that reference the data files.\n")
+ run_command(
+ "git add MANIFEST META .gitignore .g3t", dry_run=config.dry_run, no_capture=True
+ )
+ run_command(
+ 'git commit -m "initialized" MANIFEST META .gitignore .g3t',
+ dry_run=config.dry_run,
+ no_capture=True,
+ )
# Note: The commented code below is an example of how to use context settings to allow extra arguments.
@@ -165,8 +233,8 @@ def ensure_git_repo(config):
@cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
-@click.argument('target')
-@click.option('--no-git-add', default=False, is_flag=True, hidden=True)
+@click.argument("target")
+@click.option("--no-git-add", default=False, is_flag=True, hidden=True)
@click.pass_context
def add(ctx, target, no_git_add: bool):
"""
@@ -211,10 +279,10 @@ def add(ctx, target, no_git_add: bool):
assert not config.no_config_found, MISSING_G3T_MESSAGE
# needs to have a target
- assert target, 'No targets specified.'
+ assert target, "No targets specified."
# Expand wildcard paths
- if is_url(target) and not target.startswith('file://'):
+ if is_url(target) and not target.startswith("file://"):
all_changed_files, updates = add_url(ctx, target)
else:
all_changed_files, updates = add_file(ctx, target)
@@ -224,8 +292,12 @@ def add(ctx, target, no_git_add: bool):
#
adds = [str(_) for _ in all_changed_files if _ not in updates]
if adds and not no_git_add:
- adds.append('.gitignore')
- run_command(f'git add {" ".join([str(_) for _ in adds])}', dry_run=config.dry_run, no_capture=True)
+ adds.append(".gitignore")
+ run_command(
+ f'git add {" ".join([str(_) for _ in adds])}',
+ dry_run=config.dry_run,
+ no_capture=True,
+ )
except Exception as e:
click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
@@ -234,9 +306,15 @@ def add(ctx, target, no_git_add: bool):
@cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
-@click.argument('targets', nargs=-1)
-@click.option('--message', '-m', help='The commit message.')
-@click.option('--all', '-a', is_flag=True, default=False, help='Automatically stage files that have been modified and deleted.')
+@click.argument("targets", nargs=-1)
+@click.option("--message", "-m", help="The commit message.")
+@click.option(
+ "--all",
+ "-a",
+ is_flag=True,
+ default=False,
+ help="Automatically stage files that have been modified and deleted.",
+)
@click.pass_context
def commit(ctx, targets, message, all):
"""Commit the changes
@@ -269,11 +347,11 @@ def status(config):
"""Show changed files."""
soft_error = False
try:
- with Halo(text='Scanning', spinner='line', placement='right', color='white'):
- manifest_path = pathlib.Path('MANIFEST')
+ with Halo(text="Scanning", spinner="line", placement="right", color="white"):
+ manifest_path = pathlib.Path("MANIFEST")
changes = data_file_changes(manifest_path)
# Get a list of all files in the MANIFEST directory and its subdirectories
- files = glob.glob('MANIFEST/**/*.dvc', recursive=True)
+ files = glob.glob("MANIFEST/**/*.dvc", recursive=True)
# Filter out directories, keep only files
files = [f for f in files if os.path.isfile(f)]
if not files:
@@ -284,28 +362,46 @@ def status(config):
document_reference_mtime = 0
- if pathlib.Path('META/DocumentReference.ndjson').exists():
+ if pathlib.Path("META/DocumentReference.ndjson").exists():
# Get the modification time
- document_reference_mtime = os.path.getmtime('META/DocumentReference.ndjson')
+ document_reference_mtime = os.path.getmtime(
+ "META/DocumentReference.ndjson"
+ )
latest_file_mtime = os.path.getmtime(latest_file)
if document_reference_mtime < latest_file_mtime:
- document_reference_mtime = datetime.fromtimestamp(document_reference_mtime).isoformat()
- latest_file_mtime = datetime.fromtimestamp(latest_file_mtime).isoformat()
- click.secho(f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}. Please check DocumentReferences.ndjson", fg=INFO_COLOR, file=sys.stderr)
+ document_reference_mtime = datetime.fromtimestamp(
+ document_reference_mtime
+ ).isoformat()
+ latest_file_mtime = datetime.fromtimestamp(
+ latest_file_mtime
+ ).isoformat()
+ click.secho(
+ f"WARNING: DocumentReference.ndjson is out of date {document_reference_mtime}. The most recently changed file is {latest_file} {latest_file_mtime}. Please check DocumentReferences.ndjson",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
soft_error = True
if changes:
- click.secho(f"# There are {len(changes)} data files that you need to update via `g3t add`:", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"# There are {len(changes)} data files that you need to update via `g3t add`:",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
cwd = pathlib.Path.cwd()
for _ in changes:
- data_path = str(_.data_path).replace(str(cwd) + '/', "")
- click.secho(f' g3t add {data_path} # changed: {modified_date(_.data_path)}, last added: {modified_date(_.dvc_path)}', fg=INFO_COLOR, file=sys.stderr)
+ data_path = str(_.data_path).replace(str(cwd) + "/", "")
+ click.secho(
+ f" g3t add {data_path} # changed: {modified_date(_.data_path)}, last added: {modified_date(_.dvc_path)}",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
soft_error = True
else:
click.secho("No data file changes.", fg=INFO_COLOR, file=sys.stderr)
- _ = run_command('git status')
+ _ = run_command("git status")
print(_.stdout)
if soft_error:
exit(1)
@@ -316,27 +412,66 @@ def status(config):
@cli.command()
-@click.option('--step',
- type=click.Choice(['index', 'upload', 'publish', 'all', 'fhir']),
- default='all',
- show_default=True,
- help='The step to run '
- )
-@click.option('--transfer-method',
- type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()),
- default='gen3',
- show_default=True,
- help='The upload method.'
- )
-@click.option('--overwrite', is_flag=True, help='(index): Overwrite previously submitted files.')
-@click.option('--wait', default=True, is_flag=True, show_default=True, help="(publish): Wait for metadata completion.")
-@click.option('--dry-run', show_default=True, default=False, is_flag=True, help='Print the commands that would be executed, but do not execute them.')
-@click.option('--re-run', show_default=True, default=False, is_flag=True, help='Re-run the last publish step')
-@click.option('--fhir-server', show_default=True, default=False, is_flag=True, help='Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server')
-@click.option('--debug', is_flag=True)
-@click.option('--skip_validate', is_flag=True, help='Skip validation of the metadata')
+@click.option(
+ "--step",
+ type=click.Choice(["index", "upload", "publish", "all", "fhir"]),
+ default="all",
+ show_default=True,
+ help="The step to run ",
+)
+@click.option(
+ "--transfer-method",
+ type=click.Choice(gen3_tracker.FILE_TRANSFER_METHODS.keys()),
+ default="gen3",
+ show_default=True,
+ help="The upload method.",
+)
+@click.option(
+ "--overwrite", is_flag=True, help="(index): Overwrite previously submitted files."
+)
+@click.option(
+ "--wait",
+ default=True,
+ is_flag=True,
+ show_default=True,
+ help="(publish): Wait for metadata completion.",
+)
+@click.option(
+ "--dry-run",
+ show_default=True,
+ default=False,
+ is_flag=True,
+ help="Print the commands that would be executed, but do not execute them.",
+)
+@click.option(
+ "--re-run",
+ show_default=True,
+ default=False,
+ is_flag=True,
+ help="Re-run the last publish step",
+)
+@click.option(
+ "--fhir-server",
+ show_default=True,
+ default=False,
+ is_flag=True,
+ help="Push data in META directory to FHIR Server. Whatever FHIR data that exists in META dir will be upserted into the fhir server",
+)
+@click.option("--debug", is_flag=True)
+@click.option("--skip_validate", is_flag=True, help="Skip validation of the metadata")
@click.pass_context
-def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wait: bool, dry_run: bool, fhir_server: bool, debug: bool, skip_validate: bool):
+def push(
+ ctx,
+ step: str,
+ transfer_method: str,
+ overwrite: bool,
+ re_run: bool,
+ wait: bool,
+ dry_run: bool,
+ fhir_server: bool,
+ debug: bool,
+ skip_validate: bool,
+):
"""Push changes to the remote repository.
\b
steps:
@@ -362,27 +497,35 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa
raise NotImplementedError("Re-run not implemented")
try:
- with Halo(text='Checking', spinner='line', placement='right', color='white'):
+ with Halo(
+ text="Checking", spinner="line", placement="right", color="white"
+ ):
run_command("g3t status")
if not skip_validate:
run_command("g3t meta validate", no_capture=True)
except Exception as e:
- click.secho("Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr)
+ click.secho(
+ "Please correct issues before pushing.", fg=ERROR_COLOR, file=sys.stderr
+ )
click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
if config.debug:
raise
exit(1)
- with Halo(text='Scanning', spinner='line', placement='right', color='white'):
+ with Halo(text="Scanning", spinner="line", placement="right", color="white"):
# check git status
branch, uncommitted = git_status()
- assert not uncommitted, "Uncommitted changes found. Please commit or stash them first."
+ assert (
+ not uncommitted
+ ), "Uncommitted changes found. Please commit or stash them first."
# check dvc vs external files
- changes = data_file_changes(pathlib.Path('MANIFEST'))
- assert not changes, f"# There are {len(changes)} data files that you need to update. See `g3t status`"
+ changes = data_file_changes(pathlib.Path("MANIFEST"))
+ assert (
+ not changes
+ ), f"# There are {len(changes)} data files that you need to update. See `g3t status`"
# initialize dvc objects with this project_id
committed_files, dvc_objects = manifest(config.gen3.project_id)
@@ -392,133 +535,232 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa
bucket_name = get_program_bucket(config=config, auth=auth)
# check for new files
- records = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)['records']
- dids = {_['did']: _['updated_date'] for _ in records}
+ records = ls(
+ config, metadata={"project_id": config.gen3.project_id}, auth=auth
+ )["records"]
+ dids = {_["did"]: _["updated_date"] for _ in records}
new_dvc_objects = [_ for _ in dvc_objects if _.object_id not in dids]
- updated_dvc_objects = [_ for _ in dvc_objects if _.object_id in dids and _.out.modified > dids[_.object_id]]
+ updated_dvc_objects = [
+ _
+ for _ in dvc_objects
+ if _.object_id in dids and _.out.modified > dids[_.object_id]
+ ]
if step not in ["publish", "fhir"]:
if not overwrite:
dvc_objects = new_dvc_objects + updated_dvc_objects
- assert dvc_objects, "No new files to index. Use --overwrite to force"
-
- click.secho(f'Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files', fg=INFO_COLOR, file=sys.stderr)
+ assert (
+ dvc_objects
+ ), "No new files to index. Use --overwrite to force"
+
+ click.secho(
+ f"Scanned new: {len(new_dvc_objects)}, updated: {len(updated_dvc_objects)} files",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
if updated_dvc_objects:
- click.secho(f'Found {len(updated_dvc_objects)} updated files. overwriting', fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"Found {len(updated_dvc_objects)} updated files. overwriting",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
overwrite = True
- if step in ['index', 'all']:
+ if step in ["index", "all"]:
# send to index
if dry_run:
- click.secho("Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ "Dry run: not indexing files", fg=INFO_COLOR, file=sys.stderr
+ )
yaml.dump(
{
- 'new': [_.model_dump() for _ in new_dvc_objects],
- 'updated': [_.model_dump() for _ in updated_dvc_objects],
+ "new": [_.model_dump() for _ in new_dvc_objects],
+ "updated": [_.model_dump() for _ in updated_dvc_objects],
},
- sys.stdout
+ sys.stdout,
)
return
for _ in tqdm(
- to_indexd(
- dvc_objects=dvc_objects,
- auth=auth,
- project_id=config.gen3.project_id,
- bucket_name=bucket_name,
- overwrite=overwrite,
- restricted_project_id=None
-
- ),
- desc='Indexing', unit='file', leave=False, total=len(committed_files)):
+ to_indexd(
+ dvc_objects=dvc_objects,
+ auth=auth,
+ project_id=config.gen3.project_id,
+ bucket_name=bucket_name,
+ overwrite=overwrite,
+ restricted_project_id=None,
+ ),
+ desc="Indexing",
+ unit="file",
+ leave=False,
+ total=len(committed_files),
+ ):
pass
- click.secho(f'Indexed {len(committed_files)} files.', fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"Indexed {len(committed_files)} files.", fg=INFO_COLOR, file=sys.stderr
+ )
- if step in ['upload', 'all']:
- click.secho(f'Checking {len(dvc_objects)} files for upload via {transfer_method}', fg=INFO_COLOR, file=sys.stderr)
+ if step in ["upload", "all"]:
+ click.secho(
+ f"Checking {len(dvc_objects)} files for upload via {transfer_method}",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
to_remote(
upload_method=transfer_method,
dvc_objects=dvc_objects,
bucket_name=bucket_name,
profile=config.gen3.profile,
dry_run=config.dry_run,
- work_dir=config.work_dir
+ work_dir=config.work_dir,
)
- if fhir_server or step in ['fhir']:
+ if fhir_server or step in ["fhir"]:
"""Either there exists a Bundle.ndjson file in META signifying a revision to the data, or there is no bundle.json,
- signifying that the data in the META directory should be upserted into gen34"""
- meta_dir = pathlib.Path('META')
+ signifying that the data in the META directory should be upserted into gen34
+ """
+ meta_dir = pathlib.Path("META")
bundle_file = meta_dir / "Bundle.ndjson"
if os.path.isfile(bundle_file):
- with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'):
+ with Halo(
+ text="Sending to FHIR Server",
+ spinner="line",
+ placement="right",
+ color="white",
+ ):
with open(bundle_file, "r") as file:
json_string = file.read()
bundle_data = orjson.loads(json_string)
headers = {"Authorization": f"{auth._access_token}"}
- result = requests.delete(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_data, default=_default_json_serializer,
- option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers)
-
- with open("logs/publish.log", 'a') as f:
- log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"}
- click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr)
- f.write(json.dumps(log_msg, separators=(',', ':')))
- f.write('\n')
+ result = requests.delete(
+ url=f"{auth.endpoint}/Bundle",
+ data=orjson.dumps(
+ bundle_data,
+ default=_default_json_serializer,
+ option=orjson.OPT_APPEND_NEWLINE,
+ ).decode(),
+ headers=headers,
+ )
+
+ with open("logs/publish.log", "a") as f:
+ log_msg = {
+ "timestamp": datetime.now(pytz.UTC).isoformat(),
+ "result": f"{result}",
+ }
+ click.secho(
+ "Published project. See logs/publish.log",
+ fg=SUCCESS_COLOR,
+ file=sys.stderr,
+ )
+ f.write(json.dumps(log_msg, separators=(",", ":")))
+ f.write("\n")
return
project_id = config.gen3.project_id
now = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
- bundle = Bundle(type='transaction', timestamp=now)
- bundle.identifier = Identifier(value=project_id, system="https://aced-idp.org/project_id")
+ bundle = Bundle(type="transaction", timestamp=now)
+ bundle.identifier = Identifier(
+ value=project_id, system="https://aced-idp.org/project_id"
+ )
from gen3_tracker import ACED_NAMESPACE
+
bundle.id = str(uuid.uuid5(ACED_NAMESPACE, f"Bundle/{project_id}/{now}"))
bundle.entry = []
for _ in get_data_from_meta():
bundle_entry = BundleEntry()
# See https://build.fhir.org/bundle-definitions.html#Bundle.entry.request.url
- bundle_entry.request = BundleEntryRequest(url=f"{_['resourceType']}/{_['id']}", method='PUT')
+ bundle_entry.request = BundleEntryRequest(
+ url=f"{_['resourceType']}/{_['id']}", method="PUT"
+ )
bundle_entry.resource = _
bundle.entry.append(bundle_entry)
headers = {"Authorization": f"{auth._access_token}"}
bundle_dict = bundle.dict()
- with Halo(text='Sending to FHIR Server', spinner='line', placement='right', color='white'):
- result = requests.put(url=f'{auth.endpoint}/Bundle', data=orjson.dumps(bundle_dict, default=_default_json_serializer,
- option=orjson.OPT_APPEND_NEWLINE).decode(), headers=headers)
-
- with open("logs/publish.log", 'a') as f:
- log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat(), "result": f"{result}"}
- click.secho('Published project. See logs/publish.log', fg=SUCCESS_COLOR, file=sys.stderr)
- f.write(json.dumps(log_msg, separators=(',', ':')))
- f.write('\n')
+ with Halo(
+ text="Sending to FHIR Server",
+ spinner="line",
+ placement="right",
+ color="white",
+ ):
+ result = requests.put(
+ url=f"{auth.endpoint}/Bundle",
+ data=orjson.dumps(
+ bundle_dict,
+ default=_default_json_serializer,
+ option=orjson.OPT_APPEND_NEWLINE,
+ ).decode(),
+ headers=headers,
+ )
+
+ with open("logs/publish.log", "a") as f:
+ log_msg = {
+ "timestamp": datetime.now(pytz.UTC).isoformat(),
+ "result": f"{result}",
+ }
+ click.secho(
+ "Published project. See logs/publish.log",
+ fg=SUCCESS_COLOR,
+ file=sys.stderr,
+ )
+ f.write(json.dumps(log_msg, separators=(",", ":")))
+ f.write("\n")
return
- if step in ['publish', 'all'] and not fhir_server:
+ if step in ["publish", "all"] and not fhir_server:
log_path = "logs/publish.log"
- with Halo(text='Uploading snapshot', spinner='line', placement='right', color='white'):
+ with Halo(
+ text="Uploading snapshot",
+ spinner="line",
+ placement="right",
+ color="white",
+ ):
# push the snapshot of the `.git` sub-directory in the current directory
push_snapshot(config, auth=auth)
- if transfer_method == 'gen3':
+ if transfer_method == "gen3":
try:
# legacy, "old" fhir_import_export use publish_commits to publish the META
- with Halo(text='Publishing', spinner='line', placement='right', color='white') as spinner:
- _ = publish_commits(config, wait=wait, auth=auth, bucket_name=bucket_name, spinner=spinner)
+ with Halo(
+ text="Publishing",
+ spinner="line",
+ placement="right",
+ color="white",
+ ) as spinner:
+ _ = publish_commits(
+ config,
+ wait=wait,
+ auth=auth,
+ bucket_name=bucket_name,
+ spinner=spinner,
+ )
except Exception as e:
- click.secho(f'Unable to publish project. See {log_path} for more info', fg=ERROR_COLOR, file=sys.stderr)
+ click.secho(
+ f"Unable to publish project. See {log_path} for more info",
+ fg=ERROR_COLOR,
+ file=sys.stderr,
+ )
raise e
# print success message and save logs
- with open(log_path, 'a') as f:
- log_msg = {'timestamp': datetime.now(pytz.UTC).isoformat()}
+ with open(log_path, "a") as f:
+ log_msg = {"timestamp": datetime.now(pytz.UTC).isoformat()}
log_msg.update(_)
- f.write(json.dumps(log_msg, separators=(',', ':')))
- f.write('\n')
- click.secho(f'Published project. Logs found at {log_path}', fg=SUCCESS_COLOR, file=sys.stderr)
+ f.write(json.dumps(log_msg, separators=(",", ":")))
+ f.write("\n")
+ click.secho(
+ f"Published project. Logs found at {log_path}",
+ fg=SUCCESS_COLOR,
+ file=sys.stderr,
+ )
else:
- click.secho(f'Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading', fg=ERROR_COLOR, file=sys.stderr)
+ click.secho(
+ f"Auto-publishing not supported for {transfer_method}. Please use --step publish after uploading",
+ fg=ERROR_COLOR,
+ file=sys.stderr,
+ )
except Exception as e:
click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
@@ -529,7 +771,7 @@ def push(ctx, step: str, transfer_method: str, overwrite: bool, re_run: bool, wa
def manifest(project_id) -> tuple[list[str], list[DVC]]:
"""Get the committed files and their dvc objects. Initialize dvc objects with this project_id"""
- committed_files = [_ for _ in git_files() if _.endswith('.dvc')]
+ committed_files = [_ for _ in git_files() if _.endswith(".dvc")]
dvc_objects = [_ for _ in dvc_data(committed_files)]
for _ in dvc_objects:
_.project_id = project_id
@@ -537,26 +779,43 @@ def manifest(project_id) -> tuple[list[str], list[DVC]]:
@cli.command()
-@click.option('--remote',
- type=click.Choice(['gen3', 's3', 'ln', 'scp']),
- default='gen3',
- show_default=True,
- help='Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy'
- )
-@click.option('--worker_count', '-w', default=(multiprocessing.cpu_count() - 1), show_default=True,
- type=int,
- help='Number of workers to use.')
-@click.option('--data-only', help='Ignore git snapshot', is_flag=True, default=False, show_default=True)
+@click.option(
+ "--remote",
+ type=click.Choice(["gen3", "s3", "ln", "scp"]),
+ default="gen3",
+ show_default=True,
+ help="Specify the remote storage type. gen3:download, s3:s3 cp, ln: symbolic link, scp: scp copy",
+)
+@click.option(
+ "--worker_count",
+ "-w",
+ default=(multiprocessing.cpu_count() - 1),
+ show_default=True,
+ type=int,
+ help="Number of workers to use.",
+)
+@click.option(
+ "--data-only",
+ help="Ignore git snapshot",
+ is_flag=True,
+ default=False,
+ show_default=True,
+)
@click.pass_obj
def pull(config: Config, remote: str, worker_count: int, data_only: bool):
- """ Fetch from and integrate with a remote repository."""
+ """Fetch from and integrate with a remote repository."""
try:
- with Halo(text='Authorizing', spinner='line', placement='right', color='white'):
+ with Halo(text="Authorizing", spinner="line", placement="right", color="white"):
auth = gen3_tracker.config.ensure_auth(config=config)
if not data_only:
- with Halo(text='Pulling git snapshot', spinner='line', placement='right', color='white'):
+ with Halo(
+ text="Pulling git snapshot",
+ spinner="line",
+ placement="right",
+ color="white",
+ ):
if not auth:
auth = gen3_tracker.config.ensure_auth(config=config)
snapshot, zip_filepath = download_snapshot(auth, config)
@@ -567,35 +826,52 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool):
# Rename the directory
shutil.move(".git", new_dir_name)
# unzip the snapshot
- with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
- zip_ref.extractall('.')
- click.secho(f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr)
+ with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
+ zip_ref.extractall(".")
+ click.secho(
+ f"Pulled {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr
+ )
manifest_files, dvc_objects = manifest(config.gen3.project_id)
- if remote == 'gen3':
+ if remote == "gen3":
# download the files
- with Halo(text='Pulling from gen3', spinner='line', placement='right', color='white'):
- object_ids = [{'object_id': _.object_id} for _ in dvc_objects] # if not _.out.source_url
- current_time = datetime.now().strftime("%Y%m%d%H%M%S") # Format datetime as you need
- manifest_file = pathlib.Path(config.work_dir) / f'manifest-{current_time}.json'
- with open(manifest_file, 'w') as fp:
+ with Halo(
+ text="Pulling from gen3",
+ spinner="line",
+ placement="right",
+ color="white",
+ ):
+ object_ids = [
+ {"object_id": _.object_id} for _ in dvc_objects
+ ] # if not _.out.source_url
+ current_time = datetime.now().strftime(
+ "%Y%m%d%H%M%S"
+ ) # Format datetime as you need
+ manifest_file = (
+ pathlib.Path(config.work_dir) / f"manifest-{current_time}.json"
+ )
+ with open(manifest_file, "w") as fp:
json.dump(object_ids, fp)
- cmd = f'gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}'
+ cmd = f"gen3-client download-multiple --no-prompt --profile {config.gen3.profile} --manifest {manifest_file} --numparallel {worker_count}"
print(cmd)
run_command(cmd, no_capture=True)
- elif remote == 's3':
- with Halo(text='Pulling from s3', spinner='line', placement='right', color='white'):
+ elif remote == "s3":
+ with Halo(
+ text="Pulling from s3", spinner="line", placement="right", color="white"
+ ):
if not auth:
auth = gen3_tracker.config.ensure_auth(config=config)
- results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)
+ results = ls(
+ config, metadata={"project_id": config.gen3.project_id}, auth=auth
+ )
object_ids = [_.object_id for _ in dvc_objects]
- for _ in results['records']:
- if _['did'] in object_ids:
- print('aws s3 cp ', _['urls'][0], _['file_name'])
- elif remote == 'ln':
+ for _ in results["records"]:
+ if _["did"] in object_ids:
+ print("aws s3 cp ", _["urls"][0], _["file_name"])
+ elif remote == "ln":
for _ in dvc_objects:
print(f"ln -s {_.out.realpath} {_.out.path}")
- elif remote == 'scp':
+ elif remote == "scp":
for _ in dvc_objects:
print(f"scp USER@HOST:{_.out.realpath} {_.out.path}")
@@ -609,56 +885,79 @@ def pull(config: Config, remote: str, worker_count: int, data_only: bool):
@cli.command()
-@click.argument('project_id', default=None, required=False, envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID", metavar='PROJECT_ID')
+@click.argument(
+ "project_id",
+ default=None,
+ required=False,
+ envvar=f"{gen3_tracker.ENV_VARIABLE_PREFIX}PROJECT_ID",
+ metavar="PROJECT_ID",
+)
@click.pass_obj
def clone(config, project_id):
"""Clone a repository into a new directory"""
try:
config.gen3.project_id = project_id
- assert not pathlib.Path(project_id).exists(), f"{project_id} already exists. Please remove it first."
+ assert not pathlib.Path(
+ project_id
+ ).exists(), f"{project_id} already exists. Please remove it first."
os.mkdir(project_id)
os.chdir(project_id)
- with Halo(text='Cloning', spinner='line', placement='right', color='white'):
+ with Halo(text="Cloning", spinner="line", placement="right", color="white"):
auth = gen3_tracker.config.ensure_auth(config=config)
snapshot, zip_filepath = download_snapshot(auth, config)
- assert not pathlib.Path('.git').exists(), "A git repository already exists. Please remove it, or move to another directory first."
+ assert not pathlib.Path(
+ ".git"
+ ).exists(), "A git repository already exists. Please remove it, or move to another directory first."
# unzip
- with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
- zip_ref.extractall('.')
+ with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
+ zip_ref.extractall(".")
# if we just unzipped a .git these directories will exist
- expected_dirs = ['.git', 'META', 'MANIFEST']
+ expected_dirs = [".git", "META", "MANIFEST"]
if not all([pathlib.Path(_).exists() for _ in expected_dirs]):
# if not, we have downloaded a legacy SNAPSHOT.zip, so lets migrate the data to the expected drirectories
- click.secho(f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"{expected_dirs} not found after downloading {snapshot['file_name']} processing legacy snapshot",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
# legacy - was this a *SNAPSHOT.zip?
- meta_files = (pathlib.Path('studies') / config.gen3.project)
+ meta_files = pathlib.Path("studies") / config.gen3.project
# legacy - was this a *meta.zip?
if not meta_files.exists():
- meta_files = pathlib.Path('.')
+ meta_files = pathlib.Path(".")
# create local directories and git
[_ for _ in config_init(config, project_id)]
ensure_git_repo(config=config)
# move ndjson from studies to META
- for _ in meta_files.glob('*.ndjson'):
- shutil.move(_, 'META/')
+ for _ in meta_files.glob("*.ndjson"):
+ shutil.move(_, "META/")
# add to git
- run_command('git add META/*.*')
+ run_command("git add META/*.*")
# migrate DocumentReferences to MANIFEST
references = meta_index()
manifest_files = []
- for _ in read_ndjson_file('META/DocumentReference.ndjson'):
+ for _ in read_ndjson_file("META/DocumentReference.ndjson"):
document_reference = DocumentReference.parse_obj(_)
- dvc_object = DVC.from_document_reference(config, document_reference, references)
- manifest_files.append(write_dvc_file(yaml_data=dvc_object.model_dump(), target=dvc_object.out.path))
+ dvc_object = DVC.from_document_reference(
+ config, document_reference, references
+ )
+ manifest_files.append(
+ write_dvc_file(
+ yaml_data=dvc_object.model_dump(),
+ target=dvc_object.out.path,
+ )
+ )
# Get the current time in seconds since the epoch
current_time = time.time()
# Update the access and modification times of the file
- os.utime('META/DocumentReference.ndjson', (current_time, current_time))
+ os.utime("META/DocumentReference.ndjson", (current_time, current_time))
- run_command('git add MANIFEST/')
- run_command('git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore')
+ run_command("git add MANIFEST/")
+ run_command(
+ 'git commit -m "migrated from legacy" MANIFEST/ META/ .gitignore'
+ )
shutil.move(zip_filepath, config.work_dir / zip_filepath.name)
click.secho(f"Cloned {snapshot['file_name']}", fg=INFO_COLOR, file=sys.stderr)
@@ -673,27 +972,30 @@ def clone(config, project_id):
def download_snapshot(auth, config):
"""Download the latest snapshot."""
from gen3_tracker.git.cloner import find_latest_snapshot
+
snapshot = find_latest_snapshot(auth, config)
gen3_file = Gen3File(auth)
- pathlib.Path(snapshot['file_name']).parent.mkdir(exist_ok=True, parents=True)
- ok = gen3_file.download_single(snapshot['did'], '.')
+ pathlib.Path(snapshot["file_name"]).parent.mkdir(exist_ok=True, parents=True)
+ ok = gen3_file.download_single(snapshot["did"], ".")
assert ok, f"Failed to download {snapshot['did']}"
- zip_filepath = pathlib.Path(snapshot['file_name'])
+ zip_filepath = pathlib.Path(snapshot["file_name"])
assert zip_filepath.exists(), f"Failed to download {snapshot['did']}"
return snapshot, zip_filepath
def file_name_or_guid(config, object_id) -> (str, pathlib.Path):
"""Check if the object_id is a file name or a GUID."""
- guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$')
+ guid_pattern = re.compile(
+ r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
+ )
path = None
if not guid_pattern.match(object_id):
if not is_url(object_id):
- path = pathlib.Path('MANIFEST') / (object_id + ".dvc")
+ path = pathlib.Path("MANIFEST") / (object_id + ".dvc")
else:
- path = pathlib.Path('MANIFEST') / (url_path(object_id) + ".dvc")
+ path = pathlib.Path("MANIFEST") / (url_path(object_id) + ".dvc")
if path.exists():
dvc_object = next(iter(dvc_data([str(path)])), None)
@@ -702,20 +1004,33 @@ def file_name_or_guid(config, object_id) -> (str, pathlib.Path):
object_id = dvc_object.object_id
else:
raise ValueError(
- f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID).")
+ f"{object_id} was not found in the MANIFEST and does not appear to be an object identifier (GUID)."
+ )
else:
committed_files, dvc_objects = manifest(config.gen3.project_id)
- dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id]
+ dvc_objects = [
+ dvc_object
+ for dvc_object in dvc_objects
+ if dvc_object.object_id == object_id
+ ]
assert dvc_objects, f"{object_id} not found in MANIFEST."
- path = pathlib.Path('MANIFEST') / (dvc_objects[0].out.path + ".dvc")
+ path = pathlib.Path("MANIFEST") / (dvc_objects[0].out.path + ".dvc")
assert guid_pattern.match(object_id), f"{object_id} was not found in MANIFEST."
return object_id, path
@cli.command("ls")
-@click.option('--long', '-l', 'long_flag', default=False, is_flag=True, help='Long listing format.', show_default=True)
-@click.argument('target', default=None, required=False)
+@click.option(
+ "--long",
+ "-l",
+ "long_flag",
+ default=False,
+ is_flag=True,
+ help="Long listing format.",
+ show_default=True,
+)
+@click.argument("target", default=None, required=False)
@click.pass_obj
def ls_cli(config: Config, long_flag: bool, target: str):
"""List files in the repository.
@@ -724,10 +1039,14 @@ def ls_cli(config: Config, long_flag: bool, target: str):
"""
try:
- with Halo(text='Pulling file list', spinner='line', placement='right', color='white'):
+ with Halo(
+ text="Pulling file list", spinner="line", placement="right", color="white"
+ ):
auth = gen3_tracker.config.ensure_auth(config=config)
- results = ls(config, metadata={'project_id': config.gen3.project_id}, auth=auth)
- indexd_records = results['records']
+ results = ls(
+ config, metadata={"project_id": config.gen3.project_id}, auth=auth
+ )
+ indexd_records = results["records"]
committed_files, dvc_objects = manifest(config.gen3.project_id)
# list all data files
dvc_objects = {_.object_id: _ for _ in dvc_objects}
@@ -742,44 +1061,49 @@ def _dvc_meta(dvc_object, full=False) -> dict:
_[k] = v
else:
_ = dvc_object.model_dump(exclude_none=True)
- _['object_id'] = dvc_object.object_id
+ _["object_id"] = dvc_object.object_id
return _
if not long_flag:
indexd_records = [
{
- 'did': _['did'],
- 'file_name': _['file_name'],
- 'indexd_created_date': _['created_date'],
- 'meta': _dvc_meta(dvc_objects.get(_['did'], None)),
- 'urls': _['urls']
- } for _ in indexd_records
+ "did": _["did"],
+ "file_name": _["file_name"],
+ "indexd_created_date": _["created_date"],
+ "meta": _dvc_meta(dvc_objects.get(_["did"], None)),
+ "urls": _["urls"],
+ }
+ for _ in indexd_records
]
- bucket_ids = {_['did'] for _ in indexd_records}
+ bucket_ids = {_["did"] for _ in indexd_records}
- uncommitted = pathlib.Path('MANIFEST').glob('**/*.dvc')
+ uncommitted = pathlib.Path("MANIFEST").glob("**/*.dvc")
uncommitted = [str(_) for _ in uncommitted]
uncommitted = [str(_) for _ in uncommitted if _ not in committed_files]
uncommitted = [_.model_dump(exclude_none=True) for _ in dvc_data(uncommitted)]
_ = {
- 'bucket': indexd_records,
- 'committed': [_dvc_meta(v, full=True) for k, v in dvc_objects.items() if k not in bucket_ids],
- 'uncommitted': uncommitted
+ "bucket": indexd_records,
+ "committed": [
+ _dvc_meta(v, full=True)
+ for k, v in dvc_objects.items()
+ if k not in bucket_ids
+ ],
+ "uncommitted": uncommitted,
}
if target:
# Escape special characters and replace wildcard '*' with '.*' for regex pattern
pattern = re.escape(target).replace("\\*", ".*")
filtered = {
- 'bucket': filter_dicts(_.get('bucket', []), pattern),
- 'committed': filter_dicts(_.get('committed', []), pattern),
- 'uncommitted': filter_dicts(_.get('uncommitted', []), pattern)
+ "bucket": filter_dicts(_.get("bucket", []), pattern),
+ "committed": filter_dicts(_.get("committed", []), pattern),
+ "uncommitted": filter_dicts(_.get("uncommitted", []), pattern),
}
_ = filtered
- if config.output.format == 'json':
+ if config.output.format == "json":
print(json.dumps(_, indent=2))
else:
yaml.dump(_, sys.stdout, default_flow_style=False)
@@ -791,7 +1115,7 @@ def _dvc_meta(dvc_object, full=False) -> dict:
@cli.command()
-@click.argument('object_id', metavar='')
+@click.argument("object_id", metavar="")
@click.pass_obj
def rm(config: Config, object_id: str):
"""Remove a single file from the server index, and MANIFEST. Does not alter META.
@@ -800,29 +1124,50 @@ def rm(config: Config, object_id: str):
"""
try:
- with Halo(text='Searching', spinner='line', placement='right', color='white'):
+ with Halo(text="Searching", spinner="line", placement="right", color="white"):
object_id, path = file_name_or_guid(config, object_id)
- with Halo(text='Deleting from server', spinner='line', placement='right', color='white'):
+ with Halo(
+ text="Deleting from server",
+ spinner="line",
+ placement="right",
+ color="white",
+ ):
auth = gen3_tracker.config.ensure_auth(config=config)
index = Gen3Index(auth)
result = index.delete_record(object_id)
if not result:
if not path:
- path = ''
- click.secho(f"Failed to delete {object_id} from server. {path}", fg=ERROR_COLOR, file=sys.stderr)
+ path = ""
+ click.secho(
+ f"Failed to delete {object_id} from server. {path}",
+ fg=ERROR_COLOR,
+ file=sys.stderr,
+ )
else:
- click.secho(f"Deleted {object_id} from server. {path}", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"Deleted {object_id} from server. {path}",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
- with Halo(text='Scanning', spinner='line', placement='right', color='white'):
+ with Halo(text="Scanning", spinner="line", placement="right", color="white"):
committed_files, dvc_objects = manifest(config.gen3.project_id)
- dvc_objects = [dvc_object for dvc_object in dvc_objects if dvc_object.object_id == object_id]
+ dvc_objects = [
+ dvc_object
+ for dvc_object in dvc_objects
+ if dvc_object.object_id == object_id
+ ]
assert dvc_objects, f"{object_id} not found in MANIFEST."
dvc_object = dvc_objects[0]
- path = pathlib.Path('MANIFEST') / (dvc_object.out.path + ".dvc")
+ path = pathlib.Path("MANIFEST") / (dvc_object.out.path + ".dvc")
assert path.exists(), f"{path} not found"
path.unlink()
- click.secho(f"Deleted {path} from MANIFEST. Please adjust META resources", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"Deleted {path} from MANIFEST. Please adjust META resources",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
except Exception as e:
click.secho(str(e), fg=ERROR_COLOR, file=sys.stderr)
@@ -858,10 +1203,10 @@ def ping(config: Config):
msgs.append(str(e))
ok = False
except Gen3AuthError as e:
- msg = str(e).split(':')[0]
+ msg = str(e).split(":")[0]
msgs.append(msg)
msg2 = str(e).split('')[-1]
- msg2 = msg2.split('
')[0]
+ msg2 = msg2.split("
")[0]
msgs.append(msg2)
ok = False
@@ -871,34 +1216,42 @@ def ping(config: Config):
_ = "Configuration ERROR: "
output.exit_code = 1
- _ = {'msg': _ + ', '.join(msgs)}
+ _ = {"msg": _ + ", ".join(msgs)}
if auth:
- _['endpoint'] = auth.endpoint
- user_info = auth.curl('/user/user').json()
- _['username'] = user_info['username']
+ _["endpoint"] = auth.endpoint
+ user_info = auth.curl("/user/user").json()
+ _["username"] = user_info["username"]
buckets = get_buckets(config=config)
bucket_info = {}
program_info = defaultdict(list)
- for k, v in buckets['S3_BUCKETS'].items():
+ for k, v in buckets["S3_BUCKETS"].items():
bucket_info[k] = {}
- if 'programs' not in v:
+ if "programs" not in v:
bucket_info[k] = "No `programs` found"
- click.secho(f"WARNING: No `programs` found for bucket {k}", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"WARNING: No `programs` found for bucket {k}",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
continue
- bucket_info[k] = ",".join(v['programs'])
- for program in v['programs']:
+ bucket_info[k] = ",".join(v["programs"])
+ for program in v["programs"]:
program_info[program].append(k)
- _['bucket_programs'] = bucket_info
+ _["bucket_programs"] = bucket_info
for k, v in program_info.items():
if len(v) > 1:
- click.secho(f"WARNING: {k} is in multiple buckets: {', '.join(v)}", fg=INFO_COLOR, file=sys.stderr)
+ click.secho(
+ f"WARNING: {k} is in multiple buckets: {', '.join(v)}",
+ fg=INFO_COLOR,
+ file=sys.stderr,
+ )
- assert 'authz' in user_info, "No authz found"
+ assert "authz" in user_info, "No authz found"
authz_info = defaultdict(dict)
- for k, v in user_info['authz'].items():
- authz_info[k] = ",".join(set([_['method'] for _ in v]))
- _['your_access'] = dict(authz_info)
+ for k, v in user_info["authz"].items():
+ authz_info[k] = ",".join(set([_["method"] for _ in v]))
+ _["your_access"] = dict(authz_info)
output.update(_)
diff --git a/gen3_tracker/meta/entities.py b/gen3_tracker/meta/entities.py
index 888cf903..c6beda14 100644
--- a/gen3_tracker/meta/entities.py
+++ b/gen3_tracker/meta/entities.py
@@ -246,7 +246,6 @@ def scalars(self) -> dict:
if (not isinstance(v, list) and not isinstance(v, dict))
}
-
@computed_field
@property
def codings(self) -> dict:
@@ -261,8 +260,12 @@ def codings(self) -> dict:
if isinstance(elem, dict):
# TODO: implement hierarchy of codes rather than just taking last code?
for value, source in normalize_coding(elem):
- if len(v) > 1 and get_nested_value(elem, [source, 0, 'system']):
- _codings[elem[source][0]["system"].split("/")[-1]] = value
+ if len(v) > 1 and get_nested_value(
+ elem, [source, 0, "system"]
+ ):
+ _codings[elem[source][0]["system"].split("/")[-1]] = (
+ value
+ )
else:
_codings[k] = value
elif isinstance(v, dict):
@@ -281,10 +284,15 @@ def identifiers(self) -> dict:
if not identifiers_len:
return {"identifier": None}
elif identifiers_len == 1:
- return {"identifier": identifiers[0].get('value')}
+ return {"identifier": identifiers[0].get("value")}
else:
- base_identifier = {"identifier": identifiers[0].get('value')}
- base_identifier.update({identifier.get("system").split("/")[-1]: identifier.get("value") for identifier in identifiers[1:]})
+ base_identifier = {"identifier": identifiers[0].get("value")}
+ base_identifier.update(
+ {
+ identifier.get("system").split("/")[-1]: identifier.get("value")
+ for identifier in identifiers[1:]
+ }
+ )
return base_identifier
@computed_field
@@ -375,7 +383,6 @@ def values(self) -> dict:
if "code" in self.resource and "text" in self.resource["code"]:
_values["observation_code"] = self.resource["code"]["text"]
-
assert len(_values) > 0, f"no values found in Observation: {self.resource}"
return _values
diff --git a/tests/__init__.py b/tests/__init__.py
index 204633dd..7ff71bef 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -5,7 +5,13 @@
from gen3_tracker.cli import cli
-def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result:
+def run(
+ runner: CliRunner,
+ args: list[str],
+ expected_output: list[str] = [],
+ expected_exit_code: int = 0,
+ expected_files: list[pathlib.Path] = [],
+) -> Result:
"""Run a command and check the output, exit code and expected files."""
if isinstance(args, str):
args = args.split()
@@ -15,16 +21,20 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp
expected_files = [expected_files]
expected_files = [pathlib.Path(_) for _ in expected_files]
- print('------------------------------------------------------------')
+ print("------------------------------------------------------------")
print("g3t " + " ".join(args))
result = runner.invoke(cli, args)
print("result.stdout", result.stdout)
print("result.output", result.output)
print("result.exception", result.exception)
print("CWD", pathlib.Path.cwd())
- assert result.exit_code == expected_exit_code, f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}"
+ assert (
+ result.exit_code == expected_exit_code
+ ), f"g3t {' '.join(args)} exit_code: {result.exit_code}, expected: {expected_exit_code}"
for line in expected_output:
- assert line in result.output, f"output: {result.output}, expected: {expected_output}"
+ assert (
+ line in result.output
+ ), f"output: {result.output}, expected: {expected_output}"
print(f"{line} found in output.")
for file in expected_files:
assert file.exists(), f"{file} does not exist."
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
index fac2d49a..68c1c087 100644
--- a/tests/integration/__init__.py
+++ b/tests/integration/__init__.py
@@ -7,7 +7,13 @@
from gen3.query import Gen3Query
-def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], expected_exit_code: int = 0, expected_files: list[pathlib.Path] = []) -> Result:
+def run(
+ runner: CliRunner,
+ args: list[str],
+ expected_output: list[str] = [],
+ expected_exit_code: int = 0,
+ expected_files: list[pathlib.Path] = [],
+) -> Result:
"""Run a command and check the output, exit code and expected files."""
if isinstance(args, str):
args = args.split()
@@ -17,13 +23,17 @@ def run(runner: CliRunner, args: list[str], expected_output: list[str] = [], exp
expected_files = [expected_files]
expected_files = [pathlib.Path(_) for _ in expected_files]
- print('------------------------------------------------------------')
+ print("------------------------------------------------------------")
print("g3t " + " ".join(args))
result = runner.invoke(cli, args)
print(result.stdout)
- assert result.exit_code == expected_exit_code, f"exit_code: {result.exit_code}, expected: {expected_exit_code}"
+ assert (
+ result.exit_code == expected_exit_code
+ ), f"exit_code: {result.exit_code}, expected: {expected_exit_code}"
for line in expected_output:
- assert line in result.output, f"output: {result.output}, expected: {expected_output}"
+ assert (
+ line in result.output
+ ), f"output: {result.output}, expected: {expected_output}"
print(f"{line} found in output.")
for file in expected_files:
assert file.exists(), f"{file} does not exist."
@@ -37,11 +47,12 @@ def validate_document_in_grip(did: str, auth=None, project_id=None):
if not auth:
auth = ensure_auth(config=default())
token = auth.get_access_token()
- result = requests.get(f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}",
- headers={"Authorization": f"bearer {token}"}
- ).json()
- assert 'data' in result, f"Failed to query grip for {did} {result}"
- assert result['data']['gid'] == did
+ result = requests.get(
+ f"{auth.endpoint}/grip/writer/graphql/CALIPER/get-vertex/{did}/{project_id}",
+ headers={"Authorization": f"bearer {token}"},
+ ).json()
+ assert "data" in result, f"Failed to query grip for {did} {result}"
+ assert result["data"]["gid"] == did
def validate_document_in_elastic(did, auth):
@@ -55,7 +66,7 @@ def validate_document_in_elastic(did, auth):
}
}
""",
- variables={"filter": {"AND": [{"IN": {"id": [did]}}]}}
+ variables={"filter": {"AND": [{"IN": {"id": [did]}}]}},
)
print(result)
- assert result['data']['file'][0]['id'] == did
+ assert result["data"]["file"][0]["id"] == did
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index b171120e..8de22443 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -16,7 +16,7 @@ def program() -> str:
@pytest.fixture
def project() -> str:
- project = uuid.uuid4().hex.replace('-', '_')
+ project = uuid.uuid4().hex.replace("-", "_")
return project
diff --git a/tests/integration/test_bucket_import.py b/tests/integration/test_bucket_import.py
index 5edc31d3..9a0959cc 100644
--- a/tests/integration/test_bucket_import.py
+++ b/tests/integration/test_bucket_import.py
@@ -42,8 +42,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None:
print(project_id)
- run(runner, ["--debug", "init", project_id, "--approve", "--no-server"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "init", project_id, "--approve", "--no-server"],
+ expected_files=[".g3t", ".git"],
+ )
for _ in SHOULD_SUCCEED:
run(runner, _.split())
@@ -60,11 +63,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None:
result = run(runner, ["--debug", "--format", "json", "ls"])
listing = json.loads(result.stdout)
- for _ in ['bucket', 'committed', 'uncommitted']:
+ for _ in ["bucket", "committed", "uncommitted"]:
assert _ in listing
# files should appear in uncommitted
- assert len(listing['uncommitted']) == len(SHOULD_SUCCEED)
+ assert len(listing["uncommitted"]) == len(SHOULD_SUCCEED)
# commit the changes
run(runner, ["--debug", "commit", "-am", "initial commit"])
@@ -72,11 +75,11 @@ def test_bucket_import(runner: CliRunner, project_id, tmpdir) -> None:
# test the ls command, should now be in committed
result = run(runner, ["--debug", "--format", "json", "ls"])
listing = json.loads(result.stdout)
- assert len(listing['committed']) == len(SHOULD_SUCCEED)
+ assert len(listing["committed"]) == len(SHOULD_SUCCEED)
# test the ls filter
for _ in EXPECTED_MANIFEST_PATHS:
- bucket_name = _.split('/')[1]
+ bucket_name = _.split("/")[1]
result = run(runner, ["--debug", "--format", "json", "ls", bucket_name])
listing = json.loads(result.stdout)
- assert len(listing['committed']) == 1
+ assert len(listing["committed"]) == 1
diff --git a/tests/integration/test_bundle.py b/tests/integration/test_bundle.py
index f98d0e8c..d27d99ad 100644
--- a/tests/integration/test_bundle.py
+++ b/tests/integration/test_bundle.py
@@ -11,21 +11,21 @@
CHANGE_PATIENT = [
"--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1",
"--debug meta init",
- "--debug commit -am \"initial commit\"",
+ '--debug commit -am "initial commit"',
"--debug add s3://s3-bucket/p1-object.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1-prime",
"--debug meta init",
- "--debug commit -am \"prime commit\"",
+ '--debug commit -am "prime commit"',
]
# user made a mistake and added the wrong file
CHANGE_FILE = [
"--debug add s3://s3-bucket/p1-object-mistake.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1",
"--debug meta init",
- "--debug commit -am \"initial commit\"",
+ '--debug commit -am "initial commit"',
"--debug rm s3://s3-bucket/p1-object-mistake.txt",
"--debug add s3://s3-bucket/p1-object-correct.txt --size 1 --modified 2024-05-05T07:26:29-0700 --md5 acbd18db4cc2f85cedef654fccc4a4d8 --patient P1",
"--debug meta init",
- "--debug commit -am \"prime commit\"",
+ '--debug commit -am "prime commit"',
]
@@ -37,8 +37,11 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None:
print(project_id)
- run(runner, ["--debug", "init", project_id, "--approve", "--no-server"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "init", project_id, "--approve", "--no-server"],
+ expected_files=[".g3t", ".git"],
+ )
for _ in CHANGE_PATIENT:
run(runner, _.split())
@@ -54,8 +57,10 @@ def test_change_patient(runner: CliRunner, project_id, tmpdir) -> None:
assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected."
urls = [_.request.url for _ in bundle.entry]
- assert any([_.startswith('Patient') for _ in urls]), "Expected to delete a Patient."
- assert any([_.startswith('ResearchSubject') for _ in urls]), "Expected to delete a ResearchSubject."
+ assert any([_.startswith("Patient") for _ in urls]), "Expected to delete a Patient."
+ assert any(
+ [_.startswith("ResearchSubject") for _ in urls]
+ ), "Expected to delete a ResearchSubject."
def test_change_file(runner: CliRunner, project_id, tmpdir) -> None:
@@ -66,8 +71,11 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None:
print(project_id)
- run(runner, ["--debug", "init", project_id, "--approve", "--no-server"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "init", project_id, "--approve", "--no-server"],
+ expected_files=[".g3t", ".git"],
+ )
for _ in CHANGE_FILE:
run(runner, _.split())
@@ -83,4 +91,6 @@ def test_change_file(runner: CliRunner, project_id, tmpdir) -> None:
assert all([_ == "DELETE" for _ in methods]), "Only DELETE method is expected."
urls = [_.request.url for _ in bundle.entry]
- assert any([_.startswith('DocumentReference') for _ in urls]), "Expected to delete a DocumentReference."
+ assert any(
+ [_.startswith("DocumentReference") for _ in urls]
+ ), "Expected to delete a DocumentReference."
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 86b03fb0..6237edab 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -17,23 +17,36 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
assert tmpdir.chdir()
print(Path.cwd())
- assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+ assert os.environ.get(
+ "G3T_PROFILE"
+ ), "G3T_PROFILE environment variable must be set."
print(project_id)
- run(runner, ["--debug", "init", project_id, "--approve"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "init", project_id, "--approve"],
+ expected_files=[".g3t", ".git"],
+ )
# check ping
- run(runner, ["--debug", "ping"], expected_output=["bucket_programs", "your_access", "endpoint", "username"])
+ run(
+ runner,
+ ["--debug", "ping"],
+ expected_output=["bucket_programs", "your_access", "endpoint", "username"],
+ )
# create a test file
test_file = Path("my-project-data/hello.txt")
test_file.parent.mkdir(parents=True, exist_ok=True)
- test_file.write_text('hello\n')
+ test_file.write_text("hello\n")
# add the file
- run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
+ run(
+ runner,
+ ["--debug", "add", str(test_file)],
+ expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+ )
# should create a dvc file
dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc")
@@ -49,7 +62,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
object_id = dvc.object_id
# create the meta file
- run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"])
+ run(
+ runner,
+ ["--debug", "meta", "init"],
+ expected_files=["META/DocumentReference.ndjson"],
+ )
# commit the changes, delegating to git
run(runner, ["--debug", "commit", "-am", "initial commit"])
@@ -60,10 +77,18 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
# update the file
test_file = Path("my-project-data/hello.txt")
test_file.parent.mkdir(parents=True, exist_ok=True)
- test_file.write_text('hello UPDATE\n')
+ test_file.write_text("hello UPDATE\n")
# re-add the file
- run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
- run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"])
+ run(
+ runner,
+ ["--debug", "add", str(test_file)],
+ expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+ )
+ run(
+ runner,
+ ["--debug", "meta", "init"],
+ expected_files=["META/DocumentReference.ndjson"],
+ )
run(runner, ["--debug", "commit", "-am", "updated"])
run(runner, ["--debug", "meta", "validate"])
@@ -71,7 +96,11 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
run(runner, ["--debug", "meta", "graph"], expected_files=["meta.html"])
# create a dataframe
- run(runner, ["--debug", "meta", "dataframe", 'DocumentReference'], expected_files=["DocumentReference.csv"])
+ run(
+ runner,
+ ["--debug", "meta", "dataframe", "DocumentReference"],
+ expected_files=["DocumentReference.csv"],
+ )
# push to the server
run(runner, ["--debug", "push"])
@@ -103,12 +132,25 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
# check the files exist in the cloned directory
run_command("ls -l")
- assert Path("my-project-data/hello.txt").exists(), "hello.txt does not exist in the cloned directory."
+ assert Path(
+ "my-project-data/hello.txt"
+ ).exists(), "hello.txt does not exist in the cloned directory."
# remove the project from the server.
# TODO note, this does not remove the files from the bucket (UChicago bug)
# See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229
- run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"])
+ run(
+ runner,
+ [
+ "--debug",
+ "projects",
+ "empty",
+ "--project_id",
+ project_id,
+ "--confirm",
+ "empty",
+ ],
+ )
# TODO fix `collaborator rm`
# arborist logs: "Policy `data_upload` does not exist for user `xxx@xxx.xxx`: not revoking. Check if it is assigned through a group."
@@ -116,13 +158,27 @@ def test_simple_workflow(runner: CliRunner, project_id, tmpdir) -> None:
# run(runner, ["--debug", "collaborator", "rm", username, "--approve"], expected_output=[username])
# add a user with write permissions
- run(runner, ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"])
+ run(
+ runner,
+ ["--debug", "collaborator", "add", "foo@bar.com", "--write", "--approve"],
+ )
# add a user from another directory (without config)
os.mkdir("empty")
os.chdir("empty")
program, project = project_id.split("-")
- run(runner, ["--debug", "collaborator", "add", "foo2@bar.com", f"/programs/{program}/projects/{project}", "--write", "--approve"])
+ run(
+ runner,
+ [
+ "--debug",
+ "collaborator",
+ "add",
+ "foo2@bar.com",
+ f"/programs/{program}/projects/{project}",
+ "--write",
+ "--approve",
+ ],
+ )
def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> None:
@@ -131,20 +187,29 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N
assert tmpdir.chdir()
print(Path.cwd())
- assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+ assert os.environ.get(
+ "G3T_PROFILE"
+ ), "G3T_PROFILE environment variable must be set."
print(project_id)
- run(runner, ["--debug", "init", project_id, "--approve"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "init", project_id, "--approve"],
+ expected_files=[".g3t", ".git"],
+ )
# create a test file
test_file = Path("my-project-data/hello.txt")
test_file.parent.mkdir(parents=True, exist_ok=True)
- test_file.write_text('hello\n')
+ test_file.write_text("hello\n")
# add the file
- run(runner, ["--debug", "add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
+ run(
+ runner,
+ ["--debug", "add", str(test_file)],
+ expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+ )
# should create a dvc file
dvc_path = Path("MANIFEST/my-project-data/hello.txt.dvc")
@@ -160,7 +225,11 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N
object_id = dvc.object_id
# create the meta file
- run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson"])
+ run(
+ runner,
+ ["--debug", "meta", "init"],
+ expected_files=["META/DocumentReference.ndjson"],
+ )
# commit the changes, delegating to git
run(runner, ["--debug", "commit", "-am", "initial commit"])
@@ -184,24 +253,44 @@ def test_simple_fhir_server_workflow(runner: CliRunner, project_id, tmpdir) -> N
# remove the project from the server.
# TODO note, this does not remove the files from the bucket (UChicago bug)
# See https://ohsucomputationalbio.slack.com/archives/C043HPV0VMY/p1714065633867229
- run(runner, ["--debug", "projects", "empty", "--project_id", project_id, "--confirm", "empty"])
-
-
-def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, project_id: str, tmp_path: Path):
+ run(
+ runner,
+ [
+ "--debug",
+ "projects",
+ "empty",
+ "--project_id",
+ project_id,
+ "--confirm",
+ "empty",
+ ],
+ )
+
+
+def test_push_fails_with_invalid_doc_ref_creation_date(
+ runner: CliRunner, project_id: str, tmp_path: Path
+):
# check
- assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+ assert os.environ.get(
+ "G3T_PROFILE"
+ ), "G3T_PROFILE environment variable must be set."
# copy fixture to temp test dir
project_dir = "fhir-gdc-examples"
fixtures_path = Path(os.path.dirname(__file__)).parent / "fixtures"
fhir_gdc_dir = fixtures_path / project_dir
- modified_doc_ref_path = fixtures_path / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson"
+ modified_doc_ref_path = (
+ fixtures_path
+ / "negative-examples/fhir-gdc-DocumentReference-invalid-date.ndjson"
+ )
# init project
new_project_dir = tmp_path / project_dir
shutil.copytree(fhir_gdc_dir, new_project_dir)
- shutil.copy(modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson" )
+ shutil.copy(
+ modified_doc_ref_path, new_project_dir / "META" / "DocumentReference.ndjson"
+ )
# get invalid date from fixture
doc_ref_content = pd.read_json(modified_doc_ref_path, lines=True)["content"][0]
@@ -211,14 +300,17 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
log_file_path = "logs/publish.log"
os.chdir(new_project_dir)
run(runner, ["init", project_id, "--approve"])
- result = run(runner,
- ["push", "--skip_validate", "--overwrite"],
- expected_exit_code=1,
- expected_files=[log_file_path]
- )
+ result = run(
+ runner,
+ ["push", "--skip_validate", "--overwrite"],
+ expected_exit_code=1,
+ expected_files=[log_file_path],
+ )
# ensure push has useful useful error logs
- assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}"
+ assert (
+ log_file_path in result.output
+ ), f"expected log file path in stdout, instead got:\n{result.output}"
# ensure saved log file contains info about invalid date
with open(log_file_path, "r") as log_file:
@@ -226,40 +318,53 @@ def test_push_fails_with_invalid_doc_ref_creation_date(runner: CliRunner, projec
str_lines = str(lines)
for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
- assert keyword in str_lines, f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
+ assert (
+ keyword in str_lines
+ ), f'expected log file to contain keyword "{keyword}", instead got: \n{str_lines}'
-def test_push_fails_with_no_write_permissions(runner: CliRunner, project_id: str, tmp_path: Path):
+def test_push_fails_with_no_write_permissions(
+ runner: CliRunner, project_id: str, tmp_path: Path
+):
# setup
- assert os.environ.get("G3T_PROFILE"), "G3T_PROFILE environment variable must be set."
+ assert os.environ.get(
+ "G3T_PROFILE"
+ ), "G3T_PROFILE environment variable must be set."
os.chdir(tmp_path)
# initialize project without approving permissions
log_file_path = "logs/publish.log"
- run(runner, [ "init", project_id],
- expected_files=[".g3t", ".git"])
+ run(runner, ["init", project_id], expected_files=[".g3t", ".git"])
# create test file
test_file = Path("my-project-data/hello.txt")
test_file.parent.mkdir(parents=True, exist_ok=True)
- test_file.write_text('hello\n')
+ test_file.write_text("hello\n")
# prepare test file for submission
- run(runner, ["add", str(test_file)], expected_files=["MANIFEST/my-project-data/hello.txt.dvc"])
+ run(
+ runner,
+ ["add", str(test_file)],
+ expected_files=["MANIFEST/my-project-data/hello.txt.dvc"],
+ )
run(runner, ["meta", "init"], expected_files=["META/DocumentReference.ndjson"])
- print("current directory:",os.getcwd())
+ print("current directory:", os.getcwd())
run(runner, ["commit", "-m", "initial commit"])
# push
result = run(runner, ["push"], expected_exit_code=1, expected_files=[log_file_path])
# ensure stdout mentions log files
- assert log_file_path in result.output, f"expected log file path in stdout, instead got:\n{result.output}"
+ assert (
+ log_file_path in result.output
+ ), f"expected log file path in stdout, instead got:\n{result.output}"
# check valid error messages within
with open(log_file_path, "r") as log_file:
# grab last line
- line = [l for l in log_file.readlines()][-1]
+ line = [_ for _ in log_file.readlines()][-1]
for output in ["401", "permission"]:
- assert "401" in line, f"expected {log_file_path} to contain {output}, instead got: \n{line}"
+ assert (
+ "401" in line
+ ), f"expected {log_file_path} to contain {output}, instead got: \n{line}"
diff --git a/tests/unit/meta/conftest.py b/tests/unit/meta/conftest.py
index b171120e..8de22443 100644
--- a/tests/unit/meta/conftest.py
+++ b/tests/unit/meta/conftest.py
@@ -16,7 +16,7 @@ def program() -> str:
@pytest.fixture
def project() -> str:
- project = uuid.uuid4().hex.replace('-', '_')
+ project = uuid.uuid4().hex.replace("-", "_")
return project
diff --git a/tests/unit/meta/test_meta.py b/tests/unit/meta/test_meta.py
index 818ba083..d602a5c6 100644
--- a/tests/unit/meta/test_meta.py
+++ b/tests/unit/meta/test_meta.py
@@ -10,15 +10,20 @@
from tests import run
-def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None:
+def test_assert_object_id_invalid_on_project_id_change(
+ runner: CliRunner, project_id, tmp_path: pathlib.Path
+) -> None:
"""Test object_id validation command."""
# change to the temporary directory
os.chdir(tmp_path)
print(pathlib.Path.cwd())
print(project_id)
- run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "--profile", "local", "init", project_id, "--no-server"],
+ expected_files=[".g3t", ".git"],
+ )
# create test files
cmds = """
@@ -27,42 +32,67 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec
echo "hello" > my-project-data/hello.txt
echo "big-data" > my-read-only-data/big-file.txt
ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt
- """.split('\n')
+ """.split(
+ "\n"
+ )
for cmd in cmds:
run_command(cmd, no_capture=True)
- assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist."
- assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist."
- assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist."
+ assert pathlib.Path(
+ "my-project-data/hello.txt"
+ ).exists(), "hello.txt does not exist."
+ assert pathlib.Path(
+ "my-read-only-data/big-file.txt"
+ ).exists(), "my-read-only-data/big-file.txt does not exist."
+ assert pathlib.Path(
+ "my-project-data/big-file.txt"
+ ).exists(), "my-project-data/big-file.txt does not exist."
files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"]
patients = ["P1", "P2"]
for f, p in zip(files, patients):
- run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"])
-
- run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"])
+ run(
+ runner,
+ ["--debug", "add", str(f), "--patient", p],
+ expected_files=[f"MANIFEST/{f}.dvc"],
+ )
+
+ run(
+ runner,
+ ["--debug", "meta", "init"],
+ expected_files=[
+ "META/DocumentReference.ndjson",
+ "META/Patient.ndjson",
+ "META/ResearchStudy.ndjson",
+ "META/ResearchSubject.ndjson",
+ ],
+ )
run(runner, ["--debug", "meta", "validate"])
- run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
+ run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
# now change the project_id to something new
# this should cause invalid object_id errors
config = gen3_tracker.config.default()
config.gen3.project_id = config.gen3.project_id + "XXXX"
- with open('.g3t/config.yaml', 'w') as f:
+ with open(".g3t/config.yaml", "w") as f:
yaml.dump(config.model_dump(), f)
- run(runner, ["commit", "-m", "change-project_id", '.g3t/config.yaml'])
+ run(runner, ["commit", "-m", "change-project_id", ".g3t/config.yaml"])
# should error now
run(runner, ["--debug", "meta", "validate"], expected_exit_code=1)
run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=1)
# also check skip_validate
- run(runner, ["--debug", "push", "--dry-run", "--skip_validate"], expected_exit_code=0)
+ run(
+ runner,
+ ["--debug", "push", "--dry-run", "--skip_validate"],
+ expected_exit_code=0,
+ )
# should pass now
config.gen3.project_id = config.gen3.project_id.replace("XXXX", "")
- with open('.g3t/config.yaml', 'w') as f:
+ with open(".g3t/config.yaml", "w") as f:
yaml.dump(config.model_dump(), f)
- run(runner, ["commit", "-m", "restore-project_id", '.g3t/config.yaml'])
+ run(runner, ["commit", "-m", "restore-project_id", ".g3t/config.yaml"])
# ensure we can validate without passing project id
results = validate(directory_path="META")
@@ -72,7 +102,9 @@ def test_assert_object_id_invalid_on_project_id_change(runner: CliRunner, projec
run(runner, ["--debug", "push", "--dry-run"], expected_exit_code=0)
-def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path: pathlib.Path) -> None:
+def test_assert_add_specimen_after_init(
+ runner: CliRunner, project_id, tmp_path: pathlib.Path
+) -> None:
"""Test meta skeleton handles re-add of data with new specimen"""
# change to the temporary directory
os.chdir(tmp_path)
@@ -80,8 +112,11 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path:
print(project_id)
# init the project, no server
- run(runner, ["--debug", "--profile", "local", "init", project_id, "--no-server"],
- expected_files=[".g3t", ".git"])
+ run(
+ runner,
+ ["--debug", "--profile", "local", "init", project_id, "--no-server"],
+ expected_files=[".g3t", ".git"],
+ )
# create test files
cmds = """
@@ -90,34 +125,73 @@ def test_assert_add_specimen_after_init(runner: CliRunner, project_id, tmp_path:
echo "hello" > my-project-data/hello.txt
echo "big-data" > my-read-only-data/big-file.txt
ln -s $PWD/my-read-only-data/big-file.txt my-project-data/big-file.txt
- """.split('\n')
+ """.split(
+ "\n"
+ )
for cmd in cmds:
run_command(cmd, no_capture=True)
- assert pathlib.Path("my-project-data/hello.txt").exists(), "hello.txt does not exist."
- assert pathlib.Path("my-read-only-data/big-file.txt").exists(), "my-read-only-data/big-file.txt does not exist."
- assert pathlib.Path("my-project-data/big-file.txt").exists(), "my-project-data/big-file.txt does not exist."
+ assert pathlib.Path(
+ "my-project-data/hello.txt"
+ ).exists(), "hello.txt does not exist."
+ assert pathlib.Path(
+ "my-read-only-data/big-file.txt"
+ ).exists(), "my-read-only-data/big-file.txt does not exist."
+ assert pathlib.Path(
+ "my-project-data/big-file.txt"
+ ).exists(), "my-project-data/big-file.txt does not exist."
def _files_with_patients():
files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"]
patients = ["P1", "P2"]
for f, p in zip(files, patients):
- run(runner, ["--debug", "add", str(f), "--patient", p], expected_files=[f"MANIFEST/{f}.dvc"])
-
- run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson"])
+ run(
+ runner,
+ ["--debug", "add", str(f), "--patient", p],
+ expected_files=[f"MANIFEST/{f}.dvc"],
+ )
+
+ run(
+ runner,
+ ["--debug", "meta", "init"],
+ expected_files=[
+ "META/DocumentReference.ndjson",
+ "META/Patient.ndjson",
+ "META/ResearchStudy.ndjson",
+ "META/ResearchSubject.ndjson",
+ ],
+ )
run(runner, ["--debug", "meta", "validate"])
- run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
+ run(
+ runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]
+ )
def _files_with_patients_and_specimens():
files = ["my-project-data/hello.txt", "my-project-data/big-file.txt"]
patients = ["P1", "P2"]
specimens = ["S1", "S2"]
for f, p, s in zip(files, patients, specimens):
- run(runner, ["--debug", "add", str(f), "--patient", p, "--specimen", s], expected_files=[f"MANIFEST/{f}.dvc"])
-
- run(runner, ["--debug", "meta", "init"], expected_files=["META/DocumentReference.ndjson", "META/Patient.ndjson", "META/ResearchStudy.ndjson", "META/ResearchSubject.ndjson", "META/Specimen.ndjson"])
+ run(
+ runner,
+ ["--debug", "add", str(f), "--patient", p, "--specimen", s],
+ expected_files=[f"MANIFEST/{f}.dvc"],
+ )
+
+ run(
+ runner,
+ ["--debug", "meta", "init"],
+ expected_files=[
+ "META/DocumentReference.ndjson",
+ "META/Patient.ndjson",
+ "META/ResearchStudy.ndjson",
+ "META/ResearchSubject.ndjson",
+ "META/Specimen.ndjson",
+ ],
+ )
run(runner, ["--debug", "meta", "validate"])
- run(runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"])
+ run(
+ runner, ["commit", "-m", "init", "MANIFEST/", "META/", ".g3t", ".gitignore"]
+ )
# create initial association between patients and files
_files_with_patients()
diff --git a/tests/unit/test_coding_conventions.py b/tests/unit/test_coding_conventions.py
index 34171929..e88b5cb6 100644
--- a/tests/unit/test_coding_conventions.py
+++ b/tests/unit/test_coding_conventions.py
@@ -7,7 +7,10 @@
def test_coding_conventions():
"""Check python conventions on key directories"""
script_dir = os.path.dirname(os.path.abspath(__file__))
- directories = [os.path.join(script_dir, "../../gen3_tracker"), os.path.join(script_dir, "../../tests")]
+ directories = [
+ os.path.join(script_dir, "../../gen3_tracker"),
+ os.path.join(script_dir, "../../tests"),
+ ]
failures = []
for directory in directories:
cmd_str = f"flake8 {directory} --max-line-length 256 --exclude test_flatten_fhir_example.py"
diff --git a/tests/unit/test_flatten_fhir_example.py b/tests/unit/test_flatten_fhir_example.py
index 82acc470..9b923d88 100644
--- a/tests/unit/test_flatten_fhir_example.py
+++ b/tests/unit/test_flatten_fhir_example.py
@@ -29,19 +29,40 @@
# test data ------------------------------------------------------------
# The following fixtures provide test data for the tests below.
+
@pytest.fixture
def patient_dict() -> dict:
# TODO - read the patient example from a file
- patient_dict = {"resourceType": "Patient", "id": "3", "meta": {"lastUpdated": "2012-05-29T23:45:32Z"},
- "text": {"status": "generated",
- "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003eKidd, Kari. SSN:\n 444555555\u003c/div\u003e"},
- "identifier": [{"type": {
- "coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "SS", "display": "Social Security number"}]},
- "system": "http://hl7.org/fhir/sid/us-ssn", "value": "444555555"}], "active": True,
- "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}],
- "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}], "gender": "female",
- "address": [{"use": "home", "line": ["2222 Home Street"]}],
- "managingOrganization": {"reference": "Organization/hl7"}}
+ patient_dict = {
+ "resourceType": "Patient",
+ "id": "3",
+ "meta": {"lastUpdated": "2012-05-29T23:45:32Z"},
+ "text": {
+ "status": "generated",
+ "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003eKidd, Kari. SSN:\n 444555555\u003c/div\u003e',
+ },
+ "identifier": [
+ {
+ "type": {
+ "coding": [
+ {
+ "system": "http://terminology.hl7.org/CodeSystem/v2-0203",
+ "code": "SS",
+ "display": "Social Security number",
+ }
+ ]
+ },
+ "system": "http://hl7.org/fhir/sid/us-ssn",
+ "value": "444555555",
+ }
+ ],
+ "active": True,
+ "name": [{"use": "official", "family": "Kidd", "given": ["Kari"]}],
+ "telecom": [{"system": "phone", "value": "555-555-2005", "use": "work"}],
+ "gender": "female",
+ "address": [{"use": "home", "line": ["2222 Home Street"]}],
+ "managingOrganization": {"reference": "Organization/hl7"},
+ }
yield patient_dict
@@ -52,38 +73,42 @@ def specimen_dict():
"id": "denovo-3",
"text": {
"status": "generated",
- "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://browser.ihtsdotools.org/\"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"Patient-denovoFather.html\"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d\"ServiceRequest-genomicServiceRequest.html\"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d\"grid\"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d\"Practitioner-practitioner01.html\"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html\"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e"},
+ "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative\u003c/b\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource \u0026quot;denovo-3\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003eidentifier\u003c/b\u003e: id: 3\u003c/p\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: available\u003c/p\u003e\u003cp\u003e\u003cb\u003etype\u003c/b\u003e: Venous blood specimen \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://browser.ihtsdotools.org/"\u003eSNOMED CT\u003c/a\u003e#122555007)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"Patient-denovoFather.html"\u003ePatient/denovoFather: John Doe\u003c/a\u003e \u0026quot; DOE\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003ereceivedTime\u003c/b\u003e: 2021-01-01 01:01:01+0000\u003c/p\u003e\u003cp\u003e\u003cb\u003erequest\u003c/b\u003e: \u003ca href\u003d"ServiceRequest-genomicServiceRequest.html"\u003eServiceRequest/genomicServiceRequest\u003c/a\u003e\u003c/p\u003e\u003ch3\u003eCollections\u003c/h3\u003e\u003ctable class\u003d"grid"\u003e\u003ctr\u003e\u003ctd\u003e-\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollector\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eCollected[x]\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eQuantity\u003c/b\u003e\u003c/td\u003e\u003ctd\u003e\u003cb\u003eMethod\u003c/b\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd\u003e*\u003c/td\u003e\u003ctd\u003e\u003ca href\u003d"Practitioner-practitioner01.html"\u003ePractitioner/practitioner01\u003c/a\u003e \u0026quot; DOEL\u0026quot;\u003c/td\u003e\u003ctd\u003e2021-01-01 01:01:00+0000\u003c/td\u003e\u003ctd\u003e1 mL\u003c/td\u003e\u003ctd\u003eLine, Venous \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/3.1.0/CodeSystem-v2-0488.html"\u003especimenCollectionMethod\u003c/a\u003e#LNV)\u003c/span\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/table\u003e\u003c/div\u003e',
+ },
"identifier": [
{
"system": "http://www.somesystemabc.net/identifiers/specimens",
- "value": "3"}],
+ "value": "3",
+ }
+ ],
"status": "available",
"type": {
"coding": [
{
"system": "http://snomed.info/sct",
"code": "122555007",
- "display": "Venous blood specimen"}]},
- "subject": {
- "reference": "Patient/denovoFather",
- "display": "John Doe"},
+ "display": "Venous blood specimen",
+ }
+ ]
+ },
+ "subject": {"reference": "Patient/denovoFather", "display": "John Doe"},
"receivedTime": "2021-01-01T01:01:01Z",
- "request": [
- {
- "reference": "ServiceRequest/genomicServiceRequest"}],
+ "request": [{"reference": "ServiceRequest/genomicServiceRequest"}],
"collection": {
- "collector": {
- "reference": "Practitioner/practitioner01"},
+ "collector": {"reference": "Practitioner/practitioner01"},
"collectedDateTime": "2021-01-01T01:01:00Z",
- "quantity": {
- "value": 1,
- "unit": "mL"},
+ "quantity": {"value": 1, "unit": "mL"},
"method": {
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/v2-0488",
"code": "LNV",
- "display": "Line, Venous"}]}}}
+ "display": "Line, Venous",
+ }
+ ]
+ },
+ },
+ }
@pytest.fixture
@@ -93,14 +118,14 @@ def observation_eye_color_dict():
"id": "eye-color",
"text": {
"status": "generated",
- "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"eye-color\"\u003e \u003c/a\u003e\u003ca name\u003d\"hceye-color\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e"},
+ "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"eye-color"\u003e \u003c/a\u003e\u003ca name\u003d"hceye-color"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;eye-color\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: final\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: eye color \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e ()\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 2016-05-18\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: blue\u003c/p\u003e\u003c/div\u003e',
+ },
"status": "final",
- "code": {
- "text": "eye color"},
- "subject": {
- "reference": "Patient/example"},
+ "code": {"text": "eye color"},
+ "subject": {"reference": "Patient/example"},
"effectiveDateTime": "2016-05-18",
- "valueString": "blue"}
+ "valueString": "blue",
+ }
@pytest.fixture
@@ -110,7 +135,8 @@ def observation_bmi_dict():
"id": "bmi-using-related",
"text": {
"status": "generated",
- "div": "\u003cdiv xmlns\u003d\"http://www.w3.org/1999/xhtml\"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d\"bmi-using-related\"\u003e \u003c/a\u003e\u003ca name\u003d\"hcbmi-using-related\"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d\"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%\"\u003e\u003cp style\u003d\"margin-bottom: 0px\"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d\" \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n \"\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html\"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d\"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki\"\u003e (\u003ca href\u003d\"https://loinc.org/\"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d\"patient-example.html\"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d\"background: LightGoldenRodYellow\"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d\"broken-link.html\"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"observation-example.html\"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e"},
+ "div": '\u003cdiv xmlns\u003d"http://www.w3.org/1999/xhtml"\u003e\u003cp\u003e\u003cb\u003eGenerated Narrative: Observation\u003c/b\u003e\u003ca name\u003d"bmi-using-related"\u003e \u003c/a\u003e\u003ca name\u003d"hcbmi-using-related"\u003e \u003c/a\u003e\u003c/p\u003e\u003cdiv style\u003d"display: inline-block; background-color: #d9e0e7; padding: 6px; margin: 4px; border: 1px solid #8da1b4; border-radius: 5px; line-height: 60%"\u003e\u003cp style\u003d"margin-bottom: 0px"\u003eResource Observation \u0026quot;bmi-using-related\u0026quot; \u003c/p\u003e\u003c/div\u003e\u003cp\u003e\u003cb\u003estatus\u003c/b\u003e: \u003cspan title\u003d" \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d need to fix vitals to removed fixed value \u0027has-member\u0027 \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\t\u0026lt;meta\u0026gt;\n\t\t\u0026lt;profile value\u003d\u0026quot;http://hl7.org/fhir/StructureDefinition/vitalsigns\u0026quot;/\u0026gt;\n\t\u0026lt;/meta\u0026gt;\n "\u003efinal\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecategory\u003c/b\u003e: Vital Signs \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"http://terminology.hl7.org/5.5.0/CodeSystem-observation-category.html"\u003eObservation Category Codes\u003c/a\u003e#vital-signs)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ecode\u003c/b\u003e: BMI \u003cspan style\u003d"background: LightGoldenRodYellow; margin: 4px; border: 1px solid khaki"\u003e (\u003ca href\u003d"https://loinc.org/"\u003eLOINC\u003c/a\u003e#39156-5 \u0026quot;Body mass index (BMI) [Ratio]\u0026quot;)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003esubject\u003c/b\u003e: \u003ca href\u003d"patient-example.html"\u003ePatient/example\u003c/a\u003e \u0026quot;Peter CHALMERS\u0026quot;\u003c/p\u003e\u003cp\u003e\u003cb\u003eeffective\u003c/b\u003e: 1999-07-02\u003c/p\u003e\u003cp\u003e\u003cb\u003evalue\u003c/b\u003e: 16.2 kg/m2\u003cspan style\u003d"background: LightGoldenRodYellow"\u003e (Details: UCUM code kg/m2 \u003d \u0027kg/m2\u0027)\u003c/span\u003e\u003c/p\u003e\u003cp\u003e\u003cb\u003ederivedFrom\u003c/b\u003e: \u003c/p\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d"broken-link.html"\u003eObservation/bodyheight: Body Height\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d"observation-example.html"\u003eObservation/example: Body Weight\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e',
+ },
"status": "final",
"category": [
{
@@ -118,35 +144,41 @@ def observation_bmi_dict():
{
"system": "http://terminology.hl7.org/CodeSystem/observation-category",
"code": "vital-signs",
- "display": "Vital Signs"}],
- "text": "Vital Signs"}],
+ "display": "Vital Signs",
+ }
+ ],
+ "text": "Vital Signs",
+ }
+ ],
"code": {
"coding": [
{
"system": "http://loinc.org",
"code": "39156-5",
- "display": "Body mass index (BMI) [Ratio]"}],
- "text": "BMI"},
- "subject": {
- "reference": "Patient/example"},
+ "display": "Body mass index (BMI) [Ratio]",
+ }
+ ],
+ "text": "BMI",
+ },
+ "subject": {"reference": "Patient/example"},
"effectiveDateTime": "1999-07-02",
"valueQuantity": {
"value": 16.2,
"unit": "kg/m2",
"system": "http://unitsofmeasure.org",
- "code": "kg/m2"},
+ "code": "kg/m2",
+ },
"derivedFrom": [
- {
- "reference": "Observation/bodyheight",
- "display": "Body Height"},
- {
- "reference": "Observation/example",
- "display": "Body Weight"}]}
+ {"reference": "Observation/bodyheight", "display": "Body Height"},
+ {"reference": "Observation/example", "display": "Body Weight"},
+ ],
+ }
# flatteners ------------------------------------------------------------
# The following functions are used to flatten the FHIR resources.
+
def flatten_simple(self: DomainResource):
"""Convert the DomainResource instance to just an id."""
return self.id
@@ -161,13 +193,17 @@ def _isodate(v):
def flatten_scalars(self: DomainResource) -> dict:
"""Convert the DomainResource instance to a dictionary."""
- _ = {k: _isodate(v) for k, v in self.dict().items() if not isinstance(v, (list, dict))}
+ _ = {
+ k: _isodate(v)
+ for k, v in self.dict().items()
+ if not isinstance(v, (list, dict))
+ }
return _
def flatten_references(self: DomainResource) -> dict:
"""Convert the DomainResource instance to a dictionary."""
- fields = [_ for _ in self.__fields__.keys() if not _.endswith('__ext')]
+ fields = [_ for _ in self.__fields__.keys() if not _.endswith("__ext")]
_ = {}
# if any top level field in this resource is a Reference, use the Reference.reference https://build.fhir.org/references-definitions.html#Reference.reference
for k in fields:
@@ -181,14 +217,16 @@ def flatten_references(self: DomainResource) -> dict:
def flatten_identifier(self: Identifier) -> dict:
"""Convert the Identifier instance to a key value, use a simplified system as key."""
parsed_url = urlparse(self.system)
- path_parts = parsed_url.path.split('/') # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn
- key = path_parts[-1] if path_parts else 'identifier'
+ path_parts = parsed_url.path.split(
+ "/"
+ ) # e.g. "http://hl7.org/fhir/sid/us-ssn" -> us-ssn
+ key = path_parts[-1] if path_parts else "identifier"
return {key: self.value}
def flatten_coding(self: Coding) -> dict:
"""Convert the DomainResource instance to a dictionary."""
- return {'display': self.display}
+ return {"display": self.display}
def flatten_scalars_and_references(self: DomainResource) -> dict:
@@ -213,12 +251,12 @@ def flatten_observation(self: Observation) -> dict:
_ = flatten_scalars_references_identifiers(self)
# normalize all the valueXXXXX to 'value'
if self.valueQuantity:
- _['value'] = f"{self.valueQuantity.value} {self.valueQuantity.unit}"
+ _["value"] = f"{self.valueQuantity.value} {self.valueQuantity.unit}"
elif self.valueString:
- _['value'] = self.valueString
- del _['valueString']
+ _["value"] = self.valueString
+ del _["valueString"]
elif self.valueCodeableConcept:
- _['value'] = self.valueCodeableConcept.text
+ _["value"] = self.valueCodeableConcept.text
# there are many other value types, but we'll ignore them for now
# see https://build.fhir.org/observation-definitions.html#Observation.value_x_
# Quantity|CodeableConcept|string|boolean|integer|Range|Ratio|SampledData|time|dateTime|Period|Attachment|Reference(MolecularSequence)
@@ -229,6 +267,7 @@ def flatten_observation(self: Observation) -> dict:
# patchers ------------------------------------------------------------
# The following fixtures are used to patch the DomainResource class to add the desired method.
+
@pytest.fixture
def patched_domain_resource_simple() -> bool:
"""Patch the DomainResource class to add a flatten method."""
@@ -284,66 +323,126 @@ def patched_scalars_references_identifiers_observation() -> bool:
# tests ------------------------------------------------------------
+
def test_patient_without_flatten(patient_dict: dict):
"""This patient object should NOT have a 'flatten' method."""
# without path dependency, just have a plain patient object with no flatten method
patient = Patient.parse_obj(patient_dict)
- assert not hasattr(patient, 'flatten'), "Patient object should not have a 'flatten' method"
+ assert not hasattr(
+ patient, "flatten"
+ ), "Patient object should not have a 'flatten' method"
def test_patient_with_simple(patched_domain_resource_simple: bool, patient_dict: dict):
"""This patient object should have a 'flatten' method."""
patient = Patient.parse_obj(patient_dict)
- assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
- assert patient.flatten() == patient.id, f"Patient.flatten() should return {patient.id}"
+ assert hasattr(
+ patient, "flatten"
+ ), "Patient object does not have a 'flatten' method"
+ assert (
+ patient.flatten() == patient.id
+ ), f"Patient.flatten() should return {patient.id}"
def test_patient_with_scalars(patched_scalars: bool, patient_dict: dict):
"""This patient object should have a 'flatten' method that returns a dict of scalar values."""
patient = Patient.parse_obj(patient_dict)
- assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
- assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values"
-
-
-def test_patient_with_scalars_and_references(patched_scalars_and_references: bool, patient_dict: dict):
+ assert hasattr(
+ patient, "flatten"
+ ), "Patient object does not have a 'flatten' method"
+ assert patient.flatten() == {
+ "active": True,
+ "gender": "female",
+ "id": "3",
+ "resourceType": "Patient",
+ }, "Patient.flatten() should return a dict of all scalar values"
+
+
+def test_patient_with_scalars_and_references(
+ patched_scalars_and_references: bool, patient_dict: dict
+):
"""This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
patient = Patient.parse_obj(patient_dict)
- assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
- assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient'}, "Patient.flatten() should return a dict of all scalar values and references"
-
-
-def test_patient_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, patient_dict: dict):
+ assert hasattr(
+ patient, "flatten"
+ ), "Patient object does not have a 'flatten' method"
+ assert patient.flatten() == {
+ "active": True,
+ "gender": "female",
+ "id": "3",
+ "managingOrganization": "Organization/hl7",
+ "resourceType": "Patient",
+ }, "Patient.flatten() should return a dict of all scalar values and references"
+
+
+def test_patient_with_scalars_references_identifiers(
+ patched_scalars_references_identifiers: bool, patient_dict: dict
+):
"""This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
patient = Patient.parse_obj(patient_dict)
- assert hasattr(patient, 'flatten'), "Patient object does not have a 'flatten' method"
- assert patient.flatten() == {'active': True, 'gender': 'female', 'id': '3', 'managingOrganization': 'Organization/hl7', 'resourceType': 'Patient', 'us-ssn': '444555555'}, "Patient.flatten() should return a dict of all scalar values and references"
-
-
-def test_specimen_with_scalars_references_identifiers(patched_scalars_references_identifiers: bool, specimen_dict: dict):
+ assert hasattr(
+ patient, "flatten"
+ ), "Patient object does not have a 'flatten' method"
+ assert patient.flatten() == {
+ "active": True,
+ "gender": "female",
+ "id": "3",
+ "managingOrganization": "Organization/hl7",
+ "resourceType": "Patient",
+ "us-ssn": "444555555",
+ }, "Patient.flatten() should return a dict of all scalar values and references"
+
+
+def test_specimen_with_scalars_references_identifiers(
+ patched_scalars_references_identifiers: bool, specimen_dict: dict
+):
"""This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
specimen = Specimen.parse_obj(specimen_dict)
- assert hasattr(specimen, 'flatten'), "Specimen object does not have a 'flatten' method"
- assert specimen.flatten() == {'resourceType': 'Specimen', 'id': 'denovo-3', 'status': 'available',
- 'receivedTime': '2021-01-01T01:01:01+00:00',
- 'subject': 'Patient/denovoFather', 'specimens': '3'}
+ assert hasattr(
+ specimen, "flatten"
+ ), "Specimen object does not have a 'flatten' method"
+ assert specimen.flatten() == {
+ "resourceType": "Specimen",
+ "id": "denovo-3",
+ "status": "available",
+ "receivedTime": "2021-01-01T01:01:01+00:00",
+ "subject": "Patient/denovoFather",
+ "specimens": "3",
+ }
-def test_eye_color_observation(patched_scalars_references_identifiers_observation: bool, observation_eye_color_dict: dict):
+def test_eye_color_observation(
+ patched_scalars_references_identifiers_observation: bool,
+ observation_eye_color_dict: dict,
+):
"""This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
observation = Observation.parse_obj(observation_eye_color_dict)
- assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method"
- assert observation.flatten() == {'resourceType': 'Observation', 'id': 'eye-color', 'status': 'final',
- 'effectiveDateTime': '2016-05-18', 'value': 'blue',
- 'subject': 'Patient/example'}
+ assert hasattr(
+ observation, "flatten"
+ ), "Observation object does not have a 'flatten' method"
+ assert observation.flatten() == {
+ "resourceType": "Observation",
+ "id": "eye-color",
+ "status": "final",
+ "effectiveDateTime": "2016-05-18",
+ "value": "blue",
+ "subject": "Patient/example",
+ }
-def test_bmi_observation(patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict):
+def test_bmi_observation(
+ patched_scalars_references_identifiers_observation: bool, observation_bmi_dict: dict
+):
"""This patient object should have a 'flatten' method that returns a dict of scalar values and references."""
observation = Observation.parse_obj(observation_bmi_dict)
- assert hasattr(observation, 'flatten'), "Observation object does not have a 'flatten' method"
- assert observation.flatten() == {'effectiveDateTime': '1999-07-02',
- 'id': 'bmi-using-related',
- 'resourceType': 'Observation',
- 'status': 'final',
- 'subject': 'Patient/example',
- 'value': '16.2 kg/m2'}
+ assert hasattr(
+ observation, "flatten"
+ ), "Observation object does not have a 'flatten' method"
+ assert observation.flatten() == {
+ "effectiveDateTime": "1999-07-02",
+ "id": "bmi-using-related",
+ "resourceType": "Observation",
+ "status": "final",
+ "subject": "Patient/example",
+ "value": "16.2 kg/m2",
+ }
diff --git a/tests/unit/test_hash_types.py b/tests/unit/test_hash_types.py
index ab51944c..68559e6f 100644
--- a/tests/unit/test_hash_types.py
+++ b/tests/unit/test_hash_types.py
@@ -5,20 +5,20 @@
from gen3_tracker.git import DVCItem
VALID_HASHES = {
- 'md5': 'acbd18db4cc2f85cedef654fccc4a4d8',
- 'sha1': '2ef7bde608ce5404e97d5f042f95f89f1c232871',
- 'sha256': '5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf',
- 'sha512': '3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78',
- 'crc': '3e25960a',
- 'etag': 'acbd18db4cc2f85cedef654fccc4a4d8-3'
+ "md5": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "sha1": "2ef7bde608ce5404e97d5f042f95f89f1c232871",
+ "sha256": "5bf8aa57fc5a6bc547decf1cc6db63f10deb55a3c6c5df497d631fb3d95e1abf",
+ "sha512": "3ba2942ed1d05551d4360a2a7bb6298c2359061dc07b368949bd3fb7feca3344221257672d772ce456075b7cfa50fd7ce41eaefe529d056bf23dd665de668b78",
+ "crc": "3e25960a",
+ "etag": "acbd18db4cc2f85cedef654fccc4a4d8-3",
}
def test_invalid_hash_values():
"""Test that invalid hash values raise a ValidationError."""
for hash_type in ACCEPTABLE_HASHES.keys():
- _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1)
- _[hash_type] = 'foo'
+ _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1)
+ _[hash_type] = "foo"
print(_)
with pytest.raises(ValidationError):
item = DVCItem(**_)
@@ -28,7 +28,7 @@ def test_invalid_hash_values():
def test_valid_hash_values():
"""Test that valid hash values do raise a ValidationError."""
for hash_type in VALID_HASHES.keys():
- _ = dict(hash=hash_type, modified='2013-07-01T16:10-04:00', path='dddd', size=1)
+ _ = dict(hash=hash_type, modified="2013-07-01T16:10-04:00", path="dddd", size=1)
_[hash_type] = VALID_HASHES[hash_type]
print(_)
item = DVCItem(**_)
diff --git a/tests/unit/test_read_dvc.py b/tests/unit/test_read_dvc.py
index 7feef182..c9a5c231 100644
--- a/tests/unit/test_read_dvc.py
+++ b/tests/unit/test_read_dvc.py
@@ -3,15 +3,23 @@
def test_read_dvc(data_path: Path):
- dvc = to_dvc(data_path / 'hello.txt.dvc')
+ dvc = to_dvc(data_path / "hello.txt.dvc")
assert dvc
assert dvc.outs
- assert dvc.outs[0].path == 'my-project-data/hello.txt'
+ assert dvc.outs[0].path == "my-project-data/hello.txt"
def test_read_dvc_item():
- _ = {'hash': 'md5', 'is_symlink': False, 'md5': 'b1946ac92492d2347c6235b4d2611184', 'mime': 'text/plain', 'modified': '2024-04-30T17:46:30.819143+00:00',
- 'path': 'my-project-data/hello.txt', 'realpath': '/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt', 'size': 6}
+ _ = {
+ "hash": "md5",
+ "is_symlink": False,
+ "md5": "b1946ac92492d2347c6235b4d2611184",
+ "mime": "text/plain",
+ "modified": "2024-04-30T17:46:30.819143+00:00",
+ "path": "my-project-data/hello.txt",
+ "realpath": "/Users/walsbr/aced/g3t-git/attic/cbds-test39/my-project-data/hello.txt",
+ "size": 6,
+ }
item = DVCItem(**_)
assert item
- assert item.hash == 'md5'
+ assert item.hash == "md5"
From 2cb2feb138aff124c258f107ccfbfb420608a070 Mon Sep 17 00:00:00 2001
From: quinnwai
Date: Mon, 2 Dec 2024 13:00:20 -0800
Subject: [PATCH 3/6] ensure dataframer unit tests pass
---
tests/unit/dataframer/test_dataframer.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/unit/dataframer/test_dataframer.py b/tests/unit/dataframer/test_dataframer.py
index 87a51f22..422665f6 100644
--- a/tests/unit/dataframer/test_dataframer.py
+++ b/tests/unit/dataframer/test_dataframer.py
@@ -93,6 +93,7 @@ def simplified_resources(
"category": "Laboratory",
"sample_type": "Primary Solid Tumor",
"library_id": "12345",
+ "observation_code": "sample type abc",
"tissue_type": "Tumor",
"treatments": "Trastuzumab",
"allocated_for_site": "TEST Clinical Research",
@@ -284,6 +285,7 @@ def specimen_row(simplified_resources, specimen_key):
"tissue_type": "Tumor",
"treatments": "Trastuzumab",
"allocated_for_site": "TEST Clinical Research",
+ "observation_code": "sample type abc",
"indexed_collection_date": "365",
"biopsy_specimens": "specimenA, specimenB, specimenC",
"biopsy_procedure_type": "Biopsy - Core",
From 4a71cf1f57755610e3516e8e9b4abcfbda5e094e Mon Sep 17 00:00:00 2001
From: matthewpeterkort
Date: Mon, 2 Dec 2024 13:34:55 -0800
Subject: [PATCH 4/6] fix test
---
tests/integration/test_end_to_end_workflow.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 6237edab..d4c5f407 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
result = run(
runner,
["push", "--skip_validate", "--overwrite"],
- expected_exit_code=1,
+ expected_exit_code=0 ,
expected_files=[log_file_path],
)
@@ -316,6 +316,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
with open(log_file_path, "r") as log_file:
lines = log_file.readlines()
str_lines = str(lines)
+ print("log lines: ", str_lines)
for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
assert (
From 885bfce2e0828c37724fcd63f33a73ae74678a46 Mon Sep 17 00:00:00 2001
From: matthewpeterkort
Date: Tue, 3 Dec 2024 14:45:09 -0800
Subject: [PATCH 5/6] fix test to work with new output
---
tests/integration/test_end_to_end_workflow.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index d4c5f407..911ffc2e 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
result = run(
runner,
["push", "--skip_validate", "--overwrite"],
- expected_exit_code=0 ,
+ expected_exit_code=1 ,
expected_files=[log_file_path],
)
@@ -316,8 +316,6 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
with open(log_file_path, "r") as log_file:
lines = log_file.readlines()
str_lines = str(lines)
- print("log lines: ", str_lines)
-
for keyword in ["/content/0/attachment/creation", "jsonschema", invalid_date]:
assert (
keyword in str_lines
From 6d46e966914e362e94a1a24e00b1b91ff6ec2d2c Mon Sep 17 00:00:00 2001
From: quinnwai
Date: Tue, 3 Dec 2024 14:52:18 -0800
Subject: [PATCH 6/6] fix spacing
---
tests/integration/test_end_to_end_workflow.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/integration/test_end_to_end_workflow.py b/tests/integration/test_end_to_end_workflow.py
index 911ffc2e..5fdca0c0 100644
--- a/tests/integration/test_end_to_end_workflow.py
+++ b/tests/integration/test_end_to_end_workflow.py
@@ -303,7 +303,7 @@ def test_push_fails_with_invalid_doc_ref_creation_date(
result = run(
runner,
["push", "--skip_validate", "--overwrite"],
- expected_exit_code=1 ,
+ expected_exit_code=1,
expected_files=[log_file_path],
)