Skip to content

Commit

Permalink
use RuntimeApiSiteClient instead of defining new NmdcApiClient class
Browse files Browse the repository at this point in the history
  • Loading branch information
sujaypatil96 committed May 29, 2024
1 parent e57b41c commit e9ac70d
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 114 deletions.
22 changes: 9 additions & 13 deletions nmdc_runtime/site/export/ncbi_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,26 @@
load_mappings,
validate_xml,
)
from nmdc_runtime.site.export.nmdc_api_client import NMDCApiClient


class NCBISubmissionXML:
def __init__(self, ncbi_submission_fields: dict):
def __init__(self, nmdc_study_id: str, ncbi_submission_metadata: dict):
self.root = ET.Element("Submission")
self.nmdc_study_id = ncbi_submission_fields.get("nmdc_study_id")
self.nmdc_ncbi_attribute_mapping_file_url = ncbi_submission_fields.get(

self.nmdc_study_id = nmdc_study_id

self.nmdc_ncbi_attribute_mapping_file_url = ncbi_submission_metadata.get(
"nmdc_ncbi_attribute_mapping_file_url"
)
self.ncbi_submission_metadata = ncbi_submission_fields.get(
self.ncbi_submission_metadata = ncbi_submission_metadata.get(
"ncbi_submission_metadata", {}
)
self.ncbi_bioproject_metadata = ncbi_submission_fields.get(
self.ncbi_bioproject_metadata = ncbi_submission_metadata.get(
"ncbi_bioproject_metadata", {}
)
self.ncbi_biosample_metadata = ncbi_submission_fields.get(
self.ncbi_biosample_metadata = ncbi_submission_metadata.get(
"ncbi_biosample_metadata", {}
)
self.nmdc_api_client = NMDCApiClient()

# dispatcher dictionary capturing handlers for NMDC object to NCBI flat Attribute
# type handlers
Expand Down Expand Up @@ -250,7 +250,7 @@ def set_biosample(
)
self.root.append(action)

def get_submission_xml(self):
def get_submission_xml(self, biosamples_list: list):
self.set_description(
email=self.ncbi_submission_metadata.get("email", ""),
user=self.ncbi_submission_metadata.get("user", ""),
Expand All @@ -267,10 +267,6 @@ def get_submission_xml(self):
org=self.ncbi_submission_metadata.get("organization", ""),
)

biosamples_list = self.nmdc_api_client.get_biosamples_part_of_study(
self.nmdc_study_id
)

self.set_biosample(
organism_name=self.ncbi_biosample_metadata.get("organism_name", ""),
package=self.ncbi_biosample_metadata.get("package", ""),
Expand Down
41 changes: 0 additions & 41 deletions nmdc_runtime/site/export/nmdc_api_client.py

This file was deleted.

29 changes: 25 additions & 4 deletions nmdc_runtime/site/export/study_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import csv
from io import StringIO

import requests
from dagster import (
op,
get_dagster_logger,
Expand All @@ -26,13 +25,27 @@ def get_all_docs(client, collection, filter_):
per_page = 200
url_base = f"/{collection}?filter={filter_}&per_page={per_page}"
results = []
rv = client.request("GET", url_base).json()
response = client.request("GET", url_base)
if response.status_code != 200:
raise Exception(
f"Runtime API request failed with status {response.status_code}."
f" Check URL: {url_base}"
)
rv = response.json()
results.extend(rv.get("results", []))
page, count = rv["meta"]["page"], rv["meta"]["count"]
assert count <= 10_000
while page * per_page < count:
rv = requests.get(url_base + f"&page={page + 1}").json()
results.extend(rv["results"])
page += 1
url = f"{url_base}&page={page}"
response = client.request("GET", url)
if response.status_code != 200:
raise Exception(
f"Runtime API request failed with status {response.status_code}."
f" Check URL: {url}"
)
rv = response.json()
results.extend(rv.get("results", []))
return results


Expand Down Expand Up @@ -115,3 +128,11 @@ def export_study_biosamples_as_csv(context: OpExecutionContext, study_export_inf
def export_study_biosamples_metadata():
outputs = export_study_biosamples_as_csv(get_study_biosamples_metadata())
add_output_run_event(outputs)


@op(required_resource_keys={"runtime_api_site_client"})
def get_biosamples_by_study_id(context: OpExecutionContext, nmdc_study_id: str):
# nmdc_study_id = context.op_config["nmdc_study_id"]
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
biosamples = get_all_docs(client, "biosamples", f"part_of:{nmdc_study_id}")
return biosamples
10 changes: 8 additions & 2 deletions nmdc_runtime/site/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
get_neon_pipeline_inputs,
get_df_from_url,
site_code_mapping,
get_ncbi_export_pipeline_study_id,
get_ncbi_export_pipeline_inputs,
ncbi_submission_xml_from_nmdc_study,
ncbi_submission_xml_asset,
)
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id


@graph
Expand Down Expand Up @@ -388,6 +390,10 @@ def ingest_neon_surface_water_metadata():

@graph
def nmdc_study_to_ncbi_submission_export():
ncbi_submission_fields = get_ncbi_export_pipeline_inputs()
xml_data = ncbi_submission_xml_from_nmdc_study(ncbi_submission_fields)
nmdc_study_id = get_ncbi_export_pipeline_study_id()
biosamples = get_biosamples_by_study_id(nmdc_study_id)
ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
xml_data = ncbi_submission_xml_from_nmdc_study(
nmdc_study_id, ncbi_submission_metadata, biosamples
)
ncbi_submission_xml_asset(xml_data)
14 changes: 9 additions & 5 deletions nmdc_runtime/site/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,9 +1000,13 @@ def site_code_mapping() -> dict:
)


@op(config_schema={"nmdc_study_id": str})
def get_ncbi_export_pipeline_study_id(context: OpExecutionContext) -> str:
return context.op_config["nmdc_study_id"]


@op(
config_schema={
"nmdc_study_id": str,
"nmdc_ncbi_attribute_mapping_file_url": str,
"ncbi_submission_metadata": Field(
Permissive(
Expand Down Expand Up @@ -1043,7 +1047,6 @@ def site_code_mapping() -> dict:
out=Out(Dict),
)
def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
nmdc_study_id = context.op_config["nmdc_study_id"]
nmdc_ncbi_attribute_mapping_file_url = context.op_config[
"nmdc_ncbi_attribute_mapping_file_url"
]
Expand All @@ -1052,7 +1055,6 @@ def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
ncbi_biosample_metadata = context.op_config.get("ncbi_biosample_metadata", {})

return {
"nmdc_study_id": nmdc_study_id,
"nmdc_ncbi_attribute_mapping_file_url": nmdc_ncbi_attribute_mapping_file_url,
"ncbi_submission_metadata": ncbi_submission_metadata,
"ncbi_bioproject_metadata": ncbi_bioproject_metadata,
Expand All @@ -1063,8 +1065,10 @@ def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
@op
def ncbi_submission_xml_from_nmdc_study(
context: OpExecutionContext,
nmdc_study_id: str,
ncbi_exporter_metadata: dict,
biosamples: list,
) -> str:
ncbi_exporter = NCBISubmissionXML(ncbi_exporter_metadata)
ncbi_xml = ncbi_exporter.get_submission_xml()
ncbi_exporter = NCBISubmissionXML(nmdc_study_id, ncbi_exporter_metadata)
ncbi_xml = ncbi_exporter.get_submission_xml(biosamples)
return ncbi_xml
21 changes: 20 additions & 1 deletion nmdc_runtime/site/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,13 +855,32 @@ def biosample_submission_ingest():

@repository
def biosample_export():
normal_resources = run_config_frozen__normal_env["resources"]
return [
nmdc_study_to_ncbi_submission_export.to_job(
resource_defs=resource_defs,
config={
"resources": merge(
unfreeze(normal_resources),
{
"runtime_api_site_client": {
"config": {
"base_url": {"env": "API_HOST"},
"client_id": {"env": "API_SITE_CLIENT_ID"},
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
"site_id": {"env": "API_SITE_ID"},
},
},
},
),
"ops": {
"get_ncbi_export_pipeline_inputs": {
"get_ncbi_export_pipeline_study_id": {
"config": {
"nmdc_study_id": "",
}
},
"get_ncbi_export_pipeline_inputs": {
"config": {
"nmdc_ncbi_attribute_mapping_file_url": "",
"ncbi_submission_metadata": {
"email": "",
Expand Down
77 changes: 29 additions & 48 deletions tests/test_data/test_ncbi_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
handle_float_value,
handle_string_value,
)
from nmdc_runtime.site.export.nmdc_api_client import NMDCApiClient

MOCK_SUBMISSION_FIELDS = {
"nmdc_study_id": "nmdc:sty-11-12345",
MOCK_NCBI_NMDC_STUDY_ID = "nmdc:sty-11-12345"

MOCK_NCBI_SUBMISSION_METADATA = {
"nmdc_ncbi_attribute_mapping_file_url": "http://example.com/mappings.tsv",
"ncbi_submission_metadata": {
"email": "user@example.com",
Expand All @@ -43,12 +43,10 @@

@pytest.fixture
def ncbi_submission_client():
return NCBISubmissionXML(ncbi_submission_fields=MOCK_SUBMISSION_FIELDS)


@pytest.fixture
def nmdc_api_client():
return NMDCApiClient(api_base_url="http://fakeapi.com/")
return NCBISubmissionXML(
nmdc_study_id=MOCK_NCBI_NMDC_STUDY_ID,
ncbi_submission_metadata=MOCK_NCBI_SUBMISSION_METADATA,
)


@pytest.fixture
Expand Down Expand Up @@ -89,11 +87,11 @@ def test_set_element(self, ncbi_submission_client):

def test_set_description(self, ncbi_submission_client):
ncbi_submission_client.set_description(
MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["email"],
MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["user"],
MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["first"],
MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["last"],
MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["organization"],
MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"]["email"],
MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"]["user"],
MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"]["first"],
MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"]["last"],
MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"]["organization"],
)
description = ET.tostring(
ncbi_submission_client.root.find("Description"), "unicode"
Expand All @@ -116,13 +114,19 @@ def test_set_description(self, ncbi_submission_client):

def test_set_bioproject(self, ncbi_submission_client):
ncbi_submission_client.set_bioproject(
title=MOCK_SUBMISSION_FIELDS["ncbi_bioproject_metadata"]["title"],
project_id=MOCK_SUBMISSION_FIELDS["ncbi_bioproject_metadata"]["project_id"],
description=MOCK_SUBMISSION_FIELDS["ncbi_bioproject_metadata"][
title=MOCK_NCBI_SUBMISSION_METADATA["ncbi_bioproject_metadata"]["title"],
project_id=MOCK_NCBI_SUBMISSION_METADATA["ncbi_bioproject_metadata"][
"project_id"
],
description=MOCK_NCBI_SUBMISSION_METADATA["ncbi_bioproject_metadata"][
"description"
],
data_type=MOCK_SUBMISSION_FIELDS["ncbi_bioproject_metadata"]["data_type"],
org=MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["organization"],
data_type=MOCK_NCBI_SUBMISSION_METADATA["ncbi_bioproject_metadata"][
"data_type"
],
org=MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"][
"organization"
],
)
bioproject_xml = ET.tostring(
ncbi_submission_client.root.find(".//Project"), "unicode"
Expand Down Expand Up @@ -178,11 +182,13 @@ def test_set_biosample(self, ncbi_submission_client, nmdc_biosample, mocker):
),
)
ncbi_submission_client.set_biosample(
organism_name=MOCK_SUBMISSION_FIELDS["ncbi_biosample_metadata"][
organism_name=MOCK_NCBI_SUBMISSION_METADATA["ncbi_biosample_metadata"][
"organism_name"
],
package=MOCK_SUBMISSION_FIELDS["ncbi_biosample_metadata"]["package"],
org=MOCK_SUBMISSION_FIELDS["ncbi_submission_metadata"]["organization"],
package=MOCK_NCBI_SUBMISSION_METADATA["ncbi_biosample_metadata"]["package"],
org=MOCK_NCBI_SUBMISSION_METADATA["ncbi_submission_metadata"][
"organization"
],
nmdc_biosamples=nmdc_biosample,
)
biosample_xml = ET.tostring(
Expand Down Expand Up @@ -237,11 +243,7 @@ def test_get_submission_xml(self, mocker, ncbi_submission_client, nmdc_biosample
),
)

mocker.patch.object(
NMDCApiClient, "get_biosamples_part_of_study", return_value=nmdc_biosample
)

submission_xml = ncbi_submission_client.get_submission_xml()
submission_xml = ncbi_submission_client.get_submission_xml(nmdc_biosample)

assert "nmdc:bsm-12-gnfpt483" in submission_xml
assert "E. coli" in submission_xml
Expand All @@ -252,27 +254,6 @@ def test_get_submission_xml(self, mocker, ncbi_submission_client, nmdc_biosample
assert "Test Project" in submission_xml


class TestNMDCApiClient:
def test_get_biosamples_part_of_study_success(self, mocker, nmdc_api_client):
mock_response = mocker.MagicMock()
mock_response.json.return_value = {
"resources": [
{"id": "nmdc:bsm-12-gnfpt483", "part_of": ["nmdc:sty-11-hht5sb92"]}
],
"next_page_token": None,
}
mocker.patch("requests.get", return_value=mock_response)
result = nmdc_api_client.get_biosamples_part_of_study("nmdc:sty-11-hht5sb92")
assert result == [
{"id": "nmdc:bsm-12-gnfpt483", "part_of": ["nmdc:sty-11-hht5sb92"]}
]

def test_get_biosamples_part_of_study_failure(self, mocker, nmdc_api_client):
mocker.patch("requests.get", side_effect=HTTPError("API Error"))
with pytest.raises(HTTPError):
nmdc_api_client.get_biosamples_part_of_study("nmdc:sty-11-hht5sb92")


class TestNCBIXMLUtils:
def test_handle_quantity_value(self):
assert (
Expand Down

0 comments on commit e9ac70d

Please sign in to comment.