Skip to content

Commit

Permalink
Ensure basic validation occurs for updates via POST /queries:run (#782
Browse files Browse the repository at this point in the history
)

* feat: use overlaydb to apply queries:run updates for validation

for #774

* fix: queries:run update validation

init test

* Update nmdc_runtime/api/endpoints/queries.py

Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com>

* Update tests/test_api/test_endpoints.py

Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com>

* style: reformat

* docs: docstring

---------

Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com>
Co-authored-by: github-actions <github-actions@github.com>
  • Loading branch information
3 people authored Nov 21, 2024
1 parent fb91571 commit 8013f3f
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 4 deletions.
39 changes: 38 additions & 1 deletion nmdc_runtime/api/endpoints/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
get_mongo_db,
get_nonempty_nmdc_schema_collection_names,
)
from nmdc_runtime.api.endpoints.util import permitted, users_allowed
from nmdc_runtime.api.endpoints.util import permitted, users_allowed, strip_oid
from nmdc_runtime.api.models.query import (
Query,
QueryResponseOptions,
Expand All @@ -23,6 +23,7 @@
UpdateCommand,
)
from nmdc_runtime.api.models.user import get_current_active_user, User
from nmdc_runtime.util import OverlayDB, validate_json

router = APIRouter()

Expand Down Expand Up @@ -165,6 +166,42 @@ def _run_query(query, mdb) -> CommandResponse:
{"filter": up_statement.q, "limit": 0 if up_statement.multi else 1}
for up_statement in query.cmd.updates
]
# Execute this "update" command on a temporary "overlay" database so we can
# validate its outcome before executing it on the real database. If its outcome
# is invalid, we will abort and raise an "HTTP 422" exception.
#
# TODO: Consider wrapping this entire "preview-then-apply" sequence within a
# MongoDB transaction so as to avoid race conditions where the overlay
# database at "preview" time does not reflect the state of the database
# at "apply" time. This will be necessary once the "preview" step
# accounts for referential integrity.
#
with OverlayDB(mdb) as odb:
odb.apply_updates(
collection_name,
[u.model_dump(mode="json", exclude="hint") for u in query.cmd.updates],
)
_ids_to_check = set()
for spec in update_specs:
for doc in mdb[collection_name].find(
filter=spec["filter"],
limit=spec["limit"],
projection={
"_id": 1
}, # unique `id` not guaranteed (see e.g. `functional_annotation_agg`)
):
_ids_to_check.add(doc["_id"])
docs_to_check = odb._top_db[collection_name].find(
{"_id": {"$in": list(_ids_to_check)}}
)
rv = validate_json(
{collection_name: [strip_oid(d) for d in docs_to_check]}, mdb
)
if rv["result"] == "errors":
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Schema document(s) would be invalid after proposed update: {rv['detail']}",
)
for spec in update_specs:
docs = list(mdb[collection_name].find(**spec))
if not docs:
Expand Down
43 changes: 40 additions & 3 deletions tests/test_api/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
from nmdc_runtime.api.models.user import UserInDB, UserIn, User
from nmdc_runtime.site.ops import materialize_alldocs
from nmdc_runtime.site.repository import run_config_frozen__normal_env
from nmdc_runtime.site.resources import get_mongo, RuntimeApiSiteClient, mongo_resource
from nmdc_runtime.site.resources import (
get_mongo,
RuntimeApiSiteClient,
mongo_resource,
RuntimeApiUserClient,
)
from nmdc_runtime.util import REPO_ROOT_DIR, ensure_unique_id_indexes


Expand Down Expand Up @@ -180,6 +185,13 @@ def api_site_client():
return RuntimeApiSiteClient(base_url=os.getenv("API_HOST"), **rs["site_client"])


@pytest.fixture
def api_user_client():
mdb = get_mongo_db()
rs = ensure_test_resources(mdb)
return RuntimeApiUserClient(base_url=os.getenv("API_HOST"), **rs["user"])


def test_metadata_validate_json_0(api_site_client):
rv = api_site_client.request(
"POST",
Expand Down Expand Up @@ -289,10 +301,10 @@ def test_submit_workflow_activities(api_site_client):
"has_output": [
"nmdc:dobj-11-w5dak635",
"nmdc:dobj-11-g6d71n77",
"nmdc:dobj-11-bds7qq03"
"nmdc:dobj-11-bds7qq03",
],
"type": "nmdc:ReadQcAnalysis",
"version": "v1.0.8"
"version": "v1.0.8",
}
]
}
Expand Down Expand Up @@ -379,6 +391,7 @@ def test_find_planned_processes(api_site_client):
)
assert rv.json()["meta"]["count"] >= 9


def test_find_planned_process_by_id(api_site_client):
# Seed the database with documents that represent instances of the `PlannedProcess` class or any of its subclasses.
mdb = get_mongo_db()
Expand Down Expand Up @@ -418,3 +431,27 @@ def test_find_planned_process_by_id(api_site_client):
"GET",
f"/planned_processes/nmdc:sty-11-00000001",
)


def test_queries_run_update(api_user_client):
"""Submit a request to store data that does not comply with the schema."""
mdb = get_mongo_db()
allow_spec = {
"username": api_user_client.username,
"action": "/queries:run(query_cmd:DeleteCommand)",
}
mdb["_runtime.api.allow"].replace_one(allow_spec, allow_spec, upsert=True)
with pytest.raises(requests.HTTPError):
api_user_client.request(
"POST",
"/queries:run",
{
"update": "biosample_set",
"updates": [
{
"q": {"id": "nmdc:bsm-13-amrnys72"},
"u": {"$set": {"associated_studies.0": 42}},
}
],
},
)

0 comments on commit 8013f3f

Please sign in to comment.