Skip to content

Commit

Permalink
Merge pull request #167 from kbase/develop
Browse files Browse the repository at this point in the history
Merge Develop -> Main for release 0.0.22
  • Loading branch information
MrCreosote authored Aug 16, 2022
2 parents d3edf07 + a486b20 commit 927a833
Show file tree
Hide file tree
Showing 20 changed files with 389 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .bandit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
assert_used:
skips: ['*_test.py', '*test_*.py']
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.0.22] 2022-08-15
### Changed
- The NCBI taxa scientfic name lookup queries below were updated to make use of the new
NCBI taxa loader `species_or_below` flag and note that the `strain` flag is deprecated.
See https://github.com/kbase/relation_engine_importers/blob/d8f87fb74e984cae1c94985b82349b13bc7f277e/docs/NCBI_taxa_sciname_lookup_issues_22_07.md
- taxonomy_search_species_strain
- taxonomy_search_species_strain_no_sort
- The NCBI and GTDB taxon collections were updated to include the new flag.

## [0.0.21] 2022-06-13
### Added
- Dependabot
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.7-alpine
FROM python:3.10-alpine

ARG DEVELOPMENT
ARG BUILD_DATE
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,10 @@ The CI service is available in the `relationapi` service
* Merge from Develop to Main
* Draft a release using semantic versioning (e.g 1.2.3) by creating a tag with the target of Main
* Ensure release notes are updated manually and/or by AutoGenerate Release Notes

## Testing

To run tests:
```
make test
```
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.21
0.0.22
19 changes: 10 additions & 9 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
mypy>=0.630
bandit==1.5.1
mypy>=0.971
bandit==1.7.4
# mccabe 0.7.0 is not compatible with flake8 4.0.1
mccabe==0.6.1
flake8==3.5.0
grequests==0.3.0
coverage==5.2.1
typed-ast>=1.4.0
black==22.3.0
pytest==6.2.5
jinja2==3.0.3
flake8==4.0.1
grequests==0.6.0
coverage==6.4.2
black==22.6.0
pytest==7.1.2
python-arango==7.4.1
frozendict==2.3.4
6 changes: 3 additions & 3 deletions relation_engine_server/test/test_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
_CONF = get_config()

# Use the mock auth tokens
NON_ADMIN_TOKEN = "non_admin_token"
ADMIN_TOKEN = "admin_token"
INVALID_TOKEN = "invalid_token"
NON_ADMIN_TOKEN = "non_admin_token" # nosec
ADMIN_TOKEN = "admin_token" # nosec
INVALID_TOKEN = "invalid_token" # nosec

# Use the docker-compose url of the running flask server
URL = os.environ.get("TEST_URL", "http://localhost:5000")
Expand Down
10 changes: 5 additions & 5 deletions relation_engine_server/test/test_json_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ def test_non_validation_validator_errors(self):
with self.assertRaisesRegex(TypeError, err_msg):
run_validator(schema_file=test_file, data={})

# invalid jsonpointer string - note the grammar error is from jsonpointer
err_str = "location must starts with /"
# invalid jsonpointer string
err_str = "Location must start with /"
json_loc = "start validating here"
with self.assertRaisesRegex(JsonPointerException, err_str):
run_validator(schema=test_schema, data={}, validate_at=json_loc)
Expand Down Expand Up @@ -271,7 +271,7 @@ def test_pattern_validation(self, schema_arg=None, schema_file_arg=None):
"distance": 3,
},
"file": "invalid_pattern",
"err_str": '"what\'s-the-problem with-this-string\?" does not match .*?',
"err_str": r'"what\'s-the-problem with-this-string\?" does not match .*?',
},
{
"input": {"name": "No_problem_with_this_string", "distance": 3},
Expand Down Expand Up @@ -309,7 +309,7 @@ def test_uri_validation(self, schema_arg=None, schema_file_arg=None):
{
"input": {"name": "invalid_uri", "home_page": "where is it?"},
"file": "invalid_uri",
"err_str": "'where is it\?' is not a 'uri'",
"err_str": r"'where is it\?' is not a 'uri'",
},
]

Expand Down Expand Up @@ -350,7 +350,7 @@ def test_date_format_validation(self, schema_arg=None, schema_file_arg=None):

# pyyaml-specific issue: dates get automatically parsed into datetime objects (doh!)
file_path = os_path.join(json_validation_dir, "unquoted_date.yaml")
err_str = "datetime.date\(2020, 6, 6\) is not of type 'string'"
err_str = r"datetime.date\(2020, 6, 6\) is not of type 'string'"
with self.assertRaisesRegex(ValidationError, err_str):
run_validator(
schema=schema_arg,
Expand Down
2 changes: 1 addition & 1 deletion relation_engine_server/test/test_spec_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def test_get_schema_for_doc(self):

fake_name = "fake_name/12345"
# a nonexistent file raises the appropriate error
err_msg = f"Collection 'fake_name' does not exist."
err_msg = "Collection 'fake_name' does not exist."
with self.assertRaisesRegex(SchemaNonexistent, err_msg):
spec_loader.get_schema_for_doc(fake_name, path_only=True)

Expand Down
19 changes: 11 additions & 8 deletions relation_engine_server/utils/ensure_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,22 +229,25 @@ def excise_namespace(analyzer_name: str) -> str:


def is_obj_subset_rec(
l: Union[dict, list, float, str, int],
r: Union[dict, list, float, str, int],
left: Union[dict, list, float, str, int],
right: Union[dict, list, float, str, int],
):
"""
Compare two JSON objects, to see if, essentially, l <= r
Compare two JSON objects, to see if, essentially, left <= right
If comparing dicts, recursively compare
If comparing lists, shallowly compare. For now, YAGN more
"""
if isinstance(l, dict) and isinstance(r, dict):
if isinstance(left, dict) and isinstance(right, dict):
return all(
[k in r.keys() and is_obj_subset_rec(l[k], r[k]) for k in l.keys()]
[
k in right.keys() and is_obj_subset_rec(left[k], right[k])
for k in left.keys()
]
) # ignore: typing
elif isinstance(l, list) and isinstance(r, list):
return all([le in r for le in l])
elif isinstance(left, list) and isinstance(right, list):
return all([le in right for le in left])
else:
return l == r # noqa: E741
return left == right


def mod_obj_literal(
Expand Down
9 changes: 4 additions & 5 deletions relation_engine_server/utils/json_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@
"""
from jsonschema import validators, Draft7Validator, FormatChecker, RefResolver

from jsonschema.compat import (
urlopen,
urlsplit,
)
from jsonschema.exceptions import ValidationError
from jsonpointer import resolve_pointer
from urllib.request import urlopen
from urllib.parse import urlsplit
import yaml
import json
import requests
Expand Down Expand Up @@ -165,7 +163,8 @@ def resolve_remote(self, uri):
result = requests.get(uri).json()
else:
# Otherwise, pass off to urllib and assume utf-8
with urlopen(uri) as url:
# TODO https://github.com/kbase/relation_engine/issues/156
with urlopen(uri) as url: # nosec
content = url.read().decode("utf-8")
if uri.endswith(".yaml") or uri.endswith(".yml"):
result = yaml.safe_load(content)
Expand Down
22 changes: 11 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Flask==2.1.2
itsdangerous==2.0.1
greenlet==0.4.16
Flask==2.1.3
itsdangerous==2.1.2
greenlet==1.1.2
gunicorn==20.1.0
gevent==1.3.7
simplejson==3.16.0
python-dotenv==0.9.1
requests==2.28.0
jsonpointer==2.0
jsonschema==3.2.0
jsonschema[format]==3.2.0
gevent==21.12.0
simplejson==3.17.6
python-dotenv==0.20.0
requests==2.28.1
jsonpointer==2.3
jsonschema==4.8.0
jsonschema[format]==4.8.0
pyyaml==6.0
rfc3987==1.3.8
jinja2==3.0.3
jinja2==3.1.2
12 changes: 9 additions & 3 deletions scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,24 @@ set -e
(cd /app/relation_engine_server/test/spec_release && \
tar czvf spec.tar.gz sample_spec_release)

echo "> black"
black .
echo "> flake8"
flake8 --max-complexity 20 /app
echo "> mypy"
mypy --ignore-missing-imports /app
bandit -r /app
echo "> bandit"
bandit -r -c .bandit.yaml /app

# start server, using the specs in /spec/repo
sh /app/scripts/start_server.sh &
coverage erase
# spec validation
python -m spec.validate
# wait for the RE service to be up so integration tests can pass
python -m relation_engine_server.utils.wait_for api
# run importer/, relation_engine_server/, and spec/ tests
coverage run --branch -m pytest
# RE client tests
coverage run --branch -m pytest -vv
# RE client tests - seems like these tests were already run in the line above, why run them again?
PYTHONPATH=client_src python -m pytest client_src/test
coverage html --omit=*/test_*
5 changes: 5 additions & 0 deletions spec/collections/gtdb/gtdb_taxon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,8 @@ schema:
type: string
title: Taxonomic rank
examples: [domain, phylum]
species_or_below:
type: boolean
title: Species or below
description: A flag that denotes the taxon is of rank species or a lower rank, e.g.
strain, forma, genome, subspecies, etc.
16 changes: 13 additions & 3 deletions spec/collections/ncbi/ncbi_taxon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,22 @@ schema:
type: string
title: Taxonomic rank
examples: ["Domain", "Phylum", "no rank"]
species_or_below:
type: boolean
title: Species or below flag
description: A flag that denotes the taxon is of rank species or a lower rank, e.g.
strain, forma, genome, subspecies, etc.
strain:
type: boolean
title: Strain flag
description: Whether this node corresponds to a strain. Strains are considered to be nodes
title: Strain flag (DEPRECATED)
description: DEPRECATED. See https://github.com/kbase/relation_engine_importers/blob/29cbf68b289335ec5c27201efba0058a501c6b74/docs/NCBI_taxa_sciname_lookup_issues_22_07.md
This field should not be used in any new code, but is left intact as it may be contained in
already loaded data.
Original description was
"Whether this node corresponds to a strain. Strains are considered to be nodes
that have a rank of "no rank" and whose parents' rank is either species or subspecies or
where the parent's strain flag is true.
where the parent's strain flag is true."

ncbi_taxon_id:
type: integer
title: The NCBI taxon ID as a number
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ query: |
)
FOR doc IN FULLTEXT(@@taxon_coll, @sciname_field, search_text__fulltext)
FILTER @ts ? doc.created <= @ts AND doc.expired >= @ts : true
FILTER doc.rank IN ["species", "strain"] OR doc.strain
// note that doc.strain is deprecated but is retained for backwards compaibility
// see https://github.com/kbase/relation_engine_importers/blob/d8f87fb74e984cae1c94985b82349b13bc7f277e/docs/NCBI_taxa_sciname_lookup_issues_22_07.md
FILTER doc.rank IN ["species", "strain"] OR doc.strain OR doc.species_or_below
LET doc_sciname__norm = REGEX_REPLACE(LOWER(TRIM(doc.scientific_name)), "\\s+", " ") // for exact matching
LET contains_ind = CONTAINS(doc_sciname__norm, search_text__norm, true)
SORT contains_ind == 0 DESC, // prefix match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ query: |
)
FOR doc IN FULLTEXT(@@taxon_coll, @sciname_field, search_text__fulltext)
FILTER @ts ? doc.created <= @ts AND doc.expired >= @ts : true
FILTER doc.rank IN ["species", "strain"] OR doc.strain
// note that doc.strain is deprecated but is retained for backwards compaibility
// see https://github.com/kbase/relation_engine_importers/blob/d8f87fb74e984cae1c94985b82349b13bc7f277e/docs/NCBI_taxa_sciname_lookup_issues_22_07.md
FILTER doc.rank IN ["species", "strain"] OR doc.strain OR doc.species_or_below
LIMIT @offset ? @offset : 0, @limit ? @limit : 20
RETURN @select ? KEEP(doc, @select) : doc
Loading

0 comments on commit 927a833

Please sign in to comment.