Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve testing of round trip serialization #480

Merged
merged 30 commits into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
831a58e
Convert SSSOM TSV => JSON => TSV and confirm the MappingSetDataFrame …
hrshdhgd Sep 26, 2023
1854d29
Add more explicit tests
cthoyt Sep 26, 2023
e00f4d4
Merge branch 'master' into issue-321
hrshdhgd Oct 3, 2023
d8db602
Merge branch 'master' into issue-321
hrshdhgd Dec 13, 2023
8f9a334
SSSOM tsv => json => tsv fixed.
hrshdhgd Dec 14, 2023
9702d8c
Now gets both metadata and prefix_map from JSON
hrshdhgd Dec 14, 2023
7ca9934
undo changes for util.py
hrshdhgd Dec 14, 2023
43e4555
undo changes for util.py
hrshdhgd Dec 14, 2023
b117b38
not needed
hrshdhgd Dec 14, 2023
f03b612
undo util change
hrshdhgd Dec 14, 2023
2eac05b
Refactor @context => JSON_CONTEXT_KEY
hrshdhgd Dec 14, 2023
4c480a5
changed @context backto what it was
hrshdhgd Dec 14, 2023
fc8c43c
Use all attr of MappingSet instead of the previous two
hrshdhgd Dec 14, 2023
f87db65
Update parsers.py
matentzn Jan 4, 2024
a5c6509
Update test_parsers.py
matentzn Jan 4, 2024
93bbe68
Add roundtrip test for RDF parsing as well
matentzn Jan 4, 2024
c7bb6c8
Update test_parsers.py
matentzn Jan 5, 2024
2081f47
Update tests
cthoyt Jan 5, 2024
1bf8756
Add TSV test and minor refactor
cthoyt Jan 5, 2024
f72ff1a
Remove redundant test
cthoyt Jan 5, 2024
70325d3
Update test_parsers.py
cthoyt Jan 5, 2024
5bcc9fb
Refactor the rdf parser to use linkml
matentzn Jan 12, 2024
655012b
Some linting
matentzn Jan 12, 2024
e21ab07
Ignore unmapped predicates
matentzn Jan 12, 2024
77dee25
Update parsers.py
cthoyt Jan 13, 2024
3a679a1
Fix bimap usage
cthoyt Jan 13, 2024
249ee1c
Add missing field to RDF
cthoyt Jan 13, 2024
52cb27f
Fix broken oio references
cthoyt Jan 13, 2024
de4f9c6
Update broken test data in rdf
matentzn Jan 13, 2024
556728e
Update wrong test count
matentzn Jan 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
CONFIDENCE,
CURIE_MAP,
DEFAULT_MAPPING_PROPERTIES,
LICENSE,
MAPPING_JUSTIFICATION,
MAPPING_JUSTIFICATION_UNSPECIFIED,
MAPPING_SET_ID,
OBJECT_ID,
OBJECT_LABEL,
OBJECT_SOURCE,
Expand Down Expand Up @@ -247,22 +249,27 @@ def parse_sssom_rdf(


def parse_sssom_json(
file_path: str,
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
**kwargs
# mapping_predicates: Optional[List[str]] = None,
file_path: str, prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None, **kwargs
) -> MappingSetDataFrame:
"""Parse a TSV to a :class:`MappingSetDocument` to a :class`MappingSetDataFrame`."""
"""Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
raise_for_bad_path(file_path)
converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta)

with open(file_path) as json_file:
jsondoc = json.load(json_file)

# Initialize meta if it's None
if meta is None:
meta = {}

# Update metadata with values from JSON document
meta_keys_to_update = [MAPPING_SET_ID, LICENSE]
meta.update({key: jsondoc[key] for key in meta_keys_to_update if key in jsondoc})

converter_from_jsonld = Converter.from_jsonld(file_path)
converter_via_metadata, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta)
converter = curies.chain([converter_from_jsonld, converter_via_metadata])

msdf = from_sssom_json(jsondoc=jsondoc, prefix_map=converter, meta=meta)
# df: pd.DataFrame = msdf.df
# if mapping_predicates and not df.empty():
# msdf.df = df[df["predicate_id"].isin(mapping_predicates)]
return msdf


Expand Down
1 change: 1 addition & 0 deletions tests/data/basic_subset.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# b: "http://example.org/b/"
# c: "http://example.org/c/"
# d: "http://example.org/d/"
# orcid: "https://orcid.org/"
subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification subject_source object_source mapping_tool confidence subject_match_field object_match_field subject_category object_category match_string comment
x:appendage appendage owl:equivalentClass y:appendage appendages semapv:ManualMappingCuration x y rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
x:appendage appendage owl:equivalentClass z:appendage APPENDAGE semapv:ManualMappingCuration x z rdf_matcher 0.840714406 rdfs:label rdfs:label biolink:AnatomicalEntity biolink:AnatomicalEntity appendag .
29 changes: 26 additions & 3 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import json
import math
import os
import tempfile
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
from textwrap import dedent
from xml.dom import minidom

Expand All @@ -18,7 +18,7 @@

from sssom.constants import CURIE_MAP, DEFAULT_LICENSE, SSSOM_URI_PREFIX, get_default_metadata
from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter, get_converter
from sssom.io import parse_file
from sssom.io import convert_file, parse_file
from sssom.parsers import (
_open_input,
_read_pandas_and_metadata,
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_round_trip(self):
set(msdf.prefix_map),
)

with tempfile.TemporaryDirectory() as directory:
with TemporaryDirectory() as directory:
directory = Path(directory)
path = directory.joinpath("test.sssom.tsv")
with path.open("w") as file:
Expand Down Expand Up @@ -410,3 +410,26 @@ def test_round_trip(self):

# This checks that nothing funny gets added unexpectedly
self.assertEqual(expected_prefix_map, reconsitited_msdf.prefix_map)

def test_round_trip_tsv_json_tsv(self):
"""Test TSV => JSON => TSV using convert() + parse()."""
input_tsv = test_data_dir.joinpath("basic_subset.tsv")
input_msdf = parse_sssom_table(input_tsv)
input_msdf.clean_prefix_map()
with TemporaryDirectory() as directory:
directory = Path(directory)
json_path = directory.joinpath("basic_subset.json")
tsv_path = directory.joinpath("json_to_tsv.tsv")
with json_path.open("w") as jfile:
convert_file(input_path=input_tsv, output_format="json", output=jfile)
with tsv_path.open("w") as tfile:
parse_file(
input_path=str(json_path),
input_format="json",
prefix_map_mode="merged",
output=tfile,
)
msdf = parse_sssom_table(tsv_path)
pd.testing.assert_frame_equal(input_msdf.df, msdf.df)
self.assertEqual(input_msdf.prefix_map, msdf.prefix_map)
self.assertEqual(input_msdf.metadata, msdf.metadata)