Skip to content

Commit

Permalink
_get_specific_type
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed Nov 29, 2024
1 parent 8bd6298 commit e7a8e96
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 6 deletions.
37 changes: 32 additions & 5 deletions oc_ocdm/graph/graph_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import TYPE_CHECKING

from oc_ocdm.abstract_entity import AbstractEntity
from rdflib import Graph, Namespace, URIRef
from rdflib import RDF, Graph, Namespace, URIRef

if TYPE_CHECKING:
from typing import ClassVar, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -260,6 +260,20 @@ def mark_as_to_be_deleted(self) -> None:

self._to_be_deleted = True

def _get_specific_type(self) -> Optional[URIRef]:
"""
Get the specific type of the entity (e.g., JournalArticle), if any.
Excludes the base Expression type.
Returns:
The specific type URI if present, None otherwise
"""
base_type = self.short_name_to_type_iri[self.short_name]
for _, _, type_uri in self.g.triples((self.res, RDF.type, None)):
if type_uri != base_type:
return type_uri
return None

def merge(self, other: GraphEntity, prefer_self: bool = False) -> None:
"""
**WARNING:** ``GraphEntity`` **is an abstract class that cannot be instantiated at runtime.
Expand All @@ -282,9 +296,22 @@ def merge(self, other: GraphEntity, prefer_self: bool = False) -> None:
new_triple = (triple[0], triple[1], self.res)
entity.g.add(new_triple)

types: List[URIRef] = other.get_types()
for cur_type in types:
self._create_type(cur_type)
self_specific_type = self._get_specific_type()
other_specific_type = other._get_specific_type()

final_specific_type = None
if prefer_self and self_specific_type:
final_specific_type = self_specific_type
elif other_specific_type:
final_specific_type = other_specific_type
elif self_specific_type:
final_specific_type = self_specific_type

self.g.remove((self.res, RDF.type, None))
base_type = self.short_name_to_type_iri[self.short_name]
self.g.add((self.res, RDF.type, base_type))
if final_specific_type:
self.g.add((self.res, RDF.type, final_specific_type))

label: Optional[str] = other.get_label()
if label is not None:
Expand All @@ -307,4 +334,4 @@ def commit_changes(self):
self.preexisting_graph.add(triple)
self._to_be_deleted = False
self._was_merged = False
self._merge_list = tuple()
self._merge_list = tuple()
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,84 @@ def test_assign_more_types_br(self):
triple2 = self.br1.res, RDF.type, GraphEntity.iri_preprint
self.assertIn(triple2, self.br1.g) and not self.assertIn(triple1, self.br1.g)

def test_merge_specific_types(self):
"""Test merging entities with specific types"""

# Case 1: Merge specific type into generic type
br_specific = self.graph_set.add_br(self.resp_agent)
br_generic = self.graph_set.add_br(self.resp_agent)

br_specific.create_journal_article()

# Verify initial types
self.assertIn((br_specific.res, RDF.type, GraphEntity.iri_journal_article), br_specific.g)
self.assertIn((br_specific.res, RDF.type, GraphEntity.iri_expression), br_specific.g)
self.assertIn((br_generic.res, RDF.type, GraphEntity.iri_expression), br_generic.g)

# Merge and verify types are preserved correctly
br_generic.merge(br_specific)
self.assertIn((br_generic.res, RDF.type, GraphEntity.iri_journal_article), br_generic.g)
self.assertIn((br_generic.res, RDF.type, GraphEntity.iri_expression), br_generic.g)

# Case 2: Merge generic type into specific type
br_specific_2 = self.graph_set.add_br(self.resp_agent)
br_generic_2 = self.graph_set.add_br(self.resp_agent)

br_specific_2.create_journal_article()

br_specific_2.merge(br_generic_2)
self.assertIn((br_specific_2.res, RDF.type, GraphEntity.iri_journal_article), br_specific_2.g)
self.assertIn((br_specific_2.res, RDF.type, GraphEntity.iri_expression), br_specific_2.g)

# Case 3: Merge between two specific types (prefer_self=True)
br_article = self.graph_set.add_br(self.resp_agent)
br_book = self.graph_set.add_br(self.resp_agent)

br_article.create_journal_article()
br_book.create_book()

br_article.merge(br_book, prefer_self=True)
self.assertIn((br_article.res, RDF.type, GraphEntity.iri_journal_article), br_article.g)
self.assertIn((br_article.res, RDF.type, GraphEntity.iri_expression), br_article.g)
self.assertNotIn((br_article.res, RDF.type, GraphEntity.iri_book), br_article.g)

# Case 4: Merge between two specific types (prefer_self=False)
br_article_2 = self.graph_set.add_br(self.resp_agent)
br_book_2 = self.graph_set.add_br(self.resp_agent)

br_article_2.create_journal_article()
br_book_2.create_book()

br_article_2.merge(br_book_2, prefer_self=False)
self.assertIn((br_article_2.res, RDF.type, GraphEntity.iri_book), br_article_2.g)
self.assertIn((br_article_2.res, RDF.type, GraphEntity.iri_expression), br_article_2.g)
self.assertNotIn((br_article_2.res, RDF.type, GraphEntity.iri_journal_article), br_article_2.g)

# Case 5: Merge a generic entity into a specific entity (prefer_self=True)
br_specific_3 = self.graph_set.add_br(self.resp_agent)
br_generic_3 = self.graph_set.add_br(self.resp_agent)

br_specific_3.create_journal_article()

# Verify initial types
self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_journal_article), br_specific_3.g)
self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_expression), br_specific_3.g)
self.assertIn((br_generic_3.res, RDF.type, GraphEntity.iri_expression), br_generic_3.g)

# Merge and verify types are preserved correctly
br_specific_3.merge(br_generic_3, prefer_self=True)
self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_journal_article), br_specific_3.g)
self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_expression), br_specific_3.g)

# Case 6: Merge a generic entity into a specific entity (prefer_self=False)
br_specific_4 = self.graph_set.add_br(self.resp_agent)
br_generic_4 = self.graph_set.add_br(self.resp_agent)

br_specific_4.create_journal_article()

br_specific_4.merge(br_generic_4, prefer_self=False)
self.assertIn((br_specific_4.res, RDF.type, GraphEntity.iri_expression), br_specific_4.g)
self.assertIn((br_specific_4.res, RDF.type, GraphEntity.iri_journal_article), br_specific_4.g)

if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "oc_ocdm"
version = "9.2.0"
version = "9.2.1"
description = "Object mapping library for manipulating RDF graphs that are compliant with the OpenCitations datamodel."
authors = [
"Silvio Peroni <essepuntato@gmail.com>",
Expand Down

0 comments on commit e7a8e96

Please sign in to comment.