diff --git a/oc_ocdm/graph/graph_entity.py b/oc_ocdm/graph/graph_entity.py index a201f68..c9c3295 100644 --- a/oc_ocdm/graph/graph_entity.py +++ b/oc_ocdm/graph/graph_entity.py @@ -18,7 +18,7 @@ from typing import TYPE_CHECKING from oc_ocdm.abstract_entity import AbstractEntity -from rdflib import Graph, Namespace, URIRef +from rdflib import RDF, Graph, Namespace, URIRef if TYPE_CHECKING: from typing import ClassVar, Dict, List, Optional, Tuple @@ -260,6 +260,20 @@ def mark_as_to_be_deleted(self) -> None: self._to_be_deleted = True + def _get_specific_type(self) -> Optional[URIRef]: + """ + Get the specific type of the entity (e.g., JournalArticle), if any. + Excludes the base Expression type. + + Returns: + The specific type URI if present, None otherwise + """ + base_type = self.short_name_to_type_iri[self.short_name] + for _, _, type_uri in self.g.triples((self.res, RDF.type, None)): + if type_uri != base_type: + return type_uri + return None + def merge(self, other: GraphEntity, prefer_self: bool = False) -> None: """ **WARNING:** ``GraphEntity`` **is an abstract class that cannot be instantiated at runtime. @@ -282,9 +296,22 @@ def merge(self, other: GraphEntity, prefer_self: bool = False) -> None: new_triple = (triple[0], triple[1], self.res) entity.g.add(new_triple) - types: List[URIRef] = other.get_types() - for cur_type in types: - self._create_type(cur_type) + self_specific_type = self._get_specific_type() + other_specific_type = other._get_specific_type() + + final_specific_type = None + if prefer_self and self_specific_type: + final_specific_type = self_specific_type + elif other_specific_type: + final_specific_type = other_specific_type + elif self_specific_type: + final_specific_type = self_specific_type + + self.g.remove((self.res, RDF.type, None)) + base_type = self.short_name_to_type_iri[self.short_name] + self.g.add((self.res, RDF.type, base_type)) + if final_specific_type: + self.g.add((self.res, RDF.type, final_specific_type)) label: Optional[str] = other.get_label() if label is not None: @@ -307,4 +334,4 @@ def commit_changes(self): self.preexisting_graph.add(triple) self._to_be_deleted = False self._was_merged = False - self._merge_list = tuple() + self._merge_list = tuple() \ No newline at end of file diff --git a/oc_ocdm/test/graph/entities/bibliographic/test_bibliographic_resource.py b/oc_ocdm/test/graph/entities/bibliographic/test_bibliographic_resource.py index dea9a3d..7b04487 100644 --- a/oc_ocdm/test/graph/entities/bibliographic/test_bibliographic_resource.py +++ b/oc_ocdm/test/graph/entities/bibliographic/test_bibliographic_resource.py @@ -353,6 +353,84 @@ def test_assign_more_types_br(self): triple2 = self.br1.res, RDF.type, GraphEntity.iri_preprint self.assertIn(triple2, self.br1.g) and not self.assertIn(triple1, self.br1.g) + def test_merge_specific_types(self): + """Test merging entities with specific types""" + + # Case 1: Merge specific type into generic type + br_specific = self.graph_set.add_br(self.resp_agent) + br_generic = self.graph_set.add_br(self.resp_agent) + + br_specific.create_journal_article() + + # Verify initial types + self.assertIn((br_specific.res, RDF.type, GraphEntity.iri_journal_article), br_specific.g) + self.assertIn((br_specific.res, RDF.type, GraphEntity.iri_expression), br_specific.g) + self.assertIn((br_generic.res, RDF.type, GraphEntity.iri_expression), br_generic.g) + + # Merge and verify types are preserved correctly + br_generic.merge(br_specific) + self.assertIn((br_generic.res, RDF.type, GraphEntity.iri_journal_article), br_generic.g) + self.assertIn((br_generic.res, RDF.type, GraphEntity.iri_expression), br_generic.g) + + # Case 2: Merge generic type into specific type + br_specific_2 = self.graph_set.add_br(self.resp_agent) + br_generic_2 = self.graph_set.add_br(self.resp_agent) + + br_specific_2.create_journal_article() + + br_specific_2.merge(br_generic_2) + self.assertIn((br_specific_2.res, RDF.type, GraphEntity.iri_journal_article), br_specific_2.g) + self.assertIn((br_specific_2.res, RDF.type, GraphEntity.iri_expression), br_specific_2.g) + + # Case 3: Merge between two specific types (prefer_self=True) + br_article = self.graph_set.add_br(self.resp_agent) + br_book = self.graph_set.add_br(self.resp_agent) + + br_article.create_journal_article() + br_book.create_book() + + br_article.merge(br_book, prefer_self=True) + self.assertIn((br_article.res, RDF.type, GraphEntity.iri_journal_article), br_article.g) + self.assertIn((br_article.res, RDF.type, GraphEntity.iri_expression), br_article.g) + self.assertNotIn((br_article.res, RDF.type, GraphEntity.iri_book), br_article.g) + + # Case 4: Merge between two specific types (prefer_self=False) + br_article_2 = self.graph_set.add_br(self.resp_agent) + br_book_2 = self.graph_set.add_br(self.resp_agent) + + br_article_2.create_journal_article() + br_book_2.create_book() + + br_article_2.merge(br_book_2, prefer_self=False) + self.assertIn((br_article_2.res, RDF.type, GraphEntity.iri_book), br_article_2.g) + self.assertIn((br_article_2.res, RDF.type, GraphEntity.iri_expression), br_article_2.g) + self.assertNotIn((br_article_2.res, RDF.type, GraphEntity.iri_journal_article), br_article_2.g) + + # Case 5: Merge a generic entity into a specific entity (prefer_self=True) + br_specific_3 = self.graph_set.add_br(self.resp_agent) + br_generic_3 = self.graph_set.add_br(self.resp_agent) + + br_specific_3.create_journal_article() + + # Verify initial types + self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_journal_article), br_specific_3.g) + self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_expression), br_specific_3.g) + self.assertIn((br_generic_3.res, RDF.type, GraphEntity.iri_expression), br_generic_3.g) + + # Merge and verify types are preserved correctly + br_specific_3.merge(br_generic_3, prefer_self=True) + self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_journal_article), br_specific_3.g) + self.assertIn((br_specific_3.res, RDF.type, GraphEntity.iri_expression), br_specific_3.g) + + # Case 6: Merge a generic entity into a specific entity (prefer_self=False) + br_specific_4 = self.graph_set.add_br(self.resp_agent) + br_generic_4 = self.graph_set.add_br(self.resp_agent) + + br_specific_4.create_journal_article() + + br_specific_4.merge(br_generic_4, prefer_self=False) + self.assertIn((br_specific_4.res, RDF.type, GraphEntity.iri_expression), br_specific_4.g) + self.assertIn((br_specific_4.res, RDF.type, GraphEntity.iri_journal_article), br_specific_4.g) if __name__ == '__main__': unittest.main() diff --git a/pyproject.toml b/pyproject.toml index a1fd8c6..8cfb1a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "oc_ocdm" -version = "9.2.0" +version = "9.2.1" description = "Object mapping library for manipulating RDF graphs that are compliant with the OpenCitations datamodel." authors = [ "Silvio Peroni ",