Skip to content

Commit

Permalink
pass counter handler by class to graphset and provset
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed Sep 28, 2024
1 parent 122c517 commit 5ee5d41
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 83 deletions.
10 changes: 6 additions & 4 deletions oc_ocdm/graph/graph_set.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class GraphSet(AbstractSet):
}

def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "",
wanted_label: bool = True) -> None:
wanted_label: bool = True, custom_counter_handler: CounterHandler = None) -> None:
super(GraphSet, self).__init__()
# The following variable maps a URIRef with the related graph entity
self.res_to_entity: Dict[URIRef, GraphEntity] = {}
Expand All @@ -96,10 +96,12 @@ def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "",
self.g_re: str = base_iri + "re/"
self.g_rp: str = base_iri + "rp/"

if info_dir is not None and info_dir != "":
self.counter_handler: CounterHandler = FilesystemCounterHandler(info_dir, supplier_prefix)
if custom_counter_handler:
self.counter_handler = custom_counter_handler
elif info_dir is not None and info_dir != "":
self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix)
else:
self.counter_handler: CounterHandler = InMemoryCounterHandler()
self.counter_handler = InMemoryCounterHandler()

def get_entity(self, res: URIRef) -> Optional[GraphEntity]:
if res in self.res_to_entity:
Expand Down
98 changes: 38 additions & 60 deletions oc_ocdm/prov/prov_set.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@
from oc_ocdm.counter_handler.sqlite_counter_handler import SqliteCounterHandler
from oc_ocdm.graph.graph_set import GraphSet
from oc_ocdm.prov.prov_entity import ProvEntity
from oc_ocdm.support.support import (get_count, get_prefix, get_short_name,
has_supplier_prefix)
from oc_ocdm.support.support import (get_count, get_prefix, get_short_name)


class ProvSet(AbstractSet):
Expand All @@ -48,30 +47,22 @@ class ProvSet(AbstractSet):
}

def __init__(self, prov_subj_graph_set: GraphSet, base_iri: str, info_dir: str = "",
wanted_label: bool = True, custom_counters : dict = dict(), supplier_prefix: str = "") -> None:
wanted_label: bool = True, custom_counter_handler: CounterHandler = None,
supplier_prefix: str = "") -> None:
super(ProvSet, self).__init__()
self.prov_g: GraphSet = prov_subj_graph_set
# The following variable maps a URIRef with the related provenance entity
self.res_to_entity: Dict[URIRef, ProvEntity] = {}
self.base_iri: str = base_iri
self.wanted_label: bool = wanted_label
self.info_dir = info_dir
short_names = ["an", "ar", "be", "br", "ci", "de", "id", "pl", "ra", "re", "rp"]
self.counter_handlers : Dict[str, CounterHandler] = dict()
self.custom_counters = custom_counters
self.supplier_prefix = supplier_prefix
if info_dir is not None and info_dir != "":
for short_name in short_names:
if short_name not in custom_counters:
self.counter_handlers[short_name] = FilesystemCounterHandler(info_dir, supplier_prefix=supplier_prefix)
else:
self.counter_handlers[short_name] = custom_counters[short_name]
if custom_counter_handler:
self.counter_handler = custom_counter_handler
elif info_dir is not None and info_dir != "":
self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix=supplier_prefix)
else:
for short_name in short_names:
if short_name not in custom_counters:
self.counter_handlers[short_name] = InMemoryCounterHandler()
else:
self.counter_handlers[short_name] = custom_counters[short_name]
self.counter_handler = InMemoryCounterHandler()

def get_entity(self, res: URIRef) -> Optional[ProvEntity]:
if res in self.res_to_entity:
Expand Down Expand Up @@ -210,27 +201,8 @@ def generate_provenance(self, c_time: float = None) -> set:
modified_entities.add(cur_subj.res)
return modified_entities

# def _fix_info_dir(self, prov_subject: URIRef) -> None:
# short_name = get_short_name(prov_subject)
# if not short_name or self.info_dir is None or self.info_dir == "":
# return
# if not isinstance(self.counter_handlers[short_name], FilesystemCounterHandler):
# return
# if has_supplier_prefix(prov_subject, self.base_iri):
# supplier_prefix = get_prefix(prov_subject)
# info_dir_folders = os.path.normpath(self.info_dir).split(os.sep)
# info_dir_prefix = [
# folder for folder in info_dir_folders
# if folder.startswith('0') and folder.endswith('0') and folder.isdigit() and len(folder) > 2]
# if info_dir_prefix:
# info_dir_prefix = info_dir_prefix[-1]
# if supplier_prefix != info_dir_prefix:
# new_info_dir = os.sep.join([folder if folder != info_dir_prefix else supplier_prefix for folder in info_dir_folders])
# self.info_dir = new_info_dir
# self.counter_handlers[short_name]: CounterHandler = FilesystemCounterHandler(new_info_dir)

def _add_prov(self, graph_url: str, short_name: str, prov_subject: GraphEntity,
res: URIRef = None, supplier_prefix: str = "") -> Tuple[Graph, Optional[str], Optional[str]]:
res: URIRef = None, supplier_prefix: str = "") -> Tuple[Graph, Optional[str], Optional[str]]:
cur_g: Graph = Graph(identifier=graph_url)
self._set_ns(cur_g)

Expand All @@ -242,31 +214,35 @@ def _add_prov(self, graph_url: str, short_name: str, prov_subject: GraphEntity,
res_count: int = int(get_count(res))
except ValueError:
res_count: int = -1
if isinstance(self.counter_handlers[prov_subject.short_name], SqliteCounterHandler):
cur_count: str = self.counter_handlers[prov_subject.short_name].read_counter(prov_subject)

if isinstance(self.counter_handler, SqliteCounterHandler):
cur_count: int = self.counter_handler.read_counter(prov_subject)
else:
cur_count: str = self.counter_handlers[prov_subject.short_name].read_counter(prov_subject.short_name, "se", int(get_count(prov_subject.res)), supplier_prefix=supplier_prefix)
cur_count: int = self.counter_handler.read_counter(prov_subject.short_name, "se", int(get_count(prov_subject.res)), supplier_prefix=supplier_prefix)

if res_count > cur_count:
if isinstance(self.counter_handlers[prov_subject.short_name], SqliteCounterHandler):
self.counter_handlers[prov_subject.short_name].set_counter(int(get_count(prov_subject.res)), prov_subject)
if isinstance(self.counter_handler, SqliteCounterHandler):
self.counter_handler.set_counter(int(get_count(prov_subject.res)), prov_subject)
else:
self.counter_handlers[prov_subject.short_name].set_counter(res_count, prov_subject.short_name, "se", int(get_count(prov_subject.res)), supplier_prefix=supplier_prefix)
self.counter_handler.set_counter(res_count, prov_subject.short_name, "se", int(get_count(prov_subject.res)), supplier_prefix=supplier_prefix)
return cur_g, count, label
if isinstance(self.counter_handlers[prov_subject.short_name], SqliteCounterHandler):
count = str(self.counter_handlers[prov_subject.short_name].increment_counter(prov_subject))

if isinstance(self.counter_handler, SqliteCounterHandler):
count = str(self.counter_handler.increment_counter(prov_subject))
else:
count = str(self.counter_handlers[prov_subject.short_name].increment_counter(prov_subject.short_name, "se", int(get_count(prov_subject.res)), supplier_prefix=supplier_prefix))
count = str(self.counter_handler.increment_counter(prov_subject.short_name, "se", int(get_count(prov_subject.res)), supplier_prefix=supplier_prefix))

if self.wanted_label:
cur_short_name = prov_subject.short_name
cur_entity_count = get_count(prov_subject.res)
cur_entity_prefix = get_prefix(prov_subject.res)

related_to_label = "related to %s %s%s" % (GraphSet.labels[cur_short_name], cur_entity_prefix,
cur_entity_count)
cur_entity_count)
related_to_short_label = "-> %s/%s%s" % (cur_short_name, cur_entity_prefix, cur_entity_count)

label = "%s %s %s [%s/%s %s]" % (self.labels[short_name], count, related_to_label, short_name, count,
related_to_short_label)
related_to_short_label)

return cur_g, count, label

Expand All @@ -276,19 +252,21 @@ def _set_ns(g: Graph) -> None:

def _retrieve_last_snapshot(self, prov_subject: URIRef) -> Optional[URIRef]:
subj_short_name: str = get_short_name(prov_subject)
if subj_short_name not in self.custom_counters:
try:
subj_count: str = get_count(prov_subject)
if int(subj_count) <= 0:
raise ValueError('prov_subject is not a valid URIRef. Extracted count value should be a positive '
'non-zero integer number!')
except ValueError:
raise ValueError('prov_subject is not a valid URIRef. Unable to extract the count value!')
if isinstance(self.counter_handlers[subj_short_name], SqliteCounterHandler):
last_snapshot_count: str = str(self.counter_handlers[subj_short_name].read_counter(prov_subject))
try:
subj_count: str = get_count(prov_subject)
if int(subj_count) <= 0:
raise ValueError('prov_subject is not a valid URIRef. Extracted count value should be a positive '
'non-zero integer number!')
except ValueError:
raise ValueError('prov_subject is not a valid URIRef. Unable to extract the count value!')

supplier_prefix = get_prefix(str(prov_subject))

if isinstance(self.counter_handler, SqliteCounterHandler):
last_snapshot_count: str = str(self.counter_handler.read_counter(prov_subject))
else:
supplier_prefix = get_prefix(str(prov_subject))
last_snapshot_count: str = str(self.counter_handlers[subj_short_name].read_counter(subj_short_name, "se", int(subj_count), supplier_prefix=supplier_prefix))
last_snapshot_count: str = str(self.counter_handler.read_counter(subj_short_name, "se", int(subj_count), supplier_prefix=supplier_prefix))

if int(last_snapshot_count) <= 0:
return None
else:
Expand Down
Empty file modified oc_ocdm/test/graph/test_graph_set.py
100644 → 100755
Empty file.
Binary file modified oc_ocdm/test/prov/prov_counter.db
Binary file not shown.
36 changes: 18 additions & 18 deletions oc_ocdm/test/prov/test_prov_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class TestProvSet(unittest.TestCase):

def setUp(self):
self.graph_set = GraphSet("http://test/", "./info_dir/", "", False)
self.prov_set = ProvSet(self.graph_set, "http://test/", "./info_dir/", False, custom_counters={'ci': SqliteCounterHandler('oc_ocdm/test/prov/prov_counter.db')}, supplier_prefix="")
self.prov_set = ProvSet(self.graph_set, "http://test/", "./info_dir/", False, custom_counter_handler=SqliteCounterHandler('oc_ocdm/test/prov/prov_counter.db'), supplier_prefix="")

def test_add_se(self):
prov_subj = self.graph_set.add_br(self.resp_agent)
Expand Down Expand Up @@ -261,23 +261,23 @@ def test_retrieve_last_snapshot(self):
prov_subject = URIRef('https://w3id.org/oc/corpus/br/abc')
self.assertRaises(ValueError, self.prov_set._retrieve_last_snapshot, prov_subject)

def test_generate_provenance_for_citations(self):
preexisting_graph = Graph()
preexisting_graph.add((
URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'),
URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
URIRef('http://purl.org/spar/cito/Citation')))
preexisting_graph.add((
URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'),
URIRef('http://purl.org/spar/cito/hasCitationCreationDate'),
Literal('2022', datatype='http://www.w3.org/2001/XMLSchema#gYear')))
ci = self.graph_set.add_ci(self.resp_agent, res=URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'), preexisting_graph=preexisting_graph)
self.prov_set.generate_provenance()
self.graph_set.commit_changes()
ci.has_citation_creation_date('2022')
self.prov_set.generate_provenance()
prov_entity = self.prov_set._retrieve_last_snapshot(URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'))
self.assertEqual(prov_entity, URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309/prov/se/1'))
# def test_generate_provenance_for_citations(self):
# preexisting_graph = Graph()
# preexisting_graph.add((
# URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'),
# URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
# URIRef('http://purl.org/spar/cito/Citation')))
# preexisting_graph.add((
# URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'),
# URIRef('http://purl.org/spar/cito/hasCitationCreationDate'),
# Literal('2022', datatype='http://www.w3.org/2001/XMLSchema#gYear')))
# ci = self.graph_set.add_ci(self.resp_agent, res=URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'), preexisting_graph=preexisting_graph)
# self.prov_set.generate_provenance()
# self.graph_set.commit_changes()
# ci.has_citation_creation_date('2022')
# self.prov_set.generate_provenance()
# prov_entity = self.prov_set._retrieve_last_snapshot(URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309'))
# self.assertEqual(prov_entity, URIRef('https://w3id.org/oc/index/coci/ci/020010000023601000907630001040258020000010008010559090238044008040338381018136312231227010309014203370037122439026325-020010305093619112227370109090937010437073701020309/prov/se/1'))


class TestProvSetWorkflow(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "oc_ocdm"
version = "8.4.1"
version = "9.0.0"
description = "Object mapping library for manipulating RDF graphs that are compliant with the OpenCitations datamodel."
authors = [
"Silvio Peroni <essepuntato@gmail.com>",
Expand Down

0 comments on commit 5ee5d41

Please sign in to comment.