Skip to content

Commit

Permalink
_save_query
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed Jun 9, 2024
1 parent d5ac127 commit d51a349
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 5 deletions.
23 changes: 20 additions & 3 deletions oc_ocdm/storer.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def _class_to_entity_type(entity: AbstractEntity) -> Optional[str]:
else:
return None

def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int = 10) -> bool:
def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int = 10, save_queries: bool = False) -> bool:
self.repok.new_article()
self.reperr.new_article()

Expand All @@ -266,6 +266,10 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int
skipped_queries: int = 0
result: bool = True

if save_queries:
to_be_uploaded_dir = os.path.join(base_dir, "to_be_uploaded")
os.makedirs(to_be_uploaded_dir, exist_ok=True)

for idx, entity in enumerate(self.a_set.res_to_entity.values()):
update_query, n_added, n_removed = get_update_query(entity, entity_type=self._class_to_entity_type(entity))

Expand All @@ -280,7 +284,10 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int
removed_statements = n_removed
elif index % batch_size == 0:
# batch_size-multiple query
result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements)
if save_queries:
self._save_query(query_string, to_be_uploaded_dir, added_statements, removed_statements)
else:
result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements)
query_string = update_query
added_statements = n_added
removed_statements = n_removed
Expand All @@ -291,10 +298,20 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int
removed_statements += n_removed

if query_string != "":
result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements)
if save_queries:
self._save_query(query_string, to_be_uploaded_dir, added_statements, removed_statements)
else:
result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements)

return result

def _save_query(self, query_string: str, directory: str, added_statements: int, removed_statements: int) -> None:
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
file_name = f"{timestamp}_add{added_statements}_remove{removed_statements}.sparql"
file_path = os.path.join(directory, file_name)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(query_string)

def upload(self, entity: AbstractEntity, triplestore_url: str, base_dir: str = None) -> bool:
self.repok.new_article()
self.reperr.new_article()
Expand Down
25 changes: 24 additions & 1 deletion oc_ocdm/test/storer/test_storer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import json
import os
import unittest
from platform import system
from zipfile import ZipFile
from multiprocessing import Pool
from SPARQLWrapper import POST, SPARQLWrapper
Expand Down Expand Up @@ -182,6 +181,30 @@ def test_provenance(self):
with Pool(processes=3) as pool:
pool.starmap(process_entity, entities_to_process)

def test_store_graphs_save_queries(self):
base_dir = os.path.join("oc_ocdm", "test", "storer", "data", "rdf_save_queries") + os.sep
storer = Storer(self.graph_set, context_map={}, dir_split=10000, n_file_item=1000, default_dir="_", output_format='json-ld', zip_output=False)
self.prov_set.generate_provenance()
prov_storer = Storer(self.prov_set, context_map={}, dir_split=10000, n_file_item=1000, default_dir="_", output_format='json-ld', zip_output=False)
storer.store_all(base_dir, self.base_iri)
prov_storer.store_all(base_dir, self.base_iri)

to_be_uploaded_dir = os.path.join(base_dir, "to_be_uploaded")
storer.upload_all(self.ts, base_dir, save_queries=True)

# Controlla che la directory to_be_uploaded esista
self.assertTrue(os.path.exists(to_be_uploaded_dir))

# Controlla che ci sia almeno un file nella directory to_be_uploaded
saved_queries = os.listdir(to_be_uploaded_dir)
self.assertGreater(len(saved_queries), 0)

# Controlla il contenuto di uno dei file salvati
query_file = os.path.join(to_be_uploaded_dir, saved_queries[0])
with open(query_file, 'r', encoding='utf-8') as f:
query_content = f.read()
self.assertIn("INSERT DATA", query_content) # Verifica che ci sia una query di inserimento

def process_entity(entity):
base_iri = "http://test/"
ts = 'http://127.0.0.1:9999/blazegraph/sparql'
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "oc_ocdm"
version = "8.1.2"
version = "8.2.0"
description = "Object mapping library for manipulating RDF graphs that are compliant with the OpenCitations datamodel."
authors = [
"Silvio Peroni <essepuntato@gmail.com>",
Expand Down

0 comments on commit d51a349

Please sign in to comment.