From d51a349d0a96c3fc08bd8a7cd3defaf792d6c9a9 Mon Sep 17 00:00:00 2001 From: arcangelo7 Date: Sun, 9 Jun 2024 13:02:03 +0200 Subject: [PATCH] _save_query --- oc_ocdm/storer.py | 23 ++++++++++++++++++++--- oc_ocdm/test/storer/test_storer.py | 25 ++++++++++++++++++++++++- pyproject.toml | 2 +- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/oc_ocdm/storer.py b/oc_ocdm/storer.py index e32dd46..3c10513 100644 --- a/oc_ocdm/storer.py +++ b/oc_ocdm/storer.py @@ -253,7 +253,7 @@ def _class_to_entity_type(entity: AbstractEntity) -> Optional[str]: else: return None - def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int = 10) -> bool: + def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int = 10, save_queries: bool = False) -> bool: self.repok.new_article() self.reperr.new_article() @@ -266,6 +266,10 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int skipped_queries: int = 0 result: bool = True + if save_queries: + to_be_uploaded_dir = os.path.join(base_dir, "to_be_uploaded") + os.makedirs(to_be_uploaded_dir, exist_ok=True) + for idx, entity in enumerate(self.a_set.res_to_entity.values()): update_query, n_added, n_removed = get_update_query(entity, entity_type=self._class_to_entity_type(entity)) @@ -280,7 +284,10 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int removed_statements = n_removed elif index % batch_size == 0: # batch_size-multiple query - result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements) + if save_queries: + self._save_query(query_string, to_be_uploaded_dir, added_statements, removed_statements) + else: + result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements) query_string = update_query added_statements = n_added removed_statements = n_removed @@ -291,10 +298,20 @@ def upload_all(self, triplestore_url: str, base_dir: str = None, batch_size: int removed_statements += n_removed if query_string != "": - result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements) + if save_queries: + self._save_query(query_string, to_be_uploaded_dir, added_statements, removed_statements) + else: + result &= self._query(query_string, triplestore_url, base_dir, added_statements, removed_statements) return result + def _save_query(self, query_string: str, directory: str, added_statements: int, removed_statements: int) -> None: + timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') + file_name = f"{timestamp}_add{added_statements}_remove{removed_statements}.sparql" + file_path = os.path.join(directory, file_name) + with open(file_path, 'w', encoding='utf-8') as f: + f.write(query_string) + def upload(self, entity: AbstractEntity, triplestore_url: str, base_dir: str = None) -> bool: self.repok.new_article() self.reperr.new_article() diff --git a/oc_ocdm/test/storer/test_storer.py b/oc_ocdm/test/storer/test_storer.py index 4fcb798..b786a0b 100644 --- a/oc_ocdm/test/storer/test_storer.py +++ b/oc_ocdm/test/storer/test_storer.py @@ -16,7 +16,6 @@ import json import os import unittest -from platform import system from zipfile import ZipFile from multiprocessing import Pool from SPARQLWrapper import POST, SPARQLWrapper @@ -182,6 +181,30 @@ def test_provenance(self): with Pool(processes=3) as pool: pool.starmap(process_entity, entities_to_process) + def test_store_graphs_save_queries(self): + base_dir = os.path.join("oc_ocdm", "test", "storer", "data", "rdf_save_queries") + os.sep + storer = Storer(self.graph_set, context_map={}, dir_split=10000, n_file_item=1000, default_dir="_", output_format='json-ld', zip_output=False) + self.prov_set.generate_provenance() + prov_storer = Storer(self.prov_set, context_map={}, dir_split=10000, n_file_item=1000, default_dir="_", output_format='json-ld', zip_output=False) + storer.store_all(base_dir, self.base_iri) + prov_storer.store_all(base_dir, self.base_iri) + + to_be_uploaded_dir = os.path.join(base_dir, "to_be_uploaded") + storer.upload_all(self.ts, base_dir, save_queries=True) + + # Controlla che la directory to_be_uploaded esista + self.assertTrue(os.path.exists(to_be_uploaded_dir)) + + # Controlla che ci sia almeno un file nella directory to_be_uploaded + saved_queries = os.listdir(to_be_uploaded_dir) + self.assertGreater(len(saved_queries), 0) + + # Controlla il contenuto di uno dei file salvati + query_file = os.path.join(to_be_uploaded_dir, saved_queries[0]) + with open(query_file, 'r', encoding='utf-8') as f: + query_content = f.read() + self.assertIn("INSERT DATA", query_content) # Verifica che ci sia una query di inserimento + def process_entity(entity): base_iri = "http://test/" ts = 'http://127.0.0.1:9999/blazegraph/sparql' diff --git a/pyproject.toml b/pyproject.toml index 51eb629..972f98a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "oc_ocdm" -version = "8.1.2" +version = "8.2.0" description = "Object mapping library for manipulating RDF graphs that are compliant with the OpenCitations datamodel." authors = [ "Silvio Peroni ",