From 8c2818d6e6b3427dfc582711f73321be66d6b510 Mon Sep 17 00:00:00 2001 From: Xuemin Duan Date: Mon, 11 Nov 2024 10:48:26 +0100 Subject: [PATCH] feat: support rml and ontology formats including: Turtle, N-Triples, N-Quads, Trig, Trix, RDF/XML, JSON-LD, Notation-3 --- fastapp.py | 35 +++++++++++------- scoop/ontology_format_detection.py | 59 ------------------------------ scoop/rdf_graph_load.py | 28 ++++++++++++++ 3 files changed, 50 insertions(+), 72 deletions(-) delete mode 100644 scoop/ontology_format_detection.py create mode 100644 scoop/rdf_graph_load.py diff --git a/fastapp.py b/fastapp.py index c104256..b39588b 100644 --- a/fastapp.py +++ b/fastapp.py @@ -16,7 +16,7 @@ from pyshacl import validate import multiprocessing as mp from scoop.main import main -from scoop.ontology_format_detection import get_ontology_format +from scoop.rdf_graph_load import load_rdf_graph from scoop.SCOOP.shape_integration_priority import ShapeIntegrationPriority from scoop.SCOOP.shape_integration_priority_r import ShapeIntegrationPriorityR @@ -94,22 +94,31 @@ async def translate(request_data: TranslationRequest): if request_data.rmlData!=[]: for index, data in enumerate(request_data.rmlData): - rml_file = os.path.join(inputrml_folder, f"rml{index}.ttl") - rdflib.Graph().parse(data=data, format="turtle").serialize(destination=rml_file, format="turtle") + g, rdf_format = load_rdf_graph(data) + if rdf_format: + rml_file = os.path.join(inputrml_folder, f"rml{index}.ttl") + g.serialize(destination=rml_file, format='ttl') + else: + # unrecognized format + return JSONResponse(content={"error": "Invalid RML format"}, status_code=400) args.extend(['-m', inputrml_folder, '-xr', inputrml_folder]) - # if request_data.owlData!=[]: - # for index, data in enumerate(request_data.owlData): - # owl_file = os.path.join(inputowl_folder, f"owl{index}.txt") - # open(owl_file, 'w', encoding='utf-8').write(data) - # args.extend(['-o', inputowl_folder]) + if request_data.owlData != []: for index, data in enumerate(request_data.owlData): - rdf_format = get_ontology_format(data) - print("HERE",rdf_format) - owl_file = os.path.join(inputowl_folder, f"owl{index}.{rdf_format}") - with open(owl_file, 'w', encoding='utf-8') as f: - f.write(data) + print("Start loading ontology") + g, rdf_format = load_rdf_graph(data) + print(f"Detected RDF format: {rdf_format}") + if rdf_format: + print("Here") + owl_file = os.path.join(inputowl_folder, f"owl{index}.ttl") + print(f"OWL file: {owl_file}") + g.serialize(destination=owl_file, format='ttl') + print("Ontology loaded") + else: + # unrecognized format + return JSONResponse(content={"error": "Invalid Ontology format"}, status_code=400) args.extend(['-o', inputowl_folder]) + if request_data.xsdData!=[]: for index, data in enumerate(request_data.xsdData): xsd_file = os.path.join(inputxsd_folder, f"xsd{index}.xsd") diff --git a/scoop/ontology_format_detection.py b/scoop/ontology_format_detection.py deleted file mode 100644 index 4681c8d..0000000 --- a/scoop/ontology_format_detection.py +++ /dev/null @@ -1,59 +0,0 @@ -import json - -def is_json_ld(data): - try: - json.loads(data) - return True - except ValueError: - return False - -def get_ontology_format(data): - data = "\n".join([line.strip() for line in data.strip().splitlines() if line.strip()]) - - if is_json_ld(data): - return 'jsonld' - - elif data.startswith("' in line and '.' in line and len(line.split()) == 4 for line in data.splitlines()): - return 'nt' - - elif all('<' in line and '>' in line and '.' in line and len(line.split()) == 5 for line in data.splitlines()): - return 'nq' - - elif data.count('{') > 0 and data.count('}') > 0: - return 'trig' - - elif data.startswith("trdf:"): - return 'trdf' - - elif data.startswith("{") and "rdf:" in data: - return 'rj' - - elif data.startswith("") and data.count("") > 0: - return 'trix' - - return 'ttl' - -if __name__ == '__main__': - - rdf_strings = [ - '@prefix ex: . ex:subject ex:predicate ex:object .', # Turtle - '', # RDF/XML - '{"@context": {"ex": "http://example.org/"},"@id": "ex:subject", "ex:predicate": "ex:object"}', # JSON-LD - ' .', # N-Triples - ' .', # N-Quads - '{ .}', # TriG - 'trdf:{"graph": "ex:graph", "subject": "ex:subject", "predicate": "ex:predicate", "object": "ex:object"}', # RDF Thrift - '{"rdf:subject": "ex:subject", "rdf:predicate": "ex:predicate", "rdf:object": "ex:object"}', # RDF/JSON - 'ex:subject', # TriX - ] - - - for rdf_string in rdf_strings: - format_detected = get_ontology_format(rdf_string) - print(f"Detected RDF format: {format_detected}") diff --git a/scoop/rdf_graph_load.py b/scoop/rdf_graph_load.py new file mode 100644 index 0000000..acb666d --- /dev/null +++ b/scoop/rdf_graph_load.py @@ -0,0 +1,28 @@ +import rdflib +import logging + +logging.basicConfig(level=logging.ERROR) + +def load_rdf_graph(data): + formats = ['turtle', 'nt', 'xml', 'json-ld', 'n3', 'trig', 'trix', 'nquads'] + for fmt in formats: + graph = rdflib.Graph() + try: + graph.parse(data=data, format=fmt) + return graph, fmt + except: + continue + + return None, None + +if __name__ == '__main__': + rdf_strings = [ + '@prefix ex: . ex:subject ex:predicate ex:object .', # Turtle + '{"@context": {"ex": "http://example.org/"},"@id": "ex:subject", "ex:predicate": "ex:object"}', # JSON-LD + ' .', # N-Triples + ' .', # N-Quads + ] + + for rdf_string in rdf_strings: + graph, rdf_format = get_rdf_format(rdf_string) + print(f"Detected RDF format: {rdf_format}, Graph: {graph}")