Skip to content

Commit

Permalink
add shacl_validate/infer functions and use these as the entrypoint. A…
Browse files Browse the repository at this point in the history
…ugment tests to check both shacl engines
  • Loading branch information
gtfierro committed Mar 12, 2024
1 parent 87e2cbd commit cdeba72
Show file tree
Hide file tree
Showing 9 changed files with 1,197 additions and 1,228 deletions.
12 changes: 2 additions & 10 deletions buildingmotif/dataclasses/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Union

import pygit2
import pyshacl
import rdflib
import sqlalchemy
import yaml
Expand All @@ -23,6 +22,7 @@
from buildingmotif.utils import (
get_ontology_files,
get_template_parts_from_shape,
shacl_inference,
skip_uri,
)

Expand Down Expand Up @@ -248,15 +248,7 @@ def _load_from_ontology(
# expand the ontology graph before we insert it into the database. This will ensure
# that the output of compiled models will not contain triples that really belong to
# the ontology
pyshacl.validate(
data_graph=ontology,
shacl_graph=ontology,
ont_graph=ontology,
advanced=True,
inplace=True,
js=True,
allow_warnings=True,
)
shacl_inference(ontology, ontology)

lib = cls.create(ontology_name, overwrite=overwrite)

Expand Down
29 changes: 18 additions & 11 deletions buildingmotif/dataclasses/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from buildingmotif.dataclasses.shape_collection import ShapeCollection
from buildingmotif.dataclasses.validation import ValidationContext
from buildingmotif.namespaces import A
from buildingmotif.utils import Triple, copy_graph, rewrite_shape_graph
from buildingmotif.utils import (
Triple,
copy_graph,
rewrite_shape_graph,
shacl_inference,
shacl_validate,
)

if TYPE_CHECKING:
from buildingmotif import BuildingMOTIF
Expand Down Expand Up @@ -141,6 +147,7 @@ def validate(
self,
shape_collections: Optional[List[ShapeCollection]] = None,
error_on_missing_imports: bool = True,
engine: str = "pyshacl",
) -> "ValidationContext":
"""Validates this model against the given list of ShapeCollections.
If no list is provided, the model will be validated against the model's "manifest".
Expand All @@ -157,6 +164,10 @@ def validate(
ontologies are missing (i.e. they need to be loaded into BuildingMOTIF), defaults
to True
:type error_on_missing_imports: bool, optional
:param engine: the engine to use for validation. "pyshacl" or "topquadrant". Using topquadrant
requires Java to be installed on this machine, and the "topquadrant" feature on BuildingMOTIF,
defaults to "pyshacl"
:type engine: str, optional
:return: An object containing useful properties/methods to deal with
the validation results
:rtype: ValidationContext
Expand All @@ -176,16 +187,12 @@ def validate(
shapeg = rewrite_shape_graph(shapeg)
# TODO: do we want to preserve the materialized triples added to data_graph via reasoning?
data_graph = copy_graph(self.graph)
valid, report_g, report_str = pyshacl.validate(
data_graph,
shacl_graph=shapeg,
ont_graph=shapeg,
advanced=True,
js=True,
allow_warnings=True,
# inplace=True,
)
assert isinstance(report_g, rdflib.Graph)

# perform inference on the data graph
shacl_inference(data_graph, shapeg)

# validate the data graph
valid, report_g, report_str = shacl_validate(data_graph, shapeg, engine)
return ValidationContext(
shape_collections,
valid,
Expand Down
39 changes: 38 additions & 1 deletion buildingmotif/dataclasses/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import cached_property
from itertools import chain
from secrets import token_hex
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union

import rdflib
from rdflib import Graph, URIRef
Expand Down Expand Up @@ -277,6 +277,43 @@ def as_templates(self) -> List["Template"]:
"""
return diffset_to_templates(self.diffset)

def get_reasons_with_severity(
self, severity: Union[URIRef | str]
) -> Dict[Optional[URIRef], Set[GraphDiff]]:
"""
Like diffset, but only includes ValidationResults with the given severity.
Permitted values are:
- SH.Violation or "Violation" for violations
- SH.Warning or "Warning" for warnings
- SH.Info or "Info" for info
:param severity: the severity to filter by
:type severity: Union[URIRef|str]
:return: a dictionary of focus nodes to the reasons with the given severity
:rtype: Dict[Optional[URIRef], Set[GraphDiff]]
"""

if isinstance(severity, str):
severity = SH[severity]

# check if the severity is a valid SHACL severity
if severity not in {SH.Violation, SH.Warning, SH.Info}:
raise ValueError(
f"Invalid severity: {severity}. Must be one of SH.Violation, SH.Warning, or SH.Info"
)

# for each value in the diffset, filter out the diffs that don't have the given severity
# in the diffset.graph
return {
focus: {
diff
for diff in diffs
if diff.validation_result.value(diff._result_uri, SH.resultSeverity)
== severity
}
for focus, diffs in self.diffset.items()
}

def _report_to_diffset(self) -> Dict[Optional[URIRef], Set[GraphDiff]]:
"""Interpret a SHACL validation report and say what is missing.
Expand Down
74 changes: 74 additions & 0 deletions buildingmotif/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple

import pyshacl
from rdflib import BNode, Graph, Literal, URIRef
from rdflib.paths import ZeroOrOne
from rdflib.term import Node
Expand Down Expand Up @@ -532,3 +533,76 @@ def skip_uri(uri: URIRef) -> bool:
if uri.startswith(ns):
return True
return False


def shacl_validate(
data_graph: Graph, shape_graph: Optional[Graph] = None, engine="topquadrant"
) -> Tuple[bool, Graph, str]:
"""
Validate the data graph against the shape graph.
Uses the fastest validation method available. Use the 'topquadrant' feature
to use TopQuadrant's SHACL engine. Defaults to using PySHACL.
:param data_graph: the graph to validate
:type data_graph: Graph
:param shape_graph: the shape graph to validate against
:type shape_graph: Graph, optional
"""

if engine == "topquadrant":
try:
from brick_tq_shacl.topquadrant_shacl import validate as tq_validate

return tq_validate(data_graph, shape_graph or Graph()) # type: ignore
except ImportError:
logging.info(
"TopQuadrant SHACL engine not available. Using PySHACL instead."
)
pass

return pyshacl.validate(
data_graph,
shacl_graph=shape_graph,
ont_graph=shape_graph,
advanced=True,
js=True,
allow_warnings=True,
) # type: ignore


def shacl_inference(
data_graph: Graph, shape_graph: Optional[Graph], engine="topquadrant"
):
"""
Infer new triples in the data graph using the shape graph.
Edits the data graph in place. Uses the fastest inference method available.
Use the 'topquadrant' feature to use TopQuadrant's SHACL engine. Defaults to
using PySHACL.
:param data_graph: the graph to infer new triples in
:type data_graph: Graph
:param shape_graph: the shape graph to use for inference
:type shape_graph: Optional[Graph]
:param engine: the SHACL engine to use, defaults to "topquadrant"
:type engine: str, optional
"""
if engine == "topquadrant":
try:
from brick_tq_shacl.topquadrant_shacl import infer as tq_infer

return tq_infer(data_graph, shape_graph or Graph())
except ImportError:
logging.info(
"TopQuadrant SHACL engine not available. Using PySHACL instead."
)
pass

pyshacl.validate(
data_graph=data_graph,
shacl_graph=shape_graph,
ont_graph=shape_graph,
advanced=True,
inplace=True,
js=True,
allow_warnings=True,
)
Loading

0 comments on commit cdeba72

Please sign in to comment.