Skip to content

Commit

Permalink
Add ability to use TopQuadrant SHACL implementation as engine (#308)
Browse files Browse the repository at this point in the history
* add shacl_validate/infer functions and use these as the entrypoint. Augment tests to check both shacl engines

* fix interactions with shacl inference

* tightening up the implementation and use of the shacl_* methods

* support specifying shacl engine in the API

* update tests; test both pyshacl and topquadrant

* add brick-tq-shacl dep

* add TODOs

* Formatting

* no more 3.8!

* ignoring some imported packages without type annotations

* more type annotations

* add types, ignore type errors for imports

* update mypy, fix some issues and ignore some others

* fix union type annotation

* update docker containers

* 3.8.1 python for higher

* add back python 3.8

* change 3.8 version

* add test for finding reasons with a given severity

* update brick-tq-shacl, fix type signature

* remove debug serializations

* bump shacl version

* fixing skolemization for validation

* move shacl engine config inside buildingmotif object
  • Loading branch information
gtfierro authored Apr 10, 2024
1 parent e374bc0 commit 59fa3a4
Show file tree
Hide file tree
Showing 20 changed files with 1,199 additions and 2,876 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- name: lint
run: poetry run flake8 buildingmotif
- name: type check
run: poetry run mypy
run: poetry run mypy --ignore-missing-imports
- name: unit tests
run: poetry run pytest tests/unit --cov=./ --cov-report=xml
- name: integration tests
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ repos:
entry: poetry run flake8 buildingmotif
# can't poetry run becuase not present in repository https://github.com/pre-commit/mirrors-mypy
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.931
rev: v1.9.0
hooks:
- id: mypy
args: ["--install-types", "--non-interactive", "--ignore-missing-imports", "--follow-imports=skip"]
args: ["--install-types", "--non-interactive", "--ignore-missing-imports", "--follow-imports=skip", "--disable-error-code=import-untyped"]
additional_dependencies: [sqlalchemy2-stubs <= 0.0.2a20, SQLAlchemy <= 1.4]
exclude: docs/conf.py
2 changes: 1 addition & 1 deletion buildingmotif/api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.8
FROM python:3.9

# Copy project
ADD buildingmotif /opt/buildingmotif
Expand Down
4 changes: 2 additions & 2 deletions buildingmotif/api/views/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def validate_model(models_id: int) -> flask.Response:

shape_collections = []

# no body provided -- default to model manifest
# no body provided -- default to model manifest and default SHACL engine
if request.content_length is None:
shape_collections = [model.get_manifest()]
else:
Expand Down Expand Up @@ -204,7 +204,7 @@ def validate_model(models_id: int) -> flask.Response:
"message": vaildation_context.report_string,
"valid": vaildation_context.valid,
"reasons": {
focus_node: [gd.reason() for gd in grahdiffs]
focus_node: list(set(gd.reason() for gd in grahdiffs))
for focus_node, grahdiffs in vaildation_context.diffset.items()
},
}, status.HTTP_200_OK
Expand Down
13 changes: 12 additions & 1 deletion buildingmotif/building_motif/building_motif.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
from contextlib import contextmanager
from typing import Optional

from rdflib import Graph
from rdflib.namespace import NamespaceManager
Expand All @@ -24,16 +25,26 @@
class BuildingMOTIF(metaclass=Singleton):
"""Manages BuildingMOTIF data classes."""

def __init__(self, db_uri: str, log_level=logging.WARNING) -> None:
def __init__(
self,
db_uri: str,
shacl_engine: Optional[str] = "pyshacl",
log_level=logging.WARNING,
) -> None:
"""Class constructor.
:param db_uri: database URI
:type db_uri: str
:param shacl_engine: the name of the engine to use for validation: "pyshacl" or "topquadrant". Using topquadrant
requires Java to be installed on this machine, and the "topquadrant" feature on BuildingMOTIF,
defaults to "pyshacl"
:type shacl_engine: str, optional
:param log_level: logging level of detail
:type log_level: int
:default log_level: INFO
"""
self.db_uri = db_uri
self.shacl_engine = shacl_engine
self.engine = create_engine(
db_uri,
echo=False,
Expand Down
13 changes: 3 additions & 10 deletions buildingmotif/dataclasses/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Union

import pygit2
import pyshacl
import rdflib
import sqlalchemy
import yaml
Expand All @@ -23,6 +22,7 @@
from buildingmotif.utils import (
get_ontology_files,
get_template_parts_from_shape,
shacl_inference,
skip_uri,
)

Expand Down Expand Up @@ -248,15 +248,7 @@ def _load_from_ontology(
# expand the ontology graph before we insert it into the database. This will ensure
# that the output of compiled models will not contain triples that really belong to
# the ontology
pyshacl.validate(
data_graph=ontology,
shacl_graph=ontology,
ont_graph=ontology,
advanced=True,
inplace=True,
js=True,
allow_warnings=True,
)
ontology = shacl_inference(ontology, engine=get_building_motif().shacl_engine)

lib = cls.create(ontology_name, overwrite=overwrite)

Expand Down Expand Up @@ -284,6 +276,7 @@ def _infer_shapes_from_graph(self, graph: rdflib.Graph):
dependency_cache: Dict[int, List[Dict[Any, Any]]] = {}
for candidate in candidates:
assert isinstance(candidate, rdflib.URIRef)
# TODO: mincount 0 (or unspecified) should be optional args on the generated template
partial_body, deps = get_template_parts_from_shape(candidate, graph)
templ = self.create_template(str(candidate), partial_body)
dependency_cache[templ.id] = deps
Expand Down
84 changes: 33 additions & 51 deletions buildingmotif/dataclasses/model.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Dict, List, Optional

import pyshacl
import rdflib
import rfc3987
from rdflib import URIRef

from buildingmotif import get_building_motif
from buildingmotif.dataclasses.shape_collection import ShapeCollection
from buildingmotif.dataclasses.validation import ValidationContext
from buildingmotif.namespaces import A
from buildingmotif.utils import Triple, copy_graph, rewrite_shape_graph
from buildingmotif.namespaces import OWL, A
from buildingmotif.utils import (
Triple,
copy_graph,
rewrite_shape_graph,
shacl_inference,
shacl_validate,
skolemize_shapes,
)

if TYPE_CHECKING:
from buildingmotif import BuildingMOTIF
Expand Down Expand Up @@ -174,20 +180,24 @@ def validate(
).graph
# inline sh:node for interpretability
shapeg = rewrite_shape_graph(shapeg)

# remove imports from sg
shapeg.remove((None, OWL.imports, None))

# skolemize the shape graph so we have consistent identifiers across
# validation through the interpretation of the validation report
shapeg = skolemize_shapes(shapeg)

# TODO: do we want to preserve the materialized triples added to data_graph via reasoning?
data_graph = copy_graph(self.graph)
valid, report_g, report_str = pyshacl.validate(
data_graph,
shacl_graph=shapeg,
ont_graph=shapeg,
advanced=True,
js=True,
allow_warnings=True,
# inplace=True,

# validate the data graph
valid, report_g, report_str = shacl_validate(
data_graph, shapeg, engine=self._bm.shacl_engine
)
assert isinstance(report_g, rdflib.Graph)
return ValidationContext(
shape_collections,
shapeg,
valid,
report_g,
report_str,
Expand All @@ -208,43 +218,13 @@ def compile(self, shape_collections: List["ShapeCollection"]):
for shape_collection in shape_collections:
ontology_graph += shape_collection.graph

ontology_graph = ontology_graph.skolemize()
ontology_graph = skolemize_shapes(ontology_graph)

model_graph = copy_graph(self.graph).skolemize()

# We use a fixed-point computation approach to 'compiling' RDF models.
# We accomlish this by keeping track of the size of the graph before and after
# the inference step. If the size of the graph changes, then we know that the
# inference has had some effect. We do this at most 3 times to avoid looping
# forever.
pre_compile_length = len(model_graph) # type: ignore
pyshacl.validate(
data_graph=model_graph,
shacl_graph=ontology_graph,
ont_graph=ontology_graph,
advanced=True,
inplace=True,
js=True,
allow_warnings=True,
return shacl_inference(
model_graph, ontology_graph, engine=self._bm.shacl_engine
)
post_compile_length = len(model_graph) # type: ignore

attempts = 3
while attempts > 0 and post_compile_length != pre_compile_length:
pre_compile_length = len(model_graph) # type: ignore
pyshacl.validate(
data_graph=model_graph,
shacl_graph=ontology_graph,
ont_graph=ontology_graph,
advanced=True,
inplace=True,
js=True,
allow_warnings=True,
)
post_compile_length = len(model_graph) # type: ignore
attempts -= 1
model_graph -= ontology_graph
return model_graph.de_skolemize()

def test_model_against_shapes(
self,
Expand Down Expand Up @@ -291,15 +271,17 @@ def test_model_against_shapes(

temp_model_graph += ontology_graph.cbd(shape_uri)

valid, report_g, report_str = pyshacl.validate(
data_graph=temp_model_graph,
ont_graph=ontology_graph,
allow_warnings=True,
advanced=True,
js=True,
# skolemize the shape graph so we have consistent identifiers across
# validation through the interpretation of the validation report
ontology_graph = ontology_graph.skolemize()

valid, report_g, report_str = shacl_validate(
temp_model_graph, ontology_graph
)

results[shape_uri] = ValidationContext(
shape_collections,
ontology_graph,
valid,
report_g,
report_str,
Expand Down
1 change: 1 addition & 0 deletions buildingmotif/dataclasses/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def evaluate(
parameters were provided
:rtype: Union[Template, rdflib.Graph]
"""
# TODO: handle datatype properties
templ = self.in_memory_copy()
# put all of the parameter names into the PARAM namespace so they can be
# directly subsituted in the template body
Expand Down
48 changes: 42 additions & 6 deletions buildingmotif/dataclasses/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import cached_property
from itertools import chain
from secrets import token_hex
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union

import rdflib
from rdflib import Graph, URIRef
Expand Down Expand Up @@ -252,6 +252,9 @@ class ValidationContext:
"""

shape_collections: List[ShapeCollection]
# the shapes graph that was used to validate the model
# This will be skolemized!
shapes_graph: Graph
valid: bool
report: rdflib.Graph
report_string: str
Expand All @@ -264,10 +267,6 @@ def diffset(self) -> Dict[Optional[URIRef], Set[GraphDiff]]:
"""
return self._report_to_diffset()

@cached_property
def _context(self) -> Graph:
return sum((sc.graph for sc in self.shape_collections), start=Graph()) # type: ignore

def as_templates(self) -> List["Template"]:
"""Produces the set of templates that reconcile the GraphDiffs from the
SHACL validation report.
Expand All @@ -277,6 +276,43 @@ def as_templates(self) -> List["Template"]:
"""
return diffset_to_templates(self.diffset)

def get_reasons_with_severity(
self, severity: Union[URIRef, str]
) -> Dict[Optional[URIRef], Set[GraphDiff]]:
"""
Like diffset, but only includes ValidationResults with the given severity.
Permitted values are:
- SH.Violation or "Violation" for violations
- SH.Warning or "Warning" for warnings
- SH.Info or "Info" for info
:param severity: the severity to filter by
:type severity: Union[URIRef|str]
:return: a dictionary of focus nodes to the reasons with the given severity
:rtype: Dict[Optional[URIRef], Set[GraphDiff]]
"""

if not isinstance(severity, URIRef):
severity = SH[severity]

# check if the severity is a valid SHACL severity
if severity not in {SH.Violation, SH.Warning, SH.Info}:
raise ValueError(
f"Invalid severity: {severity}. Must be one of SH.Violation, SH.Warning, or SH.Info"
)

# for each value in the diffset, filter out the diffs that don't have the given severity
# in the diffset.graph
return {
focus: {
diff
for diff in diffs
if diff.validation_result.value(diff._result_uri, SH.resultSeverity)
== severity
}
for focus, diffs in self.diffset.items()
}

def _report_to_diffset(self) -> Dict[Optional[URIRef], Set[GraphDiff]]:
"""Interpret a SHACL validation report and say what is missing.
Expand All @@ -288,7 +324,7 @@ def _report_to_diffset(self) -> Dict[Optional[URIRef], Set[GraphDiff]]:
# TODO: for future use
# proppath = SH["property"] | (SH.qualifiedValueShape / SH["property"]) # type: ignore

g = self.report + self._context
g = self.report + self.shapes_graph
diffs: Dict[Optional[URIRef], Set[GraphDiff]] = defaultdict(set)
for result in g.objects(predicate=SH.result):
# check if the failure is due to our count constraint component
Expand Down
4 changes: 2 additions & 2 deletions buildingmotif/template_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def parents(self, ntype: Node, ontology: Graph) -> Set[Node]:
cache = self.sc_cache[id(ontology)]
# populate cache if necessary
if ntype not in cache:
cache[ntype] = set(ontology.transitive_objects(ntype, RDFS.subClassOf))
cache[ntype] = set(ontology.transitive_objects(ntype, RDFS.subClassOf)) # type: ignore
return cache[ntype]

def superproperties(self, ntype: Node, ontology: Graph) -> Set[Node]:
Expand Down Expand Up @@ -428,7 +428,7 @@ def building_mapping_subgraphs_iter(
subgraph = self.building_subgraph_from_mapping(mapping)
if not subgraph.connected():
continue
key = tuple(sorted(subgraph.all_nodes()))
key = tuple(sorted(subgraph.all_nodes())) # type: ignore
if key in cache:
continue
cache.add(key)
Expand Down
Loading

0 comments on commit 59fa3a4

Please sign in to comment.