Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BMS naming convention parsing #286

Merged
merged 28 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
bedbe76
adding label parsing initial implementation with tests
gtfierro Oct 17, 2023
82b0648
fix bug in many parsing
gtfierro Oct 17, 2023
f4d0f02
test first_true
gtfierro Oct 17, 2023
b32c4ef
add ingress and example
gtfierro Oct 17, 2023
a0f2f18
adding/testing maybe parser
gtfierro Oct 26, 2023
28099f7
add errors
gtfierro Oct 26, 2023
e7b63aa
propagate errors
gtfierro Oct 26, 2023
8893db7
Merge remote-tracking branch 'origin/develop' into gtf-label-parsing
gtfierro Oct 29, 2023
f968adb
update notebook
gtfierro Nov 28, 2023
52f5128
erge remote-tracking branch 'origin/develop' into gtf-label-parsing
gtfierro Nov 28, 2023
2abe210
fix test import
gtfierro Nov 28, 2023
8d90a81
capture errors in the token stream
gtfierro Nov 29, 2023
b173a22
fix notebook kernel
gtfierro Nov 29, 2023
5c7749c
some changes
gtfierro Mar 22, 2024
18dbb45
Make Parsers classes
haneslinger May 2, 2024
a00fa6d
Add Application Explorer (#292)
haneslinger Dec 5, 2023
41de422
Generate SPARQL Queries from SHACL shapes (#273)
gtfierro Feb 14, 2024
47f31c8
Fixing template inlining (#298)
gtfierro Feb 19, 2024
ba91087
Allow Library to find template definitions from dependent libraries (…
gtfierro Mar 15, 2024
7d20083
remove unused import, fix type signature
gtfierro May 3, 2024
fd8d930
Merge remote-tracking branch 'origin/develop' into gtf-label-parsing
gtfierro May 13, 2024
a4adfc9
fix poetry lock
gtfierro May 13, 2024
7f1d7a9
moving tokens/parser to different files
gtfierro May 22, 2024
098af29
adding combinators module
gtfierro May 22, 2024
05554f4
add some negative cases
gtfierro May 22, 2024
f387cbb
add missing combinators file
gtfierro May 22, 2024
90bdd53
Merge branch 'develop' into gtf-label-parsing
gtfierro May 22, 2024
b6590ee
fix notebook
gtfierro May 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion buildingmotif/ingresses/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging

from buildingmotif.ingresses.base import Record # noqa
from buildingmotif.ingresses.csv import CSVIngress # noqa
from buildingmotif.ingresses.csvingress import CSVIngress # noqa
from buildingmotif.ingresses.naming_convention import NamingConventionIngress # noqa
from buildingmotif.ingresses.template import ( # noqa
TemplateIngress,
TemplateIngressWithChooser,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def __init__(
self.rtype = str(filename)

elif data:
# if data is a string, convert to StringIO
if isinstance(data, str):
data = StringIO(data)
self.dict_reader = DictReader(data, delimiter=",")
self.rtype = "data stream"

Expand Down
65 changes: 65 additions & 0 deletions buildingmotif/ingresses/naming_convention.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from functools import cached_property
from typing import List

from buildingmotif.ingresses.base import Record, RecordIngressHandler
from buildingmotif.label_parsing import (
Parser,
analyze_failures,
parse_list,
results_to_tokens,
)


class NamingConventionIngress(RecordIngressHandler):
"""
Ingress handler that parses labels using a naming convention parser.
This returns a Record for each input label, with the parsed tokens as a field.
You will need to attach this to a GraphIngressHandler to turn the tokens into a graph.
"""

def __init__(
self,
upstream: RecordIngressHandler,
naming_convention: Parser,
):
"""
Create a new NamingConventionIngress.

:param upstream: The upstream ingress handler to get records from. This should return records with a "label" field.
:type upstream: RecordIngressHandler
:param naming_convention: The naming convention parser to use.
:type naming_convention: Parser
"""
self.upstream = upstream
self.naming_convention = naming_convention

def dump_failed_labels(self):
sorted_groups = sorted(
analyze_failures(self.failures).items(),
key=lambda x: len(x[1]),
reverse=True,
)
for group, failures in sorted_groups:
print(f"Unparsed label: {group} ({len(failures)} failures)")
for failure in failures:
print(f"\t{failure}")
print()

@cached_property
def records(self) -> List[Record]:
results, failures = parse_list(
self.naming_convention, [x.fields["label"] for x in self.upstream.records]
)
self.failures = failures
self.results = results
tokens = results_to_tokens(results)
return [
Record(
rtype="token",
fields={
"label": t["label"],
"tokens": t["tokens"],
},
)
for t in tokens
]
31 changes: 31 additions & 0 deletions buildingmotif/label_parsing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from buildingmotif.label_parsing.parser import (
Parser,
analyze_failures,
parse_list,
results_to_tokens,
)
from buildingmotif.label_parsing.tokens import (
Constant,
Delimiter,
ErrorTokenResult,
Identifier,
Null,
Token,
TokenOrConstructor,
TokenResult,
)

__all__ = [
"Parser",
"analyze_failures",
"parse_list",
"results_to_tokens",
"Token",
"TokenResult",
"TokenOrConstructor",
"Identifier",
"Constant",
"Delimiter",
"Null",
"ErrorTokenResult",
]
Loading
Loading