diff --git a/.gitignore b/.gitignore index b7cafe7..9696ebb 100644 --- a/.gitignore +++ b/.gitignore @@ -162,4 +162,5 @@ cython_debug/ notebooks/ .vscode/ -poetry.lock \ No newline at end of file +poetry.lock +cov.xml \ No newline at end of file diff --git a/README.md b/README.md index 92e6fd9..ad58c7d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,14 @@ # Getting started with `fave-recode` + +![PyPI](https://img.shields.io/pypi/v/fave-recode.png) [![codecov](https://codecov.io/gh/Forced-Alignment-and-Vowel-Extraction/fave-recode/graph/badge.svg?token=C23B1H3DAX)](https://codecov.io/gh/Forced-Alignment-and-Vowel-Extraction/fave-recode) [![Maintainability](https://api.codeclimate.com/v1/badges/2375ddfef5d77ba1681d/maintainability.png)](https://codeclimate.com/github/Forced-Alignment-and-Vowel-Extraction/fave-recode/maintainability) [![FAVE Python CI](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/test-and-run.yml/badge.svg?branch=dev)](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/test-and-run.yml) [![Build Docs](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/build-docs.yml/badge.svg)](https://forced-alignment-and-vowel-extraction.github.io/fave-recode/) +[![DOI](https://zenodo.org/badge/605740158.svg)](https://zenodo.org/badge/latestdoi/605740158) The idea behind `fave-recode` is that no matter how much you may adjust the dictionary of a forced-aligner, you may still want to make @@ -41,6 +44,7 @@ fave_recode --help -d, --output_dest PATH An output directory Other options: + -a, --parser TEXT Label set parser. Built in options are cmu_parser -s, --scheme TEXT Recoding scheme. Built in options are cmu2labov and cmu2phila [required] -r, --recode_stem TEXT Stem to append to recoded TextGrid file names @@ -59,7 +63,7 @@ ls data KY25A_1.TextGrid josef-fruehwald_speaker.TextGrid ``` bash -fave_recode -i data/josef-fruehwald_speaker.TextGrid -s cmu2phila +fave_recode -i data/josef-fruehwald_speaker.TextGrid -s cmu2phila -a cmu_parser ls data ``` diff --git a/README.qmd b/README.qmd index 9849c35..7ea51bd 100644 --- a/README.qmd +++ b/README.qmd @@ -3,11 +3,12 @@ title: Getting started with `fave-recode` engine: jupyter format: gfm --- - +![PyPI](https://img.shields.io/pypi/v/fave-recode) [![codecov](https://codecov.io/gh/Forced-Alignment-and-Vowel-Extraction/fave-recode/graph/badge.svg?token=C23B1H3DAX)](https://codecov.io/gh/Forced-Alignment-and-Vowel-Extraction/fave-recode) [![Maintainability](https://api.codeclimate.com/v1/badges/2375ddfef5d77ba1681d/maintainability)](https://codeclimate.com/github/Forced-Alignment-and-Vowel-Extraction/fave-recode/maintainability) [![FAVE Python CI](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/test-and-run.yml/badge.svg?branch=dev)](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/test-and-run.yml) [![Build Docs](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/build-docs.yml/badge.svg)](https://forced-alignment-and-vowel-extraction.github.io/fave-recode/) +[![DOI](https://zenodo.org/badge/605740158.svg)](https://zenodo.org/badge/latestdoi/605740158) The idea behind `fave-recode` is that no matter how much you may adjust the dictionary of a forced-aligner, you may still want to make programmatic changes to the output. @@ -47,13 +48,13 @@ ls data ``` ```bash -fave_recode -i data/josef-fruehwald_speaker.TextGrid -s cmu2phila +fave_recode -i data/josef-fruehwald_speaker.TextGrid -s cmu2phila -a cmu_parser ls data ``` ```{python} #| echo: false -!fave_recode -i docs/getting-started/data/josef-fruehwald_speaker.TextGrid -s cmu2phila +!fave_recode -i docs/getting-started/data/josef-fruehwald_speaker.TextGrid -s cmu2phila -a cmu_parser !ls docs/getting-started/data ``` diff --git a/docs/_extensions/jofrhwld/codeblocklabel/_extension.yml b/docs/_extensions/jofrhwld/codeblocklabel/_extension.yml new file mode 100644 index 0000000..3c068e5 --- /dev/null +++ b/docs/_extensions/jofrhwld/codeblocklabel/_extension.yml @@ -0,0 +1,8 @@ +title: Codeblocklabel +author: Josef Fruehwald +version: 1.0.0 +quarto-required: ">=1.3.0" +contributes: + filters: + - codeblocklabel.lua + diff --git a/docs/_extensions/jofrhwld/codeblocklabel/codeblocklabel.css b/docs/_extensions/jofrhwld/codeblocklabel/codeblocklabel.css new file mode 100644 index 0000000..fdb38d0 --- /dev/null +++ b/docs/_extensions/jofrhwld/codeblocklabel/codeblocklabel.css @@ -0,0 +1,10 @@ +.langname { + margin-bottom: 0%; + padding-bottom: 0%; + font-style: italic; + font-size:smaller; +} + +.sourceCode[id]{ + margin-top: 0%; +} diff --git a/docs/_extensions/jofrhwld/codeblocklabel/codeblocklabel.lua b/docs/_extensions/jofrhwld/codeblocklabel/codeblocklabel.lua new file mode 100644 index 0000000..677e3a5 --- /dev/null +++ b/docs/_extensions/jofrhwld/codeblocklabel/codeblocklabel.lua @@ -0,0 +1,30 @@ + +-- function Div(el) +-- if el.content[1].t == "CodeBlock" then +-- return pandoc.Para("CodeBlock!") +-- end +-- end + +quarto.doc.add_html_dependency({ + name = 'codenamelabel', + stylesheets = {'codeblocklabel.css'} + }) + +function CodeBlock(block) + local newblock = block + if (FORMAT:match "html") and + (block.classes[1]) then + local langname = block.classes[1] + out = {pandoc.Div( + pandoc.RawInline("html", + "
"..block.classes[1].."
" + ), + pandoc.Attr("", {"langname"}, {}) + ), + newblock + } + else + out = newblock + end + return out +end \ No newline at end of file diff --git a/docs/_quarto.yml b/docs/_quarto.yml index c29cceb..30e51fe 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -5,6 +5,7 @@ project: website: page-navigation: true image: assets/logo.png + favicon: assets/logo.png navbar: left: - file: getting-started/overview.qmd @@ -35,14 +36,19 @@ website: contents: - getting-started/condition-attributes.qmd - getting-started/condition-relations.qmd - + - section: Labelset Parsing + contents: + - getting-started/label_set_parser.qmd format: html: theme: - light: flatly - dark: darkly + light: [flatly, styles/light.scss] + dark: [darkly, styles/dark.scss] + css: styles/styles.css toc: true + filters: + - codeblocklabel # tell quarto to read the generated sidebar metadata-files: @@ -75,6 +81,11 @@ quartodoc: - rule_classes.Condition - rule_classes.Rule - rule_classes.RuleSet + - title: Label Set Parsers + desc: Label set parsers + contents: + - labelset_parser.LabelSetParser + - labelset_parser.LabelSetParserProperties - title: Relations - subtitle: "`in`, `not in`" contents: diff --git a/docs/getting-started/label_set_parser.qmd b/docs/getting-started/label_set_parser.qmd new file mode 100644 index 0000000..099dcde --- /dev/null +++ b/docs/getting-started/label_set_parser.qmd @@ -0,0 +1,69 @@ +--- +title: Label Set Parsers +engine: jupyter +toc: true +--- + +There are some properties of label sets that you might want to include in your output labels. +For example, the CMU dictionary encodes vowel stress like so: + +| label | meaning | +| ---- | ---- | +| `AY0` | unstressed /ay/ | +| `AY2` | secondary stressed /ay/ | +| `AY1` | primary stressed /ay/ | + +A labelset parser can make these properties available so you can write a recoding rule like so: + +```yaml +- rule: ay + conditions: + - attribute: label + relation: contains + set: AY + return: ay_{stress} +``` + +`fave_recode` has built in parser for CMU labels called `cmu_parser` that you can include like so + +```bash +fave_recode \ + -i data/josef-fruehwald_speaker.TextGrid \ + -s cmu2phila \ + -a cmu_parser +``` + +## Label Set Parser Basics + +A labelset parser has two top level attributes + +```yaml +parser: CMU +properties: [] +``` + +- `parser` just names the parser +- `properties` is a list of properties you wish to make available. + +### A property + +A single property that parses primary stress out of the cmu label would look like this: + +```yaml +name: stress +updates: stress +default: "" +rules: + - rule: "1" + conditions: + - attribute: label + relation: contains + set: "1" + return: "1" +``` + +The `rule` component is identical to [rules for recoding](rule-scheme-basics.qmd). + +The `updates` field defines the variable name you want to use to access the value "1" in our recoding rule. + +Unlike a recoding rule, every segment will be given some value for "stress", so a `default` value also needs to be provided. \ No newline at end of file diff --git a/docs/getting-started/overview.qmd b/docs/getting-started/overview.qmd index cc183a0..1bc549d 100644 --- a/docs/getting-started/overview.qmd +++ b/docs/getting-started/overview.qmd @@ -4,7 +4,7 @@ aliases: - ../index.html engine: jupyter --- - +![PyPI](https://img.shields.io/pypi/v/fave-recode) [![codecov](https://codecov.io/gh/Forced-Alignment-and-Vowel-Extraction/fave-recode/graph/badge.svg?token=C23B1H3DAX)](https://codecov.io/gh/Forced-Alignment-and-Vowel-Extraction/fave-recode) [![Maintainability](https://api.codeclimate.com/v1/badges/2375ddfef5d77ba1681d/maintainability)](https://codeclimate.com/github/Forced-Alignment-and-Vowel-Extraction/fave-recode/maintainability) [![FAVE Python CI](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/test-and-run.yml/badge.svg?branch=dev)](https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-recode/actions/workflows/test-and-run.yml) @@ -48,7 +48,10 @@ ls data ``` ```bash -fave_recode -i data/josef-fruehwald_speaker.TextGrid -s cmu2phila +fave_recode \ + -i data/josef-fruehwald_speaker.TextGrid \ + -a cmu_parser \ + -s cmu2phila ls data ``` diff --git a/docs/objects.json b/docs/objects.json index 0427fc6..259baab 100644 --- a/docs/objects.json +++ b/docs/objects.json @@ -1 +1 @@ -{"project": "fave_recode", "version": "0.0.9999", "count": 16, "items": [{"name": "fave_recode.rule_classes.Condition.check_condition", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Condition.html#fave_recode.rule_classes.Condition.check_condition", "dispname": "-"}, {"name": "fave_recode.rule_classes.Condition.validate_condition", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Condition.html#fave_recode.rule_classes.Condition.validate_condition", "dispname": "-"}, {"name": "fave_recode.rule_classes.Condition", "domain": "py", "role": "class", "priority": "1", "uri": "reference/rule_classes.Condition.html#fave_recode.rule_classes.Condition", "dispname": "-"}, {"name": "fave_recode.rule_classes.Rule.apply_rule", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Rule.html#fave_recode.rule_classes.Rule.apply_rule", "dispname": "-"}, {"name": "fave_recode.rule_classes.Rule.validate_rule", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Rule.html#fave_recode.rule_classes.Rule.validate_rule", "dispname": "-"}, {"name": "fave_recode.rule_classes.Rule", "domain": "py", "role": "class", "priority": "1", "uri": "reference/rule_classes.Rule.html#fave_recode.rule_classes.Rule", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet.apply_ruleset", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet.apply_ruleset", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet.map_ruleset", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet.map_ruleset", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet.read_ruleset", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet.read_ruleset", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet", "domain": "py", "role": "class", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet", "dispname": "-"}, {"name": "fave_recode.relations.in_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.in_relation.html#fave_recode.relations.in_relation", "dispname": "-"}, {"name": "fave_recode.relations.not_in_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.not_in_relation.html#fave_recode.relations.not_in_relation", "dispname": "-"}, {"name": "fave_recode.relations.equals_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.equals_relation.html#fave_recode.relations.equals_relation", "dispname": "-"}, {"name": "fave_recode.relations.not_equals_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.not_equals_relation.html#fave_recode.relations.not_equals_relation", "dispname": "-"}, {"name": "fave_recode.relations.rematches_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.rematches_relation.html#fave_recode.relations.rematches_relation", "dispname": "-"}, {"name": "fave_recode.relations.reunmatches_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.reunmatches_relation.html#fave_recode.relations.reunmatches_relation", "dispname": "-"}]} \ No newline at end of file +{"project": "fave_recode", "version": "0.0.9999", "count": 23, "items": [{"name": "fave_recode.rule_classes.Condition.check_condition", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Condition.html#fave_recode.rule_classes.Condition.check_condition", "dispname": "-"}, {"name": "fave_recode.rule_classes.Condition.validate_condition", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Condition.html#fave_recode.rule_classes.Condition.validate_condition", "dispname": "-"}, {"name": "fave_recode.rule_classes.Condition", "domain": "py", "role": "class", "priority": "1", "uri": "reference/rule_classes.Condition.html#fave_recode.rule_classes.Condition", "dispname": "-"}, {"name": "fave_recode.rule_classes.Rule.apply_rule", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Rule.html#fave_recode.rule_classes.Rule.apply_rule", "dispname": "-"}, {"name": "fave_recode.rule_classes.Rule.validate_rule", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.Rule.html#fave_recode.rule_classes.Rule.validate_rule", "dispname": "-"}, {"name": "fave_recode.rule_classes.Rule", "domain": "py", "role": "class", "priority": "1", "uri": "reference/rule_classes.Rule.html#fave_recode.rule_classes.Rule", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet.apply_ruleset", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet.apply_ruleset", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet.map_ruleset", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet.map_ruleset", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet.read_ruleset", "domain": "py", "role": "function", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet.read_ruleset", "dispname": "-"}, {"name": "fave_recode.rule_classes.RuleSet", "domain": "py", "role": "class", "priority": "1", "uri": "reference/rule_classes.RuleSet.html#fave_recode.rule_classes.RuleSet", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParser.apply_parser", "domain": "py", "role": "function", "priority": "1", "uri": "reference/labelset_parser.LabelSetParser.html#fave_recode.labelset_parser.LabelSetParser.apply_parser", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParser.map_parser", "domain": "py", "role": "function", "priority": "1", "uri": "reference/labelset_parser.LabelSetParser.html#fave_recode.labelset_parser.LabelSetParser.map_parser", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParser.read_parser", "domain": "py", "role": "function", "priority": "1", "uri": "reference/labelset_parser.LabelSetParser.html#fave_recode.labelset_parser.LabelSetParser.read_parser", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParser.validate_parser", "domain": "py", "role": "function", "priority": "1", "uri": "reference/labelset_parser.LabelSetParser.html#fave_recode.labelset_parser.LabelSetParser.validate_parser", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParser", "domain": "py", "role": "class", "priority": "1", "uri": "reference/labelset_parser.LabelSetParser.html#fave_recode.labelset_parser.LabelSetParser", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParserProperties.validate_property", "domain": "py", "role": "function", "priority": "1", "uri": "reference/labelset_parser.LabelSetParserProperties.html#fave_recode.labelset_parser.LabelSetParserProperties.validate_property", "dispname": "-"}, {"name": "fave_recode.labelset_parser.LabelSetParserProperties", "domain": "py", "role": "class", "priority": "1", "uri": "reference/labelset_parser.LabelSetParserProperties.html#fave_recode.labelset_parser.LabelSetParserProperties", "dispname": "-"}, {"name": "fave_recode.relations.in_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.in_relation.html#fave_recode.relations.in_relation", "dispname": "-"}, {"name": "fave_recode.relations.not_in_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.not_in_relation.html#fave_recode.relations.not_in_relation", "dispname": "-"}, {"name": "fave_recode.relations.equals_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.equals_relation.html#fave_recode.relations.equals_relation", "dispname": "-"}, {"name": "fave_recode.relations.not_equals_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.not_equals_relation.html#fave_recode.relations.not_equals_relation", "dispname": "-"}, {"name": "fave_recode.relations.rematches_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.rematches_relation.html#fave_recode.relations.rematches_relation", "dispname": "-"}, {"name": "fave_recode.relations.reunmatches_relation", "domain": "py", "role": "function", "priority": "1", "uri": "reference/relations.reunmatches_relation.html#fave_recode.relations.reunmatches_relation", "dispname": "-"}]} \ No newline at end of file diff --git a/docs/reference/_sidebar.yml b/docs/reference/_sidebar.yml index a5bb4d4..eba87de 100644 --- a/docs/reference/_sidebar.yml +++ b/docs/reference/_sidebar.yml @@ -7,6 +7,10 @@ website: - reference/rule_classes.Rule.qmd - reference/rule_classes.RuleSet.qmd section: Rule Classes + - contents: + - reference/labelset_parser.LabelSetParser.qmd + - reference/labelset_parser.LabelSetParserProperties.qmd + section: Label Set Parsers - contents: - contents: - reference/relations.in_relation.qmd diff --git a/docs/reference/index.qmd b/docs/reference/index.qmd index 2fb8987..c9ff83d 100644 --- a/docs/reference/index.qmd +++ b/docs/reference/index.qmd @@ -10,6 +10,15 @@ Rule application classes | [rule_classes.Rule](rule_classes.Rule.qmd#fave_recode.rule_classes.Rule) | _A rule class_ | | [rule_classes.RuleSet](rule_classes.RuleSet.qmd#fave_recode.rule_classes.RuleSet) | A rule set class | +## Label Set Parsers + +Label set parsers + +| | | +| --- | --- | +| [labelset_parser.LabelSetParser](labelset_parser.LabelSetParser.qmd#fave_recode.labelset_parser.LabelSetParser) | A labelset parser object | +| [labelset_parser.LabelSetParserProperties](labelset_parser.LabelSetParserProperties.qmd#fave_recode.labelset_parser.LabelSetParserProperties) | A property of the labelset, including rules that | + ## Relations ### `in`, `not in` diff --git a/docs/reference/labelset_parser.LabelSetParser.qmd b/docs/reference/labelset_parser.LabelSetParser.qmd new file mode 100644 index 0000000..5881348 --- /dev/null +++ b/docs/reference/labelset_parser.LabelSetParser.qmd @@ -0,0 +1,75 @@ +# labelset_parser.LabelSetParser { #fave_recode.labelset_parser.LabelSetParser } + +`labelset_parser.LabelSetParser(self, parser=None, parser_path=None)` + +A labelset parser object + +## Parameters + +| Name | Type | Description | Default | +|---------------|------------------------|-------------------------------------------------------------------|-----------| +| `parser` | [dict](`dict`) | A dictionary defining the parser rules. Defaults to None. | `None` | +| `parser_path` | [Path](`pathlib.Path`) | A path to a yaml file definition of the parser. Defaults to None. | `None` | + +## Methods + +| Name | Description | +| --- | --- | +| [apply_parser](#fave_recode.labelset_parser.LabelSetParser.apply_parser) | Apply the parser to a single interval | +| [map_parser](#fave_recode.labelset_parser.LabelSetParser.map_parser) | Map the parser to an entire sequence tier. | +| [read_parser](#fave_recode.labelset_parser.LabelSetParser.read_parser) | Read in a yaml file defining the parser | +| [validate_parser](#fave_recode.labelset_parser.LabelSetParser.validate_parser) | Validate wellformedness of parser | + +### apply_parser { #fave_recode.labelset_parser.LabelSetParser.apply_parser } + +`labelset_parser.LabelSetParser.apply_parser(obj)` + +Apply the parser to a single interval + +#### Parameters + +| Name | Type | Description | Default | +|--------|-----------------------------------------------------------------------------|--------------------|------------| +| `obj` | [SequenceInterval](`aligned_textgrid.sequences.sequences.SequenceInterval`) | A SequenceInterval | _required_ | + +### map_parser { #fave_recode.labelset_parser.LabelSetParser.map_parser } + +`labelset_parser.LabelSetParser.map_parser(obj)` + +Map the parser to an entire sequence tier. + +#### Parameters + +| Name | Type | Description | Default | +|--------|-----------------------------------------------------------------|----------------|------------| +| `obj` | [SequenceTier](`aligned_textgrid.sequences.tiers.SequenceTier`) | A SequenceTier | _required_ | + +### read_parser { #fave_recode.labelset_parser.LabelSetParser.read_parser } + +`labelset_parser.LabelSetParser.read_parser(path)` + +Read in a yaml file defining the parser + +#### Parameters + +| Name | Type | Description | Default | +|--------|------------------------|-----------------------------------|------------| +| `path` | [Path](`pathlib.Path`) | Path to the yaml file definition. | _required_ | + +### validate_parser { #fave_recode.labelset_parser.LabelSetParser.validate_parser } + +`labelset_parser.LabelSetParser.validate_parser(parser)` + +Validate wellformedness of parser + +#### Parameters + +| Name | Type | Description | Default | +|----------|----------------|-------------------|------------| +| `parser` | [dict](`dict`) | parser dictionary | _required_ | + +#### Raises + +| Type | Description | +|--------------------------|------------------------------------| +| [Exception](`Exception`) | Any errors raised by the validator | \ No newline at end of file diff --git a/docs/reference/labelset_parser.LabelSetParserProperties.qmd b/docs/reference/labelset_parser.LabelSetParserProperties.qmd new file mode 100644 index 0000000..fd4f7a2 --- /dev/null +++ b/docs/reference/labelset_parser.LabelSetParserProperties.qmd @@ -0,0 +1,36 @@ +# labelset_parser.LabelSetParserProperties { #fave_recode.labelset_parser.LabelSetParserProperties } + +`labelset_parser.LabelSetParserProperties(self, property=None)` + +A property of the labelset, including rules that +ought to be applied and the SequenceInterval property to update. + +## Parameters + +| Name | Type | Description | Default | +|------------|----------------|-------------------------------------------------------|-----------| +| `property` | [dict](`dict`) | A dictionary defining the property. Defaults to None. | `None` | + +## Methods + +| Name | Description | +| --- | --- | +| [validate_property](#fave_recode.labelset_parser.LabelSetParserProperties.validate_property) | Validate wellformedness of parser property | + +### validate_property { #fave_recode.labelset_parser.LabelSetParserProperties.validate_property } + +`labelset_parser.LabelSetParserProperties.validate_property(property)` + +Validate wellformedness of parser property + +#### Parameters + +| Name | Type | Description | Default | +|----------|----------------|---------------------|------------| +| `parser` | [dict](`dict`) | property dictionary | _required_ | + +#### Raises + +| Type | Description | +|--------------------------|------------------------------------| +| [Exception](`Exception`) | Any errors raised by the validator | \ No newline at end of file diff --git a/docs/styles/dark.scss b/docs/styles/dark.scss new file mode 100644 index 0000000..b1ce1d6 --- /dev/null +++ b/docs/styles/dark.scss @@ -0,0 +1,10 @@ +/*-- scss:defaults --*/ + +/*-- scss:rules --*/ + + + +/*-- scss:rules --*/ +.cell:not(.page-full):has(.cell-output){ + background-color: $gray-600; +} \ No newline at end of file diff --git a/docs/styles/light.scss b/docs/styles/light.scss new file mode 100644 index 0000000..f4f5ed7 --- /dev/null +++ b/docs/styles/light.scss @@ -0,0 +1,11 @@ +/*-- scss:defaults --*/ + +/*-- scss:rules --*/ + + + +/*-- scss:rules --*/ + + .cell:not(.page-full):has(.cell-output){ + background-color: $gray-100; + } \ No newline at end of file diff --git a/docs/styles/styles.css b/docs/styles/styles.css new file mode 100644 index 0000000..c24e338 --- /dev/null +++ b/docs/styles/styles.css @@ -0,0 +1,5 @@ +.cell:not(.page-full):has(.cell-output){ + padding: 2%; + border-radius: 10px; + margin-bottom: 1em; + } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 28c71bb..f809c7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fave-recode" -version = "0.2.0" +version = "0.3.0" description = "A package for recoding Praat TextGrids" authors = ["JoFrhwld "] license = "GPLv3" @@ -16,7 +16,7 @@ repository = "https://github.com/Forced-Alignment-and-Vowel-Extraction/fave-reco [tool.poetry.dependencies] python = "^3.10" -aligned-textgrid = "^v0.4.1" +aligned-textgrid = "^0.6.4" pyyaml = "^6.0" cerberus = "^1.3.5" click = "^8.1.7" @@ -41,7 +41,8 @@ build-backend = "poetry.core.masonry.api" addopts = [ "--import-mode=importlib", "--cov-config=.coveragerc", - "--cov" + "--cov", + "--cov-report=xml:cov.xml" ] filterwarnings =[ "ignore::UserWarning" diff --git a/src/fave_recode/fave_recode.py b/src/fave_recode/fave_recode.py index 5cce3af..f692e79 100644 --- a/src/fave_recode/fave_recode.py +++ b/src/fave_recode/fave_recode.py @@ -1,5 +1,6 @@ from aligned_textgrid import AlignedTextGrid, custom_classes, Word, Phone from fave_recode.rule_classes import RuleSet +from fave_recode.labelset_parser import LabelSetParser from fave_recode.schemes import all_schemes from pathlib import Path from typing import Union @@ -42,6 +43,10 @@ cloup.constraints.mutually_exclusive, ["input_path", "output_file"] ) +@click.option("-a","--parser", + type = click.STRING, + help = "Label set parser."\ + " Built in options are cmu_parser") @click.option("-s", "--scheme", type=click.STRING, help = "Recoding scheme."\ @@ -60,17 +65,20 @@ def fave_recode( input_path = None, output_file = None, output_dest = None, + parser = None, scheme = None, save_recode = True, recode_stem = "_recoded", target_tier = "Phone" )->Union[AlignedTextGrid,list]: + parser = get_parser(parser) rules = get_rules(scheme) if input_file: input_p = Path(input_file.name) ratg = process_file( input_path=input_p, output_file=output_file, + parser = parser, scheme = rules, recode_stem = recode_stem, save_recode = save_recode, @@ -79,6 +87,7 @@ def fave_recode( ratg = process_directory( input_path = input_path, output_dest = output_dest, + parser = parser, scheme = rules, recode_stem = recode_stem, target_tier = target_tier, @@ -97,10 +106,24 @@ def get_rules( return RuleSet(rule_path = str(scheme_path)) raise Exception(f"Cannot find rule schema file: {scheme}") +def get_parser( + parser: str + ) -> LabelSetParser: + if not parser: + return LabelSetParser() + if parser in all_schemes: + return LabelSetParser(parser_path = Path(all_schemes[parser])) + scheme_path = Path(parser) + if scheme_path.is_file(): + return LabelSetParser(parser_path = scheme_path) + raise Exception(f"Cannot find rule schema file: {parser}") + + def process_directory( input_path: str, scheme: RuleSet, save_recode: bool, + parser: LabelSetParser = None, output_dest: Union[str,None] = None, recode_stem: str = "_recoded", target_tier: str = "Phone" @@ -112,6 +135,7 @@ def process_directory( ratg = process_file( input_path = tg, + parser = parser, scheme = scheme, save_recode = save_recode, output_file = output_dest, @@ -126,12 +150,15 @@ def process_file( input_path: Path, scheme: RuleSet, save_recode: bool, + parser: LabelSetParser = None, output_file: str = None, recode_stem:str = "_recoded", target_tier: str = "Phone" ): atg = validate_input_file(input_path) - run_recode(atg, scheme, target_tier) + if not parser: + parser = LabelSetParser() + run_recode(atg, parser, scheme, target_tier) if output_file and save_recode: output_path = make_output_path( @@ -227,12 +254,15 @@ def make_output_path( def run_recode( atg: AlignedTextGrid, + parser: LabelSetParser, scheme: RuleSet, target_tier: str ): all_targets = [t for tgr in atg for t in tgr if t.entry_class.__name__ == target_tier] + for tier in all_targets: + parser.map_parser(tier) for tier in all_targets: scheme.map_ruleset(tier) diff --git a/src/fave_recode/labelset_parser.py b/src/fave_recode/labelset_parser.py new file mode 100644 index 0000000..4c4b9ab --- /dev/null +++ b/src/fave_recode/labelset_parser.py @@ -0,0 +1,127 @@ +from aligned_textgrid.sequences.sequences import SequenceInterval +from aligned_textgrid.sequences.tiers import SequenceTier +from fave_recode.ruleschema import rule_validator, \ + condition_validator, \ + label_parser_validator,\ + parser_property_validator +from fave_recode.rule_classes import RuleSet +from fave_recode.relations import relation_dict +from collections.abc import Callable +from pathlib import Path +import functools +import yaml + + +class LabelSetParser(): + """A labelset parser object + + Args: + parser (dict, optional): + A dictionary defining the parser rules. Defaults to None. + parser_path (Path, optional): + A path to a yaml file definition of the parser. Defaults to None. + """ + + def __init__(self, parser: dict = None, parser_path: Path = None): + if parser: + self.validate_parser(parser) + self.name = parser["parser"] + self.properties = [ + LabelSetParserProperties(property) + for property in parser["properties"] + ] + elif parser_path: + self.read_parser(parser_path) + else: + self.properties = [LabelSetParserProperties()] + + + def apply_parser(self, obj: SequenceInterval): + """Apply the parser to a single interval + + Args: + obj (SequenceInterval): A SequenceInterval + """ + for property in self.properties: + application = property.rules.apply_ruleset(obj) + if not application: + obj.set_feature( + property.updates, + property.default + ) + + def map_parser(self, obj: SequenceTier): + """Map the parser to an entire sequence tier. + + Args: + obj (SequenceTier): A SequenceTier + """ + for seq in obj: + self.apply_parser(seq) + + def read_parser(self, path: Path): + """Read in a yaml file defining the parser + + Args: + path (Path): + Path to the yaml file definition. + """ + with path.open("r") as f: + parser = yaml.safe_load(f) + + self.validate_parser(parser) + self.name = parser["parser"] + self.properties = [ + LabelSetParserProperties(property) + for property in parser["properties"] + ] + + + def validate_parser(self, parser: dict): + """Validate wellformedness of parser + Args: + parser (dict): parser dictionary + + Raises: + Exception: Any errors raised by the validator + """ + if not label_parser_validator(parser): + errors = label_parser_validator.errors + raise Exception(repr(errors)) + + + +class LabelSetParserProperties(): + """A property of the labelset, including rules that + ought to be applied and the SequenceInterval property to update. + + Args: + property (dict, optional): + A dictionary defining the property. Defaults to None. + """ + + def __init__(self, property: dict = None): + if property: + self.validate_property(property) + self.rules = RuleSet(property["rules"]) + self.updates = property["updates"] + for rule in self.rules.rules: + rule.updates = self.updates + self.default = property["default"] + else: + self.rules = RuleSet() + self.updates = "_" + self.default = None + + def validate_property(self, property: dict): + """Validate wellformedness of parser property + Args: + parser (dict): property dictionary + + Raises: + Exception: Any errors raised by the validator + """ + if not parser_property_validator(property): + errors = label_parser_validator.errors + raise Exception(repr(errors)) + diff --git a/src/fave_recode/resources/cmu2labov.yml b/src/fave_recode/resources/cmu2labov.yml index 2e9134a..c10b09b 100644 --- a/src/fave_recode/resources/cmu2labov.yml +++ b/src/fave_recode/resources/cmu2labov.yml @@ -3,7 +3,7 @@ - attribute: label relation: == set: AH0 - return: "@" + return: "@_{stress}" - rule: eyf conditions: - attribute: label @@ -12,7 +12,7 @@ - attribute: fol.label relation: == set: "#" - return: eyF + return: eyF_{stress} - rule: iyF conditions: - attribute: label @@ -21,7 +21,7 @@ - attribute: fol.label relation: == set: "#" - return: iyF + return: iyF_{stress} - rule: owF conditions: - attribute: label @@ -30,7 +30,7 @@ - attribute: fol.label relation: == set: "#" - return: owF + return: owF_{stress} - rule: ay0 conditions: - attribute: label @@ -48,7 +48,7 @@ - SH - T - TH - return: ay0 + return: ay0_{stress} - rule: ah conditions: - attribute: label @@ -97,7 +97,7 @@ - NIRVANA - KARATE - AH - return: ah + return: ah_{stress} - rule: Tuw conditions: - attribute: label @@ -117,7 +117,7 @@ - S - T - Z - return: Tuw + return: Tuw_{stress} - rule: iyr conditions: - attribute: label @@ -134,7 +134,7 @@ set: - AXR - R - return: iyr + return: iyr_{stress} - rule: eyr conditions: - attribute: label @@ -145,7 +145,7 @@ set: - AXR - R - return: eyr + return: eyr_{stress} - rule: ahr conditions: - attribute: label @@ -156,7 +156,7 @@ set: - AXR - R - return: ahr + return: ahr_{stress} - rule: owr conditions: - attribute: label @@ -173,7 +173,7 @@ set: - AXR - R - return: owr + return: owr_{stress} - rule: uwr conditions: - attribute: label @@ -190,19 +190,19 @@ set: - AXR - R - return: uwr + return: uwr_{stress} - rule: o conditions: - attribute: label relation: contains set: AA - return: o + return: o_{stress} - rule: ae conditions: - attribute: label relation: contains set: AE - return: ae + return: ae_{stress} - rule: wedge conditions: - attribute: label @@ -210,76 +210,76 @@ set: - AH1 - AH2 - return: uh + return: uh_{stress} - rule: oh conditions: - attribute: label relation: contains set: AO - return: oh + return: oh_{stress} - rule: aw conditions: - attribute: label relation: contains set: AW - return: aw + return: aw_{stress} - rule: ay conditions: - attribute: label relation: contains set: AY - return: ay + return: ay_{stress} - rule: e conditions: - attribute: label relation: contains set: "EH" - return: e -- rule: "*hr" + return: e_{stress} +- rule: "*hr_{stress}" conditions: - attribute: label relation: contains set: ER - return: "*hr" + return: "*hr_{stress}" - rule: ey conditions: - attribute: label relation: contains set: EY - return: ey + return: ey_{stress} - rule: i conditions: - attribute: label relation: contains set: IH - return: i + return: i_{stress} - rule: iy conditions: - attribute: label relation: contains set: IY - return: iy + return: iy_{stress} - rule: ow conditions: - attribute: label relation: contains set: OW - return: ow + return: ow_{stress} - rule: oy conditions: - attribute: label relation: contains set: OY - return: oy + return: oy_{stress} - rule: u conditions: - attribute: label relation: contains set: UH - return: u + return: u_{stress} - rule: uw conditions: - attribute: label relation: contains set: UW - return: uw \ No newline at end of file + return: uw_{stress} \ No newline at end of file diff --git a/src/fave_recode/resources/cmu2phila.yml b/src/fave_recode/resources/cmu2phila.yml index c543708..88dc4ea 100644 --- a/src/fave_recode/resources/cmu2phila.yml +++ b/src/fave_recode/resources/cmu2phila.yml @@ -8,7 +8,7 @@ set: - KEPT - CATCH - return: e + return: e_{stress} - rule: a-foreign-a conditions: @@ -18,7 +18,7 @@ - attribute: inword.label relation: contains set: LANZA - return: o + return: o_{stress} # foriegn A-ae - rule: a-foreign-ae @@ -29,7 +29,7 @@ - attribute: inword.label relation: contains set: MARIO - return: ae + return: ae_{stress} # marry class - rule: marry @@ -40,7 +40,7 @@ - attribute: inword.label relation: rematches set: "ARRY$" - return: ae + return: ae_{stress} # tense lexical exceptions - rule: aeh-lex @@ -51,7 +51,7 @@ - attribute: inword.label relation: rematches set: "(MAD|BAD|GLAD)(LY|DER|DEST|NESS)?$" - return: aeh + return: aeh_{stress} # lax lexical exceptions - rule: ae-lex @@ -92,7 +92,7 @@ - MATH - EXAM - AND - return: aeBR + return: aeBR_{stress} # SKV words - rule: ae-skv @@ -115,7 +115,7 @@ - attribute: inword.label relation: reunmatches set: "ING?$" - return: aeBR + return: aeBR_{stress} # aeL - rule: aeL @@ -126,7 +126,7 @@ - attribute: fol.label relation: == set: L - return: aeBR + return: aeBR_{stress} # ae followed by trigger followed by end of word - rule: aeh-c-endword @@ -145,7 +145,7 @@ - attribute: fol.fol.label relation: == set: "#" - return: aeh + return: aeh_{stress} # ae followed by trigger followed by consonant - rule: aeh-c-X @@ -187,7 +187,7 @@ - W - Z - ZH - return: aeh + return: aeh_{stress} # ae followed by trigger and inflection - rule: aeh-ing-or-es @@ -220,7 +220,7 @@ - attribute: inword.label relation: rematches set: "(ING?|ES)$" - return: "aeh" + return: aeh_{stress} # oh-fix - rule: oh-fix @@ -272,7 +272,7 @@ - COUGHS - COUGHED - COUGHING - return: oh + return: oh_{stress} # o-fix - rule: o-fix @@ -309,7 +309,7 @@ - ORANGE - HORRIBLE - MAJORITY - return: o + return: o_{stress} # iw - rule: iw-1 @@ -320,7 +320,7 @@ - attribute: prev.label relation: == set: Y - return: "iw" + return: iw_{stress} - rule: iw-2 conditions: - attribute: label @@ -329,7 +329,7 @@ - attribute: inword.label relation: contains set: EW - return: "iw" + return: iw_{stress} - rule: iw-2 conditions: - attribute: label @@ -338,14 +338,14 @@ - attribute: inword.label relation: rematches set: "[TDNLS]U" - return: iw + return: iw_{stress} - rule: schwa conditions: - attribute: label relation: == set: AH0 - return: "@" + return: "@_{stress}" - rule: eyf conditions: - attribute: label @@ -354,7 +354,7 @@ - attribute: fol.label relation: == set: "#" - return: eyF + return: eyF_{stress} - rule: iyF conditions: - attribute: label @@ -363,7 +363,7 @@ - attribute: fol.label relation: == set: "#" - return: iyF + return: iyF_{stress} - rule: owF conditions: - attribute: label @@ -372,7 +372,7 @@ - attribute: fol.label relation: == set: "#" - return: owF + return: owF_{stress} - rule: ay0 conditions: - attribute: label @@ -390,7 +390,7 @@ - SH - T - TH - return: ay0 + return: ay0_{stress} - rule: ah conditions: - attribute: label @@ -439,7 +439,7 @@ - NIRVANA - KARATE - AH - return: ah + return: ah_{stress} - rule: Tuw conditions: - attribute: label @@ -459,7 +459,7 @@ - S - T - Z - return: Tuw + return: Tuw_{stress} - rule: iyr conditions: - attribute: label @@ -476,7 +476,7 @@ set: - AXR - R - return: iyr + return: iyr_{stress} - rule: eyr conditions: - attribute: label @@ -487,7 +487,7 @@ set: - AXR - R - return: eyr + return: eyr_{stress} - rule: ahr conditions: - attribute: label @@ -498,7 +498,7 @@ set: - AXR - R - return: ahr + return: ahr_{stress} - rule: owr conditions: - attribute: label @@ -515,7 +515,7 @@ set: - AXR - R - return: owr + return: owr_{stress} - rule: uwr conditions: - attribute: label @@ -532,19 +532,19 @@ set: - AXR - R - return: uwr + return: uwr_{stress} - rule: o conditions: - attribute: label relation: contains set: AA - return: o + return: o_{stress} - rule: ae conditions: - attribute: label relation: contains set: AE - return: ae + return: ae_{stress} - rule: wedge conditions: - attribute: label @@ -552,76 +552,76 @@ set: - AH1 - AH2 - return: uh + return: uh_{stress} - rule: oh conditions: - attribute: label relation: contains set: AO - return: oh + return: oh_{stress} - rule: aw conditions: - attribute: label relation: contains set: AW - return: aw + return: aw_{stress} - rule: ay conditions: - attribute: label relation: contains set: AY - return: ay + return: ay_{stress} - rule: e conditions: - attribute: label relation: contains set: "EH" - return: e -- rule: "*hr" + return: e_{stress} +- rule: "*hr_{stress}" conditions: - attribute: label relation: contains set: ER - return: "*hr" + return: "*hr_{stress}" - rule: ey conditions: - attribute: label relation: contains set: EY - return: ey + return: ey_{stress} - rule: i conditions: - attribute: label relation: contains set: IH - return: i + return: i_{stress} - rule: iy conditions: - attribute: label relation: contains set: IY - return: iy + return: iy_{stress} - rule: ow conditions: - attribute: label relation: contains set: OW - return: ow + return: ow_{stress} - rule: oy conditions: - attribute: label relation: contains set: OY - return: oy + return: oy_{stress} - rule: u conditions: - attribute: label relation: contains set: UH - return: u + return: u_{stress} - rule: uw conditions: - attribute: label relation: contains set: UW - return: uw \ No newline at end of file + return: uw_{stress} \ No newline at end of file diff --git a/src/fave_recode/resources/cmu_parser.yml b/src/fave_recode/resources/cmu_parser.yml new file mode 100644 index 0000000..deaa028 --- /dev/null +++ b/src/fave_recode/resources/cmu_parser.yml @@ -0,0 +1,48 @@ +parser: "CMU" + +properties: + + - name: "class" + updates: "VC" + default: "" + rules: + + - rule: "vowel" + conditions: + - attribute: label + relation: "rematches" + set: "[AEIOU]" + return: "vowel" + + - rule: "consonant" + conditions: + - attribute: label + relation: "rematches" + set: "[A-Z]" + return: "consonant" + + - name: "stress" + updates: "stress" + default: "" + rules: + + - rule: "1" + conditions: + - attribute: label + relation: contains + set: "1" + return: "1" + + - rule: "2" + conditions: + - attribute: label + relation: contains + set: "2" + return: "2" + + - rule: "0" + conditions: + - attribute: label + relation: contains + set: "2" + return: "0" diff --git a/src/fave_recode/rule_classes.py b/src/fave_recode/rule_classes.py index 48f88d6..61e3d53 100644 --- a/src/fave_recode/rule_classes.py +++ b/src/fave_recode/rule_classes.py @@ -5,6 +5,7 @@ from collections.abc import Callable import functools import yaml +import re def rgetattr(obj: SequenceInterval, attr : str, @@ -88,6 +89,10 @@ def __init__( self.rule = rule["rule"] self.name = self.rule self.output = rule["return"] + if "updates" in rule: + self.updates = rule["updates"] + else: + self.updates = "label" def __repr__(self): return f"rule: {self.rule} with {len(self.conditions)} conditions. returns {self.output}" @@ -129,8 +134,18 @@ def apply_rule( raise Exception if all(cond_met): - obj.label = self.output + output = self.parse_output(obj) + obj.set_feature(self.updates, output) return True + + def parse_output(self, obj): + get_features = re.findall(r"\{(.*?)\}", self.output) + feature_dict = { + f:rgetattr(obj, f) + for f in get_features + } + output = self.output.format(**feature_dict) + return output class RuleSet: """A rule set class @@ -177,6 +192,7 @@ def apply_ruleset( ## Crucial! ## First rule wins if application: + return True break def map_ruleset( diff --git a/src/fave_recode/ruleschema.py b/src/fave_recode/ruleschema.py index 03f2a49..5a80051 100644 --- a/src/fave_recode/ruleschema.py +++ b/src/fave_recode/ruleschema.py @@ -9,7 +9,8 @@ def attribute_check(field, value, error): 'inword', 'sub_labels', 'first', - 'last' + 'last', + 'within' ] value_components = value.split(".") value_valid = [x in aligned_textgrid_properities for x in value_components] @@ -29,6 +30,10 @@ def attribute_check(field, value, error): 'return': { 'type': 'string', 'required': True + }, + 'updates': { + 'type': 'string', + 'required': False } } @@ -58,5 +63,39 @@ def attribute_check(field, value, error): } } +label_parser_schema = { + "parser": { + "type": "string", + "required": True + }, + "properties": { + "type": "list", + "required": True + } +} + +parser_property_schema = { + "name": { + "type": "string", + "required": True + }, + "updates": { + "type": "string", + "required": True + }, + "default": { + "type": "string", + "required": True + }, + "rules": { + "type": "list", + "required": True + } +} + + + rule_validator = Validator(rule_schema) -condition_validator = Validator(condition_schema) \ No newline at end of file +condition_validator = Validator(condition_schema) +label_parser_validator = Validator(label_parser_schema) +parser_property_validator = Validator(parser_property_schema) \ No newline at end of file diff --git a/src/fave_recode/schemes.py b/src/fave_recode/schemes.py index 360750c..9231248 100644 --- a/src/fave_recode/schemes.py +++ b/src/fave_recode/schemes.py @@ -2,9 +2,11 @@ cmu2labov_path = str(files("fave_recode").joinpath("resources", "cmu2labov.yml")) cmu2phila_path = str(files("fave_recode").joinpath("resources", "cmu2phila.yml")) +cmu_parser_path = str(files("fave_recode").joinpath("resources", "cmu_parser.yml")) all_schemes = { "cmu2labov": cmu2labov_path, "cmu2phila": cmu2phila_path, + "cmu_parser": cmu_parser_path, "blank": [] } \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index fe05dbb..402a47b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -11,6 +11,7 @@ validate_input_file,\ validate_output_file from fave_recode.schemes import all_schemes +from fave_recode.labelset_parser import LabelSetParser from pathlib import Path import pytest @@ -226,9 +227,10 @@ def test_run_recode(self): "just_schwa.yml" ) scheme = RuleSet(rule_path=str(schwa_path)) + parser = LabelSetParser() target_tier = "Phone" - run_recode(atg, scheme, target_tier) + run_recode(atg, parser, scheme, target_tier) ptier = atg[0].Phone schwa_ints = [x for x in ptier if x.label == "@"] assert len(schwa_ints) > 0 diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..8b2225b --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,82 @@ +from fave_recode.labelset_parser import LabelSetParser, LabelSetParserProperties +from fave_recode.schemes import cmu_parser_path +from aligned_textgrid import Word, Phone, SequenceTier +from praatio.utilities.utils import Interval +from pathlib import Path +import pytest + +example_property = { + "name": "vowel", + "updates": "V", + "default": "", + "rules": [ + { + "rule": "vowel", + "updates": "V", + "conditions": [ + { + "attribute": "label", + "relation": "rematches", + "set": "[AEIOU]" + } + ], + "return": "vowel" + } + ] +} + +example_parser = { + "parser": "test", + "properties": [example_property] +} + + +class TestProperties: + + def test_creation(self): + property = LabelSetParserProperties(example_property) + assert isinstance(property, LabelSetParserProperties) + + +class TestParser: + + seq1 = Phone(Interval(0, 1, "AH1")) + seq2 = Phone(Interval(2, 3, "HH")) + + tier = SequenceTier( + [ + Interval(0, 1, "AH1"), + Interval(1, 3, "HH") + ] + ) + + def test_creation(self): + parser = LabelSetParser(example_parser) + assert isinstance(parser, LabelSetParser) + + def test_apply(self): + parser = LabelSetParser(example_parser) + parser.apply_parser(self.seq1) + parser.apply_parser(self.seq2) + + assert self.seq1.V == "vowel" + assert self.seq2.V == "" + + def test_map(self): + parser = LabelSetParser(example_parser) + parser.map_parser(self.tier) + + assert self.tier[0].V == "vowel" + assert self.tier[1].V == "" + + def test_default(self): + parser = LabelSetParser() + parser.map_parser(self.tier) + getattr(self.tier[0], "_") == "" + + def test_read_parser(self): + parser = LabelSetParser(parser_path=Path(cmu_parser_path)) + parser.map_parser(self.tier) + + assert self.tier[0].stress == "1" + \ No newline at end of file diff --git a/tests/test_resources.py b/tests/test_resources.py index c010795..54389ca 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -1,5 +1,5 @@ import pytest -from fave_recode.schemes import cmu2labov_path, cmu2phila_path +from fave_recode.schemes import cmu2labov_path, cmu2phila_path, cmu_parser_path from pathlib import Path class TestResoures: @@ -7,4 +7,7 @@ def test_cmu2labov(self): assert Path(cmu2labov_path).is_file() def test_cmu2phila(self): - assert Path(cmu2phila_path).is_file() \ No newline at end of file + assert Path(cmu2phila_path).is_file() + + def test_cmu_parser(self): + assert Path(cmu_parser_path).is_file() \ No newline at end of file diff --git a/tests/test_rules.py b/tests/test_rules.py index 31c4225..230c199 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -203,7 +203,7 @@ def test_map_ruleset(self): tier = SequenceTier(tier = [AE, N]) cmu2labov_rules.map_ruleset(tier) - assert tier.first.label == "ae" + assert tier.first.label == "ae_" assert tier.first.fol.label == "N" def test_empty_ruleset(self):