Skip to content

Commit 5624857

Browse files
ilslvtyranron
andauthored
Add special naming for every regex capturing group in Parameter (#8, #7)
Co-authored-by: Kai Ren <tyranron@gmail.com>
1 parent d22b143 commit 5624857

File tree

7 files changed

+506
-106
lines changed

7 files changed

+506
-106
lines changed

CHANGELOG.md

+19
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,25 @@ All user visible changes to `cucumber-expressions` crate will be documented in t
66

77

88

9+
## [0.2.0] · 2022-??-??
10+
[0.2.0]: /../../tree/v0.2.0
11+
12+
[Diff](/../../compare/v0.1.2...v0.2.0) | [Milestone](/../../milestone/4)
13+
14+
### BC Breaks
15+
16+
- Added `id` field to `Parameter` AST struct. ([#8], [#7])
17+
18+
### Added
19+
20+
- Support of capturing groups inside `Parameter` regex. ([#8], [#7])
21+
22+
[#7]: /../../issues/7
23+
[#8]: /../../pull/8
24+
25+
26+
27+
928
## [0.1.2] · 2022-01-11
1029
[0.1.2]: /../../tree/v0.1.2
1130

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ rustdoc-args = ["--cfg", "docsrs"]
2323

2424
[features]
2525
# Enables ability to expand AST into regex.
26-
into-regex = ["either", "regex"]
26+
into-regex = ["either", "regex", "regex-syntax"]
2727

2828
[dependencies]
2929
derive_more = { version = "0.99.17", features = ["as_ref", "deref", "deref_mut", "display", "error", "from", "into"], default_features = false }
@@ -33,6 +33,7 @@ nom_locate = "4.0"
3333
# "into-regex" feature dependencies
3434
either = { version = "1.6", optional = true }
3535
regex = { version = "1.5", optional = true }
36+
regex-syntax = { version = "0.6", optional = true }
3637

3738
[workspace]
3839
members = ["fuzz"]

fuzz/fuzz_targets/parameter.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ use cucumber_expressions::parse;
44
use libfuzzer_sys::fuzz_target;
55

66
fuzz_target!(|data: &str| {
7-
let _ = parse::parameter(data);
7+
let _ = parse::parameter(data, &mut 0);
88
});

src/ast.rs

+10-1
Original file line numberDiff line numberDiff line change
@@ -170,4 +170,13 @@ pub struct Optional<Input>(pub Input);
170170
///
171171
/// [0]: crate#grammar
172172
#[derive(AsRef, Clone, Copy, Debug, Deref, DerefMut, Eq, PartialEq)]
173-
pub struct Parameter<Input>(pub Input);
173+
pub struct Parameter<Input> {
174+
/// Inner `Input`.
175+
#[deref]
176+
#[deref_mut]
177+
pub input: Input,
178+
179+
/// Unique ID of this [`Parameter`] in the parsed [`Expression`].
180+
#[as_ref(ignore)]
181+
pub id: usize,
182+
}

src/expand/mod.rs

+123-37
Original file line numberDiff line numberDiff line change
@@ -150,23 +150,44 @@ where
150150
Parsing(parse::Error<Input>),
151151

152152
/// Expansion error.
153-
#[display(fmt = "Regex expansion failed: {}", _0)]
154-
Expansion(UnknownParameterError<Input>),
153+
#[display(fmt = "Failed to expand regex: {}", _0)]
154+
Expansion(ParameterError<Input>),
155155

156156
/// [`Regex`] creation error.
157157
#[display(fmt = "Regex creation failed: {}", _0)]
158158
Regex(regex::Error),
159159
}
160160

161-
/// Error of an unknown [`Parameter`] being used in an [`Expression`].
162-
#[derive(Clone, Copy, Debug, Display, Error)]
163-
#[display(fmt = "Parameter '{}' not found.", not_found)]
164-
pub struct UnknownParameterError<Input>
161+
/// Possible [`Parameter`] errors being used in an [`Expression`].
162+
#[derive(Clone, Debug, Display, Error)]
163+
pub enum ParameterError<Input>
165164
where
166165
Input: fmt::Display,
167166
{
168167
/// [`Parameter`] not found.
169-
pub not_found: Input,
168+
#[display(fmt = "Parameter `{}` not found.", _0)]
169+
NotFound(Input),
170+
171+
/// Failed to rename [`Regex`] capturing group.
172+
#[display(
173+
fmt = "Failed to rename capturing groups in regex `{}` of \
174+
parameter `{}`: {}",
175+
re,
176+
parameter,
177+
err
178+
)]
179+
RenameRegexGroup {
180+
/// [`Parameter`] name.
181+
parameter: Input,
182+
183+
/// [`Regex`] of the [`Parameter`].
184+
re: String,
185+
186+
/// [`Error`] of parsing the [`Regex`] with renamed capturing groups.
187+
///
188+
/// [`Error`]: regex_syntax::Error
189+
err: regex_syntax::Error,
190+
},
170191
}
171192

172193
/// Expansion of a [Cucumber Expressions][0] [AST] element into a [`Regex`] by
@@ -177,7 +198,7 @@ where
177198
/// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
178199
pub trait IntoRegexCharIter<Input: fmt::Display> {
179200
/// Type of an [`Iterator`] performing the expansion.
180-
type Iter: Iterator<Item = Result<char, UnknownParameterError<Input>>>;
201+
type Iter: Iterator<Item = Result<char, ParameterError<Input>>>;
181202

182203
/// Consumes this [AST] element returning an [`Iterator`] over [`char`]s
183204
/// transformable into a [`Regex`].
@@ -208,7 +229,7 @@ where
208229
/// [`IntoRegexCharIter::Iter`] for an [`Expression`].
209230
type ExpressionIter<Input> = iter::Chain<
210231
iter::Chain<
211-
iter::Once<Result<char, UnknownParameterError<Input>>>,
232+
iter::Once<Result<char, ParameterError<Input>>>,
212233
iter::FlatMap<
213234
vec::IntoIter<SingleExpression<Input>>,
214235
<SingleExpression<Input> as IntoRegexCharIter<Input>>::Iter,
@@ -218,7 +239,7 @@ type ExpressionIter<Input> = iter::Chain<
218239
-> <SingleExpression<Input> as IntoRegexCharIter<Input>>::Iter,
219240
>,
220241
>,
221-
iter::Once<Result<char, UnknownParameterError<Input>>>,
242+
iter::Once<Result<char, ParameterError<Input>>>,
222243
>;
223244

224245
impl<Input> IntoRegexCharIter<Input> for SingleExpression<Input>
@@ -307,7 +328,7 @@ type AlternationIter<I> = iter::Chain<
307328
>,
308329
>,
309330
>,
310-
iter::Once<Result<char, UnknownParameterError<I>>>,
331+
iter::Once<Result<char, ParameterError<I>>>,
311332
>;
312333

313334
// TODO: Replace with TAIT, once stabilized:
@@ -319,7 +340,7 @@ type AlternationIterInner<I> = iter::Chain<
319340
<Alternative<I> as IntoRegexCharIter<I>>::Iter,
320341
fn(Alternative<I>) -> <Alternative<I> as IntoRegexCharIter<I>>::Iter,
321342
>,
322-
iter::Once<Result<char, UnknownParameterError<I>>>,
343+
iter::Once<Result<char, ParameterError<I>>>,
323344
>;
324345

325346
impl<Input> IntoRegexCharIter<Input> for Alternative<Input>
@@ -397,7 +418,7 @@ type OptionalIter<Input> = iter::Map<
397418
>;
398419

399420
/// Function pointer describing [`Ok`].
400-
type MapOkChar<Input> = fn(char) -> Result<char, UnknownParameterError<Input>>;
421+
type MapOkChar<Input> = fn(char) -> Result<char, ParameterError<Input>>;
401422

402423
impl<Input> IntoRegexCharIter<Input> for Parameter<Input>
403424
where
@@ -413,36 +434,41 @@ where
413434
i.iter_elements().map(AsChar::as_char).eq(str.chars())
414435
};
415436

416-
if eq(&self.0, "int") {
417-
Left(r#"((?:-?\d+)|(?:\d+))"#.chars().map(Ok))
418-
} else if eq(&self.0, "float") {
437+
if eq(&self.input, "int") {
438+
Left(Left(r#"((?:-?\d+)|(?:\d+))"#.chars().map(Ok)))
439+
} else if eq(&self.input, "float") {
419440
// Regex in other implementations has lookaheads. As `regex` crate
420441
// doesn't support them, we use `f32`/`f64` grammar instead:
421442
// https://doc.rust-lang.org/stable/std/primitive.f64.html#grammar
422443
// Provided grammar is a superset of the original one:
423444
// - supports `e` as exponent in addition to `E`
424445
// - supports trailing comma: `1.`
425446
// - supports `inf` and `NaN`
426-
Left(
447+
Left(Left(
427448
"([+-]?(?:inf\
428449
|NaN\
429450
|(?:\\d+|\\d+\\.\\d*|\\d*\\.\\d+)(?:[eE][+-]?\\d+)?\
430451
))"
431452
.chars()
432453
.map(Ok),
433-
)
434-
} else if eq(&self.0, "word") {
435-
Left(r#"([^\s]+)"#.chars().map(Ok))
436-
} else if eq(&self.0, "string") {
437-
Left(
438-
r#"("(?:[^"\\]*(?:\\.[^"\\]*)*)"|'(?:[^'\\]*(?:\\.[^'\\]*)*)')"#
439-
.chars()
440-
.map(Ok),
441-
)
442-
} else if eq(&self.0, "") {
443-
Left(r#"(.*)"#.chars().map(Ok))
454+
))
455+
} else if eq(&self.input, "word") {
456+
Left(Left(r#"([^\s]+)"#.chars().map(Ok)))
457+
} else if eq(&self.input, "string") {
458+
Left(Right(
459+
OwnedChars::new(format!(
460+
"(?:\
461+
\"(?P<__{id}_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
462+
|'(?P<__{id}_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
463+
)",
464+
id = self.id,
465+
))
466+
.map(Ok),
467+
))
468+
} else if eq(&self.input, "") {
469+
Left(Left(r#"(.*)"#.chars().map(Ok)))
444470
} else {
445-
Right(iter::once(Err(UnknownParameterError { not_found: self.0 })))
471+
Right(iter::once(Err(ParameterError::NotFound(self.input))))
446472
}
447473
}
448474
}
@@ -451,11 +477,14 @@ where
451477
// https://github.com/rust-lang/rust/issues/63063
452478
/// [`IntoRegexCharIter::Iter`] for a [`Parameter`].
453479
type ParameterIter<Input> = Either<
454-
iter::Map<
455-
str::Chars<'static>,
456-
fn(char) -> Result<char, UnknownParameterError<Input>>,
480+
Either<
481+
iter::Map<
482+
str::Chars<'static>,
483+
fn(char) -> Result<char, ParameterError<Input>>,
484+
>,
485+
iter::Map<OwnedChars, fn(char) -> Result<char, ParameterError<Input>>>,
457486
>,
458-
iter::Once<Result<char, UnknownParameterError<Input>>>,
487+
iter::Once<Result<char, ParameterError<Input>>>,
459488
>;
460489

461490
/// [`Iterator`] for skipping a last [`Item`].
@@ -513,6 +542,36 @@ where
513542
}
514543
}
515544

545+
// TODO: Make private, once TAIT stabilized:
546+
// https://github.com/rust-lang/rust/issues/63063
547+
/// Like [`str::Chars`] [`Iterator`], but owns its [`String`].
548+
#[derive(Clone, Debug)]
549+
pub struct OwnedChars {
550+
/// Iterated [`String`].
551+
str: String,
552+
553+
/// Current char number.
554+
cur: usize,
555+
}
556+
557+
impl OwnedChars {
558+
/// Creates a new [`OwnedChars`] [`Iterator`].
559+
#[must_use]
560+
pub const fn new(str: String) -> Self {
561+
Self { str, cur: 0 }
562+
}
563+
}
564+
565+
impl Iterator for OwnedChars {
566+
type Item = char;
567+
568+
fn next(&mut self) -> Option<Self::Item> {
569+
let char = self.str.chars().nth(self.cur)?;
570+
self.cur += 1;
571+
Some(char)
572+
}
573+
}
574+
516575
/// [`Iterator`] for escaping `^`, `$`, `[`, `]`, `(`, `)`, `{`, `}`, `.`, `|`,
517576
/// `?`, `*`, `+` with `\`, and removing it for other [`char`]s.
518577
///
@@ -586,7 +645,7 @@ where
586645
// Naming of test cases is preserved.
587646
#[cfg(test)]
588647
mod spec {
589-
use super::{Error, Expression, UnknownParameterError};
648+
use super::{Error, Expression, ParameterError};
590649

591650
#[test]
592651
fn alternation_with_optional() {
@@ -699,7 +758,10 @@ mod spec {
699758

700759
assert_eq!(
701760
expr.as_str(),
702-
r#"^("(?:[^"\\]*(?:\\.[^"\\]*)*)"|'(?:[^'\\]*(?:\\.[^'\\]*)*)')$"#,
761+
"^(?:\
762+
\"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
763+
|'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
764+
)$",
703765
);
704766
assert!(expr.is_match("\"\""));
705767
assert!(expr.is_match("''"));
@@ -710,6 +772,30 @@ mod spec {
710772
assert!(!expr.is_match("word"));
711773
}
712774

775+
#[test]
776+
fn multiple_string_parameters() {
777+
// TODO: Use "{e}" syntax once MSRV bumps above 1.58.
778+
let expr = Expression::regex("{string} {string}")
779+
.unwrap_or_else(|e| panic!("failed: {}", e));
780+
781+
assert_eq!(
782+
expr.as_str(),
783+
"^(?:\
784+
\"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
785+
|'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
786+
) (?:\
787+
\"(?P<__1_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
788+
|'(?P<__1_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
789+
)$",
790+
);
791+
assert!(expr.is_match("\"\" ''"));
792+
assert!(expr.is_match("'' \"\""));
793+
assert!(expr.is_match("'with \"' \"\""));
794+
assert!(expr.is_match("\"with '\" '\"'"));
795+
assert!(expr.is_match("\"with \\\" escaped\" 'with \\' escaped'"));
796+
assert!(expr.is_match("'with \\' escaped' \"with \\\" escaped\""));
797+
}
798+
713799
#[test]
714800
fn parameter_all() {
715801
// TODO: Use "{e}" syntax once MSRV bumps above 1.58.
@@ -747,10 +833,10 @@ mod spec {
747833
#[test]
748834
fn unknown_parameter() {
749835
match Expression::regex("{custom}").unwrap_err() {
750-
Error::Expansion(UnknownParameterError { not_found }) => {
836+
Error::Expansion(ParameterError::NotFound(not_found)) => {
751837
assert_eq!(*not_found, "custom");
752838
}
753-
e @ (Error::Parsing(_) | Error::Regex(_)) => {
839+
e @ (Error::Parsing(_) | Error::Regex(_) | Error::Expansion(_)) => {
754840
// TODO: Use "{e}" syntax once MSRV bumps above 1.58.
755841
panic!("wrong err: {}", e);
756842
}

0 commit comments

Comments
 (0)