From 1de678078ef15d93086993f77ea1029e4afa5755 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 10:37:23 +0100 Subject: [PATCH 1/9] Overhauled Input trait to improve versatility, added IterInput --- src/combinator.rs | 128 +++-- src/extension.rs | 8 +- src/input.rs | 1344 +++++++++++++++++++++++---------------------- src/inspector.rs | 40 +- src/label.rs | 18 +- src/lib.rs | 10 +- src/number.rs | 9 +- src/pratt.rs | 16 +- src/primitive.rs | 115 ++-- src/recovery.rs | 10 +- src/regex.rs | 17 +- src/span.rs | 2 +- src/stream.rs | 121 +++- 13 files changed, 987 insertions(+), 851 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index 3e074e43..0b7b4ab1 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -173,9 +173,10 @@ where let cfg = (self.cfg)( A::Config::default(), inp.ctx(), - inp.span_since(inp.offset()), + inp.span_since(&inp.cursor()), ) - .map_err(|e| inp.add_alt_err(inp.offset, e))?; + // TODO: Don't clone + .map_err(|e| inp.add_alt_err(&inp.cursor().inner, e))?; Ok((A::make_iter(&self.parser, inp)?, cfg)) } @@ -217,10 +218,10 @@ where where Self: Sized, { - let before = inp.offset(); + let before = inp.cursor(); self.parser.go::(inp)?; - Ok(M::bind(|| inp.slice_since(before..))) + Ok(M::bind(|| inp.slice_since(&before..))) } go_extra!(I::Slice); @@ -251,13 +252,13 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); self.parser.go::(inp).and_then(|out| { if (self.filter)(&out) { Ok(M::bind(|| out)) } else { - let err_span = inp.span_since(before); - inp.add_alt(inp.offset().offset, None, None, err_span); + let err_span = inp.span_since(&before); + inp.add_alt(None, None, err_span); Err(()) } }) @@ -363,10 +364,10 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); let out = self.parser.go::(inp)?; Ok(M::map(out, |out| { - (self.mapper)(out, &mut MapExtra::new(before, inp)) + (self.mapper)(out, &mut MapExtra::new(&before, inp)) })) } @@ -399,10 +400,10 @@ where inp: &mut InputRef<'a, '_, I, E>, state: &mut Self::IterState, ) -> IPResult { - let before = inp.offset(); + let before = inp.cursor(); match self.parser.next::(inp, state) { Ok(Some(o)) => Ok(Some(M::map(o, |o| { - (self.mapper)(o, &mut MapExtra::new(before, inp)) + (self.mapper)(o, &mut MapExtra::new(&before, inp)) }))), Ok(None) => Ok(None), Err(()) => Err(()), @@ -511,9 +512,9 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); self.parser.go::(inp)?; - Ok(M::bind(|| inp.span_since(before))) + Ok(M::bind(|| inp.span_since(&before))) } go_extra!(I::Span); @@ -547,13 +548,13 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); let out = self.parser.go::(inp)?; - let span = inp.span_since(before); + let span = inp.span_since(&before); match (self.mapper)(out, span) { Ok(out) => Ok(M::bind(|| out)), Err(err) => { - inp.add_alt_err(before.offset, err); + inp.add_alt_err(&before.inner, err); Err(()) } } @@ -590,12 +591,13 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); let out = self.parser.go::(inp)?; - match (self.mapper)(out, &mut MapExtra::new(before, inp)) { + match (self.mapper)(out, &mut MapExtra::new(&before, inp)) { Ok(out) => Ok(M::bind(|| out)), Err(err) => { - inp.add_alt_err(inp.offset().offset, err); + // TODO: Don't clone + inp.add_alt_err(&inp.cursor().inner, err); Err(()) } } @@ -799,10 +801,10 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); // TODO: Don't use address, since this might not be constant? let key = ( - before.offset, + I::cursor_location(&before.inner), &self.parser as *const _ as *const () as usize, ); @@ -810,10 +812,10 @@ where hashbrown::hash_map::Entry::Occupied(o) => { if let Some(err) = o.get() { let err = err.clone(); - inp.add_alt_err(err.pos, err.err); + inp.add_alt_err(&before.inner /*&err.pos*/, err.err); } else { - let err_span = inp.span_since(before); - inp.add_alt(key.0, None, None, err_span); + let err_span = inp.span_since(&before); + inp.add_alt(None, None, err_span); } return Err(()); } @@ -979,8 +981,10 @@ where #[cfg(feature = "memoization")] let mut memos = HashMap::default(); + let (start, mut cache) = inp2.begin(); let res = inp.with_input( - &inp2, + start, + &mut cache, |inp| (&self.parser_a).then_ignore(end()).go::(inp), #[cfg(feature = "memoization")] &mut memos, @@ -990,7 +994,8 @@ where let new_alt = inp.errors.alt.take(); inp.errors.alt = alt; if let Some(new_alt) = new_alt { - inp.add_alt_err(inp.offset().offset, new_alt.err); + // TODO: Don't clone + inp.add_alt_err(&inp.cursor().inner, new_alt.err); } res @@ -1450,7 +1455,7 @@ where } #[cfg(debug_assertions)] debug_assert!( - before.offset() != inp.offset(), + *before.cursor() != inp.cursor(), "found Repeated combinator making no progress at {}", self.location, ); @@ -1459,7 +1464,7 @@ where let mut state = self.make_iter::(inp)?; loop { #[cfg(debug_assertions)] - let before = inp.offset(); + let before = inp.cursor(); match self.next::(inp, &mut state) { Ok(Some(())) => {} Ok(None) => break Ok(M::bind(|| ())), @@ -1470,7 +1475,7 @@ where } #[cfg(debug_assertions)] debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found Repeated combinator making no progress at {}", self.location, ); @@ -1767,7 +1772,7 @@ where let before_separator = inp.save(); if *state == 0 && self.allow_leading { if self.separator.go::(inp).is_err() { - inp.rewind(before_separator); + inp.rewind(before_separator.clone()); } } else if *state > 0 { match self.separator.go::(inp) { @@ -1827,7 +1832,7 @@ where let mut state = self.make_iter::(inp)?; loop { #[cfg(debug_assertions)] - let before = inp.offset(); + let before = inp.cursor(); match self.next::(inp, &mut state) { Ok(Some(())) => {} Ok(None) => break Ok(M::bind(|| ())), @@ -1838,7 +1843,7 @@ where } #[cfg(debug_assertions)] debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found SeparatedBy combinator making no progress at {}", self.location, ); @@ -1935,7 +1940,7 @@ where let mut i = 0; loop { #[cfg(debug_assertions)] - let before = inp.offset(); + let before = inp.cursor(); match self.parser.next::(inp, &mut iter_state) { Ok(Some(out)) => { M::combine_mut(&mut output, out, |output: &mut C, item| output.push(item)); @@ -1949,7 +1954,7 @@ where if !A::NONCONSUMPTION_IS_OK { if i >= 1 { debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found Collect combinator making no progress at {}", self.location, ); @@ -1988,7 +1993,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); let mut output = M::bind(|| C::uninit()); let mut iter_state = self.parser.make_iter::(inp)?; for idx in 0..C::LEN { @@ -1997,7 +2002,7 @@ where M::combine_mut(&mut output, out, |c, out| C::write(c, idx, out)); } Ok(None) => { - inp.add_alt(inp.offset, None, None, inp.span_since(before)); + inp.add_alt(None, None, inp.span_since(&before)); // SAFETY: We're guaranteed to have initialized up to `idx` values M::map(output, |mut output| unsafe { C::drop_before(&mut output, idx) @@ -2118,15 +2123,15 @@ where let alt = inp.errors.alt.take(); let result = self.parser.go::(inp); - let result_span = inp.span_since(before.offset()); + let result_span = inp.span_since(before.cursor()); inp.rewind(before); inp.errors.alt = alt; match result { Ok(()) => { - let (at, found) = inp.next_inner(); - inp.add_alt(at, None, found.map(|f| f.into()), result_span); + let found = inp.next_inner(); + inp.add_alt(None, found.map(|f| f.into()), result_span); Err(()) } Err(()) => Ok(M::bind(|| ())), @@ -2240,7 +2245,7 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save(); + let before = inp.save().clone(); match self.parser_a.go::(inp) { Ok(out) => { // A succeeded -- go back to the beginning and try B @@ -2255,7 +2260,6 @@ where } Err(()) => { // B failed -- go back to the beginning and fail - inp.rewind(before); Err(()) } } @@ -2313,7 +2317,7 @@ where let mut iter_state = self.parser_a.make_iter::(inp)?; loop { #[cfg(debug_assertions)] - let before = inp.offset(); + let before = inp.cursor(); match self.parser_a.next::(inp, &mut iter_state) { Ok(Some(out)) => { M::combine_mut(&mut a_out, out, |a_out, item| a_out.push(item)); @@ -2324,7 +2328,7 @@ where #[cfg(debug_assertions)] if !A::NONCONSUMPTION_IS_OK { debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found Foldr combinator making no progress at {}", self.location, ); @@ -2382,10 +2386,12 @@ where let mut a_out = M::bind(Vec::new); let mut iter_state = self.parser_a.make_iter::(inp)?; loop { - let before = inp.offset(); + let before = inp.cursor(); match self.parser_a.next::(inp, &mut iter_state) { Ok(Some(out)) => { - M::combine_mut(&mut a_out, out, |a_out, item| a_out.push((item, before))); + M::combine_mut(&mut a_out, out, |a_out, item| { + a_out.push((item, before.clone())) + }); } Ok(None) => break, Err(()) => return Err(()), @@ -2393,7 +2399,7 @@ where #[cfg(debug_assertions)] if !A::NONCONSUMPTION_IS_OK { debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found FoldrWithState combinator making no progress at {}", self.location, ); @@ -2404,7 +2410,7 @@ where Ok(M::combine(a_out, b_out, |a_out, b_out| { a_out.into_iter().rfold(b_out, |b, (a, before)| { - (self.folder)(a, b, &mut MapExtra::new(before, inp)) + (self.folder)(a, b, &mut MapExtra::new(&before, inp)) }) })) } @@ -2454,7 +2460,7 @@ where let mut iter_state = self.parser_b.make_iter::(inp)?; loop { #[cfg(debug_assertions)] - let before = inp.offset(); + let before = inp.cursor(); match self.parser_b.next::(inp, &mut iter_state) { Ok(Some(b_out)) => { out = M::combine(out, b_out, |out, b_out| (self.folder)(out, b_out)); @@ -2465,7 +2471,7 @@ where #[cfg(debug_assertions)] if !B::NONCONSUMPTION_IS_OK { debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found Foldl combinator making no progress at {}", self.location, ); @@ -2514,16 +2520,16 @@ where where Self: Sized, { - let before_all = inp.offset(); + let before_all = inp.cursor(); let mut out = self.parser_a.go::(inp)?; let mut iter_state = self.parser_b.make_iter::(inp)?; loop { #[cfg(debug_assertions)] - let before = inp.offset(); + let before = inp.cursor(); match self.parser_b.next::(inp, &mut iter_state) { Ok(Some(b_out)) => { out = M::combine(out, b_out, |out, b_out| { - (self.folder)(out, b_out, &mut MapExtra::new(before_all, inp)) + (self.folder)(out, b_out, &mut MapExtra::new(&before_all, inp)) }) } Ok(None) => break Ok(out), @@ -2532,7 +2538,7 @@ where #[cfg(debug_assertions)] if !B::NONCONSUMPTION_IS_OK { debug_assert!( - before != inp.offset(), + before != inp.cursor(), "found FoldlWithState combinator making no progress at {}", self.location, ); @@ -2558,7 +2564,7 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save(); + let before = inp.save().clone(); match self.parser.go::(inp) { Ok(out) => { inp.rewind(before); @@ -2623,7 +2629,7 @@ where // where // Self: Sized, // { -// let start = inp.offset(); +// let start = inp.cursor(); // let res = self.parser.go::(inp); // if res.is_err() { @@ -2658,12 +2664,12 @@ where where Self: Sized, { - let start = inp.offset(); + let start = inp.cursor(); let res = self.parser.go::(inp); if res.is_err() { let mut e = inp.take_alt(); - let span = inp.span_since(start); + let span = inp.span_since(&start); e.err = (self.mapper)(e.err, span, inp.state()); inp.errors.alt = Some(e); } @@ -2705,13 +2711,13 @@ where where Self: Sized, { - let before = inp.offset(); + let before = inp.cursor(); let out = self.parser.go::(inp)?; let mut emitter = Emitter::new(); - let out = (self.validator)(out, &mut MapExtra::new(before, inp), &mut emitter); + let out = (self.validator)(out, &mut MapExtra::new(&before, inp), &mut emitter); for err in emitter.errors() { - inp.emit(inp.offset, err); + inp.emit(err); } Ok(M::bind(|| out)) } @@ -2738,7 +2744,7 @@ where // where // Self: Sized, // { -// let before = inp.save(); +// let before = inp.save().clone(); // match self.parser.go::(inp) { // Ok(out) => Ok(out), // Err(()) => { diff --git a/src/extension.rs b/src/extension.rs index 07e82677..eea632aa 100644 --- a/src/extension.rs +++ b/src/extension.rs @@ -27,7 +27,7 @@ //! E: extra::ParserExtra<'a, I>, //! { //! fn parse(&self, inp: &mut InputRef<'a, '_, I, E>) -> Result<(), E::Error> { -//! let before = inp.offset(); +//! let before = inp.cursor(); //! match inp.next_maybe().as_deref() { //! // The next token was a null byte, meaning that parsing was successful //! Some(b'\0') => Ok(()), @@ -38,7 +38,7 @@ //! // Found whatever the token was instead //! found.copied().map(Into::into), //! // The span of the error is the span of the token that was found instead -//! inp.span_since(before), +//! inp.span_since(&before), //! )), //! } //! } @@ -153,11 +153,11 @@ mod current { { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); match M::choose(&mut *inp, |inp| self.0.parse(inp), |inp| self.0.check(inp)) { Ok(out) => Ok(out), Err(err) => { - inp.add_alt_err(before.offset, err); + inp.add_alt_err(&before.inner, err); Err(()) } } diff --git a/src/input.rs b/src/input.rs index cc864fec..8b017ae6 100644 --- a/src/input.rs +++ b/src/input.rs @@ -7,12 +7,52 @@ use inspector::Inspector; -pub use crate::stream::{BoxedExactSizeStream, BoxedStream, Stream}; +pub use crate::stream::{BoxedExactSizeStream, BoxedStream, IterInput, Stream}; use super::*; #[cfg(feature = "std")] use std::io::{BufReader, Read, Seek}; +mod sealed { + pub trait Sealed {} +} +#[doc(hidden)] +pub trait MaybeOwned<'src, T: 'src>: + sealed::Sealed + Borrow + Into> +{ + type Proj: MaybeOwned<'src, U>; + #[doc(hidden)] + fn choose( + self, + f: impl FnOnce(&'src T) -> &'src R, + g: impl FnOnce(T) -> R, + ) -> Self::Proj; +} + +impl<'src, T> sealed::Sealed for &'src T {} +impl<'src, T> MaybeOwned<'src, T> for &'src T { + type Proj = &'src U; + fn choose( + self, + f: impl FnOnce(&'src T) -> &'src R, + _g: impl FnOnce(T) -> R, + ) -> Self::Proj { + f(self) + } +} + +impl sealed::Sealed for T {} +impl<'src, T: 'src> MaybeOwned<'src, T> for T { + type Proj = U; + fn choose( + self, + _f: impl FnOnce(&'src T) -> &'src R, + g: impl FnOnce(T) -> R, + ) -> Self::Proj { + g(self) + } +} + /// A trait for types that represents a stream of input tokens. Unlike [`Iterator`], this type /// supports backtracking and a few other features required by the crate. /// @@ -25,53 +65,51 @@ use std::io::{BufReader, Read, Seek}; /// - `&str`: [`SliceInput`], [`StrInput`], [`ValueInput`], [`ExactSizeInput`] /// - `&[T]`: [`SliceInput`], [`ValueInput`], [`BorrowInput`], [`ExactSizeInput`] /// - `Stream`: [`ValueInput`], [`ExactSizeInput`] if `I: ExactSizeIterator` -/// -/// This trait is sealed and so cannot be implemented by other crates because it has an unstable API. This may -/// eventually change. For now, if you wish to use a type that chumsky does not know about as an input, consider using -/// [`Stream`] or [opening an issue/PR](https://github.com/zesterer/chumsky/issues/new). -pub trait Input<'a>: Sealed + 'a { - /// The type used to keep track of the current location in the stream - #[doc(hidden)] - type Offset: Copy + Hash + Ord + Into; - - /// The type of singular items read from the stream - type Token; - +pub trait Input<'src>: 'src { /// The type of a span on this input - to provide custom span context see [`Input::spanned`]. type Span: Span; - /// Get the offset representing the start of this stream - #[doc(hidden)] - fn start(&self) -> Self::Offset; + /// The type of singular items read from the stream + type Token: 'src; /// The token type returned by [`Input::next_maybe`], allows abstracting over by-value and by-reference inputs. - #[doc(hidden)] - type TokenMaybe: Borrow + Into>; + type TokenMaybe: MaybeOwned<'src, Self::Token>; // Must be `&'src Self::Token` or `Self::Token` + + /// The type used to keep track of the current location in the stream. + /// + /// Cursors can be used to perform stream rewinding. + type Cursor: Clone; + + /// A type that contains cached or constant data pertaining to an input. + /// + /// If in doubt, `()` can be used. + type Cache; - /// Get the next offset from the provided one, and the next token if it exists + /// Create an initial cursor and cache at the start of the input. + fn begin(self) -> (Self::Cursor, Self::Cache); + + /// TODO + fn cursor_location(cursor: &Self::Cursor) -> usize; + + /// Pull the next token, if any, from the input. /// - /// The token is effectively self-owning (even if it refers to the underlying input) so as to abstract over - /// by-value and by-reference inputs. For alternatives with stronger guarantees, see [`ValueInput::next`] and - /// `BorrowInput::next_ref`. + /// For alternatives with stronger guarantees, see [`ValueInput::next`] and `BorrowInput::next_ref`. /// /// # Safety /// - /// `offset` must be generated by either `Input::start` or a previous call to this function. - #[doc(hidden)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option); + /// `cursor` must be generated by `Input::begin`, and must not be shared between multiple inputs. + unsafe fn next_maybe( + cache: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option; - /// Create a span from a start and end offset. + /// Create a span going from the start cursor to the end cursor (exclusive). /// /// # Safety /// - /// As with [`Input::next_maybe`], the offsets passed to this function must be generated by either [`Input::start`] - /// or [`Input::next_maybe`]. - #[doc(hidden)] - unsafe fn span(&self, range: Range) -> Self::Span; - - // Get the previous offset, saturating at zero - #[doc(hidden)] - fn prev(offs: Self::Offset) -> Self::Offset; + /// As with [`Input::next_maybe`], the cursors passed to this function must be generated by [`Input::begin`] and + /// must not be shared between multiple inputs. + unsafe fn span(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span; /// Split an input that produces tokens of type `(T, S)` into one that produces tokens of type `T` and spans of /// type `S`. @@ -82,7 +120,7 @@ pub trait Input<'a>: Sealed + 'a { /// /// Also required is an 'End Of Input' (EoI) span. This span is arbitrary, but is used by the input to produce /// sensible spans that extend to the end of the input or are zero-width. Most implementations simply use some - /// equivalent of `len..len` (i.e: a span where both the start and end offsets are set to the end of the input). + /// equivalent of `len..len` (i.e: a span where both the start and end cursors are set to the end of the input). /// However, what you choose for this span is up to you: but consider that the context, start, and end of the span /// will be recombined to create new spans as required by the parser. /// @@ -91,9 +129,9 @@ pub trait Input<'a>: Sealed + 'a { /// around sections of the input. fn spanned(self, eoi: S) -> SpannedInput where - Self: Input<'a, Token = (T, S)> + Sized, - T: 'a, - S: Span + Clone + 'a, + Self: Input<'src, Token = (T, S)> + Sized, + T: 'src, + S: Span + Clone + 'src, { SpannedInput { input: self, @@ -125,7 +163,7 @@ pub trait Input<'a>: Sealed + 'a { /// an identifier that corresponds to the file the spans originated from. fn map_span(self, map_fn: F) -> MappedSpan where - Self: Input<'a> + Sized, + Self: Input<'src> + Sized, F: Fn(Self::Span) -> S, { MappedSpan { @@ -137,292 +175,297 @@ pub trait Input<'a>: Sealed + 'a { } /// Implement by inputs that have a known size (including spans) -pub trait ExactSizeInput<'a>: Input<'a> { - /// Get a span from a start offset to the end of the input. - #[doc(hidden)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span; +pub trait ExactSizeInput<'src>: Input<'src> { + /// Get a span from a start cursor to the end of the input. + unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span; } /// Implemented by inputs that represent slice-like streams of input tokens. -pub trait SliceInput<'a>: ExactSizeInput<'a> { +pub trait SliceInput<'src>: ExactSizeInput<'src> { /// The unsized slice type of this input. For [`&str`] it's `&str`, and for [`&[T]`] it will be `&[T]`. type Slice; /// Get the full slice of the input - #[doc(hidden)] - fn full_slice(&self) -> Self::Slice; + fn full_slice(cache: &Self::Cache) -> Self::Slice; - /// Get a slice from a start and end offset + /// Get a slice from a start and end cursor // TODO: Make unsafe - #[doc(hidden)] - fn slice(&self, range: Range) -> Self::Slice; + unsafe fn slice(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice; - /// Get a slice from a start offset till the end of the input + /// Get a slice from a start cursor to the end of the input // TODO: Make unsafe - #[doc(hidden)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice; + unsafe fn slice_from(cache: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice; } -// Implemented by inputs that reference a string slice and use byte indices as their offset. -/// A trait for types that represent string-like streams of input tokens -pub trait StrInput<'a, C: Char>: - ValueInput<'a, Offset = usize, Token = C> + SliceInput<'a, Slice = &'a C::Str> +// Implemented by inputs that reference a string slice and use byte indices as their cursor. This trait is sealed right +// now because `StrInput` places additional requirements on its cursor semantics. +/// A trait for types that represent string-like streams of input tokens. +pub trait StrInput<'src, C: Char>: + Sealed + ValueInput<'src, Cursor = usize, Token = C> + SliceInput<'src, Slice = &'src C::Str> { } /// Implemented by inputs that can have tokens borrowed from them. -pub trait ValueInput<'a>: Input<'a> { - /// Get the next offset from the provided one, and the next token if it exists +pub trait ValueInput<'src>: Input<'src> { + /// Get the next cursor from the provided one, and the next token if it exists /// /// # Safety /// - /// `offset` must be generated by either `Input::start` or a previous call to this function. - #[doc(hidden)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option); + /// `cursor` must be generated by either `Input::start` or a previous call to this function. + unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option; } /// Implemented by inputs that can have tokens borrowed from them. -pub trait BorrowInput<'a>: Input<'a> { +pub trait BorrowInput<'src>: Input<'src> { /// Borrowed version of [`ValueInput::next`] with the same safety requirements. /// /// # Safety /// /// Same as [`ValueInput::next`] - #[doc(hidden)] - unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>); + unsafe fn next_ref(cache: &Self::Cache, cursor: &mut Self::Cursor) + -> Option<&'src Self::Token>; } -impl Sealed for &str {} -impl<'a> Input<'a> for &'a str { - type Offset = usize; - type Token = char; +impl<'src> Input<'src> for &'src str { + type Cursor = usize; type Span = SimpleSpan; - #[inline] - fn start(&self) -> Self::Offset { - 0 - } - + type Token = char; type TokenMaybe = char; - #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.next(offset) + type Cache = Self; + + #[inline] + fn begin(self) -> (Self::Cursor, Self::Cache) { + (0, self) } - #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - range.into() + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + *cursor + } + + #[inline(always)] + unsafe fn next_maybe( + this: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + if *cursor < this.len() { + // SAFETY: `cursor < self.len()` above guarantees cursor is in-bounds + // We only ever return cursors that are at a character boundary + let c = this + .get_unchecked(*cursor..) + .chars() + .next() + .unwrap_unchecked(); + *cursor += c.len_utf8(); + Some(c) + } else { + None + } } #[inline(always)] - fn prev(offs: Self::Offset) -> Self::Offset { - offs.saturating_sub(1) + unsafe fn span(_this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + (*range.start..*range.end).into() } } -impl<'a> ExactSizeInput<'a> for &'a str { +impl<'src> ExactSizeInput<'src> for &'src str { #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { - (range.start..self.len()).into() + unsafe fn span_from(this: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + (*range.start..this.len()).into() } } -impl<'a> ValueInput<'a> for &'a str { +impl<'src> ValueInput<'src> for &'src str { #[inline(always)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - if offset < self.len() { - // SAFETY: `offset < self.len()` above guarantees offset is in-bounds - // We only ever return offsets that are at a character boundary - let c = unsafe { - self.get_unchecked(offset..) - .chars() - .next() - .unwrap_unchecked() - }; - (offset + c.len_utf8(), Some(c)) - } else { - (offset, None) - } + unsafe fn next(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + Self::next_maybe(this, cursor) } } -impl<'a> StrInput<'a, char> for &'a str {} +impl<'src> Sealed for &'src str {} +impl<'src> StrInput<'src, char> for &'src str {} -impl<'a> SliceInput<'a> for &'a str { - type Slice = &'a str; +impl<'src> SliceInput<'src> for &'src str { + type Slice = &'src str; #[inline(always)] - fn full_slice(&self) -> Self::Slice { - *self + fn full_slice(this: &Self::Cache) -> Self::Slice { + *this } #[inline(always)] - fn slice(&self, range: Range) -> Self::Slice { - &self[range] + unsafe fn slice(this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + &this[*range.start..*range.end] } #[inline(always)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice { - &self[from] + unsafe fn slice_from(this: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + &this[*from.start..] } } -impl Sealed for &[T] {} -impl<'a, T> Input<'a> for &'a [T] { - type Offset = usize; - type Token = T; +impl<'src, T> Input<'src> for &'src [T] { + type Cursor = usize; type Span = SimpleSpan; - #[inline(always)] - fn start(&self) -> Self::Offset { - 0 - } + type Token = T; + type TokenMaybe = &'src T; - type TokenMaybe = &'a T; + type Cache = Self; - #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.next_ref(offset) + #[inline] + fn begin(self) -> (Self::Cursor, Self::Cache) { + (0, self) + } + + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + *cursor } #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - range.into() + unsafe fn next_maybe( + this: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + if let Some(tok) = this.get(*cursor) { + *cursor += 1; + Some(tok) + } else { + None + } } #[inline(always)] - fn prev(offs: Self::Offset) -> Self::Offset { - offs.saturating_sub(1) + unsafe fn span(_this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + (*range.start..*range.end).into() } } -impl<'a, T> ExactSizeInput<'a> for &'a [T] { +impl<'src, T> ExactSizeInput<'src> for &'src [T] { #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { - (range.start..self.len()).into() + unsafe fn span_from(this: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + (*range.start..this.len()).into() } } -impl<'a> StrInput<'a, u8> for &'a [u8] {} +impl<'src> Sealed for &'src [u8] {} +impl<'src> StrInput<'src, u8> for &'src [u8] {} -impl<'a, T> SliceInput<'a> for &'a [T] { - type Slice = &'a [T]; +impl<'src, T> SliceInput<'src> for &'src [T] { + type Slice = &'src [T]; #[inline(always)] - fn full_slice(&self) -> Self::Slice { - *self + fn full_slice(this: &Self::Cache) -> Self::Slice { + *this } #[inline(always)] - fn slice(&self, range: Range) -> Self::Slice { - &self[range] + unsafe fn slice(this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + &this[*range.start..*range.end] } #[inline(always)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice { - &self[from] + unsafe fn slice_from(this: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + &this[*from.start..] } } -impl<'a, T: Clone> ValueInput<'a> for &'a [T] { +impl<'src, T: Clone> ValueInput<'src> for &'src [T] { #[inline(always)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - if let Some(tok) = self.get(offset) { - (offset + 1, Some(tok.clone())) - } else { - (offset, None) - } + unsafe fn next(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + Self::next_maybe(this, cursor).cloned() } } -impl<'a, T> BorrowInput<'a> for &'a [T] { +impl<'src, T> BorrowInput<'src> for &'src [T] { #[inline(always)] - unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) { - if let Some(tok) = self.get(offset) { - (offset + 1, Some(tok)) - } else { - (offset, None) - } + unsafe fn next_ref(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option<&'src Self::Token> { + Self::next_maybe(this, cursor) } } -impl<'a, T: 'a, const N: usize> Sealed for &'a [T; N] {} -impl<'a, T: 'a, const N: usize> Input<'a> for &'a [T; N] { - type Offset = usize; - type Token = T; +impl<'src, T: 'src, const N: usize> Input<'src> for &'src [T; N] { + type Cursor = usize; type Span = SimpleSpan; - #[inline(always)] - fn start(&self) -> Self::Offset { - 0 - } + type Token = T; + type TokenMaybe = &'src T; - type TokenMaybe = &'a T; + type Cache = Self; - #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.next_ref(offset) + #[inline] + fn begin(self) -> (Self::Cursor, Self::Cache) { + (0, self) + } + + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + *cursor } #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - range.into() + unsafe fn next_maybe( + this: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + if let Some(tok) = this.get(*cursor) { + *cursor += 1; + Some(tok) + } else { + None + } } #[inline(always)] - fn prev(offs: Self::Offset) -> Self::Offset { - offs.saturating_sub(1) + unsafe fn span(_this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + (*range.start..*range.end).into() } } -impl<'a, T: 'a, const N: usize> ExactSizeInput<'a> for &'a [T; N] { +impl<'src, T: 'src, const N: usize> ExactSizeInput<'src> for &'src [T; N] { #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { - (range.start..N).into() + unsafe fn span_from(this: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + (*range.start..this.len()).into() } } -impl<'a, const N: usize> StrInput<'a, u8> for &'a [u8; N] {} +impl<'src, const N: usize> Sealed for &'src [u8; N] {} +impl<'src, const N: usize> StrInput<'src, u8> for &'src [u8; N] {} -impl<'a, T: 'a, const N: usize> SliceInput<'a> for &'a [T; N] { - type Slice = &'a [T]; +impl<'src, T: 'src, const N: usize> SliceInput<'src> for &'src [T; N] { + type Slice = &'src [T]; #[inline(always)] - fn full_slice(&self) -> Self::Slice { - *self + fn full_slice(this: &Self::Cache) -> Self::Slice { + *this } #[inline(always)] - fn slice(&self, range: Range) -> Self::Slice { - &self[range] + unsafe fn slice(this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + &this[*range.start..*range.end] } #[inline(always)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice { - &self[from] + unsafe fn slice_from(this: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + &this[*from.start..] } } -impl<'a, T: Clone + 'a, const N: usize> ValueInput<'a> for &'a [T; N] { +impl<'src, T: Clone + 'src, const N: usize> ValueInput<'src> for &'src [T; N] { #[inline(always)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - if let Some(tok) = self.get(offset) { - (offset + 1, Some(tok.clone())) - } else { - (offset, None) - } + unsafe fn next(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + Self::next_maybe(this, cursor).cloned() } } -impl<'a, T: 'a, const N: usize> BorrowInput<'a> for &'a [T; N] { +impl<'src, T: 'src, const N: usize> BorrowInput<'src> for &'src [T; N] { #[inline(always)] - unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) { - if let Some(tok) = self.get(offset) { - (offset + 1, Some(tok)) - } else { - (offset, None) - } + unsafe fn next_ref(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option<&'src Self::Token> { + Self::next_maybe(this, cursor) } } @@ -434,140 +477,123 @@ pub struct SpannedInput { phantom: PhantomData, } -/// Utility type required to allow [`SpannedInput`] to implement [`Input`]. -#[doc(hidden)] -pub struct SpannedTokenMaybe<'a, I: Input<'a>, T, S>(I::TokenMaybe, PhantomData<(T, S)>); - -impl<'a, I: Input<'a, Token = (T, S)>, T, S> Borrow for SpannedTokenMaybe<'a, I, T, S> { - #[inline(always)] - fn borrow(&self) -> &T { - &self.0.borrow().0 - } -} - -impl<'a, I: Input<'a, Token = (T, S)>, T, S: 'a> From> - for MaybeRef<'a, T> -{ - #[inline(always)] - fn from(st: SpannedTokenMaybe<'a, I, T, S>) -> MaybeRef<'a, T> { - match st.0.into() { - MaybeRef::Ref((tok, _)) => MaybeRef::Ref(tok), - MaybeRef::Val((tok, _)) => MaybeRef::Val(tok), - } - } -} - -impl<'a, T, S, I: Input<'a>> Sealed for SpannedInput {} -impl<'a, T, S, I> Input<'a> for SpannedInput +impl<'src, T, S, I> Input<'src> for SpannedInput where - I: Input<'a, Token = (T, S)>, - T: 'a, - S: Span + Clone + 'a, + I: Input<'src, Token = (T, S)>, + T: 'src, + S: Span + Clone + 'src, { - type Offset = I::Offset; - type Token = T; + type Cursor = (I::Cursor, Option); type Span = S; - #[inline(always)] - fn start(&self) -> Self::Offset { - self.input.start() - } + type Token = T; + type TokenMaybe = >::Proj; - type TokenMaybe = SpannedTokenMaybe<'a, I, T, S>; + type Cache = (I::Cache, S); - #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - let (offset, tok) = self.input.next_maybe(offset); - (offset, tok.map(|tok| SpannedTokenMaybe(tok, PhantomData))) + #[inline] + fn begin(self) -> (Self::Cursor, Self::Cache) { + let (cursor, cache) = self.input.begin(); + ((cursor, None), (cache, self.eoi)) } - #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - let start = self - .input - .next_maybe(range.start) - .1 - .or_else(|| self.input.next_maybe(self.input.start()).1) - // TODO: Should EOI actually be 'full input'? - .map_or(self.eoi.start(), |tok| tok.borrow().1.start()); - let end = self - .input - .next_maybe(I::prev(range.end)) - .1 - .map_or(self.eoi.start(), |tok| tok.borrow().1.end()); - S::new(self.eoi.context(), start..end) + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + I::cursor_location(&cursor.0) } #[inline(always)] - fn prev(offs: Self::Offset) -> Self::Offset { - I::prev(offs) + unsafe fn next_maybe( + (cache, _): &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + I::next_maybe(cache, &mut cursor.0).map(|tok| { + cursor.1 = Some(tok.borrow().1.end()); + tok.choose(|(tok, _)| tok, |(tok, _)| tok) + }) + } + + #[inline] + unsafe fn span((cache, eoi): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + let start = I::next_maybe(cache, &mut range.start.0.clone()) + .map(|tok| tok.borrow().1.start()) + // .or_else(|| self.input.next_maybe(self.input.start()).1) + .unwrap_or_else(|| eoi.start()); + let end = range.end.1.clone().unwrap_or_else(|| eoi.end()); + S::new(eoi.context(), start..end) } } -impl<'a, T, S, I> ExactSizeInput<'a> for SpannedInput +impl<'src, T, S, I> ExactSizeInput<'src> for SpannedInput where - I: ExactSizeInput<'a, Token = (T, S)>, - T: 'a, - S: Span + Clone + 'a, + I: ExactSizeInput<'src, Token = (T, S)>, + T: 'src, + S: Span + Clone + 'src, { #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { - let start = self - .input - .next_maybe(range.start) - .1 - .map_or(self.eoi.start(), |tok| tok.borrow().1.start()); - S::new(self.eoi.context(), start..self.eoi.start()) + unsafe fn span_from((cache, eoi): &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + let start = I::next_maybe(cache, &mut range.start.0.clone()) + .map(|tok| tok.borrow().1.start()) + // .or_else(|| self.input.next_maybe(self.input.start()).1) + .unwrap_or_else(|| eoi.start()); + S::new(eoi.context(), start..eoi.end()) } } -impl<'a, T, S, I> ValueInput<'a> for SpannedInput +impl<'src, T, S, I> ValueInput<'src> for SpannedInput where - I: ValueInput<'a, Token = (T, S)>, - T: 'a, - S: Span + Clone + 'a, + I: ValueInput<'src, Token = (T, S)>, + T: 'src, + S: Span + Clone + 'src, { #[inline(always)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - let (offs, tok) = self.input.next(offset); - (offs, tok.map(|(tok, _)| tok)) + unsafe fn next((cache, _): &Self::Cache, cursor: &mut Self::Cursor) -> Option { + I::next(cache, &mut cursor.0).map(|tok| { + cursor.1 = Some(tok.1.end()); + tok.0 + }) } } -impl<'a, T, S, I> BorrowInput<'a> for SpannedInput +impl<'src, T, S, I> BorrowInput<'src> for SpannedInput where - I: Input<'a> + BorrowInput<'a, Token = (T, S)>, - T: 'a, - S: Span + Clone + 'a, + I: Input<'src> + BorrowInput<'src, Token = (T, S)>, + T: 'src, + S: Span + Clone + 'src, { #[inline(always)] - unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) { - let (offs, tok) = self.input.next_ref(offset); - (offs, tok.map(|(tok, _)| tok)) + unsafe fn next_ref( + (cache, _): &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option<&'src Self::Token> { + I::next_ref(cache, &mut cursor.0).map(|tok| { + cursor.1 = Some(tok.1.end()); + &tok.0 + }) } } -impl<'a, T, S, I> SliceInput<'a> for SpannedInput +impl<'src, T, S, I> SliceInput<'src> for SpannedInput where - I: Input<'a> + SliceInput<'a, Token = (T, S)>, - T: 'a, - S: Span + Clone + 'a, + I: Input<'src> + SliceInput<'src, Token = (T, S)>, + T: 'src, + S: Span + Clone + 'src, { type Slice = I::Slice; #[inline(always)] - fn full_slice(&self) -> Self::Slice { - ::full_slice(&self.input) + fn full_slice((cache, _): &Self::Cache) -> Self::Slice { + I::full_slice(cache) } #[inline(always)] - fn slice(&self, range: Range) -> Self::Slice { - ::slice(&self.input, range) + unsafe fn slice((cache, _): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + I::slice(cache, &range.start.0..&range.end.0) } #[inline(always)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice { - ::slice_from(&self.input, from) + unsafe fn slice_from((cache, _): &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + I::slice_from(cache, &from.start.0..) } } @@ -580,114 +606,130 @@ pub struct WithContext { phantom: PhantomData, } -impl Sealed for WithContext {} -impl<'a, S, I: Input<'a>> Input<'a> for WithContext +impl<'src, S, I: Input<'src>> Input<'src> for WithContext where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, { - type Offset = I::Offset; - type Token = I::Token; + type Cursor = I::Cursor; type Span = S; + type Token = I::Token; + type TokenMaybe = I::TokenMaybe; + + type Cache = (I::Cache, S::Context); + #[inline(always)] - fn start(&self) -> Self::Offset { - self.input.start() + fn begin(self) -> (Self::Cursor, Self::Cache) { + let (cursor, cache) = self.input.begin(); + (cursor, (cache, self.context)) } - type TokenMaybe = I::TokenMaybe; + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + I::cursor_location(cursor) + } #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.input.next_maybe(offset) + unsafe fn next_maybe( + (cache, _): &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + I::next_maybe(cache, cursor) } - #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - let inner_span = self.input.span(range); - Span::new( - self.context.clone(), + #[inline] + unsafe fn span((cache, ctx): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + let inner_span = I::span(cache, range); + S::new( + ctx.clone(), inner_span.start().into()..inner_span.end().into(), ) } - - #[inline(always)] - fn prev(offs: Self::Offset) -> Self::Offset { - I::prev(offs) - } } -impl<'a, S, I: Input<'a>> ExactSizeInput<'a> for WithContext +impl<'src, S, I: Input<'src>> ExactSizeInput<'src> for WithContext where - I: ExactSizeInput<'a>, - S: Span + Clone + 'a, - S::Context: Clone + 'a, + I: ExactSizeInput<'src>, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, { - #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { - let inner_span = self.input.span_from(range); - Span::new( - self.context.clone(), + #[inline] + unsafe fn span_from((cache, ctx): &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + let inner_span = I::span_from(cache, range); + S::new( + ctx.clone(), inner_span.start().into()..inner_span.end().into(), ) } } -impl<'a, S, I: ValueInput<'a>> ValueInput<'a> for WithContext +impl<'src, S, I: ValueInput<'src>> ValueInput<'src> for WithContext where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, { #[inline(always)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.input.next(offset) + unsafe fn next((cache, _): &Self::Cache, cursor: &mut Self::Cursor) -> Option { + I::next(cache, cursor) } } -impl<'a, S, I: BorrowInput<'a>> BorrowInput<'a> for WithContext +impl<'src, S, I: BorrowInput<'src>> BorrowInput<'src> for WithContext where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, { #[inline(always)] - unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) { - self.input.next_ref(offset) + unsafe fn next_ref( + (cache, _): &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option<&'src Self::Token> { + I::next_ref(cache, cursor) } } -impl<'a, S, I: SliceInput<'a>> SliceInput<'a> for WithContext +impl<'src, S, I: SliceInput<'src>> SliceInput<'src> for WithContext where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, { type Slice = I::Slice; #[inline(always)] - fn full_slice(&self) -> Self::Slice { - ::full_slice(&self.input) + fn full_slice((cache, _): &Self::Cache) -> Self::Slice { + I::full_slice(cache) } #[inline(always)] - fn slice(&self, range: Range) -> Self::Slice { - ::slice(&self.input, range) + unsafe fn slice((cache, _): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + I::slice(cache, range) } #[inline(always)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice { - ::slice_from(&self.input, from) + unsafe fn slice_from((cache, _): &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + I::slice_from(cache, from) } } -impl<'a, C, S, I> StrInput<'a, C> for WithContext +impl<'src, S, I> Sealed for WithContext +where + I: Input<'src>, + S: Span + Clone + 'src, + S::Context: Clone + 'src, + S::Offset: From<::Offset>, +{ +} +impl<'src, C, S, I> StrInput<'src, C> for WithContext where - I: StrInput<'a, C>, - S: Span + Clone + 'a, - S::Context: Clone + 'a, + I: StrInput<'src, C>, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, C: Char, { @@ -702,113 +744,130 @@ pub struct MappedSpan { phantom: PhantomData, } -impl<'a, S: Span, I: Input<'a>, F> Sealed for MappedSpan {} -impl<'a, S, I: Input<'a>, F: 'a> Input<'a> for MappedSpan +impl<'src, S, I: Input<'src>, F: 'src> Input<'src> for MappedSpan where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, F: Fn(I::Span) -> S, { - type Offset = I::Offset; - type Token = I::Token; + type Cursor = I::Cursor; type Span = S; - #[inline(always)] - fn start(&self) -> Self::Offset { - self.input.start() - } - + type Token = I::Token; type TokenMaybe = I::TokenMaybe; + type Cache = (I::Cache, F); + #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.input.next_maybe(offset) + fn begin(self) -> (Self::Cursor, Self::Cache) { + let (cursor, cache) = self.input.begin(); + (cursor, (cache, self.map_fn)) } - #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - let inner_span = self.input.span(range); - (self.map_fn)(inner_span) + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + I::cursor_location(cursor) } #[inline(always)] - fn prev(offs: Self::Offset) -> Self::Offset { - I::prev(offs) + unsafe fn next_maybe( + cache: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + I::next_maybe(&cache.0, cursor) + } + + #[inline] + unsafe fn span(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + let inner_span = I::span(&cache.0, range); + (cache.1)(inner_span) } } -impl<'a, S, I: Input<'a>, F: 'a> ExactSizeInput<'a> for MappedSpan +impl<'src, S, I: Input<'src>, F: 'src> ExactSizeInput<'src> for MappedSpan where - I: ExactSizeInput<'a>, - S: Span + Clone + 'a, - S::Context: Clone + 'a, + I: ExactSizeInput<'src>, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, F: Fn(I::Span) -> S, { #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { - let inner_span = self.input.span_from(range); - (self.map_fn)(inner_span) + unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + let inner_span = I::span_from(&cache.0, range); + (cache.1)(inner_span) } } -impl<'a, S, I: ValueInput<'a>, F: 'a> ValueInput<'a> for MappedSpan +impl<'src, S, I: ValueInput<'src>, F: 'src> ValueInput<'src> for MappedSpan where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, F: Fn(I::Span) -> S, { #[inline(always)] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.input.next(offset) + unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + I::next(&cache.0, cursor) } } -impl<'a, S, I: BorrowInput<'a>, F: 'a> BorrowInput<'a> for MappedSpan +impl<'src, S, I: BorrowInput<'src>, F: 'src> BorrowInput<'src> for MappedSpan where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, F: Fn(I::Span) -> S, { #[inline(always)] - unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) { - self.input.next_ref(offset) + unsafe fn next_ref( + cache: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option<&'src Self::Token> { + I::next_ref(&cache.0, cursor) } } -impl<'a, S, I: SliceInput<'a>, F: 'a> SliceInput<'a> for MappedSpan +impl<'src, S, I: SliceInput<'src>, F: 'src> SliceInput<'src> for MappedSpan where - S: Span + Clone + 'a, - S::Context: Clone + 'a, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, F: Fn(I::Span) -> S, { type Slice = I::Slice; #[inline(always)] - fn full_slice(&self) -> Self::Slice { - ::full_slice(&self.input) + fn full_slice(cache: &Self::Cache) -> Self::Slice { + I::full_slice(&cache.0) } #[inline(always)] - fn slice(&self, range: Range) -> Self::Slice { - ::slice(&self.input, range) + unsafe fn slice(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + I::slice(&cache.0, range) } #[inline(always)] - fn slice_from(&self, from: RangeFrom) -> Self::Slice { - ::slice_from(&self.input, from) + unsafe fn slice_from(cache: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + I::slice_from(&cache.0, from) } } -impl<'a, C, S, I, F: 'a> StrInput<'a, C> for MappedSpan +impl<'src, S, I, F: 'src> Sealed for MappedSpan where - I: StrInput<'a, C>, - S: Span + Clone + 'a, - S::Context: Clone + 'a, + I: Input<'src>, + S: Span + Clone + 'src, + S::Context: Clone + 'src, + S::Offset: From<::Offset>, + F: Fn(I::Span) -> S, +{ +} +impl<'src, C, S, I, F: 'src> StrInput<'src, C> for MappedSpan +where + I: StrInput<'src, C>, + S: Span + Clone + 'src, + S::Context: Clone + 'src, S::Offset: From<::Offset>, F: Fn(I::Span) -> S, C: Char, @@ -818,7 +877,7 @@ where #[cfg(feature = "std")] struct IoInner { reader: BufReader, - last_offset: usize, + last_cursor: usize, } /// Input type which supports seekable readers. Uses a [`BufReader`] internally to buffer input and @@ -834,49 +893,55 @@ impl IoInput { pub fn new(reader: R) -> IoInput { IoInput(RefCell::new(IoInner { reader: BufReader::new(reader), - last_offset: 0, + last_cursor: 0, })) } } #[cfg(feature = "std")] -impl Sealed for IoInput {} -#[cfg(feature = "std")] -impl<'a, R: Read + Seek + 'a> Input<'a> for IoInput { - type Offset = usize; - type Token = u8; +impl<'src, R: Read + Seek + 'src> Input<'src> for IoInput { + type Cursor = usize; type Span = SimpleSpan; - fn start(&self) -> Self::Offset { - 0 - } - + type Token = u8; type TokenMaybe = u8; - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - Self::next(self, offset) + type Cache = Self; + + fn begin(self) -> (Self::Cursor, Self::Cache) { + (0, self) + } + + #[inline(always)] + fn cursor_location(cursor: &Self::Cursor) -> usize { + *cursor } - unsafe fn span(&self, range: Range) -> Self::Span { - SimpleSpan::from(range) + #[inline(always)] + unsafe fn next_maybe( + cache: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + Self::next(cache, cursor) } - fn prev(offs: Self::Offset) -> Self::Offset { - offs.saturating_sub(1) + #[inline] + unsafe fn span(_cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + (*range.start..*range.end).into() } } #[cfg(feature = "std")] -impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput { - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { - let mut inner = self.0.borrow_mut(); +impl<'src, R: Read + Seek + 'src> ValueInput<'src> for IoInput { + unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + let mut inner = cache.0.borrow_mut(); - if offset != inner.last_offset { - let seek = offset as i64 - inner.last_offset as i64; + if *cursor != inner.last_cursor { + let seek = *cursor as i64 - inner.last_cursor as i64; inner.reader.seek_relative(seek).unwrap(); - inner.last_offset = offset; + inner.last_cursor = *cursor; } let mut out = 0; @@ -885,47 +950,44 @@ impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput { match r { Ok(()) => { - inner.last_offset += 1; - (offset + 1, Some(out)) + inner.last_cursor += 1; + *cursor += 1; + Some(out) } - Err(_) => (offset, None), + Err(_) => None, } } } /// Represents a location in an input that can be rewound to. /// -/// Markers can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`]. -pub struct Marker<'a, 'parse, I: Input<'a>, C> { - pub(crate) offset: I::Offset, +/// Checkpoints can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`]. +pub struct Checkpoint<'src, 'parse, I: Input<'src>, C> { + cursor: Cursor<'src, 'parse, I>, pub(crate) err_count: usize, - pub(crate) user_checkpoint: C, + pub(crate) inspector: C, phantom: PhantomData &'parse ()>, // Invariance } -impl<'a, 'parse, I: Input<'a>, C> Marker<'a, 'parse, I, C> { - /// Get the [`Offset`] that this marker corresponds to. - pub fn offset(self) -> Offset<'a, 'parse, I> { - Offset { - offset: self.offset, - phantom: PhantomData, - } +impl<'src, 'parse, I: Input<'src>, C> Checkpoint<'src, 'parse, I, C> { + /// Get the [`Cursor`] that this checkpoint corresponds to. + pub fn cursor(&self) -> &Cursor<'src, 'parse, I> { + &self.cursor } - /// Get the [`SaveMarker`][Inspector::SaveMarker] that this marker corresponds to. - pub fn ext_checkpoint(self) -> C { - self.user_checkpoint + /// Get the [`Checkpoint`][Inspector::Checkpoint] that this marker corresponds to. + pub fn inspector(&self) -> &C { + &self.inspector } } -impl<'a, I: Input<'a>, C: Copy> Copy for Marker<'a, '_, I, C> {} -impl<'a, I: Input<'a>, C: Clone> Clone for Marker<'a, '_, I, C> { +impl<'src, I: Input<'src>, C: Clone> Clone for Checkpoint<'src, '_, I, C> { #[inline(always)] fn clone(&self) -> Self { Self { - user_checkpoint: self.user_checkpoint.clone(), - offset: self.offset, + cursor: self.cursor.clone(), err_count: self.err_count, + inspector: self.inspector.clone(), phantom: PhantomData, } } @@ -933,36 +995,48 @@ impl<'a, I: Input<'a>, C: Clone> Clone for Marker<'a, '_, I, C> { /// Represents a location in an input. /// -/// If you to rewind to an old input location, see [`Marker`]. -pub struct Offset<'a, 'parse, I: Input<'a>> { - pub(crate) offset: I::Offset, +/// If you to rewind to an old input location, see [`Checkpoint`]. +#[repr(transparent)] +pub struct Cursor<'src, 'parse, I: Input<'src>> { + pub(crate) inner: I::Cursor, phantom: PhantomData &'parse ()>, // Invariance } -impl<'a, I: Input<'a>> Copy for Offset<'a, '_, I> {} -impl<'a, I: Input<'a>> Clone for Offset<'a, '_, I> { +impl<'src, 'parse, I: Input<'src>> Cursor<'src, 'parse, I> { + /// Get the input's internal cursor. + pub fn inner(&self) -> &I::Cursor { + &self.inner + } +} + +impl<'src, I: Input<'src>> Clone for Cursor<'src, '_, I> { #[inline(always)] fn clone(&self) -> Self { - *self + Self { + inner: self.inner.clone(), + phantom: PhantomData, + } } } -impl<'a, I: Input<'a>> Eq for Offset<'a, '_, I> {} -impl<'a, I: Input<'a>> PartialEq for Offset<'a, '_, I> { +impl<'src, I: Input<'src>> Eq for Cursor<'src, '_, I> {} +impl<'src, I: Input<'src>> PartialEq for Cursor<'src, '_, I> { fn eq(&self, other: &Self) -> bool { - self.offset == other.offset + I::cursor_location(&self.inner) + .cmp(&I::cursor_location(&other.inner)) + .is_eq() } } -impl<'a, I: Input<'a>> PartialOrd for Offset<'a, '_, I> { +impl<'src, I: Input<'src>> PartialOrd for Cursor<'src, '_, I> { fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + Some(I::cursor_location(&self.inner).cmp(&I::cursor_location(&other.inner))) } } -impl<'a, I: Input<'a>> Ord for Offset<'a, '_, I> { +impl<'src, I: Input<'src>> Ord for Cursor<'src, '_, I> { fn cmp(&self, other: &Self) -> Ordering { - self.offset.cmp(&other.offset) + I::cursor_location(&self.inner).cmp(&I::cursor_location(&other.inner)) } } @@ -972,7 +1046,7 @@ pub(crate) struct Errors { } impl Errors { - /// Returns a slice of the secondary errors (if any) have been emitted since the given marker was created. + /// Returns a slice of the secondary errors (if any) have been emitted since the given checkpoint was created. #[inline] pub(crate) fn secondary_errors_since(&mut self, err_count: usize) -> &mut [Located] { self.secondary.get_mut(err_count..).unwrap_or(&mut []) @@ -990,28 +1064,31 @@ impl Default for Errors { /// Internal type representing the owned parts of an input - used at the top level by a call to /// `parse`. -pub(crate) struct InputOwn<'a, 's, I: Input<'a>, E: ParserExtra<'a, I>> { - pub(crate) input: I, - pub(crate) errors: Errors, +pub(crate) struct InputOwn<'src, 's, I: Input<'src>, E: ParserExtra<'src, I>> { + pub(crate) start: I::Cursor, + pub(crate) cache: I::Cache, + pub(crate) errors: Errors, pub(crate) state: MaybeMut<'s, E::State>, pub(crate) ctx: E::Context, #[cfg(feature = "memoization")] - pub(crate) memos: HashMap<(I::Offset, usize), Option>>, + pub(crate) memos: HashMap<(usize, usize), Option>>, } -impl<'a, 's, I, E> InputOwn<'a, 's, I, E> +impl<'src, 's, I, E> InputOwn<'src, 's, I, E> where - I: Input<'a>, - E: ParserExtra<'a, I>, + I: Input<'src>, + E: ParserExtra<'src, I>, { #[cfg_attr(not(test), allow(dead_code))] - pub(crate) fn new(input: I) -> InputOwn<'a, 's, I, E> + pub(crate) fn new(input: I) -> InputOwn<'src, 's, I, E> where E::State: Default, E::Context: Default, { + let (start, cache) = input.begin(); InputOwn { - input, + start, + cache, errors: Errors::default(), state: MaybeMut::Val(E::State::default()), ctx: E::Context::default(), @@ -1020,12 +1097,14 @@ where } } - pub(crate) fn new_state(input: I, state: &'s mut E::State) -> InputOwn<'a, 's, I, E> + pub(crate) fn new_state(input: I, state: &'s mut E::State) -> InputOwn<'src, 's, I, E> where E::Context: Default, { + let (start, cache) = input.begin(); InputOwn { - input, + start, + cache, errors: Errors::default(), state: MaybeMut::Ref(state), ctx: E::Context::default(), @@ -1034,26 +1113,10 @@ where } } - pub(crate) fn as_ref_start<'parse>(&'parse mut self) -> InputRef<'a, 'parse, I, E> { + pub(crate) fn as_ref_start<'parse>(&'parse mut self) -> InputRef<'src, 'parse, I, E> { InputRef { - offset: self.input.start(), - input: &self.input, - errors: &mut self.errors, - state: &mut self.state, - ctx: &self.ctx, - #[cfg(feature = "memoization")] - memos: &mut self.memos, - } - } - - #[cfg(test)] - pub(crate) fn as_ref_at<'parse>( - &'parse mut self, - offset: I::Offset, - ) -> InputRef<'a, 'parse, I, E> { - InputRef { - offset, - input: &self.input, + cursor: self.start.clone(), + cache: &mut self.cache, errors: &mut self.errors, state: &mut self.state, ctx: &self.ctx, @@ -1072,30 +1135,30 @@ where } /// Internal type representing an input as well as all the necessary context for parsing. -pub struct InputRef<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> { - pub(crate) offset: I::Offset, - pub(crate) input: &'parse I, - pub(crate) errors: &'parse mut Errors, +pub struct InputRef<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> { + cursor: I::Cursor, + pub(crate) cache: &'parse I::Cache, + pub(crate) errors: &'parse mut Errors, pub(crate) state: &'parse mut E::State, pub(crate) ctx: &'parse E::Context, #[cfg(feature = "memoization")] - pub(crate) memos: &'parse mut HashMap<(I::Offset, usize), Option>>, + pub(crate) memos: &'parse mut HashMap<(usize, usize), Option>>, } -impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> { +impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'parse, I, E> { #[inline] pub(crate) fn with_ctx<'sub_parse, EM, O>( &'sub_parse mut self, new_ctx: &'sub_parse EM::Context, - f: impl FnOnce(&mut InputRef<'a, 'sub_parse, I, EM>) -> O, + f: impl FnOnce(&mut InputRef<'src, 'sub_parse, I, EM>) -> O, ) -> O where 'parse: 'sub_parse, - EM: ParserExtra<'a, I, Error = E::Error, State = E::State>, + EM: ParserExtra<'src, I, Error = E::Error, State = E::State>, { let mut new_inp = InputRef { - input: self.input, - offset: self.offset, + cursor: self.cursor.clone(), + cache: self.cache, state: self.state, ctx: new_ctx, errors: self.errors, @@ -1103,7 +1166,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> memos: self.memos, }; let res = f(&mut new_inp); - self.offset = new_inp.offset; + self.cursor = new_inp.cursor; res } @@ -1111,15 +1174,15 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> pub(crate) fn with_state<'sub_parse, S, O>( &'sub_parse mut self, new_state: &'sub_parse mut S, - f: impl FnOnce(&mut InputRef<'a, 'sub_parse, I, extra::Full>) -> O, + f: impl FnOnce(&mut InputRef<'src, 'sub_parse, I, extra::Full>) -> O, ) -> O where 'parse: 'sub_parse, - S: 'a + Inspector<'a, I>, + S: 'src + Inspector<'src, I>, { let mut new_inp = InputRef { - input: self.input, - offset: self.offset, + cursor: self.cursor.clone(), + cache: self.cache, state: new_state, ctx: self.ctx, errors: self.errors, @@ -1127,26 +1190,27 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> memos: self.memos, }; let res = f(&mut new_inp); - self.offset = new_inp.offset; + self.cursor = new_inp.cursor; res } #[inline] pub(crate) fn with_input<'sub_parse, O>( &'sub_parse mut self, - new_input: &'sub_parse I, - f: impl FnOnce(&mut InputRef<'a, 'sub_parse, I, E>) -> O, + start: I::Cursor, + cache: &'sub_parse I::Cache, + f: impl FnOnce(&mut InputRef<'src, 'sub_parse, I, E>) -> O, #[cfg(feature = "memoization")] memos: &'sub_parse mut HashMap< - (I::Offset, usize), - Option>, + (usize, usize), + Option>, >, ) -> O where 'parse: 'sub_parse, { let mut new_inp = InputRef { - offset: new_input.start(), - input: new_input, + cursor: start, + cache, state: self.state, ctx: self.ctx, errors: self.errors, @@ -1156,41 +1220,41 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> f(&mut new_inp) } - /// Get the internal offset of the input at this moment in time. + /// Get the internal cursor of the input at this moment in time. /// /// Can be used for generating spans or slices. See [`InputRef::span`] and [`InputRef::slice`]. #[inline(always)] - pub fn offset(&self) -> Offset<'a, 'parse, I> { - Offset { - offset: self.offset, + pub fn cursor(&self) -> Cursor<'src, 'parse, I> { + Cursor { + // TODO: Find ways to avoid this clone, if possible + inner: self.cursor.clone(), phantom: PhantomData, } } - /// Save the current parse state as a [`Marker`]. + /// Save the current parse state as a [`Checkpoint`]. /// /// You can rewind back to this state later with [`InputRef::rewind`]. #[inline(always)] - pub fn save(&self) -> Marker<'a, 'parse, I, >::SaveMarker> { - Marker { - offset: self.offset, + pub fn save(&self) -> Checkpoint<'src, 'parse, I, >::Checkpoint> { + let cursor = self.cursor(); + let inspector = self.state.on_save(&cursor); + Checkpoint { + cursor, err_count: self.errors.secondary.len(), - user_checkpoint: self.state.on_save(self.offset), + inspector, phantom: PhantomData, } } - /// Reset the parse state to that represented by the given [`Marker`]. + /// Reset the parse state to that represented by the given [`Checkpoint`]. /// - /// You can create a marker with which to perform rewinding using [`InputRef::save`]. + /// You can create a checkpoint with which to perform rewinding using [`InputRef::save`]. #[inline(always)] - pub fn rewind( - &mut self, - marker: Marker<'a, 'parse, I, >::SaveMarker>, - ) { - self.errors.secondary.truncate(marker.err_count); - self.offset = marker.offset; - self.state.on_rewind(marker); + pub fn rewind(&mut self, checkpoint: Checkpoint<'src, 'parse, I, >::Checkpoint>) { + self.errors.secondary.truncate(checkpoint.err_count); + self.state.on_rewind(&checkpoint); + self.cursor = checkpoint.cursor.inner; } /// Get a mutable reference to the state associated with the current parse. @@ -1212,60 +1276,57 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> #[inline] pub(crate) fn skip_while bool>(&mut self, mut f: F) where - I: ValueInput<'a>, + I: Input<'src>, { loop { - // SAFETY: offset was generated by previous call to `Input::next` - let (offset, token) = unsafe { self.input.next(self.offset) }; - if token.as_ref().filter(|&t| f(t)).is_none() { + let mut cursor = self.cursor.clone(); + // SAFETY: cursor was generated by previous call to `Input::next` + let token = unsafe { I::next_maybe(&mut self.cache, &mut cursor) }; + if token.as_ref().filter(|tok| f((*tok).borrow())).is_none() { break; } else { if let Some(t) = &token { - self.state.on_token(t); + self.state.on_token(t.borrow()); } - self.offset = offset; + self.cursor = cursor; } } } #[inline(always)] - pub(crate) fn next_inner(&mut self) -> (I::Offset, Option) + pub(crate) fn next_inner(&mut self) -> Option where - I: ValueInput<'a>, + I: ValueInput<'src>, { - // SAFETY: offset was generated by previous call to `Input::next` - let (offset, token) = unsafe { self.input.next(self.offset) }; + // SAFETY: cursor was generated by previous call to `Input::next` + let token = unsafe { I::next(&mut self.cache, &mut self.cursor) }; if let Some(t) = &token { self.state.on_token(t); } - self.offset = offset; - (self.offset, token) + token } #[inline(always)] - pub(crate) fn next_maybe_inner(&mut self) -> (I::Offset, Option) { - // SAFETY: offset was generated by previous call to `Input::next` - let (offset, token) = unsafe { self.input.next_maybe(self.offset) }; + pub(crate) fn next_maybe_inner(&mut self) -> Option { + // SAFETY: cursor was generated by previous call to `Input::next` + let token = unsafe { I::next_maybe(&mut self.cache, &mut self.cursor) }; if let Some(t) = &token { - self.state.on_token(Borrow::borrow(t)); + self.state.on_token(t.borrow()); } - let r = (self.offset, token); - self.offset = offset; - r + token } #[inline(always)] - pub(crate) fn next_ref_inner(&mut self) -> (I::Offset, Option<&'a I::Token>) + pub(crate) fn next_ref_inner(&mut self) -> Option<&'src I::Token> where - I: BorrowInput<'a>, + I: BorrowInput<'src>, { - // SAFETY: offset was generated by previous call to `Input::next` - let (offset, token) = unsafe { self.input.next_ref(self.offset) }; + // SAFETY: cursor was generated by previous call to `Input::next` + let token = unsafe { I::next_ref(&mut self.cache, &mut self.cursor) }; if let Some(t) = &token { self.state.on_token(t); } - self.offset = offset; - (self.offset, token) + token } /// Attempt to parse this input using the given parser. @@ -1276,14 +1337,14 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// [undefined](https://en.wikipedia.org/wiki/Undefined_behavior)) state. /// /// The only well-specified action you are permitted to perform on the input after an error has occurred is - /// rewinding to a marker created *before* the error occurred via [`InputRef::rewind`]. + /// rewinding to a checkpoint created *before* the error occurred via [`InputRef::rewind`]. /// /// This state is not consistent between releases of chumsky, compilations of the final binary, or even invocations /// of the parser. You should not rely on this state for anything, and choosing to rely on it means that your /// parser may break in unexpected ways at any time. /// /// You have been warned. - pub fn parse>(&mut self, parser: P) -> Result { + pub fn parse>(&mut self, parser: P) -> Result { match parser.go::(self) { Ok(out) => Ok(out), Err(()) => Err(self.take_alt().err), @@ -1295,7 +1356,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// # Import Notice /// /// See [`InputRef::parse`] about unspecified behavior associated with this function. - pub fn check>(&mut self, parser: P) -> Result<(), E::Error> { + pub fn check>(&mut self, parser: P) -> Result<(), E::Error> { match parser.go::(self) { Ok(()) => Ok(()), Err(()) => Err(self.take_alt().err), @@ -1312,8 +1373,8 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// /// See [`InputRef::next_ref`] if you want get a reference to the next token instead. #[inline(always)] - pub fn next_maybe(&mut self) -> Option> { - self.next_maybe_inner().1.map(Into::into) + pub fn next_maybe(&mut self) -> Option> { + self.next_maybe_inner().map(Into::into) } /// Get the next token in the input by value. Returns `None` if the end of the input has been reached. @@ -1322,56 +1383,56 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> #[inline(always)] pub fn next(&mut self) -> Option where - I: ValueInput<'a>, + I: ValueInput<'src>, { - self.next_inner().1 + self.next_inner() } /// Get a reference to the next token in the input. Returns `None` if the end of the input has been reached. /// /// See [`InputRef::next`] if you want get the next token by value instead. #[inline(always)] - pub fn next_ref(&mut self) -> Option<&'a I::Token> + pub fn next_ref(&mut self) -> Option<&'src I::Token> where - I: BorrowInput<'a>, + I: BorrowInput<'src>, { - self.next_ref_inner().1 + self.next_ref_inner() } /// Peek the next token in the input. Returns `None` if the end of the input has been reached. /// /// See [`InputRef::next_maybe`] for more information about what this function guarantees. #[inline(always)] - pub fn peek_maybe(&self) -> Option> { - // SAFETY: offset was generated by previous call to `Input::next` - unsafe { self.input.next_maybe(self.offset).1.map(Into::into) } + pub fn peek_maybe(&self) -> Option> { + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::next_maybe(self.cache, &mut self.cursor.clone()).map(Into::into) } } /// Peek the next token in the input. Returns `None` if the end of the input has been reached. #[inline(always)] pub fn peek(&self) -> Option where - I: ValueInput<'a>, + I: ValueInput<'src>, { - // SAFETY: offset was generated by previous call to `Input::next` - unsafe { self.input.next(self.offset).1 } + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::next(self.cache, &mut self.cursor.clone()).map(Into::into) } } /// Peek the next token in the input. Returns `None` if the end of the input has been reached. #[inline(always)] - pub fn peek_ref(&self) -> Option<&'a I::Token> + pub fn peek_ref(&self) -> Option<&'src I::Token> where - I: BorrowInput<'a>, + I: BorrowInput<'src>, { - // SAFETY: offset was generated by previous call to `Input::next` - unsafe { self.input.next_ref(self.offset).1 } + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::next_ref(self.cache, &mut self.cursor.clone()).map(Into::into) } } /// Skip the next token in the input. #[inline(always)] pub fn skip(&mut self) where - I: ValueInput<'a>, + I: ValueInput<'src>, { let _ = self.next_inner(); } @@ -1380,156 +1441,150 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> #[inline] pub(crate) fn full_slice(&self) -> I::Slice where - I: SliceInput<'a>, + I: SliceInput<'src>, { - self.input.full_slice() + I::full_slice(self.cache) } - /// Get a slice of the input that covers the given offset range. + /// Get a slice of the input that covers the given cursor range. #[inline] - pub fn slice(&self, range: Range>) -> I::Slice + pub fn slice(&self, range: Range<&Cursor<'src, 'parse, I>>) -> I::Slice where - I: SliceInput<'a>, + I: SliceInput<'src>, { - self.slice_inner(range.start.offset..range.end.offset) + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::slice(self.cache, &range.start.inner..&range.end.inner) } } - /// Get a slice of the input that covers the given offset range. + /// Get a slice of the input that covers the given cursor range. #[inline] - pub fn slice_from(&self, range: RangeFrom>) -> I::Slice + pub fn slice_from(&self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Slice where - I: SliceInput<'a>, + I: SliceInput<'src>, { - self.slice_from_inner(range.start.offset..) + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::slice_from(self.cache, &range.start.inner..) } } - /// Get a slice of the input that covers the given offset range. + /// Get a slice of the input that covers the given cursor range. #[inline] - pub fn slice_since(&self, range: RangeFrom>) -> I::Slice - where - I: SliceInput<'a>, - { - self.slice_inner(range.start.offset..self.offset) - } - - // TODO: Unofy with `InputRef::slice` - #[inline(always)] - pub(crate) fn slice_inner(&self, range: Range) -> I::Slice + pub fn slice_since(&self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Slice where - I: SliceInput<'a>, + I: SliceInput<'src>, { - self.input.slice(range) - } - - #[allow(dead_code)] - #[inline(always)] - pub(crate) fn slice_from_inner(&self, range: RangeFrom) -> I::Slice - where - I: SliceInput<'a>, - { - self.input.slice_from(range) + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::slice(self.cache, &range.start.inner..&self.cursor) } } #[cfg_attr(not(feature = "lexical-numbers"), allow(dead_code))] #[inline(always)] pub(crate) fn slice_trailing_inner(&self) -> I::Slice where - I: SliceInput<'a>, + I: SliceInput<'src>, { - self.input.slice_from(self.offset..) + // SAFETY: cursor was generated by previous call to `Input::next` + unsafe { I::slice_from(self.cache, &self.cursor..) } } - /// Get a span over the input that covers the given offset range. - #[inline(always)] - pub fn span(&self, range: Range>) -> I::Span { - // SAFETY: `Offset` is invariant over 'parse, so we know that this offset came from the same input - // See `https://plv.mpi-sws.org/rustbelt/ghostcell/` - unsafe { self.input.span(range.start.offset..range.end.offset) } - } + // /// Get a span over the input that covers the given cursor range. + // #[inline(always)] + // pub fn span(&self, range: Range<&Cursor<'src, 'parse, I>>) -> I::Span { + // // SAFETY: `Cursor` is invariant over 'parse, so we know that this cursor came from the same input + // // See `https://plv.mpi-sws.org/rustbelt/ghostcell/` + // unsafe { I::span(self.cache, &range.start.inner..&range.end.inner) } + // } - /// Get a span over the input that covers the given offset range. + /// Get a span over the input that goes from the given cursor to the end of the input. // TODO: Unofy with `InputRef::span` #[inline(always)] - pub fn span_from(&self, range: RangeFrom>) -> I::Span + pub fn span_from(&self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Span where - I: ExactSizeInput<'a>, + I: ExactSizeInput<'src>, { - // SAFETY: `Offset` is invariant over 'parse, so we know that this offset came from the same input + // SAFETY: `Cursor` is invariant over 'parse, so we know that this cursor came from the same input // See `https://plv.mpi-sws.org/rustbelt/ghostcell/` - unsafe { self.input.span_from(range.start.offset..) } + unsafe { I::span_from(self.cache, &range.start.inner..) } } - /// Generate a span that extends from the provided [`Offset`] to the current input position. + /// Generate a span that extends from the provided [`Cursor`] to the current input position. #[inline(always)] - pub fn span_since(&self, before: Offset<'a, 'parse, I>) -> I::Span { - // SAFETY: `Offset` is invariant over 'parse, so we know that this offset came from the same input + pub fn span_since(&self, before: &Cursor<'src, 'parse, I>) -> I::Span { + // SAFETY: `Cursor` is invariant over 'parse, so we know that this cursor came from the same input // See `https://plv.mpi-sws.org/rustbelt/ghostcell/` - unsafe { self.input.span(before.offset..self.offset) } + unsafe { I::span(self.cache, &before.inner..&self.cursor) } } + /// SAFETY: Previous cursor + skip must not exceed length #[inline(always)] #[cfg(any(feature = "regex", feature = "lexical-numbers"))] - pub(crate) fn skip_bytes(&mut self, skip: usize) + pub(crate) unsafe fn skip_bytes(&mut self, skip: usize) where - I: SliceInput<'a, Offset = usize>, + I: SliceInput<'src, Cursor = usize>, { - self.offset += skip; + self.cursor += skip; } #[inline] - pub(crate) fn emit(&mut self, pos: I::Offset, error: E::Error) { - self.errors.secondary.push(Located::at(pos, error)); + pub(crate) fn emit(&mut self, error: E::Error) { + self.errors + .secondary + .push(Located::at(self.cursor.clone(), error)); } #[inline] - pub(crate) fn add_alt>>>( + pub(crate) fn add_alt>>>( &mut self, - at: I::Offset, expected: Exp, - found: Option>, + found: Option>, span: I::Span, ) { if core::mem::size_of::() == 0 { return; } + + let at = &self.cursor.clone(); + // Prioritize errors before choosing whether to generate the alt (avoids unnecessary error creation) self.errors.alt = Some(match self.errors.alt.take() { - Some(alt) => match alt.pos.into().cmp(&at.into()) { + Some(alt) => match I::cursor_location(&alt.pos).cmp(&I::cursor_location(at)) { Ordering::Equal => { Located::at(alt.pos, alt.err.merge_expected_found(expected, found, span)) } Ordering::Greater => alt, - Ordering::Less => { - Located::at(at, alt.err.replace_expected_found(expected, found, span)) - } + Ordering::Less => Located::at( + at.clone(), + alt.err.replace_expected_found(expected, found, span), + ), }, - None => Located::at(at, Error::expected_found(expected, found, span)), + None => Located::at(at.clone(), Error::expected_found(expected, found, span)), }); } #[inline] - pub(crate) fn add_alt_err(&mut self, at: I::Offset, err: E::Error) { + pub(crate) fn add_alt_err(&mut self, at: &I::Cursor, err: E::Error) { if core::mem::size_of::() == 0 { return; } // Prioritize errors self.errors.alt = Some(match self.errors.alt.take() { - Some(alt) => match alt.pos.into().cmp(&at.into()) { + Some(alt) => match I::cursor_location(&alt.pos).cmp(&I::cursor_location(at)) { Ordering::Equal => Located::at(alt.pos, alt.err.merge(err)), Ordering::Greater => alt, - Ordering::Less => Located::at(at, err), + Ordering::Less => Located::at(at.clone(), err), }, - None => Located::at(at, err), + None => Located::at(at.clone(), err), }); } // Take the alt error. If one doesn't exist, generate a fake one. - pub(crate) fn take_alt(&mut self) -> Located { - let start = self.offset(); - let fake_span = self.span_since(start); + pub(crate) fn take_alt(&mut self) -> Located { + let fake_span = self.span_since(&self.cursor()); self.errors.alt.take().unwrap_or_else(|| { - Located::at(start.offset, E::Error::expected_found([], None, fake_span)) + Located::at( + self.cursor.clone(), + E::Error::expected_found([], None, fake_span), + ) }) } } @@ -1560,45 +1615,46 @@ impl Emitter { } /// See [`Parser::map_with`]. -pub struct MapExtra<'a, 'b, I: Input<'a>, E: ParserExtra<'a, I>> { - before: I::Offset, - after: I::Offset, - inp: &'b I, +pub struct MapExtra<'src, 'b, I: Input<'src>, E: ParserExtra<'src, I>> { + before: &'b I::Cursor, + after: &'b I::Cursor, + cache: &'b I::Cache, state: &'b mut E::State, ctx: &'b E::Context, } -impl<'a, 'b, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, I, E> { +impl<'src, 'b, I: Input<'src>, E: ParserExtra<'src, I>> MapExtra<'src, 'b, I, E> { #[inline(always)] pub(crate) fn new<'parse>( - before: Offset<'a, 'parse, I>, - inp: &'b mut InputRef<'a, 'parse, I, E>, + before: &'b Cursor<'src, 'parse, I>, + inp: &'b mut InputRef<'src, 'parse, I, E>, ) -> Self { Self { - before: before.offset, - after: inp.offset, + before: &before.inner, + after: &inp.cursor, + cache: inp.cache, ctx: inp.ctx, state: inp.state, - inp: inp.input, } } /// Get the span corresponding to the output. #[inline(always)] pub fn span(&self) -> I::Span { - // SAFETY: The offsets both came from the same input + // SAFETY: The cursors both came from the same input // TODO: Should this make `MapExtra::new` unsafe? Probably, but it's an internal API and we simply wouldn't // ever abuse it in this way, even accidentally. - unsafe { self.inp.span(self.before..self.after) } + unsafe { I::span(self.cache, self.before..self.after) } } /// Get the slice corresponding to the output. #[inline(always)] pub fn slice(&self) -> I::Slice where - I: SliceInput<'a>, + I: SliceInput<'src>, { - self.inp.slice(self.before..self.after) + // SAFETY: The cursors both came from the same input + unsafe { I::slice(self.cache, self.before..self.after) } } /// Get the parser state. diff --git a/src/inspector.rs b/src/inspector.rs index 3dc3f7db..871c4fed 100644 --- a/src/inspector.rs +++ b/src/inspector.rs @@ -4,7 +4,7 @@ //! and rubber bungs in his ears. He knew exactly what was happening and had known //! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night //! beside his pillar and woken him with a start."* -use crate::{input::Marker, Input}; +use crate::{input::{Checkpoint, Cursor}, Input}; use core::ops::{Deref, DerefMut}; #[allow(unused)] // for intra-doc links @@ -13,40 +13,46 @@ use crate::Parser; /// A type that receives event hooks when certain parsing actions occur. /// /// If you don't need to receive event hooks, use [`SimpleState`]. -pub trait Inspector<'a, I: Input<'a>> { +pub trait Inspector<'src, I: Input<'src>> { /// A type the Inspector can use to revert to a previous state. /// /// For implementation reasons, this is required to be `Copy + Clone`. - type SaveMarker: Copy + Clone; + type Checkpoint: Copy + Clone; /// This function is called when a new token is read from the input stream. - // impl note: this should be called only when `self.offset` is updated, not when we only peek at the next token. + // impl note: this should be called only when `self.cursor` is updated, not when we only peek at the next token. fn on_token(&mut self, token: &I::Token); /// This function is called when a combinator saves the current state of the parse. - fn on_save(&self, offset: I::Offset) -> Self::SaveMarker; + fn on_save<'parse>(&self, cursor: &Cursor<'src, 'parse, I>) -> Self::Checkpoint; /// This function is called when a combinator rewinds to an earlier state of the parser. /// - /// You can use [`Marker::ext_checkpoint`] to get back the [`SaveMarker`][Self::SaveMarker] + /// You can use [`Checkpoint::inspector`] to get back the [`Checkpoint`][Self::Checkpoint] /// you originally created in [`on_save`][Self::on_save]. - fn on_rewind<'parse>(&mut self, marker: Marker<'a, 'parse, I, Self::SaveMarker>); + fn on_rewind<'parse>(&mut self, marker: &Checkpoint<'src, 'parse, I, Self::Checkpoint>); } -impl<'a, I: Input<'a>> Inspector<'a, I> for () { - type SaveMarker = (); - fn on_token(&mut self, _: &>::Token) {} - fn on_save(&self, _: >::Offset) -> Self::SaveMarker {} - fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self>) {} +impl<'src, I: Input<'src>> Inspector<'src, I> for () { + type Checkpoint = (); + #[inline(always)] + fn on_token(&mut self, _: &>::Token) {} + #[inline(always)] + fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {} + #[inline(always)] + fn on_rewind<'parse>(&mut self, _: &Checkpoint<'src, 'parse, I, Self>) {} } /// A state type that should be accessible directly from `parser.state()` and has no special behavior. /// /// This wrapper implements the [`Inspector`] trait for you so you don't have to. pub struct SimpleState(pub T); -impl<'a, T, I: Input<'a>> Inspector<'a, I> for SimpleState { - type SaveMarker = (); - fn on_token(&mut self, _: &>::Token) {} - fn on_save<'parse>(&self, _: >::Offset) -> Self::SaveMarker {} - fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self::SaveMarker>) {} +impl<'src, T, I: Input<'src>> Inspector<'src, I> for SimpleState { + type Checkpoint = (); + #[inline(always)] + fn on_token(&mut self, _: &>::Token) {} + #[inline(always)] + fn on_save<'parse>(&self, _: &Cursor<'src, 'parse, I>) -> Self::Checkpoint {} + #[inline(always)] + fn on_rewind<'parse>(&mut self, _: &Checkpoint<'src, 'parse, I, Self::Checkpoint>) {} } impl Deref for SimpleState { diff --git a/src/label.rs b/src/label.rs index 4ba4d351..15083bbb 100644 --- a/src/label.rs +++ b/src/label.rs @@ -57,22 +57,22 @@ where inp.errors.alt = old_alt; if let Some(mut new_alt) = new_alt { - let before_next = before.offset.into(); - if new_alt.pos.into() == before_next { + let before_loc = I::cursor_location(&before.cursor().inner); + let new_alt_loc = I::cursor_location(&new_alt.pos); + if new_alt_loc == before_loc { new_alt.err.label_with(self.label.clone()); - } else if self.is_context && new_alt.pos.into() > before_next { - // SAFETY: offsets generated by previous call to `InputRef::next` (or similar). - let span = unsafe { inp.input.span(before.offset..new_alt.pos) }; + } else if self.is_context && new_alt_loc > before_loc { + // SAFETY: cursors generated by previous call to `InputRef::next` (or similar). + let span = unsafe { I::span(inp.cache, &before.cursor().inner..&new_alt.pos) }; new_alt.err.in_context(self.label.clone(), span); } - inp.add_alt_err(new_alt.pos, new_alt.err); + inp.add_alt_err(&new_alt.pos, new_alt.err); } if self.is_context { - let before_offset = before.offset; for err in inp.errors.secondary_errors_since(before.err_count) { - // SAFETY: offsets generated by previous call to `InputRef::next` (or similar). - let span = unsafe { inp.input.span(before_offset..err.pos) }; + // SAFETY: cursors generated by previous call to `InputRef::next` (or similar). + let span = unsafe { I::span(inp.cache, &before.cursor().inner..&err.pos) }; err.err.in_context(self.label.clone(), span); } } diff --git a/src/lib.rs b/src/lib.rs index 2e5a6a41..f5952fdb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -117,7 +117,7 @@ use core::marker::Tuple; use core::{ borrow::Borrow, cell::{Cell, RefCell}, - cmp::{Eq, Ordering}, + cmp::{Eq, Ord, Ordering}, fmt, hash::Hash, marker::PhantomData, @@ -2246,7 +2246,6 @@ where pub struct ParserIter<'a, 'iter, P: IterParser<'a, I, O, E>, I: Input<'a>, O, E: ParserExtra<'a, I>> { parser: P, - offset: I::Offset, own: InputOwn<'a, 'iter, I, E>, iter_state: Option>, #[allow(dead_code)] @@ -2261,7 +2260,7 @@ where type Item = O; fn next(&mut self) -> Option { - let mut inp = self.own.as_ref_at(self.offset); + let mut inp = self.own.as_ref_start(); let parser = &self.parser; let iter_state = match &mut self.iter_state { @@ -2274,7 +2273,8 @@ where }; let res = parser.next::(&mut inp, iter_state); - self.offset = inp.offset; + // TODO: Avoid clone + self.own.start = inp.cursor().inner; res.ok().and_then(|res| res) } } @@ -2511,7 +2511,6 @@ where ParseResult::new( Some(ParserIter { parser: self, - offset: input.start(), own: InputOwn::new(input), iter_state: None, phantom: EmptyPhantom::new(), @@ -2537,7 +2536,6 @@ where ParseResult::new( Some(ParserIter { parser: self, - offset: input.start(), own: InputOwn::new_state(input, state), iter_state: None, phantom: EmptyPhantom::new(), diff --git a/src/number.rs b/src/number.rs index f13503ad..d219da72 100644 --- a/src/number.rs +++ b/src/number.rs @@ -33,21 +33,22 @@ pub const fn number() -> Number { impl<'a, const F: u128, I, O, E> ParserSealed<'a, I, O, E> for Number where O: FromLexical, - I: SliceInput<'a, Offset = usize>, + I: SliceInput<'a, Cursor = usize>, >::Slice: AsRef<[u8]>, E: ParserExtra<'a, I>, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); match parse_partial(inp.slice_trailing_inner().as_ref()) { Ok((out, skip)) => { - inp.skip_bytes(skip); + // SAFETY: `skip` is no longer than the trailing input's byte length + unsafe { inp.skip_bytes(skip) }; Ok(M::bind(|| out)) } Err(_err) => { // TODO: Improve error - inp.add_alt(inp.offset().offset, None, None, inp.span_since(before)); + inp.add_alt(None, None, inp.span_since(&before)); Err(()) } } diff --git a/src/pratt.rs b/src/pratt.rs index cc617cf6..1c630c70 100644 --- a/src/pratt.rs +++ b/src/pratt.rs @@ -430,12 +430,12 @@ macro_rules! impl_pratt_for_tuple { Ok(op) => { match recursive::recurse(|| self.pratt_go::(inp, $X.associativity().left_power())) { Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { - $X.fold_prefix(op, rhs, &mut MapExtra::new(pre_expr.offset(), inp)) + $X.fold_prefix(op, rhs, &mut MapExtra::new(pre_expr.cursor(), inp)) }), - Err(()) => inp.rewind(pre_expr), + Err(()) => inp.rewind(pre_expr.clone()), } }, - Err(()) => inp.rewind(pre_expr), + Err(()) => inp.rewind(pre_expr.clone()), } } )* @@ -455,11 +455,11 @@ macro_rules! impl_pratt_for_tuple { match $X.op_parser().go::(inp) { Ok(op) => { lhs = M::combine(lhs, op, |lhs, op| { - $X.fold_postfix(lhs, op, &mut MapExtra::new(pre_expr.offset(), inp)) + $X.fold_postfix(lhs, op, &mut MapExtra::new(pre_expr.cursor(), inp)) }); continue }, - Err(()) => inp.rewind(pre_op), + Err(()) => inp.rewind(pre_op.clone()), } } )* @@ -475,14 +475,14 @@ macro_rules! impl_pratt_for_tuple { M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), op, |(lhs, rhs), op| { - $X.fold_infix(lhs, op, rhs, &mut MapExtra::new(pre_expr.offset(), inp)) + $X.fold_infix(lhs, op, rhs, &mut MapExtra::new(pre_expr.cursor(), inp)) }, ); continue }, - Err(()) => inp.rewind(pre_op), + Err(()) => inp.rewind(pre_op.clone()), }, - Err(()) => inp.rewind(pre_op), + Err(()) => inp.rewind(pre_op.clone()), } } )* diff --git a/src/primitive.rs b/src/primitive.rs index fc223393..d9359ab8 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -41,11 +41,11 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); match inp.next_maybe_inner() { - (_, None) => Ok(M::bind(|| ())), - (at, Some(tok)) => { - inp.add_alt(at, Some(None), Some(tok.into()), inp.span_since(before)); + None => Ok(M::bind(|| ())), + Some(tok) => { + inp.add_alt(Some(None), Some(tok.into()), inp.span_since(&before)); Err(()) } } @@ -184,15 +184,14 @@ where let seq = cfg.seq.as_ref().unwrap_or(&self.seq); if let Some(()) = seq.seq_iter().find_map(|next| { - let before = inp.offset(); + let before = inp.cursor().clone(); match inp.next_maybe_inner() { - (_, Some(tok)) if next.borrow() == tok.borrow() => None, - (at, found) => { + Some(tok) if next.borrow() == tok.borrow() => None, + found => { inp.add_alt( - at, Some(Some(T::to_maybe_ref(next))), found.map(|f| f.into()), - inp.span_since(before), + inp.span_since(&before), ); Some(()) } @@ -262,14 +261,13 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); match inp.next_inner() { #[allow(suspicious_double_ref_op)] // Is this a clippy bug? - (_, Some(tok)) if self.seq.contains(tok.borrow()) => Ok(M::bind(|| tok)), - (at, found) => { - let err_span = inp.span_since(before); + Some(tok) if self.seq.contains(tok.borrow()) => Ok(M::bind(|| tok)), + found => { + let err_span = inp.span_since(&before); inp.add_alt( - at, self.seq.seq_iter().map(|e| Some(T::to_maybe_ref(e))), found.map(|f| f.into()), err_span, @@ -337,13 +335,13 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); match inp.next_inner() { - #[allow(suspicious_double_ref_op)] // Is this a clippy bug? - (_, Some(tok)) if !self.seq.contains(tok.borrow()) => Ok(M::bind(|| tok)), - (at, found) => { - let err_span = inp.span_since(before); - inp.add_alt(at, None, found.map(|f| f.into()), err_span); + // #[allow(suspicious_double_ref_op)] // Is this a clippy bug? + Some(tok) if !self.seq.contains(tok.borrow()) => Ok(M::bind(|| tok)), + found => { + let err_span = inp.span_since(&before); + inp.add_alt(None, found.map(|f| f.into()), err_span); Err(()) } } @@ -403,11 +401,11 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); match (self.f)(inp) { Ok(out) => Ok(M::bind(|| out)), Err(err) => { - inp.add_alt_err(before.offset, err); + inp.add_alt_err(&before.inner, err); Err(()) } } @@ -456,17 +454,17 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); let next = inp.next_inner(); - let err_span = inp.span_since(before); - let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok.clone(), &mut MapExtra::new(before, inp)) { + let err_span = inp.span_since(&before); + let found = match next { + Some(tok) => match (self.filter)(tok.clone(), &mut MapExtra::new(&before, inp)) { Some(out) => return Ok(M::bind(|| out)), - None => (at, Some(tok.into())), + None => Some(tok.into()), }, - (at, found) => (at, found.map(|f| f.into())), + found => found.map(|f| f.into()), }; - inp.add_alt(at, None, found, err_span); + inp.add_alt(None, found, err_span); Err(()) } @@ -513,17 +511,17 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); let next = inp.next_ref_inner(); - let err_span = inp.span_since(before); - let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok, &mut MapExtra::new(before, inp)) { + let found = match next { + Some(tok) => match (self.filter)(tok, &mut MapExtra::new(&before, inp)) { Some(out) => return Ok(M::bind(|| out)), - None => (at, Some(tok.into())), + None => Some(tok.into()), }, - (at, found) => (at, found.map(|f| f.into())), + found => found.map(|f| f.into()), }; - inp.add_alt(at, None, found, err_span); + let err_span = inp.span_since(&before); + inp.add_alt(None, found, err_span); Err(()) } @@ -550,12 +548,12 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); match inp.next_inner() { - (_, Some(tok)) => Ok(M::bind(|| tok)), - (at, found) => { - let err_span = inp.span_since(before); - inp.add_alt(at, None, found.map(|f| f.into()), err_span); + Some(tok) => Ok(M::bind(|| tok)), + found => { + let err_span = inp.span_since(&before); + inp.add_alt(None, found.map(|f| f.into()), err_span); Err(()) } } @@ -605,12 +603,12 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor().clone(); match inp.next_ref_inner() { - (_, Some(tok)) => Ok(M::bind(|| tok)), - (at, found) => { - let err_span = inp.span_since(before); - inp.add_alt(at, None, found.map(|f| f.into()), err_span); + Some(tok) => Ok(M::bind(|| tok)), + found => { + let err_span = inp.span_since(&before); + inp.add_alt(None, found.map(|f| f.into()), err_span); Err(()) } } @@ -885,19 +883,19 @@ macro_rules! impl_choice_for_tuple { { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save(); + let before = inp.save().clone(); let Choice { parsers: ($Head, $($X,)*), .. } = self; match $Head.go::(inp) { Ok(out) => return Ok(out), - Err(()) => inp.rewind(before), + Err(()) => inp.rewind(before.clone()), } $( match $X.go::(inp) { Ok(out) => return Ok(out), - Err(()) => inp.rewind(before), + Err(()) => inp.rewind(before.clone()), } )* @@ -935,22 +933,19 @@ where #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { if self.parsers.is_empty() { - let offs = inp.offset(); - let err_span = inp.span_since(offs); - inp.add_alt(offs.offset, None, None, err_span); + let offs = inp.cursor(); + let err_span = inp.span_since(&offs); + inp.add_alt(None, None, err_span); Err(()) } else { let before = inp.save(); - match self.parsers.iter().find_map(|parser| { - inp.rewind(before); - match parser.go::(inp) { - Ok(out) => Some(out), - Err(()) => None, + for parser in self.parsers.iter() { + inp.rewind(before.clone()); + if let Ok(out) = parser.go::(inp) { + return Ok(out); } - }) { - Some(out) => Ok(out), - None => Err(()), } + Err(()) } } diff --git a/src/recovery.rs b/src/recovery.rs index 0c214646..8f60bf67 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -47,7 +47,7 @@ where return Err(()); } }; - inp.emit(inp.offset, alt.err); + inp.emit(alt.err); Ok(out) } } @@ -67,11 +67,11 @@ where S: Strategy<'a, I, O, E>, { fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save(); + let before = inp.save().clone(); match self.parser.go::(inp) { Ok(out) => Ok(out), Err(()) => { - inp.rewind(before); + inp.rewind(before.clone()); match self.strategy.recover::(inp, &self.parser) { Ok(out) => Ok(out), Err(()) => { @@ -130,7 +130,7 @@ where .secondary_errors_since(before.err_count) .is_empty() }) { - inp.emit(inp.offset, alt.err); + inp.emit(alt.err); break Ok(out); } else { inp.errors.alt.take(); @@ -172,7 +172,7 @@ where loop { let before = inp.save(); if let Ok(()) = self.until.go::(inp) { - inp.emit(inp.offset, alt.err); + inp.emit(alt.err); break Ok(M::bind(|| (self.fallback)())); } inp.rewind(before); diff --git a/src/regex.rs b/src/regex.rs index b72bf20b..d5378880 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -35,24 +35,27 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); + let before = inp.cursor(); let re_in = ReInput::new(inp.full_slice()) .anchored(Anchored::Yes) - .range(before.offset..); + .range(before.inner..); let res = self.regex.find(re_in).map(|m| m.len()); match res { Some(len) => { - let before = inp.offset(); - inp.skip_bytes(len); - let after = inp.offset(); - Ok(M::bind(|| inp.slice_inner(before.offset..after.offset))) + let before = inp.cursor(); + // SAFETY: `len` *must* be no greater than the byte length of the remaining string + unsafe { + inp.skip_bytes(len); + } + let after = inp.cursor(); + Ok(M::bind(|| inp.slice(&before..&after))) } None => { // TODO: Improve error - inp.add_alt(inp.offset().offset, None, None, inp.span_since(before)); + inp.add_alt(None, None, inp.span_since(&before)); Err(()) } } diff --git a/src/span.rs b/src/span.rs index 3b73cf2e..c36af703 100644 --- a/src/span.rs +++ b/src/span.rs @@ -30,7 +30,7 @@ pub trait Span { /// means that it's perfectly fine for tokens to have non-continuous spans that bear no relation to their actual /// location in the input stream. This is useful for languages with an AST-level macro system that need to /// correctly point to symbols in the macro input when producing errors. - type Offset; + type Offset: Clone; /// Create a new span given a context and an offset range. fn new(context: Self::Context, range: Range) -> Self; diff --git a/src/stream.rs b/src/stream.rs index 2a242456..88019e4e 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -62,30 +62,36 @@ impl<'a, I: Iterator + 'a> Input<'a> for Stream where I::Item: Clone, { - type Offset = usize; - type Token = I::Item; type Span = SimpleSpan; - #[inline(always)] - fn start(&self) -> Self::Offset { - 0 - } - + type Token = I::Item; type TokenMaybe = I::Item; + type Cursor = usize; + + type Cache = Self; + #[inline(always)] - unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { - self.next(offset) + fn begin(self) -> (Self::Cursor, Self::Cache) { + (0, self) + } + + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + *cursor } #[inline(always)] - unsafe fn span(&self, range: Range) -> Self::Span { - range.into() + unsafe fn next_maybe( + cache: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + Self::next(cache, cursor) } - #[inline] - fn prev(offs: Self::Offset) -> Self::Offset { - offs.saturating_sub(1) + #[inline(always)] + unsafe fn span(_cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + (*range.start..*range.end).into() } } @@ -94,12 +100,12 @@ where I::Item: Clone, { #[inline(always)] - unsafe fn span_from(&self, range: RangeFrom) -> Self::Span { + unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { let mut other = Cell::new((Vec::new(), None)); - self.tokens.swap(&other); + cache.tokens.swap(&other); let len = other.get_mut().1.as_ref().expect("no iterator?!").len(); - self.tokens.swap(&other); - (range.start..len).into() + cache.tokens.swap(&other); + (*range.start..len).into() } } @@ -108,23 +114,88 @@ where I::Item: Clone, { #[inline] - unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { + unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option { let mut other = Cell::new((Vec::new(), None)); - self.tokens.swap(&other); + cache.tokens.swap(&other); let (vec, iter) = other.get_mut(); // Pull new items into the vector if we need them - if vec.len() <= offset { + if vec.len() <= *cursor { vec.extend(iter.as_mut().expect("no iterator?!").take(500)); } - // Get the token at the given offset - let tok = vec.get(offset).cloned(); + // Get the token at the given cursor + let tok = vec.get(*cursor).cloned(); + + cache.tokens.swap(&other); + + *cursor += tok.is_some() as usize; - self.tokens.swap(&other); + tok + } +} + +/// An input type that uses an iterator to generate tokens. +/// +/// This input type supports backtracking by duplicating the iterator. It is recommended that your iterator is very +/// cheap to copy/clone. +pub struct IterInput { + iter: I, + eoi: S, +} + +impl IterInput { + /// Create a new [`IterInput`] with the given iterator, and end of input span. + pub fn new(iter: I, eoi: S) -> Self { + Self { iter, eoi } + } +} + +impl<'src, I, T: 'src, S> Input<'src> for IterInput +where + I: Iterator + Clone + 'src, + S: Span + 'src, +{ + type Cursor = (I, usize, Option); + type Span = S; + + type Token = T; + type TokenMaybe = T; + + type Cache = S; // eoi + + #[inline] + fn begin(self) -> (Self::Cursor, Self::Cache) { + ((self.iter, 0, None), self.eoi) + } + + #[inline] + fn cursor_location(cursor: &Self::Cursor) -> usize { + cursor.1 + } + + unsafe fn next_maybe( + _eoi: &Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option { + cursor.0.next().map(|(tok, span)| { + cursor.1 += 1; + cursor.2 = Some(span.end()); + tok + }) + } - (offset + tok.is_some() as usize, tok) + unsafe fn span(eoi: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + let start = range + .start + .0 + .clone() + .next() + .map(|(_, s)| s.start()) + .unwrap_or_else(|| eoi.start()); + let end = range.end.2.clone().unwrap_or_else(|| eoi.end()); + S::new(eoi.context(), start..end) } } From 61cfa74be199f7b767de4663334725032dff132d Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 11:07:14 +0100 Subject: [PATCH 2/9] Fixed tests --- src/lib.rs | 4 ++-- src/primitive.rs | 19 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f5952fdb..06cc5366 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3502,11 +3502,11 @@ mod tests { let mut err = as crate::Error<&str>>::expected_found( Some(Some('h'.into())), - Some('g'.into()), + Some('b'.into()), (0..1).into(), ); as LabelError<&str, _>>::label_with(&mut err, "greeting"); - assert_eq!(parser().parse("goodbye").into_errors(), vec![err]); + assert_eq!(parser().parse("bye").into_errors(), vec![err]); let mut err = as crate::Error<&str>>::expected_found( Some(Some('l'.into())), diff --git a/src/primitive.rs b/src/primitive.rs index d9359ab8..c1e5fbd6 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -182,25 +182,24 @@ where cfg: Self::Config, ) -> PResult { let seq = cfg.seq.as_ref().unwrap_or(&self.seq); - - if let Some(()) = seq.seq_iter().find_map(|next| { - let before = inp.cursor().clone(); + for next in seq.seq_iter() { + let before = inp.save(); match inp.next_maybe_inner() { - Some(tok) if next.borrow() == tok.borrow() => None, + Some(tok) if next.borrow() == tok.borrow() => {} found => { + let span = inp.span_since(before.cursor()); + inp.rewind(before); inp.add_alt( Some(Some(T::to_maybe_ref(next))), found.map(|f| f.into()), - inp.span_since(&before), + span, ); - Some(()) + return Err(()); } } - }) { - Err(()) - } else { - Ok(M::bind(|| seq.clone())) } + + Ok(M::bind(|| seq.clone())) } go_cfg_extra!(T); From a2ee755cfb740b36afd5da4bb919b780cf51c3d1 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 11:13:30 +0100 Subject: [PATCH 3/9] Added safety section to docs --- src/combinator.rs | 3 --- src/input.rs | 30 ++++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index 0b7b4ab1..e9aa81ca 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -175,7 +175,6 @@ where inp.ctx(), inp.span_since(&inp.cursor()), ) - // TODO: Don't clone .map_err(|e| inp.add_alt_err(&inp.cursor().inner, e))?; Ok((A::make_iter(&self.parser, inp)?, cfg)) @@ -596,7 +595,6 @@ where match (self.mapper)(out, &mut MapExtra::new(&before, inp)) { Ok(out) => Ok(M::bind(|| out)), Err(err) => { - // TODO: Don't clone inp.add_alt_err(&inp.cursor().inner, err); Err(()) } @@ -994,7 +992,6 @@ where let new_alt = inp.errors.alt.take(); inp.errors.alt = alt; if let Some(new_alt) = new_alt { - // TODO: Don't clone inp.add_alt_err(&inp.cursor().inner, new_alt.err); } diff --git a/src/input.rs b/src/input.rs index 8b017ae6..b176375e 100644 --- a/src/input.rs +++ b/src/input.rs @@ -29,7 +29,7 @@ pub trait MaybeOwned<'src, T: 'src>: ) -> Self::Proj; } -impl<'src, T> sealed::Sealed for &'src T {} +impl sealed::Sealed for &T {} impl<'src, T> MaybeOwned<'src, T> for &'src T { type Proj = &'src U; fn choose( @@ -177,6 +177,10 @@ pub trait Input<'src>: 'src { /// Implement by inputs that have a known size (including spans) pub trait ExactSizeInput<'src>: Input<'src> { /// Get a span from a start cursor to the end of the input. + /// + /// # Safety + /// + /// As with functions on [`Input`], the cursors provided must be generated by this input. unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span; } @@ -186,14 +190,24 @@ pub trait SliceInput<'src>: ExactSizeInput<'src> { type Slice; /// Get the full slice of the input + /// + /// # Safety + /// + /// As with functions on [`Input`], the cursors provided must be generated by this input. fn full_slice(cache: &Self::Cache) -> Self::Slice; /// Get a slice from a start and end cursor - // TODO: Make unsafe + /// + /// # Safety + /// + /// As with functions on [`Input`], the cursors provided must be generated by this input. unsafe fn slice(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice; /// Get a slice from a start cursor to the end of the input - // TODO: Make unsafe + /// + /// # Safety + /// + /// As with functions on [`Input`], the cursors provided must be generated by this input. unsafe fn slice_from(cache: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice; } @@ -211,7 +225,7 @@ pub trait ValueInput<'src>: Input<'src> { /// /// # Safety /// - /// `cursor` must be generated by either `Input::start` or a previous call to this function. + /// As with functions on [`Input`], the cursors provided must be generated by this input. unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option; } @@ -221,7 +235,7 @@ pub trait BorrowInput<'src>: Input<'src> { /// /// # Safety /// - /// Same as [`ValueInput::next`] + /// As with functions on [`Input`], the cursors provided must be generated by this input. unsafe fn next_ref(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option<&'src Self::Token>; } @@ -285,7 +299,7 @@ impl<'src> ValueInput<'src> for &'src str { } } -impl<'src> Sealed for &'src str {} +impl Sealed for &str {} impl<'src> StrInput<'src, char> for &'src str {} impl<'src> SliceInput<'src> for &'src str { @@ -352,7 +366,7 @@ impl<'src, T> ExactSizeInput<'src> for &'src [T] { } } -impl<'src> Sealed for &'src [u8] {} +impl Sealed for &[u8] {} impl<'src> StrInput<'src, u8> for &'src [u8] {} impl<'src, T> SliceInput<'src> for &'src [T] { @@ -433,7 +447,7 @@ impl<'src, T: 'src, const N: usize> ExactSizeInput<'src> for &'src [T; N] { } } -impl<'src, const N: usize> Sealed for &'src [u8; N] {} +impl Sealed for &[u8; N] {} impl<'src, const N: usize> StrInput<'src, u8> for &'src [u8; N] {} impl<'src, T: 'src, const N: usize> SliceInput<'src> for &'src [T; N] { From da4f6fc7822326a3d6acec2cfa0a8e918e419787 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 11:29:24 +0100 Subject: [PATCH 4/9] Allow input caches to be mutable --- src/combinator.rs | 12 +-- src/input.rs | 233 +++++++++++++++++++++++++--------------------- src/number.rs | 3 +- src/primitive.rs | 3 +- src/regex.rs | 3 +- src/stream.rs | 54 +++++------ 6 files changed, 160 insertions(+), 148 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index e9aa81ca..c1aaf8e4 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -170,12 +170,9 @@ where &self, inp: &mut InputRef<'a, '_, I, E>, ) -> PResult> { - let cfg = (self.cfg)( - A::Config::default(), - inp.ctx(), - inp.span_since(&inp.cursor()), - ) - .map_err(|e| inp.add_alt_err(&inp.cursor().inner, e))?; + let span = inp.span_since(&inp.cursor()); + let cfg = (self.cfg)(A::Config::default(), inp.ctx(), span) + .map_err(|e| inp.add_alt_err(&inp.cursor().inner, e))?; Ok((A::make_iter(&self.parser, inp)?, cfg)) } @@ -1999,7 +1996,8 @@ where M::combine_mut(&mut output, out, |c, out| C::write(c, idx, out)); } Ok(None) => { - inp.add_alt(None, None, inp.span_since(&before)); + let span = inp.span_since(&before); + inp.add_alt(None, None, span); // SAFETY: We're guaranteed to have initialized up to `idx` values M::map(output, |mut output| unsafe { C::drop_before(&mut output, idx) diff --git a/src/input.rs b/src/input.rs index b176375e..c999827e 100644 --- a/src/input.rs +++ b/src/input.rs @@ -99,7 +99,7 @@ pub trait Input<'src>: 'src { /// /// `cursor` must be generated by `Input::begin`, and must not be shared between multiple inputs. unsafe fn next_maybe( - cache: &Self::Cache, + cache: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option; @@ -109,7 +109,7 @@ pub trait Input<'src>: 'src { /// /// As with [`Input::next_maybe`], the cursors passed to this function must be generated by [`Input::begin`] and /// must not be shared between multiple inputs. - unsafe fn span(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span; + unsafe fn span(cache: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span; /// Split an input that produces tokens of type `(T, S)` into one that produces tokens of type `T` and spans of /// type `S`. @@ -181,7 +181,7 @@ pub trait ExactSizeInput<'src>: Input<'src> { /// # Safety /// /// As with functions on [`Input`], the cursors provided must be generated by this input. - unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span; + unsafe fn span_from(cache: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span; } /// Implemented by inputs that represent slice-like streams of input tokens. @@ -194,21 +194,21 @@ pub trait SliceInput<'src>: ExactSizeInput<'src> { /// # Safety /// /// As with functions on [`Input`], the cursors provided must be generated by this input. - fn full_slice(cache: &Self::Cache) -> Self::Slice; + fn full_slice(cache: &mut Self::Cache) -> Self::Slice; /// Get a slice from a start and end cursor /// /// # Safety /// /// As with functions on [`Input`], the cursors provided must be generated by this input. - unsafe fn slice(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice; + unsafe fn slice(cache: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice; /// Get a slice from a start cursor to the end of the input /// /// # Safety /// /// As with functions on [`Input`], the cursors provided must be generated by this input. - unsafe fn slice_from(cache: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice; + unsafe fn slice_from(cache: &mut Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice; } // Implemented by inputs that reference a string slice and use byte indices as their cursor. This trait is sealed right @@ -226,7 +226,7 @@ pub trait ValueInput<'src>: Input<'src> { /// # Safety /// /// As with functions on [`Input`], the cursors provided must be generated by this input. - unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option; + unsafe fn next(cache: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option; } /// Implemented by inputs that can have tokens borrowed from them. @@ -236,8 +236,10 @@ pub trait BorrowInput<'src>: Input<'src> { /// # Safety /// /// As with functions on [`Input`], the cursors provided must be generated by this input. - unsafe fn next_ref(cache: &Self::Cache, cursor: &mut Self::Cursor) - -> Option<&'src Self::Token>; + unsafe fn next_ref( + cache: &mut Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option<&'src Self::Token>; } impl<'src> Input<'src> for &'src str { @@ -261,7 +263,7 @@ impl<'src> Input<'src> for &'src str { #[inline(always)] unsafe fn next_maybe( - this: &Self::Cache, + this: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { if *cursor < this.len() { @@ -280,21 +282,21 @@ impl<'src> Input<'src> for &'src str { } #[inline(always)] - unsafe fn span(_this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { (*range.start..*range.end).into() } } impl<'src> ExactSizeInput<'src> for &'src str { #[inline(always)] - unsafe fn span_from(this: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { (*range.start..this.len()).into() } } impl<'src> ValueInput<'src> for &'src str { #[inline(always)] - unsafe fn next(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { Self::next_maybe(this, cursor) } } @@ -306,17 +308,17 @@ impl<'src> SliceInput<'src> for &'src str { type Slice = &'src str; #[inline(always)] - fn full_slice(this: &Self::Cache) -> Self::Slice { + fn full_slice(this: &mut Self::Cache) -> Self::Slice { *this } #[inline(always)] - unsafe fn slice(this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + unsafe fn slice(this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { &this[*range.start..*range.end] } #[inline(always)] - unsafe fn slice_from(this: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + unsafe fn slice_from(this: &mut Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { &this[*from.start..] } } @@ -342,7 +344,7 @@ impl<'src, T> Input<'src> for &'src [T] { #[inline(always)] unsafe fn next_maybe( - this: &Self::Cache, + this: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { if let Some(tok) = this.get(*cursor) { @@ -354,14 +356,14 @@ impl<'src, T> Input<'src> for &'src [T] { } #[inline(always)] - unsafe fn span(_this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { (*range.start..*range.end).into() } } impl<'src, T> ExactSizeInput<'src> for &'src [T] { #[inline(always)] - unsafe fn span_from(this: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { (*range.start..this.len()).into() } } @@ -373,31 +375,34 @@ impl<'src, T> SliceInput<'src> for &'src [T] { type Slice = &'src [T]; #[inline(always)] - fn full_slice(this: &Self::Cache) -> Self::Slice { + fn full_slice(this: &mut Self::Cache) -> Self::Slice { *this } #[inline(always)] - unsafe fn slice(this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + unsafe fn slice(this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { &this[*range.start..*range.end] } #[inline(always)] - unsafe fn slice_from(this: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + unsafe fn slice_from(this: &mut Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { &this[*from.start..] } } impl<'src, T: Clone> ValueInput<'src> for &'src [T] { #[inline(always)] - unsafe fn next(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { Self::next_maybe(this, cursor).cloned() } } impl<'src, T> BorrowInput<'src> for &'src [T] { #[inline(always)] - unsafe fn next_ref(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option<&'src Self::Token> { + unsafe fn next_ref( + this: &mut Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option<&'src Self::Token> { Self::next_maybe(this, cursor) } } @@ -423,7 +428,7 @@ impl<'src, T: 'src, const N: usize> Input<'src> for &'src [T; N] { #[inline(always)] unsafe fn next_maybe( - this: &Self::Cache, + this: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { if let Some(tok) = this.get(*cursor) { @@ -435,14 +440,14 @@ impl<'src, T: 'src, const N: usize> Input<'src> for &'src [T; N] { } #[inline(always)] - unsafe fn span(_this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { (*range.start..*range.end).into() } } impl<'src, T: 'src, const N: usize> ExactSizeInput<'src> for &'src [T; N] { #[inline(always)] - unsafe fn span_from(this: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { (*range.start..this.len()).into() } } @@ -454,31 +459,34 @@ impl<'src, T: 'src, const N: usize> SliceInput<'src> for &'src [T; N] { type Slice = &'src [T]; #[inline(always)] - fn full_slice(this: &Self::Cache) -> Self::Slice { + fn full_slice(this: &mut Self::Cache) -> Self::Slice { *this } #[inline(always)] - unsafe fn slice(this: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + unsafe fn slice(this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { &this[*range.start..*range.end] } #[inline(always)] - unsafe fn slice_from(this: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + unsafe fn slice_from(this: &mut Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { &this[*from.start..] } } impl<'src, T: Clone + 'src, const N: usize> ValueInput<'src> for &'src [T; N] { #[inline(always)] - unsafe fn next(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option { + unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { Self::next_maybe(this, cursor).cloned() } } impl<'src, T: 'src, const N: usize> BorrowInput<'src> for &'src [T; N] { #[inline(always)] - unsafe fn next_ref(this: &Self::Cache, cursor: &mut Self::Cursor) -> Option<&'src Self::Token> { + unsafe fn next_ref( + this: &mut Self::Cache, + cursor: &mut Self::Cursor, + ) -> Option<&'src Self::Token> { Self::next_maybe(this, cursor) } } @@ -518,7 +526,7 @@ where #[inline(always)] unsafe fn next_maybe( - (cache, _): &Self::Cache, + (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { I::next_maybe(cache, &mut cursor.0).map(|tok| { @@ -528,7 +536,7 @@ where } #[inline] - unsafe fn span((cache, eoi): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span((cache, eoi): &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { let start = I::next_maybe(cache, &mut range.start.0.clone()) .map(|tok| tok.borrow().1.start()) // .or_else(|| self.input.next_maybe(self.input.start()).1) @@ -545,7 +553,10 @@ where S: Span + Clone + 'src, { #[inline(always)] - unsafe fn span_from((cache, eoi): &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + unsafe fn span_from( + (cache, eoi): &mut Self::Cache, + range: RangeFrom<&Self::Cursor>, + ) -> Self::Span { let start = I::next_maybe(cache, &mut range.start.0.clone()) .map(|tok| tok.borrow().1.start()) // .or_else(|| self.input.next_maybe(self.input.start()).1) @@ -561,7 +572,7 @@ where S: Span + Clone + 'src, { #[inline(always)] - unsafe fn next((cache, _): &Self::Cache, cursor: &mut Self::Cursor) -> Option { + unsafe fn next((cache, _): &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { I::next(cache, &mut cursor.0).map(|tok| { cursor.1 = Some(tok.1.end()); tok.0 @@ -577,7 +588,7 @@ where { #[inline(always)] unsafe fn next_ref( - (cache, _): &Self::Cache, + (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option<&'src Self::Token> { I::next_ref(cache, &mut cursor.0).map(|tok| { @@ -596,17 +607,20 @@ where type Slice = I::Slice; #[inline(always)] - fn full_slice((cache, _): &Self::Cache) -> Self::Slice { + fn full_slice((cache, _): &mut Self::Cache) -> Self::Slice { I::full_slice(cache) } #[inline(always)] - unsafe fn slice((cache, _): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + unsafe fn slice((cache, _): &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { I::slice(cache, &range.start.0..&range.end.0) } #[inline(always)] - unsafe fn slice_from((cache, _): &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + unsafe fn slice_from( + (cache, _): &mut Self::Cache, + from: RangeFrom<&Self::Cursor>, + ) -> Self::Slice { I::slice_from(cache, &from.start.0..) } } @@ -647,14 +661,14 @@ where #[inline(always)] unsafe fn next_maybe( - (cache, _): &Self::Cache, + (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { I::next_maybe(cache, cursor) } #[inline] - unsafe fn span((cache, ctx): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span((cache, ctx): &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { let inner_span = I::span(cache, range); S::new( ctx.clone(), @@ -671,7 +685,10 @@ where S::Offset: From<::Offset>, { #[inline] - unsafe fn span_from((cache, ctx): &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + unsafe fn span_from( + (cache, ctx): &mut Self::Cache, + range: RangeFrom<&Self::Cursor>, + ) -> Self::Span { let inner_span = I::span_from(cache, range); S::new( ctx.clone(), @@ -687,7 +704,7 @@ where S::Offset: From<::Offset>, { #[inline(always)] - unsafe fn next((cache, _): &Self::Cache, cursor: &mut Self::Cursor) -> Option { + unsafe fn next((cache, _): &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { I::next(cache, cursor) } } @@ -700,7 +717,7 @@ where { #[inline(always)] unsafe fn next_ref( - (cache, _): &Self::Cache, + (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option<&'src Self::Token> { I::next_ref(cache, cursor) @@ -716,17 +733,20 @@ where type Slice = I::Slice; #[inline(always)] - fn full_slice((cache, _): &Self::Cache) -> Self::Slice { + fn full_slice((cache, _): &mut Self::Cache) -> Self::Slice { I::full_slice(cache) } #[inline(always)] - unsafe fn slice((cache, _): &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + unsafe fn slice((cache, _): &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { I::slice(cache, range) } #[inline(always)] - unsafe fn slice_from((cache, _): &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { + unsafe fn slice_from( + (cache, _): &mut Self::Cache, + from: RangeFrom<&Self::Cursor>, + ) -> Self::Slice { I::slice_from(cache, from) } } @@ -786,16 +806,16 @@ where #[inline(always)] unsafe fn next_maybe( - cache: &Self::Cache, + (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { - I::next_maybe(&cache.0, cursor) + I::next_maybe(cache, cursor) } #[inline] - unsafe fn span(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { - let inner_span = I::span(&cache.0, range); - (cache.1)(inner_span) + unsafe fn span((cache, mapper): &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + let inner_span = I::span(cache, range); + (mapper)(inner_span) } } @@ -808,9 +828,12 @@ where F: Fn(I::Span) -> S, { #[inline(always)] - unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { - let inner_span = I::span_from(&cache.0, range); - (cache.1)(inner_span) + unsafe fn span_from( + (cache, mapper): &mut Self::Cache, + range: RangeFrom<&Self::Cursor>, + ) -> Self::Span { + let inner_span = I::span_from(cache, range); + (mapper)(inner_span) } } @@ -822,8 +845,8 @@ where F: Fn(I::Span) -> S, { #[inline(always)] - unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option { - I::next(&cache.0, cursor) + unsafe fn next((cache, _): &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { + I::next(cache, cursor) } } @@ -836,10 +859,10 @@ where { #[inline(always)] unsafe fn next_ref( - cache: &Self::Cache, + (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option<&'src Self::Token> { - I::next_ref(&cache.0, cursor) + I::next_ref(cache, cursor) } } @@ -853,18 +876,21 @@ where type Slice = I::Slice; #[inline(always)] - fn full_slice(cache: &Self::Cache) -> Self::Slice { - I::full_slice(&cache.0) + fn full_slice((cache, _): &mut Self::Cache) -> Self::Slice { + I::full_slice(cache) } #[inline(always)] - unsafe fn slice(cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { - I::slice(&cache.0, range) + unsafe fn slice((cache, _): &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice { + I::slice(cache, range) } #[inline(always)] - unsafe fn slice_from(cache: &Self::Cache, from: RangeFrom<&Self::Cursor>) -> Self::Slice { - I::slice_from(&cache.0, from) + unsafe fn slice_from( + (cache, _): &mut Self::Cache, + from: RangeFrom<&Self::Cursor>, + ) -> Self::Slice { + I::slice_from(cache, from) } } @@ -888,27 +914,24 @@ where { } -#[cfg(feature = "std")] -struct IoInner { - reader: BufReader, - last_cursor: usize, -} - /// Input type which supports seekable readers. Uses a [`BufReader`] internally to buffer input and /// avoid unnecessary IO calls. /// /// Only available with the `std` feature #[cfg(feature = "std")] -pub struct IoInput(RefCell>); +pub struct IoInput { + reader: BufReader, + last_cursor: usize, +} #[cfg(feature = "std")] impl IoInput { /// Create a new `IoReader` from a seekable reader. pub fn new(reader: R) -> IoInput { - IoInput(RefCell::new(IoInner { + IoInput { reader: BufReader::new(reader), last_cursor: 0, - })) + } } } @@ -933,38 +956,36 @@ impl<'src, R: Read + Seek + 'src> Input<'src> for IoInput { #[inline(always)] unsafe fn next_maybe( - cache: &Self::Cache, + this: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { - Self::next(cache, cursor) + Self::next(this, cursor) } #[inline] - unsafe fn span(_cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { (*range.start..*range.end).into() } } #[cfg(feature = "std")] impl<'src, R: Read + Seek + 'src> ValueInput<'src> for IoInput { - unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option { - let mut inner = cache.0.borrow_mut(); - - if *cursor != inner.last_cursor { - let seek = *cursor as i64 - inner.last_cursor as i64; + unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { + if *cursor != this.last_cursor { + let seek = *cursor as i64 - this.last_cursor as i64; - inner.reader.seek_relative(seek).unwrap(); + this.reader.seek_relative(seek).unwrap(); - inner.last_cursor = *cursor; + this.last_cursor = *cursor; } let mut out = 0; - let r = inner.reader.read_exact(std::slice::from_mut(&mut out)); + let r = this.reader.read_exact(std::slice::from_mut(&mut out)); match r { Ok(()) => { - inner.last_cursor += 1; + this.last_cursor += 1; *cursor += 1; Some(out) } @@ -1151,7 +1172,7 @@ where /// Internal type representing an input as well as all the necessary context for parsing. pub struct InputRef<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> { cursor: I::Cursor, - pub(crate) cache: &'parse I::Cache, + pub(crate) cache: &'parse mut I::Cache, pub(crate) errors: &'parse mut Errors, pub(crate) state: &'parse mut E::State, pub(crate) ctx: &'parse E::Context, @@ -1212,7 +1233,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars pub(crate) fn with_input<'sub_parse, O>( &'sub_parse mut self, start: I::Cursor, - cache: &'sub_parse I::Cache, + cache: &'sub_parse mut I::Cache, f: impl FnOnce(&mut InputRef<'src, 'sub_parse, I, E>) -> O, #[cfg(feature = "memoization")] memos: &'sub_parse mut HashMap< (usize, usize), @@ -1295,7 +1316,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars loop { let mut cursor = self.cursor.clone(); // SAFETY: cursor was generated by previous call to `Input::next` - let token = unsafe { I::next_maybe(&mut self.cache, &mut cursor) }; + let token = unsafe { I::next_maybe(self.cache, &mut cursor) }; if token.as_ref().filter(|tok| f((*tok).borrow())).is_none() { break; } else { @@ -1313,7 +1334,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars I: ValueInput<'src>, { // SAFETY: cursor was generated by previous call to `Input::next` - let token = unsafe { I::next(&mut self.cache, &mut self.cursor) }; + let token = unsafe { I::next(self.cache, &mut self.cursor) }; if let Some(t) = &token { self.state.on_token(t); } @@ -1323,7 +1344,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars #[inline(always)] pub(crate) fn next_maybe_inner(&mut self) -> Option { // SAFETY: cursor was generated by previous call to `Input::next` - let token = unsafe { I::next_maybe(&mut self.cache, &mut self.cursor) }; + let token = unsafe { I::next_maybe(self.cache, &mut self.cursor) }; if let Some(t) = &token { self.state.on_token(t.borrow()); } @@ -1336,7 +1357,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars I: BorrowInput<'src>, { // SAFETY: cursor was generated by previous call to `Input::next` - let token = unsafe { I::next_ref(&mut self.cache, &mut self.cursor) }; + let token = unsafe { I::next_ref(self.cache, &mut self.cursor) }; if let Some(t) = &token { self.state.on_token(t); } @@ -1417,14 +1438,14 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// /// See [`InputRef::next_maybe`] for more information about what this function guarantees. #[inline(always)] - pub fn peek_maybe(&self) -> Option> { + pub fn peek_maybe(&mut self) -> Option> { // SAFETY: cursor was generated by previous call to `Input::next` unsafe { I::next_maybe(self.cache, &mut self.cursor.clone()).map(Into::into) } } /// Peek the next token in the input. Returns `None` if the end of the input has been reached. #[inline(always)] - pub fn peek(&self) -> Option + pub fn peek(&mut self) -> Option where I: ValueInput<'src>, { @@ -1434,7 +1455,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Peek the next token in the input. Returns `None` if the end of the input has been reached. #[inline(always)] - pub fn peek_ref(&self) -> Option<&'src I::Token> + pub fn peek_ref(&mut self) -> Option<&'src I::Token> where I: BorrowInput<'src>, { @@ -1453,7 +1474,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars #[cfg_attr(not(feature = "regex"), allow(dead_code))] #[inline] - pub(crate) fn full_slice(&self) -> I::Slice + pub(crate) fn full_slice(&mut self) -> I::Slice where I: SliceInput<'src>, { @@ -1462,7 +1483,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Get a slice of the input that covers the given cursor range. #[inline] - pub fn slice(&self, range: Range<&Cursor<'src, 'parse, I>>) -> I::Slice + pub fn slice(&mut self, range: Range<&Cursor<'src, 'parse, I>>) -> I::Slice where I: SliceInput<'src>, { @@ -1472,7 +1493,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Get a slice of the input that covers the given cursor range. #[inline] - pub fn slice_from(&self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Slice + pub fn slice_from(&mut self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Slice where I: SliceInput<'src>, { @@ -1482,7 +1503,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Get a slice of the input that covers the given cursor range. #[inline] - pub fn slice_since(&self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Slice + pub fn slice_since(&mut self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Slice where I: SliceInput<'src>, { @@ -1492,7 +1513,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars #[cfg_attr(not(feature = "lexical-numbers"), allow(dead_code))] #[inline(always)] - pub(crate) fn slice_trailing_inner(&self) -> I::Slice + pub(crate) fn slice_trailing_inner(&mut self) -> I::Slice where I: SliceInput<'src>, { @@ -1511,7 +1532,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Get a span over the input that goes from the given cursor to the end of the input. // TODO: Unofy with `InputRef::span` #[inline(always)] - pub fn span_from(&self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Span + pub fn span_from(&mut self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Span where I: ExactSizeInput<'src>, { @@ -1522,7 +1543,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Generate a span that extends from the provided [`Cursor`] to the current input position. #[inline(always)] - pub fn span_since(&self, before: &Cursor<'src, 'parse, I>) -> I::Span { + pub fn span_since(&mut self, before: &Cursor<'src, 'parse, I>) -> I::Span { // SAFETY: `Cursor` is invariant over 'parse, so we know that this cursor came from the same input // See `https://plv.mpi-sws.org/rustbelt/ghostcell/` unsafe { I::span(self.cache, &before.inner..&self.cursor) } @@ -1632,7 +1653,7 @@ impl Emitter { pub struct MapExtra<'src, 'b, I: Input<'src>, E: ParserExtra<'src, I>> { before: &'b I::Cursor, after: &'b I::Cursor, - cache: &'b I::Cache, + cache: &'b mut I::Cache, state: &'b mut E::State, ctx: &'b E::Context, } @@ -1654,7 +1675,7 @@ impl<'src, 'b, I: Input<'src>, E: ParserExtra<'src, I>> MapExtra<'src, 'b, I, E> /// Get the span corresponding to the output. #[inline(always)] - pub fn span(&self) -> I::Span { + pub fn span(&mut self) -> I::Span { // SAFETY: The cursors both came from the same input // TODO: Should this make `MapExtra::new` unsafe? Probably, but it's an internal API and we simply wouldn't // ever abuse it in this way, even accidentally. @@ -1663,7 +1684,7 @@ impl<'src, 'b, I: Input<'src>, E: ParserExtra<'src, I>> MapExtra<'src, 'b, I, E> /// Get the slice corresponding to the output. #[inline(always)] - pub fn slice(&self) -> I::Slice + pub fn slice(&mut self) -> I::Slice where I: SliceInput<'src>, { diff --git a/src/number.rs b/src/number.rs index d219da72..5672dd0b 100644 --- a/src/number.rs +++ b/src/number.rs @@ -48,7 +48,8 @@ where } Err(_err) => { // TODO: Improve error - inp.add_alt(None, None, inp.span_since(&before)); + let span = inp.span_since(&before); + inp.add_alt(None, None, span); Err(()) } } diff --git a/src/primitive.rs b/src/primitive.rs index c1e5fbd6..0fd4f924 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -45,7 +45,8 @@ where match inp.next_maybe_inner() { None => Ok(M::bind(|| ())), Some(tok) => { - inp.add_alt(Some(None), Some(tok.into()), inp.span_since(&before)); + let span = inp.span_since(&before); + inp.add_alt(Some(None), Some(tok.into()), span); Err(()) } } diff --git a/src/regex.rs b/src/regex.rs index d5378880..d4744064 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -55,7 +55,8 @@ where } None => { // TODO: Improve error - inp.add_alt(None, None, inp.span_since(&before)); + let span = inp.span_since(&before); + inp.add_alt(None, None, span); Err(()) } } diff --git a/src/stream.rs b/src/stream.rs index 88019e4e..c7568058 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -5,7 +5,8 @@ use super::*; /// Internally, the stream will pull tokens in batches so as to avoid invoking the iterator every time a new token is /// required. pub struct Stream { - tokens: Cell<(Vec, Option)>, + tokens: Vec, + iter: I, } impl Stream { @@ -23,7 +24,8 @@ impl Stream { /// ``` pub fn from_iter>(iter: J) -> Self { Self { - tokens: Cell::new((Vec::new(), Some(iter.into_iter()))), + tokens: Vec::new(), + iter: iter.into_iter(), } } @@ -33,9 +35,9 @@ impl Stream { where I: 'a, { - let (vec, iter) = self.tokens.into_inner(); Stream { - tokens: Cell::new((vec, Some(Box::new(iter.expect("no iterator?!"))))), + tokens: self.tokens, + iter: Box::new(self.iter), } } @@ -44,9 +46,9 @@ impl Stream { where I: ExactSizeIterator + 'a, { - let (vec, iter) = self.tokens.into_inner(); Stream { - tokens: Cell::new((vec, Some(Box::new(iter.expect("no iterator?!"))))), + tokens: self.tokens, + iter: Box::new(self.iter), } } } @@ -83,14 +85,14 @@ where #[inline(always)] unsafe fn next_maybe( - cache: &Self::Cache, + this: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { - Self::next(cache, cursor) + Self::next(this, cursor) } #[inline(always)] - unsafe fn span(_cache: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { (*range.start..*range.end).into() } } @@ -100,12 +102,8 @@ where I::Item: Clone, { #[inline(always)] - unsafe fn span_from(cache: &Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { - let mut other = Cell::new((Vec::new(), None)); - cache.tokens.swap(&other); - let len = other.get_mut().1.as_ref().expect("no iterator?!").len(); - cache.tokens.swap(&other); - (*range.start..len).into() + unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span { + (*range.start..this.tokens.len() + this.iter.len()).into() } } @@ -114,25 +112,17 @@ where I::Item: Clone, { #[inline] - unsafe fn next(cache: &Self::Cache, cursor: &mut Self::Cursor) -> Option { - let mut other = Cell::new((Vec::new(), None)); - cache.tokens.swap(&other); - - let (vec, iter) = other.get_mut(); - + unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option { // Pull new items into the vector if we need them - if vec.len() <= *cursor { - vec.extend(iter.as_mut().expect("no iterator?!").take(500)); + if this.tokens.len() <= *cursor { + this.tokens.extend((&mut this.iter).take(512)); } // Get the token at the given cursor - let tok = vec.get(*cursor).cloned(); - - cache.tokens.swap(&other); - - *cursor += tok.is_some() as usize; - - tok + this.tokens.get(*cursor).map(|tok| { + *cursor += 1; + tok.clone() + }) } } @@ -176,7 +166,7 @@ where } unsafe fn next_maybe( - _eoi: &Self::Cache, + _eoi: &mut Self::Cache, cursor: &mut Self::Cursor, ) -> Option { cursor.0.next().map(|(tok, span)| { @@ -186,7 +176,7 @@ where }) } - unsafe fn span(eoi: &Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { + unsafe fn span(eoi: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span { let start = range .start .0 From 3195eaffcf1dba4248a95f1b61aac87312e78d8b Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 11:36:38 +0100 Subject: [PATCH 5/9] Fixed doc link --- src/input.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/input.rs b/src/input.rs index c999827e..7f0ac191 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1257,7 +1257,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// Get the internal cursor of the input at this moment in time. /// - /// Can be used for generating spans or slices. See [`InputRef::span`] and [`InputRef::slice`]. + /// Can be used for generating spans or slices. See [`InputRef::span_from`] and [`InputRef::slice`]. #[inline(always)] pub fn cursor(&self) -> Cursor<'src, 'parse, I> { Cursor { @@ -1530,7 +1530,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars // } /// Get a span over the input that goes from the given cursor to the end of the input. - // TODO: Unofy with `InputRef::span` + // TODO: Unify with `InputRef::span` #[inline(always)] pub fn span_from(&mut self, range: RangeFrom<&Cursor<'src, 'parse, I>>) -> I::Span where From 8e7ae47876432ebd1da33f3169afbdda58c135f2 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 12:03:09 +0100 Subject: [PATCH 6/9] fmt --- src/input.rs | 9 +++++++-- src/inspector.rs | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/input.rs b/src/input.rs index 7f0ac191..383c784c 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1271,7 +1271,9 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// /// You can rewind back to this state later with [`InputRef::rewind`]. #[inline(always)] - pub fn save(&self) -> Checkpoint<'src, 'parse, I, >::Checkpoint> { + pub fn save( + &self, + ) -> Checkpoint<'src, 'parse, I, >::Checkpoint> { let cursor = self.cursor(); let inspector = self.state.on_save(&cursor); Checkpoint { @@ -1286,7 +1288,10 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars /// /// You can create a checkpoint with which to perform rewinding using [`InputRef::save`]. #[inline(always)] - pub fn rewind(&mut self, checkpoint: Checkpoint<'src, 'parse, I, >::Checkpoint>) { + pub fn rewind( + &mut self, + checkpoint: Checkpoint<'src, 'parse, I, >::Checkpoint>, + ) { self.errors.secondary.truncate(checkpoint.err_count); self.state.on_rewind(&checkpoint); self.cursor = checkpoint.cursor.inner; diff --git a/src/inspector.rs b/src/inspector.rs index 871c4fed..a84f33ba 100644 --- a/src/inspector.rs +++ b/src/inspector.rs @@ -4,7 +4,10 @@ //! and rubber bungs in his ears. He knew exactly what was happening and had known //! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night //! beside his pillar and woken him with a start."* -use crate::{input::{Checkpoint, Cursor}, Input}; +use crate::{ + input::{Checkpoint, Cursor}, + Input, +}; use core::ops::{Deref, DerefMut}; #[allow(unused)] // for intra-doc links From ba52de963499cf554fd7812aeb97db5f4ff4f1fb Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 12:05:49 +0100 Subject: [PATCH 7/9] Removed redundant lifetime parameter --- src/input.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/input.rs b/src/input.rs index 383c784c..8fdd7908 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1037,7 +1037,7 @@ pub struct Cursor<'src, 'parse, I: Input<'src>> { phantom: PhantomData &'parse ()>, // Invariance } -impl<'src, 'parse, I: Input<'src>> Cursor<'src, 'parse, I> { +impl<'src, I: Input<'src>> Cursor<'src, '_, I> { /// Get the input's internal cursor. pub fn inner(&self) -> &I::Cursor { &self.inner From d532d92b36c316a34863bf2387aeb112a96a75c6 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 12:09:38 +0100 Subject: [PATCH 8/9] Removed unnecessary clones --- src/combinator.rs | 4 ++-- src/primitive.rs | 18 +++++++++--------- src/recovery.rs | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index c1aaf8e4..3f35a7de 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -2559,7 +2559,7 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save().clone(); + let before = inp.save(); match self.parser.go::(inp) { Ok(out) => { inp.rewind(before); @@ -2739,7 +2739,7 @@ where // where // Self: Sized, // { -// let before = inp.save().clone(); +// let before = inp.save(); // match self.parser.go::(inp) { // Ok(out) => Ok(out), // Err(()) => { diff --git a/src/primitive.rs b/src/primitive.rs index 0fd4f924..f92ec1ff 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -41,7 +41,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); match inp.next_maybe_inner() { None => Ok(M::bind(|| ())), Some(tok) => { @@ -261,7 +261,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); match inp.next_inner() { #[allow(suspicious_double_ref_op)] // Is this a clippy bug? Some(tok) if self.seq.contains(tok.borrow()) => Ok(M::bind(|| tok)), @@ -335,7 +335,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); match inp.next_inner() { // #[allow(suspicious_double_ref_op)] // Is this a clippy bug? Some(tok) if !self.seq.contains(tok.borrow()) => Ok(M::bind(|| tok)), @@ -401,7 +401,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); match (self.f)(inp) { Ok(out) => Ok(M::bind(|| out)), Err(err) => { @@ -454,7 +454,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); let next = inp.next_inner(); let err_span = inp.span_since(&before); let found = match next { @@ -511,7 +511,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); let next = inp.next_ref_inner(); let found = match next { Some(tok) => match (self.filter)(tok, &mut MapExtra::new(&before, inp)) { @@ -548,7 +548,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); match inp.next_inner() { Some(tok) => Ok(M::bind(|| tok)), found => { @@ -603,7 +603,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.cursor().clone(); + let before = inp.cursor(); match inp.next_ref_inner() { Some(tok) => Ok(M::bind(|| tok)), found => { @@ -883,7 +883,7 @@ macro_rules! impl_choice_for_tuple { { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save().clone(); + let before = inp.save(); let Choice { parsers: ($Head, $($X,)*), .. } = self; diff --git a/src/recovery.rs b/src/recovery.rs index 8f60bf67..fcf771b0 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -67,7 +67,7 @@ where S: Strategy<'a, I, O, E>, { fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.save().clone(); + let before = inp.save(); match self.parser.go::(inp) { Ok(out) => Ok(out), Err(()) => { From 9d3165493ec6256dfb7031b8ddf669089d9c5909 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Oct 2024 17:33:05 +0100 Subject: [PATCH 9/9] Renamed TokenMaybe to MaybeToken, tightened up some API corners --- src/input.rs | 76 ++++++++++++--------------------------------------- src/lib.rs | 2 +- src/stream.rs | 8 +++--- src/text.rs | 4 +-- src/util.rs | 51 ++++++++++++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 65 deletions(-) diff --git a/src/input.rs b/src/input.rs index 8fdd7908..52e92d75 100644 --- a/src/input.rs +++ b/src/input.rs @@ -13,46 +13,6 @@ use super::*; #[cfg(feature = "std")] use std::io::{BufReader, Read, Seek}; -mod sealed { - pub trait Sealed {} -} -#[doc(hidden)] -pub trait MaybeOwned<'src, T: 'src>: - sealed::Sealed + Borrow + Into> -{ - type Proj: MaybeOwned<'src, U>; - #[doc(hidden)] - fn choose( - self, - f: impl FnOnce(&'src T) -> &'src R, - g: impl FnOnce(T) -> R, - ) -> Self::Proj; -} - -impl sealed::Sealed for &T {} -impl<'src, T> MaybeOwned<'src, T> for &'src T { - type Proj = &'src U; - fn choose( - self, - f: impl FnOnce(&'src T) -> &'src R, - _g: impl FnOnce(T) -> R, - ) -> Self::Proj { - f(self) - } -} - -impl sealed::Sealed for T {} -impl<'src, T: 'src> MaybeOwned<'src, T> for T { - type Proj = U; - fn choose( - self, - _f: impl FnOnce(&'src T) -> &'src R, - g: impl FnOnce(T) -> R, - ) -> Self::Proj { - g(self) - } -} - /// A trait for types that represents a stream of input tokens. Unlike [`Iterator`], this type /// supports backtracking and a few other features required by the crate. /// @@ -73,7 +33,7 @@ pub trait Input<'src>: 'src { type Token: 'src; /// The token type returned by [`Input::next_maybe`], allows abstracting over by-value and by-reference inputs. - type TokenMaybe: MaybeOwned<'src, Self::Token>; // Must be `&'src Self::Token` or `Self::Token` + type MaybeToken: IntoMaybe<'src, Self::Token>; // Must be `&'src Self::Token` or `Self::Token` /// The type used to keep track of the current location in the stream. /// @@ -101,7 +61,7 @@ pub trait Input<'src>: 'src { unsafe fn next_maybe( cache: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option; + ) -> Option; /// Create a span going from the start cursor to the end cursor (exclusive). /// @@ -247,7 +207,7 @@ impl<'src> Input<'src> for &'src str { type Span = SimpleSpan; type Token = char; - type TokenMaybe = char; + type MaybeToken = char; type Cache = Self; @@ -265,7 +225,7 @@ impl<'src> Input<'src> for &'src str { unsafe fn next_maybe( this: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { if *cursor < this.len() { // SAFETY: `cursor < self.len()` above guarantees cursor is in-bounds // We only ever return cursors that are at a character boundary @@ -328,7 +288,7 @@ impl<'src, T> Input<'src> for &'src [T] { type Span = SimpleSpan; type Token = T; - type TokenMaybe = &'src T; + type MaybeToken = &'src T; type Cache = Self; @@ -346,7 +306,7 @@ impl<'src, T> Input<'src> for &'src [T] { unsafe fn next_maybe( this: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { if let Some(tok) = this.get(*cursor) { *cursor += 1; Some(tok) @@ -412,7 +372,7 @@ impl<'src, T: 'src, const N: usize> Input<'src> for &'src [T; N] { type Span = SimpleSpan; type Token = T; - type TokenMaybe = &'src T; + type MaybeToken = &'src T; type Cache = Self; @@ -430,7 +390,7 @@ impl<'src, T: 'src, const N: usize> Input<'src> for &'src [T; N] { unsafe fn next_maybe( this: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { if let Some(tok) = this.get(*cursor) { *cursor += 1; Some(tok) @@ -509,7 +469,7 @@ where type Span = S; type Token = T; - type TokenMaybe = >::Proj; + type MaybeToken = >::Proj; type Cache = (I::Cache, S); @@ -528,10 +488,10 @@ where unsafe fn next_maybe( (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { I::next_maybe(cache, &mut cursor.0).map(|tok| { cursor.1 = Some(tok.borrow().1.end()); - tok.choose(|(tok, _)| tok, |(tok, _)| tok) + tok.map_maybe(|(tok, _)| tok, |(tok, _)| tok) }) } @@ -644,7 +604,7 @@ where type Span = S; type Token = I::Token; - type TokenMaybe = I::TokenMaybe; + type MaybeToken = I::MaybeToken; type Cache = (I::Cache, S::Context); @@ -663,7 +623,7 @@ where unsafe fn next_maybe( (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { I::next_maybe(cache, cursor) } @@ -789,7 +749,7 @@ where type Span = S; type Token = I::Token; - type TokenMaybe = I::TokenMaybe; + type MaybeToken = I::MaybeToken; type Cache = (I::Cache, F); @@ -808,7 +768,7 @@ where unsafe fn next_maybe( (cache, _): &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { I::next_maybe(cache, cursor) } @@ -941,7 +901,7 @@ impl<'src, R: Read + Seek + 'src> Input<'src> for IoInput { type Span = SimpleSpan; type Token = u8; - type TokenMaybe = u8; + type MaybeToken = u8; type Cache = Self; @@ -958,7 +918,7 @@ impl<'src, R: Read + Seek + 'src> Input<'src> for IoInput { unsafe fn next_maybe( this: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { Self::next(this, cursor) } @@ -1347,7 +1307,7 @@ impl<'src, 'parse, I: Input<'src>, E: ParserExtra<'src, I>> InputRef<'src, 'pars } #[inline(always)] - pub(crate) fn next_maybe_inner(&mut self) -> Option { + pub(crate) fn next_maybe_inner(&mut self) -> Option { // SAFETY: cursor was generated by previous call to `Input::next` let token = unsafe { I::next_maybe(self.cache, &mut self.cursor) }; if let Some(t) = &token { diff --git a/src/lib.rs b/src/lib.rs index 06cc5366..9132b385 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -149,7 +149,7 @@ use self::{ recovery::{RecoverWith, Strategy}, span::Span, text::*, - util::{MaybeMut, MaybeRef}, + util::{IntoMaybe, MaybeMut, MaybeRef}, }; #[cfg(all(feature = "extension", doc))] use self::{extension::v1::*, primitive::custom, stream::Stream}; diff --git a/src/stream.rs b/src/stream.rs index c7568058..c271caff 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -67,7 +67,7 @@ where type Span = SimpleSpan; type Token = I::Item; - type TokenMaybe = I::Item; + type MaybeToken = I::Item; type Cursor = usize; @@ -87,7 +87,7 @@ where unsafe fn next_maybe( this: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { Self::next(this, cursor) } @@ -151,7 +151,7 @@ where type Span = S; type Token = T; - type TokenMaybe = T; + type MaybeToken = T; type Cache = S; // eoi @@ -168,7 +168,7 @@ where unsafe fn next_maybe( _eoi: &mut Self::Cache, cursor: &mut Self::Cursor, - ) -> Option { + ) -> Option { cursor.0.next().map(|(tok, span)| { cursor.1 += 1; cursor.2 = Some(span.end()); diff --git a/src/text.rs b/src/text.rs index eed6ce5d..a0e560ea 100644 --- a/src/text.rs +++ b/src/text.rs @@ -281,7 +281,7 @@ where pub fn digits<'a, C, I, E>(radix: u32) -> Repeated + Copy, C, I, E> where C: Char, - I: ValueInput<'a> + Input<'a, Token = C>, + I: ValueInput<'a, Token = C>, E: ParserExtra<'a, I>, { any() @@ -328,7 +328,7 @@ where /// ``` /// #[must_use] -pub fn int<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( +pub fn int<'a, I: StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( radix: u32, ) -> impl Parser<'a, I, &'a C::Str, E> + Copy { any() diff --git a/src/util.rs b/src/util.rs index e7ecf7c1..45d839d3 100644 --- a/src/util.rs +++ b/src/util.rs @@ -173,3 +173,54 @@ impl<'de, T: Deserialize<'de>, R: Deref> Deserialize<'de> for Maybe< deserializer.deserialize_newtype_struct("Maybe", MaybeVisitor(PhantomData)) } } + +mod ref_or_val_sealed { + pub trait Sealed {} +} + +/// An trait that allows abstracting over values of or references to a `T`. +/// +/// Some [`Input`]s can only generate tokens by-reference (like `&[T]` -> `&T`), and some can only generate tokens +/// by-value (like `&str` -> `char`). This trait allows chumsky to handle both kinds of input. +/// +/// The trait is sealed: you cannot implement it yourself. +pub trait IntoMaybe<'src, T: 'src>: + ref_or_val_sealed::Sealed + Borrow + Into> +{ + /// Project the referential properties of this type on to another type. + /// + /// For example, `<&Foo>::Proj = &Bar` but `::Proj = Bar`. + #[doc(hidden)] + type Proj: IntoMaybe<'src, U>; + + #[doc(hidden)] + fn map_maybe( + self, + f: impl FnOnce(&'src T) -> &'src R, + g: impl FnOnce(T) -> R, + ) -> Self::Proj; +} + +impl ref_or_val_sealed::Sealed for &T {} +impl<'src, T> IntoMaybe<'src, T> for &'src T { + type Proj = &'src U; + fn map_maybe( + self, + f: impl FnOnce(&'src T) -> &'src R, + _g: impl FnOnce(T) -> R, + ) -> Self::Proj { + f(self) + } +} + +impl ref_or_val_sealed::Sealed for T {} +impl<'src, T: 'src> IntoMaybe<'src, T> for T { + type Proj = U; + fn map_maybe( + self, + _f: impl FnOnce(&'src T) -> &'src R, + g: impl FnOnce(T) -> R, + ) -> Self::Proj { + g(self) + } +}