From eda5e51d4732598592ddc291de7204d023a54a3f Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 9 Oct 2023 00:55:02 +0100 Subject: [PATCH] First attempt at filter labels --- Cargo.toml | 5 +- examples/io.rs | 4 +- examples/zero-copy.rs | 4 +- src/combinator.rs | 16 ++++-- src/either.rs | 2 +- src/error.rs | 81 +++++++++++++++++++++----- src/input.rs | 7 ++- src/lib.rs | 71 +++++++++++------------ src/text.rs | 129 +++++++++++++++++++++++------------------- 9 files changed, 194 insertions(+), 125 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 35c1968e..051f3e10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,9 +35,6 @@ memoization = [] # Allows extending chumsky by writing your own parser implementations. extension = [] -# Enable support for parser labelling -label = [] - # Make builtin parsers such as `Boxed` use atomic instead of non-atomic internals. sync = ["spin"] @@ -62,7 +59,7 @@ serde = ["dep:serde"] # An alias of all features that work with the stable compiler. # Do not use this feature, its removal is not considered a breaking change and its behaviour may change. # If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list. -_test_stable = ["std", "spill-stack", "memoization", "extension", "label", "sync"] +_test_stable = ["std", "spill-stack", "memoization", "extension", "sync"] [package.metadata.docs.rs] all-features = true diff --git a/examples/io.rs b/examples/io.rs index 403743c0..bec9eb1b 100644 --- a/examples/io.rs +++ b/examples/io.rs @@ -13,7 +13,7 @@ struct Foo { fn ident<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput, String, E> { any() - .filter(u8::is_ascii_alphabetic) + .filter(u8::is_ascii_alphabetic, "alphabetic") .repeated() .at_least(1) .collect::>() @@ -22,7 +22,7 @@ fn ident<'a, E: ParserExtra<'a, IoInput>>() -> impl Parser<'a, IoInput>>() -> impl Parser<'a, IoInput, String, E> { any() - .filter(u8::is_ascii_digit) + .filter(u8::is_ascii_digit, "digit") .repeated() .at_least(1) .collect::>() diff --git a/examples/zero-copy.rs b/examples/zero-copy.rs index dc9a8ea1..2dc9ea2e 100644 --- a/examples/zero-copy.rs +++ b/examples/zero-copy.rs @@ -9,14 +9,14 @@ enum Token<'a> { // This parser is guaranteed to never allocate! fn parser<'a>() -> impl Parser<'a, &'a str, [(SimpleSpan, Token<'a>); 6]> { let ident = any() - .filter(|c: &char| c.is_alphanumeric()) + .filter(|c: &char| c.is_alphanumeric(), "alphanumeric") .repeated() .at_least(1) .to_slice() .map(Token::Ident); let string = just('"') - .then(any().filter(|c: &char| *c != '"').repeated()) + .then(any().filter(|c: &char| *c != '"', "non-quote").repeated()) .then(just('"')) .to_slice() .map(Token::String); diff --git a/src/combinator.rs b/src/combinator.rs index 79182573..464989dc 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -223,27 +223,31 @@ where } /// See [`Parser::filter`]. -pub struct Filter { +pub struct Filter { pub(crate) parser: A, pub(crate) filter: F, + pub(crate) label: L, } -impl Copy for Filter {} -impl Clone for Filter { +impl Copy for Filter {} +impl Clone for Filter { fn clone(&self) -> Self { Self { parser: self.parser.clone(), filter: self.filter.clone(), + label: self.label.clone(), } } } -impl<'a, A, I, O, E, F> ParserSealed<'a, I, O, E> for Filter +impl<'a, A, I, O, E, F, L> ParserSealed<'a, I, O, E> for Filter where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, O, E>, F: Fn(&O) -> bool, + E::Error: LabelError<'a, I, L>, + L: Clone, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { @@ -253,7 +257,9 @@ where Ok(M::bind(|| out)) } else { let err_span = inp.span_since(before); - inp.add_alt(inp.offset().offset, None, None, err_span); + inp + .add_alt(inp.offset().offset, None, None, err_span) + .label_with(self.label.clone()); Err(()) } }) diff --git a/src/either.rs b/src/either.rs index a24f91fc..b52c78cb 100644 --- a/src/either.rs +++ b/src/either.rs @@ -41,7 +41,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { any() - .filter(|c: &char| c.is_ascii_digit()) + .filter(|c: &char| c.is_ascii_digit(), "digit") .repeated() .at_least(1) .at_most(3) diff --git a/src/error.rs b/src/error.rs index b08584c3..ba794ac6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -131,6 +131,18 @@ impl fmt::Display for EmptyErr { } } +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for EmptyErr { + #[inline] + fn label_with(&mut self, _label: L) { + // Stub + } + + #[inline] + fn in_context(&mut self, _label: L, _span: I::Span) { + // Stub + } +} + /// A very cheap error type that tracks only the error span. This type is most useful when you want fast parsing but do /// not particularly care about the quality of error messages. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -169,6 +181,18 @@ where } } +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Cheap { + #[inline] + fn label_with(&mut self, _label: L) { + // Stub + } + + #[inline] + fn in_context(&mut self, _label: L, _span: I::Span) { + // Stub + } +} + /// A simple error type that tracks the error span and found token. This type is most useful when you want fast parsing /// but do not particularly care about the quality of error messages. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -228,10 +252,22 @@ where } } +impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Simple<'a, I::Token, I::Span> { + #[inline] + fn label_with(&mut self, _label: L) { + // Stub + } + + #[inline] + fn in_context(&mut self, _label: L, _span: I::Span) { + // Stub + } +} + /// An expected pattern for a [`Rich`] error. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum RichPattern<'a, T, L = &'static str> { +pub enum RichPattern<'a, T, L = RichLabel> { /// A specific token was expected. Token(MaybeRef<'a, T>), /// A labelled pattern was expected. @@ -318,7 +354,7 @@ where /// The reason for a [`Rich`] error. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum RichReason<'a, T, L = &'static str> { +pub enum RichReason<'a, T, L = RichLabel> { /// An unexpected input was found ExpectedFound { /// The tokens expected @@ -361,7 +397,6 @@ impl<'a, T, L> RichReason<'a, T, L> { } } - #[cfg(feature = "label")] fn take_found(&mut self) -> Option> { match self { RichReason::ExpectedFound { found, .. } => found.take(), @@ -516,10 +551,9 @@ where /// Please note that it uses a [`Vec`] to remember expected symbols. If you find this to be too slow, you can /// implement [`Error`] for your own error type or use [`Simple`] instead. #[derive(Clone, PartialEq, Eq, Hash)] -pub struct Rich<'a, T, S = SimpleSpan, L = &'static str> { +pub struct Rich<'a, T, S = SimpleSpan, L = RichLabel> { span: S, reason: Box>, - #[cfg(feature = "label")] context: Vec<(L, S)>, } @@ -549,7 +583,6 @@ impl<'a, T, S, L> Rich<'a, T, S, L> { Rich { span, reason: Box::new(RichReason::Custom(msg.to_string())), - #[cfg(feature = "label")] context: Vec::new(), } } @@ -578,7 +611,6 @@ impl<'a, T, S, L> Rich<'a, T, S, L> { /// /// 'Context' here means parser patterns that the parser was in the process of parsing when the error occurred. To /// add labelled contexts, see [`Parser::labelled`]. - #[cfg(feature = "label")] pub fn contexts(&self) -> impl Iterator { self.context.iter().map(|(l, s)| (l, s)) } @@ -622,7 +654,6 @@ impl<'a, T, S, L> Rich<'a, T, S, L> { Rich { span: self.span, reason: Box::new(self.reason.map_token(f)), - #[cfg(feature = "label")] context: self.context, } } @@ -651,7 +682,6 @@ where .collect(), found, }), - #[cfg(feature = "label")] context: Vec::new(), } } @@ -662,7 +692,6 @@ where Self { span: self.span, reason: Box::new(new_reason), - #[cfg(feature = "label")] context: self.context, // TOOD: Merge contexts } } @@ -746,20 +775,41 @@ where }); } } - #[cfg(feature = "label")] self.context.clear(); self } } -#[cfg(feature = "label")] -impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Rich<'a, I::Token, I::Span, L> +/// A rich label representing some input pattern. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum RichLabel { + /// A text-specific label. + Text(text::TextLabel), + /// A label named by a string. + Named(&'static str), +} + +impl From for RichLabel { fn from(l: text::TextLabel) -> Self { Self::Text(l) } } +impl From<&'static str> for RichLabel { fn from(s: &'static str) -> Self { Self::Named(s) } } + +impl fmt::Display for RichLabel { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Text(t) => t.fmt(f), + Self::Named(n) => write!(f, "{n}"), + } + } +} + +impl<'a, I: Input<'a>, L, L2> LabelError<'a, I, L2> for Rich<'a, I::Token, I::Span, L> where I::Token: PartialEq, + L2: Into, L: PartialEq, { #[inline] - fn label_with(&mut self, label: L) { + fn label_with(&mut self, label: L2) { + let label = label.into(); // Opportunistically attempt to reuse allocations if we can match &mut *self.reason { RichReason::ExpectedFound { expected, found: _ } => { @@ -776,7 +826,8 @@ where } #[inline] - fn in_context(&mut self, label: L, span: I::Span) { + fn in_context(&mut self, label: L2, span: I::Span) { + let label = label.into(); if self.context.iter().all(|(l, _)| l != &label) { self.context.push((label, span)); } diff --git a/src/input.rs b/src/input.rs index f02c4543..71fdaa2c 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1458,9 +1458,10 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> expected: Exp, found: Option>, span: I::Span, - ) { + ) -> &mut E::Error { // Prioritize errors before choosing whether to generate the alt (avoids unnecessary error creation) - self.errors.alt = Some(match self.errors.alt.take() { + let alt = self.errors.alt.take(); + &mut self.errors.alt.insert(match alt { Some(alt) => match alt.pos.into().cmp(&at.into()) { Ordering::Equal => { Located::at(alt.pos, alt.err.merge_expected_found(expected, found, span)) @@ -1471,7 +1472,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> } }, None => Located::at(at, Error::expected_found(expected, found, span)), - }); + }).err } #[inline] diff --git a/src/lib.rs b/src/lib.rs index 01668b31..4d118108 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,7 +82,6 @@ pub mod extra; #[cfg(docsrs)] pub mod guide; pub mod input; -#[cfg(feature = "label")] pub mod label; #[cfg(feature = "lexical-numbers")] pub mod number; @@ -143,7 +142,6 @@ use hashbrown::HashMap; #[cfg(feature = "serde")] use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; -#[cfg(feature = "label")] use self::label::{LabelError, Labelled}; use self::{ combinator::*, @@ -459,7 +457,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ``` /// # use chumsky::{prelude::*, error::Simple}; /// let lowercase = any::<_, extra::Err>>() - /// .filter(char::is_ascii_lowercase) + /// .filter(char::is_ascii_lowercase, "lowercase") /// .repeated() /// .at_least(1) /// .collect::(); @@ -467,13 +465,15 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// assert_eq!(lowercase.parse("hello").into_result(), Ok("hello".to_string())); /// assert!(lowercase.parse("Hello").has_errors()); /// ``` - fn filter bool>(self, f: F) -> Filter + fn filter bool, L>(self, f: F, label: L) -> Filter where Self: Sized, + E::Error: LabelError<'a, I, L>, { Filter { parser: self, filter: f, + label, } } @@ -489,13 +489,13 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// enum Token { Word(String), Num(u64) } /// /// let word = any::<_, extra::Err>>() - /// .filter(|c: &char| c.is_alphabetic()) + /// .filter(|c: &char| c.is_alphabetic(), "alphabetic") /// .repeated().at_least(1) /// .collect::() /// .map(Token::Word); /// /// let num = any::<_, extra::Err>>() - /// .filter(|c: &char| c.is_ascii_digit()) + /// .filter(|c: &char| c.is_ascii_digit(), "digit") /// .repeated().at_least(1) /// .collect::() /// .map(|s| Token::Num(s.parse().unwrap())); @@ -786,7 +786,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// # use chumsky::{prelude::*, error::Simple}; /// // A parser that parses any number of whitespace characters without allocating /// let whitespace = any::<_, extra::Err>>() - /// .filter(|c: &char| c.is_whitespace()) + /// .filter(|c: &char| c.is_whitespace(), "whitespace") /// .ignored() /// .repeated() /// .collect::>(); @@ -858,7 +858,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// within the parser. For example, labelling a parser for an expression would yield "expected expression" errors /// rather than "expected integer, string, binary op, etc." errors. // TODO: Example - #[cfg(feature = "label")] fn labelled(self, label: L) -> Labelled where Self: Sized, @@ -880,7 +879,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ``` /// # use chumsky::{prelude::*, error::Simple}; /// let word = any::<_, extra::Err>>() - /// .filter(|c: &char| c.is_alphabetic()) + /// .filter(|c: &char| c.is_alphabetic(), "alphabetic") /// .repeated() /// .at_least(1) /// .collect::(); @@ -908,12 +907,14 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// /// ``` /// # use chumsky::{prelude::*, error::Simple}; - /// let zeroes = any::<_, extra::Err>>().filter(|c: &char| *c == '0').ignored().repeated().collect::>(); - /// let digits = any().filter(|c: &char| c.is_ascii_digit()) - /// .repeated() - /// .collect::(); + /// let zeroes = any::<_, extra::Err>>() + /// .filter(|c: &char| *c == '0', "zero") + /// .repeated(); + /// let digits = any() + /// .filter(|c: &char| c.is_ascii_digit(), "digit") + /// .repeated(); /// let integer = zeroes - /// .ignore_then(digits) + /// .ignore_then(digits.to_slice()) /// .from_str() /// .unwrapped(); /// @@ -940,7 +941,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ``` /// # use chumsky::{prelude::*, error::Simple}; /// let word = any::<_, extra::Err>>() - /// .filter(|c: &char| c.is_alphabetic()) + /// .filter(|c: &char| c.is_alphabetic(), "alphabetic") /// .repeated() /// .at_least(1) /// .collect::(); @@ -1202,7 +1203,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// List(Vec), /// } /// - /// let ident = any::<_, extra::Err>>().filter(|c: &char| c.is_alphabetic()) + /// let ident = any::<_, extra::Err>>().filter(|c: &char| c.is_alphabetic(), "alphabetic") /// .repeated() /// .at_least(1) /// .collect::(); @@ -1326,7 +1327,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// /// ``` /// # use chumsky::{prelude::*, error::Simple}; - /// let word = any::<_, extra::Err>>().filter(|c: &char| c.is_alphabetic()) + /// let word = any::<_, extra::Err>>().filter(|c: &char| c.is_alphabetic(), "alphabetic") /// .repeated() /// .at_least(1) /// .collect::(); @@ -1426,7 +1427,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ``` /// # use chumsky::{prelude::*, error::Simple}; /// let num = any::<_, extra::Err>>() - /// .filter(|c: &char| c.is_ascii_digit()) + /// .filter(|c: &char| c.is_ascii_digit(), "digit") /// .repeated() /// .at_least(1) /// .collect::() @@ -2302,7 +2303,7 @@ where /// /// ``` /// # use chumsky::{prelude::*, error::Simple}; - /// let word = any::<_, extra::Err>>().filter(|c: &char| c.is_alphabetic()) // This parser produces an output of `char` + /// let word = any::<_, extra::Err>>().filter(|c: &char| c.is_alphabetic(), "alphabetic") // This parser produces an output of `char` /// .repeated() // This parser is iterable (i.e: implements `IterParser`) /// .collect::(); // We collect the `char`s into a `String` /// @@ -2332,7 +2333,7 @@ where /// /// ``` /// # use chumsky::{prelude::*, error::Simple}; - /// let three_digit = any::<_, extra::Err>>().filter(|c: &char| c.is_numeric()) + /// let three_digit = any::<_, extra::Err>>().filter(|c: &char| c.is_numeric(), "digit") /// .repeated() /// .collect_exactly::<[_; 3]>(); /// @@ -2816,14 +2817,14 @@ mod tests { fn parser<'a>() -> impl Parser<'a, WithContext, [(Span, Token<'a>); 6]> { let ident = any() - .filter(|c: &char| c.is_alphanumeric()) + .filter(|c: &char| c.is_alphanumeric(), "alphanumeric") .repeated() .at_least(1) .to_slice() .map(Token::Ident); let string = just('"') - .then(any().filter(|c: &char| *c != '"').repeated()) + .then(any().filter(|c: &char| *c != '"', "non-quote").repeated()) .then(just('"')) .to_slice() .map(Token::String); @@ -2868,14 +2869,14 @@ mod tests { fn parser<'a, F: Fn(SimpleSpan) -> Span<'a> + 'a>( ) -> impl Parser<'a, MappedSpan, &'a str, F>, [(Span<'a>, Token<'a>); 6]> { let ident = any() - .filter(|c: &char| c.is_alphanumeric()) + .filter(|c: &char| c.is_alphanumeric(), "alphanumeric") .repeated() .at_least(1) .to_slice() .map(Token::Ident); let string = just('"') - .then(any().filter(|c: &char| *c != '"').repeated()) + .then(any().filter(|c: &char| *c != '"', "non-quote").repeated()) .then(just('"')) .to_slice() .map(Token::String); @@ -2915,7 +2916,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { any() - .filter(|c: &char| c.is_ascii_digit()) + .filter(|c: &char| c.is_ascii_digit(), "digit") .repeated() .at_least(1) .at_most(3) @@ -2953,19 +2954,19 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, (&'a str, u64, char)> { group(( any() - .filter(|c: &char| c.is_ascii_alphabetic()) + .filter(|c: &char| c.is_ascii_alphabetic(), "alphabetic") .repeated() .at_least(1) .to_slice() .padded(), any() - .filter(|c: &char| c.is_ascii_digit()) + .filter(|c: &char| c.is_ascii_digit(), "digit") .repeated() .at_least(1) .to_slice() .map(|s: &str| s.parse::().unwrap()) .padded(), - any().filter(|c: &char| !c.is_whitespace()).padded(), + any().filter(|c: &char| !c.is_whitespace(), "non-whitespace").padded(), )) } @@ -3032,7 +3033,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, String> { recursive(|expr| { let atom = any() - .filter(|c: &char| c.is_alphabetic()) + .filter(|c: &char| c.is_alphabetic(), "alphabetic") .repeated() .at_least(1) .collect() @@ -3062,7 +3063,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, String> { recursive(|expr| { let atom = any() - .filter(|c: &char| c.is_alphabetic()) + .filter(|c: &char| c.is_alphabetic(), "alphabetic") .repeated() .at_least(1) .collect(); @@ -3092,7 +3093,7 @@ mod tests { // fn debug_assert_left_recursive() { // recursive(|expr| { // let atom = any::<&str, extra::Default>() - // .filter(|c: &char| c.is_alphabetic()) + // .filter(|c: &char| c.is_alphabetic(), "alphabetic") // .repeated() // .at_least(1) // .collect(); @@ -3210,7 +3211,7 @@ mod tests { let mut expr = Recursive::declare(); expr.define({ let atom = any::<&str, extra::Default>() - .filter(|c: &char| c.is_alphabetic()) + .filter(|c: &char| c.is_alphabetic(), "alphabetic") .repeated() .at_least(1) .collect(); @@ -3241,7 +3242,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { Arc::new( any() - .filter(|c: &char| c.is_ascii_digit()) + .filter(|c: &char| c.is_ascii_digit(), "digit") .repeated() .at_least(1) .at_most(3) @@ -3274,7 +3275,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { Box::new( any() - .filter(|c: &char| c.is_ascii_digit()) + .filter(|c: &char| c.is_ascii_digit(), "digit") .repeated() .at_least(1) .at_most(3) @@ -3309,7 +3310,7 @@ mod tests { fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { Rc::new( any() - .filter(|c: &char| c.is_ascii_digit()) + .filter(|c: &char| c.is_ascii_digit(), "digit") .repeated() .at_least(1) .at_most(3) diff --git a/src/text.rs b/src/text.rs index 91d32179..ddd8c582 100644 --- a/src/text.rs +++ b/src/text.rs @@ -10,6 +10,45 @@ use crate::prelude::*; use super::*; +/// A label referring to some text pattern. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum TextLabel { + /// Any whitespace character. + Whitespace, + /// Any inline whitespace character. + InlineWhitespace, + /// Any newline sequence. + Newline, + /// Any digit. + Digit, + /// Any non-zero digit. + NonZeroDigit, + /// Any alphabetic or underscore character. + AlphabeticOrUnderscore, + /// Any alphanumeric or underscore character. + AlphanumericOrUnderscore, + /// Any unicode XID_Start character. + IdentStart, + /// Any unicode XID_Continue character. + IdentContinue, +} + +impl fmt::Display for TextLabel { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Whitespace => write!(f, "whitespace"), + Self::InlineWhitespace => write!(f, "inline whitespace"), + Self::Newline => write!(f, "newline"), + Self::Digit => write!(f, "digit"), + Self::NonZeroDigit => write!(f, "non-zero digit"), + Self::AlphabeticOrUnderscore => write!(f, "alphabetic or underscore"), + Self::AlphanumericOrUnderscore => write!(f, "alphanumeric or underscore"), + Self::IdentStart => write!(f, "identifier start"), + Self::IdentContinue => write!(f, "identifier continue"), + } + } +} + /// A trait implemented by textual character types (currently, [`u8`] and [`char`]). /// /// This trait is currently sealed to minimize the impact of breaking changes. If you find a type that you think should @@ -169,9 +208,10 @@ pub fn whitespace<'a, C: Char, I: ValueInput<'a> + StrInput<'a, C>, E: ParserExt ) -> Repeated + Copy + Clone, (), I, E> where I::Token: Char, + E::Error: LabelError<'a, I, TextLabel>, { any() - .filter(|c: &I::Token| c.is_whitespace()) + .filter(|c: &I::Token| c.is_whitespace(), TextLabel::Whitespace) .ignored() .repeated() } @@ -199,9 +239,10 @@ pub fn inline_whitespace<'a, C: Char, I: ValueInput<'a> + StrInput<'a, C>, E: Pa ) -> Repeated + Copy + Clone, (), I, E> where I::Token: Char, + E::Error: LabelError<'a, I, TextLabel>, { any() - .filter(|c: &I::Token| c.is_inline_whitespace()) + .filter(|c: &I::Token| c.is_inline_whitespace(), TextLabel::InlineWhitespace) .ignored() .repeated() } @@ -241,6 +282,7 @@ pub fn newline<'a, I: ValueInput<'a>, E: ParserExtra<'a, I>>( ) -> impl Parser<'a, I, (), E> + Copy + Clone where I::Token: Char, + E::Error: LabelError<'a, I, TextLabel>, { just(I::Token::from_ascii(b'\r')) .or_not() @@ -255,7 +297,7 @@ where '\u{2029}', // Paragraph separator ] .contains(&c.to_char()) - })) + }, TextLabel::Newline)) .ignored() } @@ -286,16 +328,10 @@ where C: Char, I: ValueInput<'a> + Input<'a, Token = C>, E: ParserExtra<'a, I>, + E::Error: LabelError<'a, I, TextLabel>, { any() - // Use try_map over filter to get a better error on failure - .try_map(move |c: C, span| { - if c.is_digit(radix) { - Ok(c) - } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) - } - }) + .filter(move |c: &C| c.is_digit(radix), TextLabel::Digit) .repeated() .at_least(1) } @@ -333,18 +369,13 @@ where #[must_use] pub fn int<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( radix: u32, -) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone { +) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone +where + E::Error: LabelError<'a, I, TextLabel>, +{ any() - // Use try_map over filter to get a better error on failure - .try_map(move |c: C, span| { - if c.is_digit(radix) && c != C::digit_zero() { - Ok(c) - } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) - } - }) - // This error never appears due to `repeated` so can use `filter` - .then(any().filter(move |c: &C| c.is_digit(radix)).repeated()) + .filter(move |c: &C| c.is_digit(radix) && *c != C::digit_zero(), TextLabel::NonZeroDigit) + .then(any().filter(move |c: &C| c.is_digit(radix), TextLabel::Digit).repeated()) .ignored() .or(just(C::digit_zero()).ignored()) .to_slice() @@ -363,20 +394,15 @@ pub mod ascii { /// characters or underscores. The regex pattern for it is `[a-zA-Z_][a-zA-Z0-9_]*`. #[must_use] pub fn ident<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( - ) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone { + ) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone + where + E::Error: LabelError<'a, I, TextLabel>, + { any() - // Use try_map over filter to get a better error on failure - .try_map(|c: C, span| { - if c.to_char().is_ascii_alphabetic() || c.to_char() == '_' { - Ok(c) - } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) - } - }) + .filter(|c: &C| c.to_char().is_ascii_alphabetic() || c.to_char() == '_', TextLabel::AlphabeticOrUnderscore) .then( any() - // This error never appears due to `repeated` so can use `filter` - .filter(|c: &C| c.to_char().is_ascii_alphanumeric() || c.to_char() == '_') + .filter(|c: &C| c.to_char().is_ascii_alphanumeric() || c.to_char() == '_', TextLabel::AlphanumericOrUnderscore) .repeated(), ) .to_slice() @@ -413,6 +439,8 @@ pub mod ascii { ) -> impl Parser<'a, I, &'a C::Str, E> + Clone + 'a where C::Str: PartialEq, + E::Error: LabelError<'a, I, TextLabel>, + E::Error: LabelError<'a, I, Str>, { #[cfg(debug_assertions)] { @@ -427,13 +455,7 @@ pub mod ascii { } } ident() - .try_map(move |s: &C::Str, span| { - if s == keyword.as_ref() { - Ok(()) - } else { - Err(Error::expected_found(None, None, span)) - } - }) + .filter({ let keyword = keyword.clone(); move |s: &&C::Str| *s == keyword.as_ref() }, keyword) .to_slice() } } @@ -453,20 +475,15 @@ pub mod unicode { /// An identifier is defined as per "Default Identifiers" in [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/). #[must_use] pub fn ident<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( - ) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone { + ) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone + where + E::Error: LabelError<'a, I, TextLabel>, + { any() - // Use try_map over filter to get a better error on failure - .try_map(|c: C, span| { - if c.is_ident_start() { - Ok(c) - } else { - Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) - } - }) + .filter(|c: &C| c.is_ident_start(), TextLabel::IdentStart) .then( any() - // This error never appears due to `repeated` so can use `filter` - .filter(|c: &C| c.is_ident_continue()) + .filter(|c: &C| c.is_ident_continue(), TextLabel::IdentContinue) .repeated(), ) .to_slice() @@ -503,6 +520,8 @@ pub mod unicode { ) -> impl Parser<'a, I, &'a C::Str, E> + Clone + 'a where C::Str: PartialEq, + E::Error: LabelError<'a, I, TextLabel>, + E::Error: LabelError<'a, I, Str>, { #[cfg(debug_assertions)] { @@ -521,13 +540,7 @@ pub mod unicode { } } ident() - .try_map(move |s: &C::Str, span| { - if s == keyword.as_ref() { - Ok(()) - } else { - Err(Error::expected_found(None, None, span)) - } - }) + .filter({ let keyword = keyword.clone(); move |s: &&C::Str| *s == keyword.as_ref() }, keyword) .to_slice() } }