Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First attempt at filter labels #538

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ memoization = []
# Allows extending chumsky by writing your own parser implementations.
extension = []

# Enable support for parser labelling
label = []

# Make builtin parsers such as `Boxed` use atomic instead of non-atomic internals.
sync = ["spin"]

Expand All @@ -62,7 +59,7 @@ serde = ["dep:serde"]
# An alias of all features that work with the stable compiler.
# Do not use this feature, its removal is not considered a breaking change and its behaviour may change.
# If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list.
_test_stable = ["std", "spill-stack", "memoization", "extension", "label", "sync"]
_test_stable = ["std", "spill-stack", "memoization", "extension", "sync"]

[package.metadata.docs.rs]
all-features = true
Expand Down
4 changes: 2 additions & 2 deletions examples/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct Foo {

fn ident<'a, E: ParserExtra<'a, IoInput<File>>>() -> impl Parser<'a, IoInput<File>, String, E> {
any()
.filter(u8::is_ascii_alphabetic)
.filter(u8::is_ascii_alphabetic, "alphabetic")
.repeated()
.at_least(1)
.collect::<Vec<_>>()
Expand All @@ -22,7 +22,7 @@ fn ident<'a, E: ParserExtra<'a, IoInput<File>>>() -> impl Parser<'a, IoInput<Fil

fn digits<'a, E: ParserExtra<'a, IoInput<File>>>() -> impl Parser<'a, IoInput<File>, String, E> {
any()
.filter(u8::is_ascii_digit)
.filter(u8::is_ascii_digit, "digit")
.repeated()
.at_least(1)
.collect::<Vec<_>>()
Expand Down
4 changes: 2 additions & 2 deletions examples/zero-copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ enum Token<'a> {
// This parser is guaranteed to never allocate!
fn parser<'a>() -> impl Parser<'a, &'a str, [(SimpleSpan<usize>, Token<'a>); 6]> {
let ident = any()
.filter(|c: &char| c.is_alphanumeric())
.filter(|c: &char| c.is_alphanumeric(), "alphanumeric")
.repeated()
.at_least(1)
.to_slice()
.map(Token::Ident);

let string = just('"')
.then(any().filter(|c: &char| *c != '"').repeated())
.then(any().filter(|c: &char| *c != '"', "non-quote").repeated())
.then(just('"'))
.to_slice()
.map(Token::String);
Expand Down
16 changes: 11 additions & 5 deletions src/combinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,27 +223,31 @@ where
}

/// See [`Parser::filter`].
pub struct Filter<A, F> {
pub struct Filter<A, F, L> {
pub(crate) parser: A,
pub(crate) filter: F,
pub(crate) label: L,
}

impl<A: Copy, F: Copy> Copy for Filter<A, F> {}
impl<A: Clone, F: Clone> Clone for Filter<A, F> {
impl<A: Copy, F: Copy, L: Copy> Copy for Filter<A, F, L> {}
impl<A: Clone, F: Clone, L: Clone> Clone for Filter<A, F, L> {
fn clone(&self) -> Self {
Self {
parser: self.parser.clone(),
filter: self.filter.clone(),
label: self.label.clone(),
}
}
}

impl<'a, A, I, O, E, F> ParserSealed<'a, I, O, E> for Filter<A, F>
impl<'a, A, I, O, E, F, L> ParserSealed<'a, I, O, E> for Filter<A, F, L>
where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: Parser<'a, I, O, E>,
F: Fn(&O) -> bool,
E::Error: LabelError<'a, I, L>,
L: Clone,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O> {
Expand All @@ -253,7 +257,9 @@ where
Ok(M::bind(|| out))
} else {
let err_span = inp.span_since(before);
inp.add_alt(inp.offset().offset, None, None, err_span);
inp
.add_alt(inp.offset().offset, None, None, err_span)
.label_with(self.label.clone());
Err(())
}
})
Expand Down
2 changes: 1 addition & 1 deletion src/either.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ mod tests {

fn parser<'a>() -> impl Parser<'a, &'a str, Vec<u64>> {
any()
.filter(|c: &char| c.is_ascii_digit())
.filter(|c: &char| c.is_ascii_digit(), "digit")
.repeated()
.at_least(1)
.at_most(3)
Expand Down
81 changes: 66 additions & 15 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,18 @@ impl fmt::Display for EmptyErr {
}
}

impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for EmptyErr {
#[inline]
fn label_with(&mut self, _label: L) {
// Stub
}

#[inline]
fn in_context(&mut self, _label: L, _span: I::Span) {
// Stub
}
}

/// A very cheap error type that tracks only the error span. This type is most useful when you want fast parsing but do
/// not particularly care about the quality of error messages.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -169,6 +181,18 @@ where
}
}

impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Cheap<I::Span> {
#[inline]
fn label_with(&mut self, _label: L) {
// Stub
}

#[inline]
fn in_context(&mut self, _label: L, _span: I::Span) {
// Stub
}
}

/// A simple error type that tracks the error span and found token. This type is most useful when you want fast parsing
/// but do not particularly care about the quality of error messages.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -228,10 +252,22 @@ where
}
}

impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Simple<'a, I::Token, I::Span> {
#[inline]
fn label_with(&mut self, _label: L) {
// Stub
}

#[inline]
fn in_context(&mut self, _label: L, _span: I::Span) {
// Stub
}
}

/// An expected pattern for a [`Rich`] error.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum RichPattern<'a, T, L = &'static str> {
pub enum RichPattern<'a, T, L = RichLabel> {
/// A specific token was expected.
Token(MaybeRef<'a, T>),
/// A labelled pattern was expected.
Expand Down Expand Up @@ -318,7 +354,7 @@ where
/// The reason for a [`Rich`] error.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum RichReason<'a, T, L = &'static str> {
pub enum RichReason<'a, T, L = RichLabel> {
/// An unexpected input was found
ExpectedFound {
/// The tokens expected
Expand Down Expand Up @@ -361,7 +397,6 @@ impl<'a, T, L> RichReason<'a, T, L> {
}
}

#[cfg(feature = "label")]
fn take_found(&mut self) -> Option<MaybeRef<'a, T>> {
match self {
RichReason::ExpectedFound { found, .. } => found.take(),
Expand Down Expand Up @@ -516,10 +551,9 @@ where
/// Please note that it uses a [`Vec`] to remember expected symbols. If you find this to be too slow, you can
/// implement [`Error`] for your own error type or use [`Simple`] instead.
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Rich<'a, T, S = SimpleSpan<usize>, L = &'static str> {
pub struct Rich<'a, T, S = SimpleSpan<usize>, L = RichLabel> {
span: S,
reason: Box<RichReason<'a, T, L>>,
#[cfg(feature = "label")]
context: Vec<(L, S)>,
}

Expand Down Expand Up @@ -549,7 +583,6 @@ impl<'a, T, S, L> Rich<'a, T, S, L> {
Rich {
span,
reason: Box::new(RichReason::Custom(msg.to_string())),
#[cfg(feature = "label")]
context: Vec::new(),
}
}
Expand Down Expand Up @@ -578,7 +611,6 @@ impl<'a, T, S, L> Rich<'a, T, S, L> {
///
/// 'Context' here means parser patterns that the parser was in the process of parsing when the error occurred. To
/// add labelled contexts, see [`Parser::labelled`].
#[cfg(feature = "label")]
pub fn contexts(&self) -> impl Iterator<Item = (&L, &S)> {
self.context.iter().map(|(l, s)| (l, s))
}
Expand Down Expand Up @@ -622,7 +654,6 @@ impl<'a, T, S, L> Rich<'a, T, S, L> {
Rich {
span: self.span,
reason: Box::new(self.reason.map_token(f)),
#[cfg(feature = "label")]
context: self.context,
}
}
Expand Down Expand Up @@ -651,7 +682,6 @@ where
.collect(),
found,
}),
#[cfg(feature = "label")]
context: Vec::new(),
}
}
Expand All @@ -662,7 +692,6 @@ where
Self {
span: self.span,
reason: Box::new(new_reason),
#[cfg(feature = "label")]
context: self.context, // TOOD: Merge contexts
}
}
Expand Down Expand Up @@ -746,20 +775,41 @@ where
});
}
}
#[cfg(feature = "label")]
self.context.clear();
self
}
}

#[cfg(feature = "label")]
impl<'a, I: Input<'a>, L> LabelError<'a, I, L> for Rich<'a, I::Token, I::Span, L>
/// A rich label representing some input pattern.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum RichLabel {
/// A text-specific label.
Text(text::TextLabel),
/// A label named by a string.
Named(&'static str),
}

impl From<text::TextLabel> for RichLabel { fn from(l: text::TextLabel) -> Self { Self::Text(l) } }
impl From<&'static str> for RichLabel { fn from(s: &'static str) -> Self { Self::Named(s) } }

impl fmt::Display for RichLabel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Text(t) => t.fmt(f),
Self::Named(n) => write!(f, "{n}"),
}
}
}

impl<'a, I: Input<'a>, L, L2> LabelError<'a, I, L2> for Rich<'a, I::Token, I::Span, L>
where
I::Token: PartialEq,
L2: Into<L>,
L: PartialEq,
{
#[inline]
fn label_with(&mut self, label: L) {
fn label_with(&mut self, label: L2) {
let label = label.into();
// Opportunistically attempt to reuse allocations if we can
match &mut *self.reason {
RichReason::ExpectedFound { expected, found: _ } => {
Expand All @@ -776,7 +826,8 @@ where
}

#[inline]
fn in_context(&mut self, label: L, span: I::Span) {
fn in_context(&mut self, label: L2, span: I::Span) {
let label = label.into();
if self.context.iter().all(|(l, _)| l != &label) {
self.context.push((label, span));
}
Expand Down
7 changes: 4 additions & 3 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1458,9 +1458,10 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
expected: Exp,
found: Option<MaybeRef<'a, I::Token>>,
span: I::Span,
) {
) -> &mut E::Error {
// Prioritize errors before choosing whether to generate the alt (avoids unnecessary error creation)
self.errors.alt = Some(match self.errors.alt.take() {
let alt = self.errors.alt.take();
&mut self.errors.alt.insert(match alt {
Some(alt) => match alt.pos.into().cmp(&at.into()) {
Ordering::Equal => {
Located::at(alt.pos, alt.err.merge_expected_found(expected, found, span))
Expand All @@ -1471,7 +1472,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
}
},
None => Located::at(at, Error::expected_found(expected, found, span)),
});
}).err
}

#[inline]
Expand Down
Loading