blob: 4b7e38c1246705ce58d20a816c446af8347d7d95 [file] [log] [blame] [edit]
// Part of the Crubit project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//! A finite state automaton based approach for matching token streams.
//!
//! This is significantly faster than recursive backtracking, but the error
//! messages are not as good, so this is only used as a fast path.
//! When a failure message is required, we rerun the match with a recursive
//! backtracking implementation.
#![allow(clippy::result_unit_err)]
#![deny(missing_docs)]
use proc_macro2::Delimiter;
use proc_macro2::TokenStream;
use proc_macro2::TokenTree;
use std::rc::Rc;
/// Matches the pattern against the input, returning Ok if and only if the pattern matches.
///
/// Patterns follow the syntax described in token_stream_matchers.rs. For example,
/// `quote!{a ... b}` matches any sequence of tokens that contains an `a` somewhere before a `b`.
///
/// This doesn't return a failure message, because the failure messages it could generate are not
/// very good.
pub fn match_tokens_fast(input: &TokenStream, pattern: &TokenStream) -> Result<(), ()> {
Pattern::from_tokenstream(pattern.clone(), true).recursive_glob_match(input.clone())
}
/// Returns true if the token represents whitespace in Crubit's token stringification algorithm.
#[must_use]
pub fn is_whitespace_token(token: &TokenTree) -> bool {
matches!(token, TokenTree::Ident(id) if id == "__NEWLINE__" || id == "__SPACE__")
}
/// A pattern that can exist within a Group or at the top level.
#[derive(Clone, Debug)]
struct Pattern(Rc<[Atom]>);
#[derive(Clone, Debug)]
enum Atom {
/// Any sequence of token trees.
DotStar,
/// Any sequence of token trees -- implicitly added to the start/end of
/// the pattern.
ImplicitDotStar,
/// A single TokenTree
TokenTree(TokenTree),
/// A Group token tree (e.g. [], (), {}), with a pattern inside of it.
Group(Delimiter, Pattern),
}
impl Pattern {
fn from_tokenstream(tokens: TokenStream, is_root: bool) -> Self {
let mut pattern = vec![];
// Patterns implicitly start and end with a `...`.
if is_root {
pattern.push(Atom::ImplicitDotStar);
}
fn extract_triple_dot(pattern: &mut Vec<Atom>) {
if pattern.len() < 3 {
return;
}
for tt_pattern in &pattern[pattern.len() - 3..] {
let Atom::TokenTree(TokenTree::Punct(x)) = tt_pattern else {
return;
};
if x.as_char() != '.' {
return;
}
}
pattern.truncate(pattern.len() - 3);
pattern.push(Atom::DotStar);
}
for token in tokens {
match token {
TokenTree::Group(g) => pattern
.push(Atom::Group(g.delimiter(), Self::from_tokenstream(g.stream(), false))),
token_tree => pattern.push(Atom::TokenTree(token_tree)),
}
extract_triple_dot(&mut pattern);
}
if is_root {
pattern.push(Atom::ImplicitDotStar);
}
Pattern(pattern.into())
}
/// Finite state glob algorithm, as described in e.g. https://research.swtch.com/glob.
///
/// Note that patterns are globs, or close enough.
/// The pattern `{... a ...}` only matches a token tree of length 1 (a
/// Group), and whether it matches does not depend on context.
///
/// So the glob algorithm works here as well. We can take the leftmost
/// match, and give maximum freedom to subsequent match
/// attempts.
fn glob_match(&self, input: TokenStream) -> Result<(), ()> {
#[derive(Copy, Clone, Debug)]
struct State {
/// px/nextPx from https://research.swtch.com/glob
pattern: usize,
/// nx/nextNx
input: usize,
}
// TODO(jeanpierreda): Can we push this upwards into recursive_glob_match?
let input: Vec<TokenTree> =
input.into_iter().filter(|token| !is_whitespace_token(token)).collect();
let mut state = State { pattern: 0, input: 0 };
let mut backtrack = state;
let can_backtrack =
|backtrack: State| -> bool { backtrack.input > 0 && backtrack.input <= input.len() };
while state.pattern < self.0.len() || state.input < input.len() {
match self.0.get(state.pattern) {
Some(Atom::TokenTree(tt)) => {
if let Some(next) = input.get(state.input) {
match match_tree(next, tt) {
Ok(()) => {
state.pattern += 1;
state.input += 1;
continue;
}
// we won't backtrack, so let's report this error up.
Err(e) if !can_backtrack(backtrack) => return Err(e),
// fall back to restart logic below.
Err(_e) => {}
}
}
}
Some(Atom::Group(delimiter, nested_pattern)) => {
if let Some(TokenTree::Group(g)) = input.get(state.input) {
if g.delimiter() == *delimiter {
match nested_pattern.glob_match(g.stream()) {
Ok(()) => {
state.pattern += 1;
state.input += 1;
continue;
}
// we won't backtrack, so let's report this error up.
Err(e) if !can_backtrack(backtrack) => return Err(e),
// fall back to restart logic below.
Err(_e) => {}
}
}
}
}
Some(Atom::DotStar | Atom::ImplicitDotStar) => {
// Next time, restart here, but consuming an extra thing into the wildcard.
// (This guarantees forward progress!)
backtrack = state;
backtrack.input += 1;
state.pattern += 1;
continue;
}
None => {}
}
// restart, we failed.
if can_backtrack(backtrack) {
state = backtrack;
continue;
}
if state.pattern < self.0.len() {
// could not find self.0[state.pattern..]
return Err(());
} else {
// pattern did not match against input[state.input..].
return Err(());
}
}
Ok(())
}
/// Like glob_match, but searches into groups.
fn recursive_glob_match(&self, input: TokenStream) -> Result<(), ()> {
if let Ok(()) = self.glob_match(input.clone()) {
return Ok(());
}
let mut stack = vec![input];
while let Some(input) = stack.pop() {
for tt in input {
if let TokenTree::Group(g) = tt {
if let Ok(()) = self.glob_match(g.stream()) {
return Ok(());
}
stack.push(g.stream());
}
}
}
Err(())
}
}
/// Returns Ok if the two token trees are equal, otherwise returns Err.
fn match_tree(actual: &TokenTree, pattern: &TokenTree) -> Result<(), ()> {
// Note: this is only called if the `actual` is a Group, or neither are groups,
// so it should hopefully be pretty fast most of the time.
let actual_src = format!("{}", actual);
let pattern_src = format!("{}", pattern);
if actual_src == pattern_src {
Ok(())
} else {
Err(())
}
}