Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions shared/tree-sitter-extractor/src/extractor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr
}

/// Extracts the source file at `path`, which is assumed to be canonicalized.
/// When `yeast_runner` is `Some`, the parsed tree is first transformed
/// through the supplied yeast `Runner` before TRAP extraction. Building the
/// `Runner` (which parses YAML and constructs the schema) is the caller's
/// responsibility, allowing it to be done once and shared across files.
/// When `desugarer` is `Some`, the parsed tree is first transformed
/// through the supplied yeast desugarer before TRAP extraction. Building
/// the desugarer (which parses YAML and constructs the schema) is the
/// caller's responsibility, allowing it to be done once and shared across
/// files.
#[allow(clippy::too_many_arguments)]
pub fn extract(
language: &Language,
Expand All @@ -295,7 +296,7 @@ pub fn extract(
path: &Path,
source: &[u8],
ranges: &[Range],
yeast_runner: Option<&yeast::Runner<'_>>,
desugarer: Option<&dyn yeast::Desugarer>,
) {
let path_str = file_paths::normalize_and_transform_path(path, transformer);
let source_root = std::env::current_dir()
Expand Down Expand Up @@ -328,8 +329,8 @@ pub fn extract(
schema,
);

if let Some(yeast_runner) = yeast_runner {
let ast = yeast_runner
if let Some(desugarer) = desugarer {
let ast = desugarer
.run_from_tree(&tree, source)
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
traverse_yeast(&ast, &mut visitor);
Expand Down
48 changes: 19 additions & 29 deletions shared/tree-sitter-extractor/src/extractor/simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ pub struct LanguageSpec {
pub prefix: &'static str,
pub ts_language: tree_sitter::Language,
pub node_types: &'static str,
/// Optional yeast desugaring configuration. When set, the parsed
/// tree is rewritten through yeast before TRAP extraction. The
/// config's `output_node_types_yaml` (if set) provides the schema
/// used both at runtime (for the rewriter) and for TRAP validation.
pub desugar: Option<yeast::DesugaringConfig>,
/// Optional desugarer. When set, the parsed tree is rewritten through
/// the desugarer before TRAP extraction. The desugarer's
/// `output_node_types_yaml()` (if set) provides the schema used both
/// at runtime (for the rewriter) and for TRAP validation.
///
/// `Box<dyn yeast::Desugarer>` so the shared extractor is agnostic to
/// the user-defined context type the desugarer uses internally.
pub desugar: Option<Box<dyn yeast::Desugarer>>,
pub file_globs: Vec<String>,
}

Expand Down Expand Up @@ -91,35 +94,22 @@ impl Extractor {
.collect();

let mut schemas = vec![];
let mut yeast_runners = Vec::new();
for lang in &self.languages {
let effective_node_types: String =
match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) {
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?,
None => lang.node_types.to_string(),
};
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
schemas.push(schema);

// Build the yeast runner once per language so the YAML schema
// isn't re-parsed for every file.
let yeast_runner = lang
let effective_node_types: String = match lang
.desugar
.as_ref()
.map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config))
.transpose()
.map_err(|e| {
.and_then(|d| d.output_node_types_yaml())
{
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to build desugaring runner for {}: {e}",
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?;
yeast_runners.push(yeast_runner);
})?,
None => lang.node_types.to_string(),
};
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
schemas.push(schema);
}

// Construct a single globset containing all language globs,
Expand Down Expand Up @@ -194,7 +184,7 @@ impl Extractor {
&path,
&source,
&[],
yeast_runners[i].as_ref(),
lang.desugar.as_deref(),
);
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;
Expand Down
34 changes: 34 additions & 0 deletions shared/yeast-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
Err(err) => err.to_compile_error().into(),
}
}

/// Define a desugaring rule whose transform is a hand-written Rust block.
///
/// Use `manual_rule!` when the transform needs control over capture
/// translation timing — for example, when an outer rule needs to set
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
/// translation reaches inner rules that read that state.
///
/// ```text
/// manual_rule!(

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the name 👍

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I think I just had a good idea (dangerous, I know). What if instead of having a separate macro that auto-translates nothing (and one that auto-translates everything), we add a way to annotate a capture as "raw" or "untranslated"?

Then we could -- at the cost of relatively little syntax -- get the best of both worlds. Non-raw captures get auto-translated before the body of the rule target is executed, everything else is up to the user.

As for the syntax, I'm thinking either @!foo or perhaps @@foo -- something that stands out sufficiently to remind the user that there's a raw capture that needs to be dealt with.

Also, in that case I would auto-wrap the entire body of (the result of expanding) manual_rule in Ok to avoid having to write Ok(...) at the end. This is what rule does currently anyway, and it wouldn't change the expressivity (since we need to return Result anyway).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I worry about the syntax becoming a soup of symbols tbh. Can we keep manual_rule as it is for now?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. I'll postpone those changes for a different PR.

/// (query_pattern field: (_) @name)
/// {
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
/// // (`name: NodeRef`, etc.) are bound from the query.
/// let translated = ctx.translate(name)?;
/// Ok(translated)
/// }
/// )
/// ```
///
/// Differences from [`rule!`]:
/// - Captures are **not** auto-translated before the body runs; they
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
/// tree template, no `Ok(...)` wrap.
#[proc_macro]
pub fn manual_rule(input: TokenStream) -> TokenStream {
let input2: TokenStream2 = input.into();
match parse::parse_manual_rule_top(input2) {
Ok(output) => output.into(),
Err(err) => err.to_compile_error().into(),
}
}
Loading
Loading