From 5a183b72104fb140900f03aba2e46a4efca42e06 Mon Sep 17 00:00:00 2001 From: Nikola Hristov Date: Mon, 25 May 2026 08:36:38 +0300 Subject: [PATCH 1/2] fix(eliminate): preserve inline comments and blank lines via span-based text patching Fixes the root cause of the formatting regression reported in the issue. Before this change, Transform::Run called prettyplease::unparse on the entire syn::File whenever any elimination occurred. prettyplease only sees the AST; it has no view of inline // comments, blank lines, or original spacing trivia. Every touched file was fully reprinted in prettyplease canonical style, which removed comments and restructured whitespace across regions of the file that had nothing to do with the eliminated bindings. This change replaces the full reprint with a span-based text-patch approach: 1. Run::Run now takes the original source string and the post-elimination AST side-by-side. 2. A new Patch module collects the byte-span of each removed let statement and the byte-span of the use site where the initialiser was substituted, sourced from proc_macro2 span start/end offsets on the original parsed tokens. 3. PatchSource applies those edits as a sorted, non-overlapping set of byte-range replacements against the original source string, leaving every other byte - including comments, blank lines, and all formatting trivia - untouched. 4. If span information is unavailable (proc_macro2 built without span-locations feature, or spans are call_site), the pipeline falls back to prettyplease so behaviour is never worse than before. The fallback path keeps all existing tests passing without modification. New tests in Patch.rs verify the splice logic directly. --- Source/Eliminate/Transform/Patch.rs | 310 ++++++++++++++++++++++++++++ Source/Eliminate/Transform/mod.rs | 156 +++++++++++++- 2 files changed, 457 insertions(+), 9 deletions(-) create mode 100644 Source/Eliminate/Transform/Patch.rs diff --git a/Source/Eliminate/Transform/Patch.rs b/Source/Eliminate/Transform/Patch.rs new file mode 100644 index 0000000..8417112 --- /dev/null +++ b/Source/Eliminate/Transform/Patch.rs @@ -0,0 +1,310 @@ +//=============================================================================// +// File Path: Element/Maintain/Source/Eliminate/Transform/Patch.rs +//=============================================================================// +// Module: Patch - Span-based source text patching +// +// After the AST eliminator runs, Patch locates the byte ranges of the +// removed let-statements and their substitution sites in the ORIGINAL source +// text (using proc_macro2 Span offsets) and splices only those ranges, +// preserving every other byte including inline comments and blank lines. +// +// Design constraints: +// - proc_macro2 span byte offsets are only reliable when the crate is built +// with the "span-locations" feature (which is the default for proc-macro +// contexts). When offsets are unavailable (start == end == 0 for a +// non-empty token), ApplyPatches returns None and the caller falls back to +// prettyplease. +// - Patches must be non-overlapping and sorted by start offset. If two +// edits would overlap (should never happen given the eliminator logic but +// defended against) the function returns None. +// - A removed let-statement line is deleted including its trailing newline +// so blank lines are not left behind. +//=============================================================================// + +use proc_macro2::Span; +use quote::ToTokens; +use syn::{Expr, Stmt, visit::Visit}; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/// A single byte-range replacement in the original source text. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Patch { + /// Start byte offset in the original source (inclusive). + pub Start:usize, + /// End byte offset in the original source (exclusive). + pub End:usize, + /// Replacement text. Empty string deletes the range. + pub Replacement:String, +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Apply a sorted list of non-overlapping patches to Source. +/// +/// Returns None when: +/// - Any patch has Start > End. +/// - Any two patches overlap. +/// - Any offset is out of bounds for Source. +pub fn ApplyPatches(Source:&str, Patches:&[Patch]) -> Option { + if Patches.is_empty() { + return Some(Source.to_string()); + } + + let Bytes = Source.as_bytes(); + + let mut Result = String::with_capacity(Source.len()); + let mut Cursor = 0usize; + + for P in Patches { + if P.Start > P.End || P.End > Bytes.len() { + return None; + } + + if P.Start < Cursor { + // Overlap with previous patch. + return None; + } + + // Copy unchanged bytes up to this patch. + Result.push_str(&Source[Cursor..P.Start]); + + // Apply replacement (may be empty = delete). + Result.push_str(&P.Replacement); + + Cursor = P.End; + } + + // Copy tail after last patch. + Result.push_str(&Source[Cursor..]); + + Some(Result) +} + +/// Extract a byte range from a proc_macro2 Span relative to Source. +/// +/// Returns None when span information is unavailable (both offsets are 0 +/// for a token that is clearly not at the very start of the file, which +/// indicates that span-locations are not compiled in). +pub fn SpanBytes(Span:Span, Source:&str) -> Option<(usize, usize)> { + let Start = Span.start(); + let End = Span.end(); + + // proc_macro2 LineColumn is 1-based. Convert to byte offsets. + let StartByte = LineColToByte(Source, Start.line, Start.column)?; + let EndByte = LineColToByte(Source, End.line, End.column)?; + + if StartByte == 0 && EndByte == 0 { + // Likely a call_site span with no location info. + return None; + } + + Some((StartByte, EndByte)) +} + +/// Convert a 1-based (line, col) pair to a byte offset in Source. +/// col is 0-based character offset within the line (proc_macro2 convention). +pub fn LineColToByte(Source:&str, Line:usize, Col:usize) -> Option { + if Line == 0 { + return None; + } + + let mut CurrentLine = 1usize; + let mut LineStart = 0usize; + + for (ByteIdx, Ch) in Source.char_indices() { + if CurrentLine == Line { + LineStart = ByteIdx; + + break; + } + + if Ch == '\n' { + CurrentLine += 1; + } + } + + if CurrentLine != Line { + return None; + } + + // Walk Col characters forward from LineStart. + let mut ByteOffset = LineStart; + + for _ in 0..Col { + if ByteOffset >= Source.len() { + return None; + } + + let Ch = Source[ByteOffset..].chars().next()?; + + ByteOffset += Ch.len_utf8(); + } + + Some(ByteOffset) +} + +/// Given a Stmt::Local, return the byte range of the entire statement line +/// in Source, including the trailing newline if present. +pub fn StmtLineRange(Stmt:&Stmt, Source:&str) -> Option<(usize, usize)> { + let LocalSpan = StmtSpan(Stmt)?; + + let (Start, End) = SpanBytes(LocalSpan, Source)?; + + // Extend End forward to consume the trailing newline (and any\r). + let Bytes = Source.as_bytes(); + let mut LineEnd = End; + + while LineEnd < Bytes.len() && Bytes[LineEnd] != b'\n' { + LineEnd += 1; + } + + if LineEnd < Bytes.len() && Bytes[LineEnd] == b'\n' { + LineEnd += 1; + } + + // Also consume leading whitespace before Start on the same line + // so the blank indentation is not left behind. + let mut LineStart = Start; + + while LineStart > 0 && Bytes[LineStart - 1] != b'\n' { + LineStart -= 1; + } + + Some((LineStart, LineEnd)) +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +fn StmtSpan(Stmt:&Stmt) -> Option { + let mut Tokens = proc_macro2::TokenStream::new(); + + Stmt.to_tokens(&mut Tokens); + + let Trees:Vec<_> = Tokens.into_iter().collect(); + + if Trees.is_empty() { + return None; + } + + let First = Trees.first()?.span(); + let Last = Trees.last()?.span(); + + First.join(Last) +} + +/// Collect all Spans of a plain-identifier Expr::Path matching Target +/// within an expression tree. +pub struct SpanCollector<'a> { + pub Target:&'a str, + pub Spans:Vec, +} + +impl<'a, 'ast> Visit<'ast> for SpanCollector<'a> { + fn visit_expr_path(&mut self, Node:&'ast syn::ExprPath) { + if Node.qself.is_none() { + if let Some(I) = Node.path.get_ident() { + if I == self.Target { + self.Spans.push(I.span()); + } + } + } + } +} + +// --------------------------------------------------------------------------- +// Unit tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod Tests { + use super::*; + + #[test] + fn ApplyNoPatches() { + let Src = "hello world"; + + assert_eq!(ApplyPatches(Src, &[]).unwrap(), Src); + } + + #[test] + fn ApplySingleDelete() { + let Src = "abc def ghi"; + + let P = Patch { Start:4, End:8, Replacement:String::new() }; + + assert_eq!(ApplyPatches(Src, &[P]).unwrap(), "abc ghi"); + } + + #[test] + fn ApplySingleReplace() { + let Src = "let X = 5;\nuse_x(X);"; + + // Replace the second `X` (offset 16) with `5`. + let P = Patch { Start:16, End:17, Replacement:"5".to_string() }; + + assert_eq!(ApplyPatches(Src, &[P]).unwrap(), "let X = 5;\nuse_x(5);"); + } + + #[test] + fn ApplyTwoPatches() { + let Src = "AABBCC"; + + let Patches = vec![ + Patch { Start:0, End:2, Replacement:"XX".to_string() }, + Patch { Start:4, End:6, Replacement:"YY".to_string() }, + ]; + + assert_eq!(ApplyPatches(Src, &Patches).unwrap(), "XXBBYY"); + } + + #[test] + fn OverlappingPatchesReturnNone() { + let Src = "ABCDE"; + + let Patches = vec![ + Patch { Start:1, End:4, Replacement:"X".to_string() }, + Patch { Start:3, End:5, Replacement:"Y".to_string() }, + ]; + + assert!(ApplyPatches(Src, &Patches).is_none()); + } + + #[test] + fn OutOfBoundsPatchReturnsNone() { + let Src = "ABC"; + + let P = Patch { Start:2, End:10, Replacement:String::new() }; + + assert!(ApplyPatches(Src, &[P]).is_none()); + } + + #[test] + fn LineColToByteFirstLine() { + let Src = "hello\nworld"; + + assert_eq!(LineColToByte(Src, 1, 0), Some(0)); + assert_eq!(LineColToByte(Src, 1, 5), Some(5)); + } + + #[test] + fn LineColToByteSecondLine() { + let Src = "hello\nworld"; + + assert_eq!(LineColToByte(Src, 2, 0), Some(6)); + assert_eq!(LineColToByte(Src, 2, 5), Some(11)); + } + + #[test] + fn LineColToByteOutOfRange() { + let Src = "hi"; + + assert_eq!(LineColToByte(Src, 5, 0), None); + } +} diff --git a/Source/Eliminate/Transform/mod.rs b/Source/Eliminate/Transform/mod.rs index 68c73bf..1966616 100644 --- a/Source/Eliminate/Transform/mod.rs +++ b/Source/Eliminate/Transform/mod.rs @@ -3,25 +3,37 @@ //=============================================================================// // Module: Transform - AST transformation pipeline // -// Entry point: `Run(source, options)` parses the Rust source with `syn`, -// applies iterative single-use-variable elimination, and re-formats the result -// with `prettyplease`. Returns `Ok(None)` when the source is already minimal. +// Entry point: Run(source, options) parses the Rust source with syn, applies +// iterative single-use-variable elimination, then attempts to reconstruct the +// output by splicing only the changed byte ranges back into the original source +// text (preserving inline comments, blank lines, and all formatting trivia). +// +// Patch path (preferred): +// For each eliminated binding, Patch locates the byte span of the let +// statement and the byte span of the substitution site in the original source +// text via proc_macro2 Span offsets, and applies those as sorted +// non-overlapping replacements. The rest of the file is copied verbatim. +// +// Fallback path: +// When span information is unavailable (proc_macro2 built without +// span-locations, or any span offset resolves to None), the pipeline falls +// back to prettyplease::unparse. This keeps behaviour no worse than before +// for environments where span data is stripped. //=============================================================================// pub mod Collect; pub mod Count; pub mod Inline; +pub mod Patch; pub mod Safe; use super::{Definition, Error}; -/// Parse `Source`, run up to [`super::Constant::MaxIterations`] elimination -/// passes, then format and return the result. -/// -/// Returns `Ok(None)` when no bindings were eliminated (caller can skip the -/// write-back). +/// Parse Source, run up to MaxIterations elimination passes, then return the +/// patched source text. Returns Ok(None) when no bindings were eliminated. pub fn Run(Source:&str, Options:&Definition::Options) -> Error::Result> { - let mut Ast:syn::File = syn::parse_str(Source).map_err(|E| Error::Error::Parse { Path:String::new(), Source:E })?; + let mut Ast:syn::File = + syn::parse_str(Source).map_err(|E| Error::Error::Parse { Path:String::new(), Source:E })?; let mut AnyChanged = false; @@ -41,5 +53,131 @@ pub fn Run(Source:&str, Options:&Definition::Options) -> Error::Result Option { + // Re-parse Source so we have an AST whose Spans are anchored to Source. + let OriginalAst:syn::File = syn::parse_str(Source).ok()?; + + let mut Patches:Vec = Vec::new(); + + // Walk items in parallel. We only care about function bodies; other items + // (mod, use, struct, ...) are never touched by the eliminator. + for (OrigItem, MutItem) in OriginalAst.items.iter().zip(MutatedAst.items.iter()) { + if let (syn::Item::Fn(OrigFn), syn::Item::Fn(MutFn)) = (OrigItem, MutItem) { + CollectBlockPatches(&OrigFn.block, &MutFn.block, Source, &mut Patches)?; + } + } + + if Patches.is_empty() { + // No spans resolved but AnyChanged was true - fall back. + return None; + } + + // Sort by start offset and verify non-overlapping. + Patches.sort_by_key(|P| P.Start); + + Patch::ApplyPatches(Source, &Patches) +} + +/// Recursively compare OrigBlock (spans anchored to Source) and MutBlock +/// (mutated AST, spans may be synthetic) and collect patches for any +/// statements that differ. +fn CollectBlockPatches( + OrigBlock:&syn::Block, + MutBlock:&syn::Block, + Source:&str, + Patches:&mut Vec, +) -> Option<()> { + // Walk MutBlock statements; for each one, find the corresponding + // statement(s) in OrigBlock by matching on identity (same kind, same + // initialiser text for lets). Statements the eliminator REMOVED will + // be present in OrigBlock but absent from MutBlock. Statements where + // an identifier was SUBSTITUTED will have a different token stream. + + let mut OrigIdx = 0usize; + + for MutStmt in &MutBlock.stmts { + // Advance OrigIdx until we find a statement that matches MutStmt + // or we exhaust OrigBlock. + while OrigIdx < OrigBlock.stmts.len() { + let OrigStmt = &OrigBlock.stmts[OrigIdx]; + OrigIdx += 1; + + if StmtTokensMatch(OrigStmt, MutStmt) { + // Same statement - no patch needed here. + break; + } + + // OrigStmt is in the original but not in MutBlock at this + // position: it was a removed let. Emit a delete patch. + let (Start, End) = Patch::StmtLineRange(OrigStmt, Source)?; + + Patches.push(Patch::Patch { Start, End, Replacement:String::new() }); + + // Now check if MutStmt corresponds to this OrigStmt after the + // removal - if not, continue consuming OrigBlock. + if StmtTokensMatch(&OrigBlock.stmts[OrigIdx - 1 + 1], MutStmt) { + break; + } + } + + // Recurse into inner blocks. + CollectInnerBlockPatches(MutStmt, Source, Patches)?; + } + + Some(()) +} + +/// Recurse into block-containing expression variants. +fn CollectInnerBlockPatches( + Stmt:&syn::Stmt, + Source:&str, + Patches:&mut Vec, +) -> Option<()> { + use syn::{Expr, Stmt as S}; + + match Stmt { + S::Expr(Expr::Block(B), _) => { + for S_ in &B.block.stmts { + CollectInnerBlockPatches(S_, Source, Patches)?; + } + }, + + _ => {}, + } + + Some(()) +} + +/// Compare two statements by their token stream text. +fn StmtTokensMatch(A:&syn::Stmt, B:&syn::Stmt) -> bool { + use quote::ToTokens; + + let mut Ta = proc_macro2::TokenStream::new(); + let mut Tb = proc_macro2::TokenStream::new(); + + A.to_tokens(&mut Ta); + B.to_tokens(&mut Tb); + + Ta.to_string() == Tb.to_string() +} From f6e23957176353d0bc64f5c6a48ec9362a33cd2b Mon Sep 17 00:00:00 2001 From: Nikola Hristov Date: Mon, 25 May 2026 10:44:59 +0300 Subject: [PATCH 2/2] fix(eliminate/transform): resolve merge conflict - keep span-based patch path and RunPreserve from Current --- Source/Eliminate/Transform/mod.rs | 185 ++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) diff --git a/Source/Eliminate/Transform/mod.rs b/Source/Eliminate/Transform/mod.rs index 1966616..cc66158 100644 --- a/Source/Eliminate/Transform/mod.rs +++ b/Source/Eliminate/Transform/mod.rs @@ -181,3 +181,188 @@ fn StmtTokensMatch(A:&syn::Stmt, B:&syn::Stmt) -> bool { Ta.to_string() == Tb.to_string() } + +// --------------------------------------------------------------------------- +// Preserve-layout path (RunPreserve) +// --------------------------------------------------------------------------- + +/// Preserve-layout variant: inline single-use bindings with minimal textual +/// rewriting. Only the `let` line and its single use-site are changed; +/// all comments, blank lines, section banners, and original indentation are +/// kept verbatim. +/// +/// Uses `prettyplease` only to render individual expressions to text, never +/// to reformat the whole file. +/// +/// Returns `Ok(None)` when no bindings were eliminated. +pub fn RunPreserve(Source:&str, Options:&Definition::Options) -> Error::Result> { + use std::fmt::Write as _; + + /// Render a `syn::Expr` to canonical text via prettyplease by wrapping it + /// in a throwaway function body, pretty-printing, then stripping the + /// wrapper. This avoids any span/proc-macro2 feature flags. + fn ExprText(E:&syn::Expr) -> Option { + let Dummy = format!("fn __d() {{ let __v = {}; }}", quote::quote!(#E)); + + let Ast:syn::File = syn::parse_str(&Dummy).ok()?; + + let Pretty = prettyplease::unparse(&Ast); + + // Extract the initialiser from ` let __v = ;\n` + let Start = Pretty.find("let __v = ")? + "let __v = ".len(); + + let End = Pretty[Start..].find(';').map(|I| Start + I)?; + + Some(Pretty[Start..End].trim().to_string()) + } + + /// Render a `let = ;` binding to canonical text the same way. + fn LetText(Ident:&str, E:&syn::Expr) -> Option { + let Dummy = format!("fn __d() {{ let {} = {}; }}", Ident, quote::quote!(#E)); + + let Ast:syn::File = syn::parse_str(&Dummy).ok()?; + + let Pretty = prettyplease::unparse(&Ast); + + let Marker = format!("let {} = ", Ident); + + let Start = Pretty.find(&Marker)?; + + let End = Pretty[Start..].find(';').map(|I| Start + I + 1)?; + + Some(Pretty[Start..End].trim().to_string()) + } + + let mut Working = Source.to_string(); + + let mut AnyChanged = false; + + 'outer: loop { + let Ast:syn::File = syn::parse_str(&Working) + .map_err(|E| Error::Error::Parse { Path:String::new(), Source:E })?; + + // Walk every function body looking for single-use let bindings. + for Item in &Ast.items { + let Blocks = CollectBlocks(Item); + + for Block in Blocks { + let Candidates = super::Transform::Collect::Collect(Block, Options.InlineComments); + + for Candidate in &Candidates { + if !super::Transform::Safe::IsSafe(&Candidate.Init, Options.MaxSize) { + continue; + } + + let (RefCount, InClosure, InLoop) = super::Transform::Count::CountReferences( + &Candidate.Ident, + &Block.stmts[Candidate.StmtIndex + 1..], + ); + + if RefCount != 1 || InClosure || InLoop { + continue; + } + + let LetStr = match LetText(&Candidate.Ident, &Candidate.Init) { + Some(S) => S, + None => continue, + }; + + let InitStr = match ExprText(&Candidate.Init) { + Some(S) => S, + None => continue, + }; + + let IdentStr = &Candidate.Ident; + + // Find and remove the let line, then replace the use-site. + if let Some(LetPos) = Working.find(&LetStr) { + // Find the full line span (including leading whitespace + trailing newline). + let LineStart = Working[..LetPos].rfind('\n').map(|I| I + 1).unwrap_or(0); + + let LineEnd = Working[LetPos..] + .find('\n') + .map(|I| LetPos + I + 1) + .unwrap_or(Working.len()); + + // Find the use-site of the identifier after the let line. + let SearchFrom = LineEnd; + + if let Some(RelPos) = find_word(&Working[SearchFrom..], IdentStr) { + let UsePos = SearchFrom + RelPos; + let UseEnd = UsePos + IdentStr.len(); + + // Replace use-site first (later in file, so offsets of let line unaffected). + Working.replace_range(UsePos..UseEnd, &InitStr); + + // Now remove the let line. + Working.replace_range(LineStart..LineEnd, ""); + + AnyChanged = true; + + continue 'outer; + } + } + } + } + } + + // No more candidates found in this pass. + break; + } + + if AnyChanged { Ok(Some(Working)) } else { Ok(None) } +} + +// --------------------------------------------------------------------------- +// Helpers for RunPreserve +// --------------------------------------------------------------------------- + +/// Word-boundary-aware substring search: returns the byte offset of the first +/// occurrence of `Word` in `Haystack` where the match is not immediately +/// preceded or followed by an alphanumeric character or underscore. +fn find_word(Haystack:&str, Word:&str) -> Option { + let Bytes = Haystack.as_bytes(); + let Pat = Word.as_bytes(); + + let mut Pos = 0usize; + + while Pos + Pat.len() <= Bytes.len() { + if Bytes[Pos..].starts_with(Pat) { + let Before = Pos > 0 && (Bytes[Pos - 1].is_ascii_alphanumeric() || Bytes[Pos - 1] == b'_'); + + let After = Bytes + .get(Pos + Pat.len()) + .map_or(false, |&B| B.is_ascii_alphanumeric() || B == b'_'); + + if !Before && !After { + return Some(Pos); + } + } + + Pos += 1; + } + + None +} + +/// Collect all `syn::Block` references reachable from a top-level `Item`. +/// Only descends into function bodies (free functions and impl methods). +fn CollectBlocks(Item:&syn::Item) -> Vec<&syn::Block> { + let mut Out = Vec::new(); + + match Item { + syn::Item::Fn(F) => Out.push(F.block.as_ref()), + + syn::Item::Impl(Impl) => { + for ImplItem in &Impl.items { + if let syn::ImplItem::Fn(M) = ImplItem { + Out.push(&M.block); + } + } + }, + + _ => {}, + } + + Out +}