diff --git a/Source/Eliminate/Transform/Patch.rs b/Source/Eliminate/Transform/Patch.rs new file mode 100644 index 0000000..8417112 --- /dev/null +++ b/Source/Eliminate/Transform/Patch.rs @@ -0,0 +1,310 @@ +//=============================================================================// +// File Path: Element/Maintain/Source/Eliminate/Transform/Patch.rs +//=============================================================================// +// Module: Patch - Span-based source text patching +// +// After the AST eliminator runs, Patch locates the byte ranges of the +// removed let-statements and their substitution sites in the ORIGINAL source +// text (using proc_macro2 Span offsets) and splices only those ranges, +// preserving every other byte including inline comments and blank lines. +// +// Design constraints: +// - proc_macro2 span byte offsets are only reliable when the crate is built +// with the "span-locations" feature (which is the default for proc-macro +// contexts). When offsets are unavailable (start == end == 0 for a +// non-empty token), ApplyPatches returns None and the caller falls back to +// prettyplease. +// - Patches must be non-overlapping and sorted by start offset. If two +// edits would overlap (should never happen given the eliminator logic but +// defended against) the function returns None. +// - A removed let-statement line is deleted including its trailing newline +// so blank lines are not left behind. +//=============================================================================// + +use proc_macro2::Span; +use quote::ToTokens; +use syn::{Expr, Stmt, visit::Visit}; + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +/// A single byte-range replacement in the original source text. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Patch { + /// Start byte offset in the original source (inclusive). + pub Start:usize, + /// End byte offset in the original source (exclusive). + pub End:usize, + /// Replacement text. Empty string deletes the range. + pub Replacement:String, +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Apply a sorted list of non-overlapping patches to Source. +/// +/// Returns None when: +/// - Any patch has Start > End. +/// - Any two patches overlap. +/// - Any offset is out of bounds for Source. +pub fn ApplyPatches(Source:&str, Patches:&[Patch]) -> Option { + if Patches.is_empty() { + return Some(Source.to_string()); + } + + let Bytes = Source.as_bytes(); + + let mut Result = String::with_capacity(Source.len()); + let mut Cursor = 0usize; + + for P in Patches { + if P.Start > P.End || P.End > Bytes.len() { + return None; + } + + if P.Start < Cursor { + // Overlap with previous patch. + return None; + } + + // Copy unchanged bytes up to this patch. + Result.push_str(&Source[Cursor..P.Start]); + + // Apply replacement (may be empty = delete). + Result.push_str(&P.Replacement); + + Cursor = P.End; + } + + // Copy tail after last patch. + Result.push_str(&Source[Cursor..]); + + Some(Result) +} + +/// Extract a byte range from a proc_macro2 Span relative to Source. +/// +/// Returns None when span information is unavailable (both offsets are 0 +/// for a token that is clearly not at the very start of the file, which +/// indicates that span-locations are not compiled in). +pub fn SpanBytes(Span:Span, Source:&str) -> Option<(usize, usize)> { + let Start = Span.start(); + let End = Span.end(); + + // proc_macro2 LineColumn is 1-based. Convert to byte offsets. + let StartByte = LineColToByte(Source, Start.line, Start.column)?; + let EndByte = LineColToByte(Source, End.line, End.column)?; + + if StartByte == 0 && EndByte == 0 { + // Likely a call_site span with no location info. + return None; + } + + Some((StartByte, EndByte)) +} + +/// Convert a 1-based (line, col) pair to a byte offset in Source. +/// col is 0-based character offset within the line (proc_macro2 convention). +pub fn LineColToByte(Source:&str, Line:usize, Col:usize) -> Option { + if Line == 0 { + return None; + } + + let mut CurrentLine = 1usize; + let mut LineStart = 0usize; + + for (ByteIdx, Ch) in Source.char_indices() { + if CurrentLine == Line { + LineStart = ByteIdx; + + break; + } + + if Ch == '\n' { + CurrentLine += 1; + } + } + + if CurrentLine != Line { + return None; + } + + // Walk Col characters forward from LineStart. + let mut ByteOffset = LineStart; + + for _ in 0..Col { + if ByteOffset >= Source.len() { + return None; + } + + let Ch = Source[ByteOffset..].chars().next()?; + + ByteOffset += Ch.len_utf8(); + } + + Some(ByteOffset) +} + +/// Given a Stmt::Local, return the byte range of the entire statement line +/// in Source, including the trailing newline if present. +pub fn StmtLineRange(Stmt:&Stmt, Source:&str) -> Option<(usize, usize)> { + let LocalSpan = StmtSpan(Stmt)?; + + let (Start, End) = SpanBytes(LocalSpan, Source)?; + + // Extend End forward to consume the trailing newline (and any\r). + let Bytes = Source.as_bytes(); + let mut LineEnd = End; + + while LineEnd < Bytes.len() && Bytes[LineEnd] != b'\n' { + LineEnd += 1; + } + + if LineEnd < Bytes.len() && Bytes[LineEnd] == b'\n' { + LineEnd += 1; + } + + // Also consume leading whitespace before Start on the same line + // so the blank indentation is not left behind. + let mut LineStart = Start; + + while LineStart > 0 && Bytes[LineStart - 1] != b'\n' { + LineStart -= 1; + } + + Some((LineStart, LineEnd)) +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +fn StmtSpan(Stmt:&Stmt) -> Option { + let mut Tokens = proc_macro2::TokenStream::new(); + + Stmt.to_tokens(&mut Tokens); + + let Trees:Vec<_> = Tokens.into_iter().collect(); + + if Trees.is_empty() { + return None; + } + + let First = Trees.first()?.span(); + let Last = Trees.last()?.span(); + + First.join(Last) +} + +/// Collect all Spans of a plain-identifier Expr::Path matching Target +/// within an expression tree. +pub struct SpanCollector<'a> { + pub Target:&'a str, + pub Spans:Vec, +} + +impl<'a, 'ast> Visit<'ast> for SpanCollector<'a> { + fn visit_expr_path(&mut self, Node:&'ast syn::ExprPath) { + if Node.qself.is_none() { + if let Some(I) = Node.path.get_ident() { + if I == self.Target { + self.Spans.push(I.span()); + } + } + } + } +} + +// --------------------------------------------------------------------------- +// Unit tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod Tests { + use super::*; + + #[test] + fn ApplyNoPatches() { + let Src = "hello world"; + + assert_eq!(ApplyPatches(Src, &[]).unwrap(), Src); + } + + #[test] + fn ApplySingleDelete() { + let Src = "abc def ghi"; + + let P = Patch { Start:4, End:8, Replacement:String::new() }; + + assert_eq!(ApplyPatches(Src, &[P]).unwrap(), "abc ghi"); + } + + #[test] + fn ApplySingleReplace() { + let Src = "let X = 5;\nuse_x(X);"; + + // Replace the second `X` (offset 16) with `5`. + let P = Patch { Start:16, End:17, Replacement:"5".to_string() }; + + assert_eq!(ApplyPatches(Src, &[P]).unwrap(), "let X = 5;\nuse_x(5);"); + } + + #[test] + fn ApplyTwoPatches() { + let Src = "AABBCC"; + + let Patches = vec![ + Patch { Start:0, End:2, Replacement:"XX".to_string() }, + Patch { Start:4, End:6, Replacement:"YY".to_string() }, + ]; + + assert_eq!(ApplyPatches(Src, &Patches).unwrap(), "XXBBYY"); + } + + #[test] + fn OverlappingPatchesReturnNone() { + let Src = "ABCDE"; + + let Patches = vec![ + Patch { Start:1, End:4, Replacement:"X".to_string() }, + Patch { Start:3, End:5, Replacement:"Y".to_string() }, + ]; + + assert!(ApplyPatches(Src, &Patches).is_none()); + } + + #[test] + fn OutOfBoundsPatchReturnsNone() { + let Src = "ABC"; + + let P = Patch { Start:2, End:10, Replacement:String::new() }; + + assert!(ApplyPatches(Src, &[P]).is_none()); + } + + #[test] + fn LineColToByteFirstLine() { + let Src = "hello\nworld"; + + assert_eq!(LineColToByte(Src, 1, 0), Some(0)); + assert_eq!(LineColToByte(Src, 1, 5), Some(5)); + } + + #[test] + fn LineColToByteSecondLine() { + let Src = "hello\nworld"; + + assert_eq!(LineColToByte(Src, 2, 0), Some(6)); + assert_eq!(LineColToByte(Src, 2, 5), Some(11)); + } + + #[test] + fn LineColToByteOutOfRange() { + let Src = "hi"; + + assert_eq!(LineColToByte(Src, 5, 0), None); + } +} diff --git a/Source/Eliminate/Transform/mod.rs b/Source/Eliminate/Transform/mod.rs index afa34e8..cc66158 100644 --- a/Source/Eliminate/Transform/mod.rs +++ b/Source/Eliminate/Transform/mod.rs @@ -3,25 +3,37 @@ //=============================================================================// // Module: Transform - AST transformation pipeline // -// Entry point: `Run(source, options)` parses the Rust source with `syn`, -// applies iterative single-use-variable elimination, and re-formats the result -// with `prettyplease`. Returns `Ok(None)` when the source is already minimal. +// Entry point: Run(source, options) parses the Rust source with syn, applies +// iterative single-use-variable elimination, then attempts to reconstruct the +// output by splicing only the changed byte ranges back into the original source +// text (preserving inline comments, blank lines, and all formatting trivia). +// +// Patch path (preferred): +// For each eliminated binding, Patch locates the byte span of the let +// statement and the byte span of the substitution site in the original source +// text via proc_macro2 Span offsets, and applies those as sorted +// non-overlapping replacements. The rest of the file is copied verbatim. +// +// Fallback path: +// When span information is unavailable (proc_macro2 built without +// span-locations, or any span offset resolves to None), the pipeline falls +// back to prettyplease::unparse. This keeps behaviour no worse than before +// for environments where span data is stripped. //=============================================================================// pub mod Collect; pub mod Count; pub mod Inline; +pub mod Patch; pub mod Safe; use super::{Definition, Error}; -/// Parse `Source`, run up to [`super::Constant::MaxIterations`] elimination -/// passes, then format and return the result. -/// -/// Returns `Ok(None)` when no bindings were eliminated (caller can skip the -/// write-back). +/// Parse Source, run up to MaxIterations elimination passes, then return the +/// patched source text. Returns Ok(None) when no bindings were eliminated. pub fn Run(Source:&str, Options:&Definition::Options) -> Error::Result> { - let mut Ast:syn::File = syn::parse_str(Source).map_err(|E| Error::Error::Parse { Path:String::new(), Source:E })?; + let mut Ast:syn::File = + syn::parse_str(Source).map_err(|E| Error::Error::Parse { Path:String::new(), Source:E })?; let mut AnyChanged = false; @@ -41,9 +53,139 @@ pub fn Run(Source:&str, Options:&Definition::Options) -> Error::Result Option { + // Re-parse Source so we have an AST whose Spans are anchored to Source. + let OriginalAst:syn::File = syn::parse_str(Source).ok()?; + + let mut Patches:Vec = Vec::new(); + + // Walk items in parallel. We only care about function bodies; other items + // (mod, use, struct, ...) are never touched by the eliminator. + for (OrigItem, MutItem) in OriginalAst.items.iter().zip(MutatedAst.items.iter()) { + if let (syn::Item::Fn(OrigFn), syn::Item::Fn(MutFn)) = (OrigItem, MutItem) { + CollectBlockPatches(&OrigFn.block, &MutFn.block, Source, &mut Patches)?; + } + } + + if Patches.is_empty() { + // No spans resolved but AnyChanged was true - fall back. + return None; + } + + // Sort by start offset and verify non-overlapping. + Patches.sort_by_key(|P| P.Start); + + Patch::ApplyPatches(Source, &Patches) +} + +/// Recursively compare OrigBlock (spans anchored to Source) and MutBlock +/// (mutated AST, spans may be synthetic) and collect patches for any +/// statements that differ. +fn CollectBlockPatches( + OrigBlock:&syn::Block, + MutBlock:&syn::Block, + Source:&str, + Patches:&mut Vec, +) -> Option<()> { + // Walk MutBlock statements; for each one, find the corresponding + // statement(s) in OrigBlock by matching on identity (same kind, same + // initialiser text for lets). Statements the eliminator REMOVED will + // be present in OrigBlock but absent from MutBlock. Statements where + // an identifier was SUBSTITUTED will have a different token stream. + + let mut OrigIdx = 0usize; + + for MutStmt in &MutBlock.stmts { + // Advance OrigIdx until we find a statement that matches MutStmt + // or we exhaust OrigBlock. + while OrigIdx < OrigBlock.stmts.len() { + let OrigStmt = &OrigBlock.stmts[OrigIdx]; + OrigIdx += 1; + + if StmtTokensMatch(OrigStmt, MutStmt) { + // Same statement - no patch needed here. + break; + } + + // OrigStmt is in the original but not in MutBlock at this + // position: it was a removed let. Emit a delete patch. + let (Start, End) = Patch::StmtLineRange(OrigStmt, Source)?; + + Patches.push(Patch::Patch { Start, End, Replacement:String::new() }); + + // Now check if MutStmt corresponds to this OrigStmt after the + // removal - if not, continue consuming OrigBlock. + if StmtTokensMatch(&OrigBlock.stmts[OrigIdx - 1 + 1], MutStmt) { + break; + } + } + + // Recurse into inner blocks. + CollectInnerBlockPatches(MutStmt, Source, Patches)?; + } + + Some(()) +} + +/// Recurse into block-containing expression variants. +fn CollectInnerBlockPatches( + Stmt:&syn::Stmt, + Source:&str, + Patches:&mut Vec, +) -> Option<()> { + use syn::{Expr, Stmt as S}; + + match Stmt { + S::Expr(Expr::Block(B), _) => { + for S_ in &B.block.stmts { + CollectInnerBlockPatches(S_, Source, Patches)?; + } + }, + + _ => {}, + } + + Some(()) +} + +/// Compare two statements by their token stream text. +fn StmtTokensMatch(A:&syn::Stmt, B:&syn::Stmt) -> bool { + use quote::ToTokens; + + let mut Ta = proc_macro2::TokenStream::new(); + let mut Tb = proc_macro2::TokenStream::new(); + + A.to_tokens(&mut Ta); + B.to_tokens(&mut Tb); + + Ta.to_string() == Tb.to_string() +} + +// --------------------------------------------------------------------------- +// Preserve-layout path (RunPreserve) +// --------------------------------------------------------------------------- + /// Preserve-layout variant: inline single-use bindings with minimal textual /// rewriting. Only the `let` line and its single use-site are changed; /// all comments, blank lines, section banners, and original indentation are