diff --git a/reference/PFS-MS-v1.0/README.md b/reference/PFS-MS-v1.0/README.md index 9350ebd..1a8e1ba 100644 --- a/reference/PFS-MS-v1.0/README.md +++ b/reference/PFS-MS-v1.0/README.md @@ -131,6 +131,20 @@ cargo run --bin pfs -- extract backup.pfs ./restore --at 2 # by session cargo run --bin pfs -- extract backup.pfs ./restore --at-time 1700000000000 ``` +### Compaction + +`pfs compact` rebuilds a multi-session file into a single fresh session holding +the current tree, **discarding history** (Section 15): deleted nodes are gone, +superseded versions and delta chains collapse to the newest full content, and +abandoned tails are reclaimed. The output is a fully valid, verifiable PFS-MS +file. (Generic `pcf-compact` must *not* be used on a PFS-MS file — it would +corrupt the session chain.) + +``` +cargo run --bin pfs -- compact fs.pfs # in place +cargo run --bin pfs -- compact fs.pfs out.pfs # to a new file +``` + POSIX permission bits and modification time are captured on import and restored on extract; pass `--no-metadata` (on either side) to skip this, and `--store` to disable compression. Symlinks and other non-regular files are skipped with a @@ -184,12 +198,14 @@ reference/PFS-MS-v1.0/ │ ├── tree.rs # liveness, tree, reconstruction (Sections 9.3, 10) │ ├── fs.rs # high-level FsReader │ ├── dirsync.rs # directory <-> archive tooling (create/update/extract) +│ ├── compact.rs # single-session compaction (Section 15) │ ├── vector.rs # canonical Section 17 reference vector │ └── bin/pfs.rs # demo CLI ├── tests/ │ ├── roundtrip.rs # end-to-end black-box tests │ ├── coverage.rs # targeted error-path / edge-case tests │ ├── dirsync.rs # directory create/update/extract round-trips +│ ├── compact.rs # single-session compaction round-trips │ └── spec_compliance.rs # one test per normative MUST (R1..R8, W2/W3) └── examples/ └── gen_testvector.rs # writes pfs_ms_testvector.bin + hex dumps diff --git a/reference/PFS-MS-v1.0/src/bin/pfs.rs b/reference/PFS-MS-v1.0/src/bin/pfs.rs index 740519d..cd55fbe 100644 --- a/reference/PFS-MS-v1.0/src/bin/pfs.rs +++ b/reference/PFS-MS-v1.0/src/bin/pfs.rs @@ -17,6 +17,7 @@ //! pfs create [--store] [--no-metadata] //! pfs update [--delete] [--store] [--no-metadata] //! pfs extract [--at ] [--at-time ] [--no-metadata] +//! pfs compact [] # rebuild as one fresh session (discards history) //! pfs keygen //! pfs sign --key [--resign] //! pfs verify-sig [--key ] [--no-recheck] @@ -70,6 +71,7 @@ fn run(args: &[String]) -> CliResult { "create" => cmd_create(rest), "update" => cmd_update(rest), "extract" => cmd_extract(rest), + "compact" => cmd_compact(rest), "keygen" => cmd_keygen(rest), "sign" => cmd_sign(rest), "verify-sig" => cmd_verify_sig(rest), @@ -83,7 +85,7 @@ fn run(args: &[String]) -> CliResult { fn print_usage() { eprintln!( - "usage:\n pfs mkfs [--key ]\n pfs mkdir [--key ]\n pfs put [] [--store] [--key ]\n pfs mv [--key ]\n pfs rm [--key ]\n pfs ls []\n pfs cat \n pfs get \n pfs log \n pfs verify \n pfs create [--store] [--no-metadata] [--key ]\n pfs update [--delete] [--store] [--no-metadata] [--key ]\n pfs extract [--at ] [--at-time ] [--no-metadata]\n pfs keygen \n pfs sign --key [--resign]\n pfs verify-sig [--key ] [--no-recheck]\n\nmutating commands accept --key to auto-sign after the commit." + "usage:\n pfs mkfs [--key ]\n pfs mkdir [--key ]\n pfs put [] [--store] [--key ]\n pfs mv [--key ]\n pfs rm [--key ]\n pfs ls []\n pfs cat \n pfs get \n pfs log \n pfs verify \n pfs create [--store] [--no-metadata] [--key ]\n pfs update [--delete] [--store] [--no-metadata] [--key ]\n pfs extract [--at ] [--at-time ] [--no-metadata]\n pfs compact []\n pfs keygen \n pfs sign --key [--resign]\n pfs verify-sig [--key ] [--no-recheck]\n\nmutating commands accept --key to auto-sign after the commit." ); } @@ -321,6 +323,14 @@ fn cmd_update(a: &[String]) -> CliResult { maybe_autosign(archive, p.values.get("key")) } +fn cmd_compact(a: &[String]) -> CliResult { + let p = parse_flags(a, &[])?; + let file = pos(&p, 0, "")?; + // In-place when is omitted; otherwise write a fresh file. + let out = p.positional.get(1).map(String::as_str).unwrap_or(file); + pfs_ms::compact_archive(Path::new(file), Path::new(out)).map_err(|e| e.to_string()) +} + fn cmd_extract(a: &[String]) -> CliResult { let p = parse_flags(a, &["at", "at-time"])?; let archive = p.positional.first().ok_or("missing argument: ")?; diff --git a/reference/PFS-MS-v1.0/src/compact.rs b/reference/PFS-MS-v1.0/src/compact.rs new file mode 100644 index 0000000..1d6e1fb --- /dev/null +++ b/reference/PFS-MS-v1.0/src/compact.rs @@ -0,0 +1,178 @@ +//! PFS-MS-aware compaction: rebuild a multi-session file into a fresh, +//! single-session snapshot of its current state (spec Section 15). +//! +//! Generic PCF compaction (PCF Section 11.5, [`pcf::Container::compacted_image`]) +//! MUST NOT be used on a PFS-MS file: it repacks entries into shared blocks and +//! rewrites every `table_hash`, which destroys the one-`PFS_SESSION`-per-HEAD +//! invariant and the inter-session hash commitments (`member_blocks_digest`, +//! `prev_session_hash`). The result no longer scans or verifies as PFS-MS. +//! +//! Compaction here is therefore profile-aware. It resolves the live tree at the +//! head and re-emits it as **one** session (`session_seq = 1`, +//! `prev_session_hash = 0`). This is a full rewrite that *discards history*: +//! +//! * deleted nodes are gone — only live nodes are re-emitted; +//! * every file is stored as fresh `Direct` (or `Empty`) content, collapsing +//! any delta chain to the newest full version; +//! * superseded versions and abandoned tails are reclaimed. +//! +//! The output is a fully valid, verifiable PFS-MS file. + +use std::fs::{self, OpenOptions}; +use std::io::{Read, Seek, Write}; +use std::path::{Path, PathBuf}; + +use pcf::HashAlgo; + +use crate::error::{Error, Result}; +use crate::fs::FsReader; +use crate::tree::Tree; +use crate::writer::{Change, FsWriter}; +use crate::ROOT_NODE_ID; + +/// Rebuild the PFS-MS file in `src` into a fresh, single-session image written +/// to `dst`, returning the destination handle. +/// +/// The resolved current tree of `src` becomes session 1 (`session_seq = 1`, +/// `prev_session_hash = 0`); history is discarded (Section 15). The source is +/// verified before any output is produced, so a corrupt input is rejected +/// rather than propagated. `dst` must be a fresh, writable, empty handle. +/// +/// The whole source tree (every live file's content) is materialised in memory +/// before `dst` is touched, so `src` and `dst` may be distinct handles to the +/// same logical data without interfering. +pub fn compact(src: R, mut dst: W) -> Result +where + R: Read + Write + Seek, + W: Read + Write + Seek, +{ + let mut r = FsReader::open(src)?; + // Refuse to compact a corrupt source (mirrors pcf-compact's verify-before). + r.verify()?; + + let algo = source_hash_algo(&mut r)?; + let tree = r.tree()?; + let changes = collect_changes(&mut r, &tree)?; + + let mut w = FsWriter::create(&mut dst, algo)?; + w.set_writer_id(b"pfs-compact"); + w.set_compression(true); + // An empty source tree yields no changes; `commit_changes` then commits + // nothing and `dst` stays at the valid empty-table state from `create`. + w.commit_changes(&changes)?; + drop(w); + + Ok(dst) +} + +/// Compact the PFS-MS file at `src` into `dst` on the host filesystem. +/// +/// When `dst == src` the file is compacted in place. Output is written to a +/// sibling temp file, fsynced, and atomically renamed into place, so a crash +/// leaves either the original or the fully written replacement. +pub fn compact_archive(src: &Path, dst: &Path) -> Result<()> { + // Build the compacted image in memory from the source. + let image = { + let in_file = OpenOptions::new() + .read(true) + .write(true) + .open(src) + .map_err(Error::Io)?; + let out = compact(in_file, std::io::Cursor::new(Vec::new()))?; + out.into_inner() + }; + + // Write to a sibling temp file, fsync, then atomically rename into place. + let dir = dst.parent().filter(|p| !p.as_os_str().is_empty()); + let tmp: PathBuf = { + let name = dst + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "pfs".into()); + let pid = std::process::id(); + let tmp_name = format!(".{name}.pfs-compact.tmp.{pid}"); + match dir { + Some(d) => d.join(tmp_name), + None => PathBuf::from(tmp_name), + } + }; + + let mut f = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&tmp) + .map_err(Error::Io)?; + f.write_all(&image).map_err(Error::Io)?; + f.sync_all().map_err(Error::Io)?; + drop(f); + + fs::rename(&tmp, dst).map_err(|e| { + let _ = fs::remove_file(&tmp); + Error::Io(e) + })?; + Ok(()) +} + +/// The table-hash algorithm of the source's head session (`Sha256` if empty). +fn source_hash_algo(r: &mut FsReader) -> Result { + let scan = r.scan()?; + Ok(scan + .sessions + .first() + .map(|s| s.block_hashes[0].2) + .unwrap_or(HashAlgo::Sha256)) +} + +/// Build the change set re-creating the whole live tree in one session. +fn collect_changes( + r: &mut FsReader, + tree: &Tree, +) -> Result> { + let mut out = Vec::new(); + walk(r, tree, ROOT_NODE_ID, "", &mut out)?; + Ok(out) +} + +fn walk( + r: &mut FsReader, + tree: &Tree, + node: [u8; 16], + prefix: &str, + out: &mut Vec, +) -> Result<()> { + let kids = match tree.children.get(&node) { + Some(k) => k.clone(), + None => return Ok(()), + }; + for cid in kids { + let rec = tree.nodes.get(&cid).ok_or(Error::NotFound)?; + let name = rec.name_str(); + let rel = if prefix.is_empty() { + name + } else { + format!("{prefix}/{name}") + }; + if rec.is_dir() { + // Emit every directory (preserving empty ones), then recurse. + out.push(Change::Mkdir { + path: rel.clone(), + mode: rec.mode, + mtime_unix_ms: rec.mtime_unix_ms, + }); + walk(r, tree, cid, &rel, out)?; + } else { + // Reconstruct the full current content; re-emitted as Direct/Empty. + let (mode, mtime) = (rec.mode, rec.mtime_unix_ms); + let content = r.read_path(&rel)?; + out.push(Change::PutFile { + path: rel, + content, + mode, + mtime_unix_ms: mtime, + }); + } + } + Ok(()) +} diff --git a/reference/PFS-MS-v1.0/src/lib.rs b/reference/PFS-MS-v1.0/src/lib.rs index 9e0bab8..b6eddd7 100644 --- a/reference/PFS-MS-v1.0/src/lib.rs +++ b/reference/PFS-MS-v1.0/src/lib.rs @@ -38,6 +38,7 @@ //! assert_eq!(r.read_path("docs/hello.txt").unwrap(), b"Hello, world\n"); //! ``` +mod compact; mod compress; pub mod consts; mod delta; @@ -52,6 +53,7 @@ mod tree; mod vector; mod writer; +pub use compact::{compact, compact_archive}; pub use compress::{compress_deflate, decompress}; pub use consts::*; pub use dirsync::{create_archive, extract_archive, session_at_time, update_archive, SyncOptions}; diff --git a/reference/PFS-MS-v1.0/tests/compact.rs b/reference/PFS-MS-v1.0/tests/compact.rs new file mode 100644 index 0000000..c0908f7 --- /dev/null +++ b/reference/PFS-MS-v1.0/tests/compact.rs @@ -0,0 +1,271 @@ +//! Tests for PFS-MS-aware compaction (`pfs_ms::compact`): a multi-session file +//! is rebuilt into a single fresh session that still reconstructs the same live +//! tree, while history (deleted nodes, superseded versions, deltas) is dropped. + +use std::io::Cursor; + +use pcf::HashAlgo; +use pfs_ms::{compact, Change, FsReader, FsWriter, ROOT_NODE_ID}; + +/// Build a multi-session source exercising: a delta (file written twice), an +/// empty directory, an empty file, nested directories, explicit mode/mtime, +/// and a deleted subtree. Returns the file bytes. +fn build_source() -> Vec { + let mut w = FsWriter::mkfs(Cursor::new(Vec::new()), HashAlgo::Sha256).unwrap(); + w.mkdir("docs").unwrap(); // session 2 + w.put_file("docs/f.txt", b"version one\n").unwrap(); // session 3 + // Larger second version so the writer stores it as a DELTA against v1. + let v2 = b"version two, substantially longer to invite a delta encode\n".repeat(8); + w.put_file("docs/f.txt", &v2).unwrap(); // session 4 + + // One session carrying explicit metadata, an empty dir, an empty file, and + // a nested directory. + w.commit_changes(&[ + Change::Mkdir { + path: "empty".into(), + mode: 0o755, + mtime_unix_ms: 1111, + }, + Change::Mkdir { + path: "a".into(), + mode: 0o700, + mtime_unix_ms: 2000, + }, + Change::Mkdir { + path: "a/b".into(), + mode: 0o700, + mtime_unix_ms: 2100, + }, + Change::Mkdir { + path: "a/b/c".into(), + mode: 0o700, + mtime_unix_ms: 2222, + }, + Change::PutFile { + path: "a/b/note.txt".into(), + content: b"deep".to_vec(), + mode: 0o640, + mtime_unix_ms: 3333, + }, + Change::PutFile { + path: "blank.txt".into(), + content: Vec::new(), + mode: 0o600, + mtime_unix_ms: 4444, + }, + ]) + .unwrap(); // session 5 + + // A subtree that is later deleted; it must not survive compaction. + w.mkdir("trash").unwrap(); // session 6 + w.put_file("trash/junk.txt", b"goodbye").unwrap(); // session 7 + w.rm("trash").unwrap(); // session 8 + + w.into_storage().into_inner() +} + +/// All live paths in a file, sorted (directories and files, root excluded). +fn live_paths(bytes: &[u8]) -> Vec { + let mut r = FsReader::open(Cursor::new(bytes.to_vec())).unwrap(); + let tree = r.tree().unwrap(); + let mut out = Vec::new(); + fn walk(tree: &pfs_ms::Tree, node: [u8; 16], prefix: &str, out: &mut Vec) { + if let Some(kids) = tree.children.get(&node) { + for &cid in kids { + let rec = &tree.nodes[&cid]; + let name = rec.name_str(); + let rel = if prefix.is_empty() { + name + } else { + format!("{prefix}/{name}") + }; + let tag = if rec.is_dir() { "d" } else { "f" }; + out.push(format!("{tag} {rel}")); + if rec.is_dir() { + walk(tree, cid, &rel, out); + } + } + } + } + walk(&tree, ROOT_NODE_ID, "", &mut out); + out.sort(); + out +} + +#[test] +fn compact_multi_session_roundtrip() { + let src = build_source(); + let out = compact(Cursor::new(src.clone()), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + + // The output is a valid PFS-MS file with exactly one session. + let mut r = FsReader::open(Cursor::new(out.clone())).unwrap(); + r.verify().unwrap(); + let sessions = r.list_sessions().unwrap(); + assert_eq!(sessions.len(), 1, "compaction must yield a single session"); + let s = &sessions[0]; + assert_eq!(s.session_seq, 1); + assert_eq!(s.prev_session_hash, [0u8; 64]); + assert_eq!(s.prev_session_hash_algo, HashAlgo::None); + + // Same live tree as the source (deleted `trash` is gone). + assert_eq!(live_paths(&out), live_paths(&src)); + assert!( + live_paths(&out).iter().all(|p| !p.contains("trash")), + "deleted subtree must not survive" + ); + + // File contents match the latest source versions; empty file stays empty. + let v2 = b"version two, substantially longer to invite a delta encode\n".repeat(8); + assert_eq!(r.read_path("docs/f.txt").unwrap(), v2); + assert_eq!(r.read_path("a/b/note.txt").unwrap(), b"deep"); + assert_eq!(r.read_path("blank.txt").unwrap(), b""); +} + +#[test] +fn compact_preserves_mode_and_mtime() { + let src = build_source(); + let out = compact(Cursor::new(src), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + + let mut r = FsReader::open(Cursor::new(out)).unwrap(); + let tree = r.tree().unwrap(); + let by_path = |path: &str| -> pfs_ms::NodeRecord { + let id = pfs_ms::resolve_path(&tree, path).unwrap(); + tree.nodes[&id].clone() + }; + assert_eq!(by_path("empty").mode, 0o755); + assert_eq!(by_path("empty").mtime_unix_ms, 1111); + assert_eq!(by_path("a/b/c").mode, 0o700); + assert_eq!(by_path("a/b/note.txt").mode, 0o640); + assert_eq!(by_path("a/b/note.txt").mtime_unix_ms, 3333); + assert_eq!(by_path("blank.txt").mode, 0o600); +} + +#[test] +fn compact_preserves_hash_algo() { + // Source built with a non-default algo; the compacted file must keep it. + let mut w = FsWriter::mkfs(Cursor::new(Vec::new()), HashAlgo::Blake3).unwrap(); + w.mkdir("d").unwrap(); + w.put_file("d/f", b"hi").unwrap(); + let src = w.into_storage().into_inner(); + + let out = compact(Cursor::new(src), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + + // Inspect the head block's table_hash_algo via PCF. + let mut c = pcf::Container::open(Cursor::new(out)).unwrap(); + let head = c.table_head(); + let algo = c.read_block_at(head).unwrap().header.table_hash_algo; + assert_eq!(algo, HashAlgo::Blake3); +} + +#[test] +fn compact_empty_tree() { + // A file whose only content has been removed compacts to an empty tree. + let mut w = FsWriter::mkfs(Cursor::new(Vec::new()), HashAlgo::Sha256).unwrap(); + w.put_file("x.txt", b"data").unwrap(); + w.rm("x.txt").unwrap(); + let src = w.into_storage().into_inner(); + + let out = compact(Cursor::new(src), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + + FsReader::open(Cursor::new(out.clone())) + .unwrap() + .verify() + .unwrap(); + assert!(live_paths(&out).is_empty()); +} + +#[test] +fn compact_is_idempotent() { + let src = build_source(); + let once = compact(Cursor::new(src), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + let twice = compact(Cursor::new(once.clone()), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + + FsReader::open(Cursor::new(twice.clone())) + .unwrap() + .verify() + .unwrap(); + assert_eq!(live_paths(&twice), live_paths(&once)); +} + +#[test] +fn compact_reclaims_space() { + let src = build_source(); + let out = compact(Cursor::new(src.clone()), Cursor::new(Vec::new())) + .unwrap() + .into_inner(); + assert!( + out.len() < src.len(), + "compacted ({}) should be smaller than source ({})", + out.len(), + src.len() + ); +} + +#[test] +fn compact_rejects_corrupt_source() { + let mut src = build_source(); + // Flip a byte in the middle (data region) to break a data_hash; the + // trailing bytes are the PCF trailer, so corrupt those would not trip + // hash verification. + let mid = src.len() / 2; + src[mid] ^= 0xFF; + let err = compact(Cursor::new(src), Cursor::new(Vec::new())); + assert!(err.is_err(), "a corrupt source must be rejected"); +} + +#[test] +fn compact_archive_in_place() { + let dir = std::env::temp_dir().join(format!("pfs-compact-test-{}", std::process::id())); + std::fs::create_dir_all(&dir).unwrap(); + let path = dir.join("a.pfs"); + std::fs::write(&path, build_source()).unwrap(); + + pfs_ms::compact_archive(&path, &path).unwrap(); + + let bytes = std::fs::read(&path).unwrap(); + let mut r = FsReader::open(Cursor::new(bytes.clone())).unwrap(); + r.verify().unwrap(); + assert_eq!(r.list_sessions().unwrap().len(), 1); + assert!(live_paths(&bytes).contains(&"f docs/f.txt".to_string())); + + std::fs::remove_dir_all(&dir).ok(); +} + +#[test] +fn compact_archive_to_new_file() { + let dir = std::env::temp_dir().join(format!("pfs-compact-out-{}", std::process::id())); + std::fs::create_dir_all(&dir).unwrap(); + let src = dir.join("src.pfs"); + let dst = dir.join("dst.pfs"); + std::fs::write(&src, build_source()).unwrap(); + + pfs_ms::compact_archive(&src, &dst).unwrap(); + + // Source is untouched (still multi-session); destination is compacted. + assert!( + FsReader::open(Cursor::new(std::fs::read(&src).unwrap())) + .unwrap() + .list_sessions() + .unwrap() + .len() + > 1 + ); + let out = std::fs::read(&dst).unwrap(); + let mut r = FsReader::open(Cursor::new(out)).unwrap(); + r.verify().unwrap(); + assert_eq!(r.list_sessions().unwrap().len(), 1); + + std::fs::remove_dir_all(&dir).ok(); +} diff --git a/specs/PCF-spec-v1.0.txt b/specs/PCF-spec-v1.0.txt index 5017663..f535867 100644 --- a/specs/PCF-spec-v1.0.txt +++ b/specs/PCF-spec-v1.0.txt @@ -711,6 +711,17 @@ Table of Contents recompute every table_hash set header.partition_table_offset to the first block's offset + // NOTE: this operation is content-preserving at the PCF layer (each + // data_hash is unchanged) but it is NOT structure-preserving across + // blocks: it re-packs entries into fresh blocks and recomputes every + // table_hash and next_table_offset. A profile that imposes structure + // spanning blocks -- e.g. a backward-linked chain grouped into logical + // units, or inter-block hash commitments such as PFS-MS's + // member_blocks_digest / prev_session_hash -- is therefore broken by + // generic compaction. Such profiles MUST define and use their own + // profile-aware compaction; generic PCF compaction MUST NOT be applied to + // their files. + ------------------------------------------------------------------------------- 12. Conformance and Validation diff --git a/specs/PFS-MS-spec-v1.0.txt b/specs/PFS-MS-spec-v1.0.txt index 6c17b5b..89edefc 100644 --- a/specs/PFS-MS-spec-v1.0.txt +++ b/specs/PFS-MS-spec-v1.0.txt @@ -1021,10 +1021,18 @@ Table of Contents can reconstruct the tree "as of" any session_seq by ignoring records with a higher seq -- a natural time-travel facility that needs no extra storage. - Finalization. A finalized PFS-MS file MAY be compacted with the PCF - compaction operation (PCF Section 11.5), which is a full rewrite into a new - file; this reclaims superseded versions and abandoned tails at the cost of - discarding history. + Finalization. A finalized PFS-MS file MAY be compacted to reclaim superseded + versions and abandoned tails, at the cost of discarding history. This is a + PROFILE-AWARE rewrite, NOT the generic PCF compaction operation (PCF Section + 11.5): the generic operation repacks entries into shared Table Blocks and + rewrites every table_hash, which destroys the single-PFS_SESSION-per-HEAD + invariant (Section 6) and the inter-session commitments member_blocks_digest + and prev_session_hash (Section 8), so the result no longer scans or verifies + as PFS-MS. A conforming implementation MUST NOT apply generic PCF compaction + to a PFS-MS file. Instead, PFS-MS compaction resolves the live tree at the + head (Section 11) and re-emits it as a single fresh session (session_seq = 1, + prev_session_hash all-zero), storing every file as DIRECT content. The + reference implementation exposes this as `pfs compact`. ------------------------------------------------------------------------------- diff --git a/tools/pcf-compact/README.md b/tools/pcf-compact/README.md index 9f11c78..beb82ff 100644 --- a/tools/pcf-compact/README.md +++ b/tools/pcf-compact/README.md @@ -34,10 +34,22 @@ FLAGS: --no-verify skip integrity verification before and after compaction (default: verify both) --force overwrite an existing --output path + --allow-pfs compact a PFS-MS file anyway (produces a plain PCF; + DISCARDS the multi-session filesystem structure) -q, --quiet suppress the savings report on stderr -h, --help show help ``` +### PFS-MS files are refused by default + +Generic PCF compaction repacks entries into shared table blocks and rewrites +every `table_hash`, which destroys a PFS-MS file's session chain (the result no +longer scans or verifies as PFS-MS). `pcf-compact` therefore **refuses** an +input that carries a `PFS_SESSION` partition (type `0xAAAA0002`) and points you +at `pfs compact`, which rebuilds it as a fresh single-session snapshot. Pass +`--allow-pfs` to force a plain-PCF compaction that intentionally discards the +PFS structure. + ### Examples ```sh diff --git a/tools/pcf-compact/src/cli.rs b/tools/pcf-compact/src/cli.rs index b1fdcad..38ea40e 100644 --- a/tools/pcf-compact/src/cli.rs +++ b/tools/pcf-compact/src/cli.rs @@ -17,6 +17,7 @@ pub struct Args { pub verify: bool, pub quiet: bool, pub force: bool, + pub allow_pfs: bool, } #[derive(Debug)] @@ -38,6 +39,8 @@ FLAGS: --no-verify skip integrity verification before and after compaction (default: verify both) --force overwrite an existing --output path + --allow-pfs compact a PFS-MS file anyway (produces a plain PCF; + DISCARDS the multi-session filesystem structure) -q, --quiet suppress the savings report on stderr -h, --help show this help @@ -56,6 +59,7 @@ pub fn parse(argv: &[String]) -> Result { let mut verify = true; let mut quiet = false; let mut force = false; + let mut allow_pfs = false; fn value(argv: &[String], i: &mut usize, flag: &str) -> Result { *i += 1; @@ -72,6 +76,7 @@ pub fn parse(argv: &[String]) -> Result { "-o" | "--output" => output = Some(PathBuf::from(value(argv, &mut i, &a)?)), "--no-verify" => verify = false, "--force" => force = true, + "--allow-pfs" => allow_pfs = true, "-q" | "--quiet" => quiet = true, other if other.starts_with('-') => { return Err(format!("unknown flag: {other}")); @@ -98,5 +103,6 @@ pub fn parse(argv: &[String]) -> Result { verify, quiet, force, + allow_pfs, })) } diff --git a/tools/pcf-compact/src/lib.rs b/tools/pcf-compact/src/lib.rs index 86240fd..ab90cf2 100644 --- a/tools/pcf-compact/src/lib.rs +++ b/tools/pcf-compact/src/lib.rs @@ -11,6 +11,20 @@ use std::time::{SystemTime, UNIX_EPOCH}; pub mod cli; +/// PFS-MS Session Record partition type (PFS-MS spec Section 8). Detected so we +/// can refuse to corrupt multi-session PFS-MS files. Kept as a literal to avoid +/// a dependency on the `pfs-ms` crate. +const PFS_SESSION_TYPE: u32 = 0xAAAA_0002; + +/// True if the container holds any `PFS_SESSION` partition, i.e. it is a PFS-MS +/// (multi-session filesystem) file that generic PCF compaction would corrupt. +pub fn is_pfs_ms(input: &[u8]) -> Result { + let mut c = pcf::Container::open(Cursor::new(input.to_vec()))?; + Ok(c.entries()? + .iter() + .any(|e| e.partition_type == PFS_SESSION_TYPE)) +} + /// Errors surfaced by the `pcf-compact` CLI and its library back-end. #[derive(Debug)] pub enum CompactError { @@ -25,6 +39,9 @@ pub enum CompactError { Pcf(pcf::Error), OutputExists(PathBuf), SameInputOutput(PathBuf), + /// Input is a PFS-MS multi-session file; generic PCF compaction would + /// corrupt it. The user must use `pfs compact`, or opt in with `--allow-pfs`. + PfsMsInput(PathBuf), /// `rename(2)` returned EXDEV — temp file and target on different filesystems. CrossDevice { tmp: PathBuf, @@ -64,6 +81,16 @@ impl std::fmt::Display for CompactError { "--output {} is the same file as the input; omit --output for in-place compaction", p.display() ), + CompactError::PfsMsInput(p) => write!( + f, + "{} is a PFS-MS (multi-session filesystem) file; generic PCF compaction \ + would corrupt it (it merges sessions and rewrites table hashes). Use \ + `pfs compact {}` to rebuild it as a fresh single-session snapshot, or \ + pass --allow-pfs to force a plain-PCF compaction that DISCARDS the PFS \ + structure", + p.display(), + p.display() + ), CompactError::CrossDevice { tmp, target } => write!( f, "temp file {} and target {} are on different filesystems; atomic rename is not possible (write --output to a path on the same filesystem)", diff --git a/tools/pcf-compact/src/main.rs b/tools/pcf-compact/src/main.rs index 97a7fc3..6644e97 100644 --- a/tools/pcf-compact/src/main.rs +++ b/tools/pcf-compact/src/main.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; use std::process::ExitCode; use pcf_compact::cli::{self, Args, Parsed}; -use pcf_compact::{atomic_write, compact_bytes, format_size, CompactError}; +use pcf_compact::{atomic_write, compact_bytes, format_size, is_pfs_ms, CompactError}; fn main() -> ExitCode { let argv: Vec = std::env::args().skip(1).collect(); @@ -34,6 +34,11 @@ fn run(args: Args) -> Result<(), CompactError> { })?; let input_len = input_bytes.len() as u64; + // Refuse to corrupt a PFS-MS file unless the user explicitly opts in. + if !args.allow_pfs && is_pfs_ms(&input_bytes)? { + return Err(CompactError::PfsMsInput(args.file.clone())); + } + let compacted = compact_bytes(&input_bytes, args.verify, args.verify)?; let target: PathBuf = match &args.output { diff --git a/tools/pcf-compact/tests/integration.rs b/tools/pcf-compact/tests/integration.rs index 46d6828..730992f 100644 --- a/tools/pcf-compact/tests/integration.rs +++ b/tools/pcf-compact/tests/integration.rs @@ -10,7 +10,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; use pcf::{Container, HashAlgo}; -use pcf_compact::{atomic_write, compact_bytes, format_size, CompactError}; +use pcf_compact::{atomic_write, compact_bytes, format_size, is_pfs_ms, CompactError}; fn uid(n: u8) -> [u8; 16] { let mut u = [0u8; 16]; @@ -215,3 +215,63 @@ fn compacts_trailer_mode_input() { let mut c = Container::open(Cursor::new(compacted)).unwrap(); assert_eq!(c.entries().unwrap().len(), 3); } + +/// Build a PCF container that looks like a PFS-MS file: it carries a partition +/// of the PFS_SESSION application type (0xAAAA0002). Built with the `pcf` API +/// only — `pcf-compact` must not depend on the `pfs-ms` crate. +fn build_pfs_like() -> Vec { + let mut c = Container::create_with(Cursor::new(Vec::new()), 8, HashAlgo::Sha256).unwrap(); + c.add_partition(0x0000_0001, uid(1), "node", &[1; 16], 16, HashAlgo::Sha256) + .unwrap(); + // The PFS_SESSION partition that marks this as a PFS-MS file. + c.add_partition( + 0xAAAA_0002, + uid(2), + "session", + &[2; 32], + 32, + HashAlgo::Sha256, + ) + .unwrap(); + c.into_storage().into_inner() +} + +#[test] +fn detects_pfs_ms_file() { + assert!(is_pfs_ms(&build_pfs_like()).unwrap()); +} + +#[test] +fn non_pfs_not_flagged() { + assert!(!is_pfs_ms(&build_sample(5, &[])).unwrap()); +} + +#[test] +fn pfs_ms_error_message_points_to_pfs_compact() { + let err = CompactError::PfsMsInput(PathBuf::from("/data/store.pfs")); + let msg = err.to_string(); + assert!(msg.contains("PFS-MS"), "message names the format: {msg}"); + assert!( + msg.contains("pfs compact"), + "message points to the fix: {msg}" + ); + assert!( + msg.contains("/data/store.pfs"), + "message names the file: {msg}" + ); + assert!( + msg.contains("--allow-pfs"), + "message names the override: {msg}" + ); +} + +#[test] +fn allow_pfs_bypasses_library_compaction() { + // The library `compact_bytes` is unguarded (the guard lives in the binary's + // `--allow-pfs` path): forcing compaction yields a still-valid plain PCF. + let bytes = build_pfs_like(); + let compacted = compact_bytes(&bytes, true, true).expect("forced compaction"); + let mut c = Container::open(Cursor::new(compacted)).unwrap(); + // Both partitions survive as a flat PCF set (the PFS structure is gone). + assert_eq!(c.entries().unwrap().len(), 2); +}