Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions reference/PFS-MS-v1.0/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ cargo run --bin pfs -- extract backup.pfs ./restore --at 2 # by session
cargo run --bin pfs -- extract backup.pfs ./restore --at-time 1700000000000
```

### Compaction

`pfs compact` rebuilds a multi-session file into a single fresh session holding
the current tree, **discarding history** (Section 15): deleted nodes are gone,
superseded versions and delta chains collapse to the newest full content, and
abandoned tails are reclaimed. The output is a fully valid, verifiable PFS-MS
file. (Generic `pcf-compact` must *not* be used on a PFS-MS file — it would
corrupt the session chain.)

```
cargo run --bin pfs -- compact fs.pfs # in place
cargo run --bin pfs -- compact fs.pfs out.pfs # to a new file
```

POSIX permission bits and modification time are captured on import and restored
on extract; pass `--no-metadata` (on either side) to skip this, and `--store` to
disable compression. Symlinks and other non-regular files are skipped with a
Expand Down Expand Up @@ -184,12 +198,14 @@ reference/PFS-MS-v1.0/
│ ├── tree.rs # liveness, tree, reconstruction (Sections 9.3, 10)
│ ├── fs.rs # high-level FsReader
│ ├── dirsync.rs # directory <-> archive tooling (create/update/extract)
│ ├── compact.rs # single-session compaction (Section 15)
│ ├── vector.rs # canonical Section 17 reference vector
│ └── bin/pfs.rs # demo CLI
├── tests/
│ ├── roundtrip.rs # end-to-end black-box tests
│ ├── coverage.rs # targeted error-path / edge-case tests
│ ├── dirsync.rs # directory create/update/extract round-trips
│ ├── compact.rs # single-session compaction round-trips
│ └── spec_compliance.rs # one test per normative MUST (R1..R8, W2/W3)
└── examples/
└── gen_testvector.rs # writes pfs_ms_testvector.bin + hex dumps
Expand Down
12 changes: 11 additions & 1 deletion reference/PFS-MS-v1.0/src/bin/pfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
//! pfs create <archive> <dir> [--store] [--no-metadata]
//! pfs update <archive> <dir> [--delete] [--store] [--no-metadata]
//! pfs extract <archive> <dir> [--at <seq>] [--at-time <unix_ms>] [--no-metadata]
//! pfs compact <file> [<out>] # rebuild as one fresh session (discards history)
//! pfs keygen <priv_out> <pub_out>
//! pfs sign <file> --key <priv> [--resign]
//! pfs verify-sig <file> [--key <trusted_pub>] [--no-recheck]
Expand Down Expand Up @@ -70,6 +71,7 @@ fn run(args: &[String]) -> CliResult {
"create" => cmd_create(rest),
"update" => cmd_update(rest),
"extract" => cmd_extract(rest),
"compact" => cmd_compact(rest),
"keygen" => cmd_keygen(rest),
"sign" => cmd_sign(rest),
"verify-sig" => cmd_verify_sig(rest),
Expand All @@ -83,7 +85,7 @@ fn run(args: &[String]) -> CliResult {

fn print_usage() {
eprintln!(
"usage:\n pfs mkfs <file> [--key <priv>]\n pfs mkdir <file> <path> [--key <priv>]\n pfs put <file> <path> [<src|->] [--store] [--key <priv>]\n pfs mv <file> <src> <dst> [--key <priv>]\n pfs rm <file> <path> [--key <priv>]\n pfs ls <file> [<path>]\n pfs cat <file> <path>\n pfs get <file> <path> <out>\n pfs log <file>\n pfs verify <file>\n pfs create <archive> <dir> [--store] [--no-metadata] [--key <priv>]\n pfs update <archive> <dir> [--delete] [--store] [--no-metadata] [--key <priv>]\n pfs extract <archive> <dir> [--at <seq>] [--at-time <unix_ms>] [--no-metadata]\n pfs keygen <priv_out> <pub_out>\n pfs sign <file> --key <priv> [--resign]\n pfs verify-sig <file> [--key <trusted_pub>] [--no-recheck]\n\nmutating commands accept --key <priv> to auto-sign after the commit."
"usage:\n pfs mkfs <file> [--key <priv>]\n pfs mkdir <file> <path> [--key <priv>]\n pfs put <file> <path> [<src|->] [--store] [--key <priv>]\n pfs mv <file> <src> <dst> [--key <priv>]\n pfs rm <file> <path> [--key <priv>]\n pfs ls <file> [<path>]\n pfs cat <file> <path>\n pfs get <file> <path> <out>\n pfs log <file>\n pfs verify <file>\n pfs create <archive> <dir> [--store] [--no-metadata] [--key <priv>]\n pfs update <archive> <dir> [--delete] [--store] [--no-metadata] [--key <priv>]\n pfs extract <archive> <dir> [--at <seq>] [--at-time <unix_ms>] [--no-metadata]\n pfs compact <file> [<out>]\n pfs keygen <priv_out> <pub_out>\n pfs sign <file> --key <priv> [--resign]\n pfs verify-sig <file> [--key <trusted_pub>] [--no-recheck]\n\nmutating commands accept --key <priv> to auto-sign after the commit."
);
}

Expand Down Expand Up @@ -321,6 +323,14 @@ fn cmd_update(a: &[String]) -> CliResult {
maybe_autosign(archive, p.values.get("key"))
}

fn cmd_compact(a: &[String]) -> CliResult {
let p = parse_flags(a, &[])?;
let file = pos(&p, 0, "<file>")?;
// In-place when <out> is omitted; otherwise write a fresh file.
let out = p.positional.get(1).map(String::as_str).unwrap_or(file);
pfs_ms::compact_archive(Path::new(file), Path::new(out)).map_err(|e| e.to_string())
}

fn cmd_extract(a: &[String]) -> CliResult {
let p = parse_flags(a, &["at", "at-time"])?;
let archive = p.positional.first().ok_or("missing argument: <archive>")?;
Expand Down
178 changes: 178 additions & 0 deletions reference/PFS-MS-v1.0/src/compact.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
//! PFS-MS-aware compaction: rebuild a multi-session file into a fresh,
//! single-session snapshot of its current state (spec Section 15).
//!
//! Generic PCF compaction (PCF Section 11.5, [`pcf::Container::compacted_image`])
//! MUST NOT be used on a PFS-MS file: it repacks entries into shared blocks and
//! rewrites every `table_hash`, which destroys the one-`PFS_SESSION`-per-HEAD
//! invariant and the inter-session hash commitments (`member_blocks_digest`,
//! `prev_session_hash`). The result no longer scans or verifies as PFS-MS.
//!
//! Compaction here is therefore profile-aware. It resolves the live tree at the
//! head and re-emits it as **one** session (`session_seq = 1`,
//! `prev_session_hash = 0`). This is a full rewrite that *discards history*:
//!
//! * deleted nodes are gone — only live nodes are re-emitted;
//! * every file is stored as fresh `Direct` (or `Empty`) content, collapsing
//! any delta chain to the newest full version;
//! * superseded versions and abandoned tails are reclaimed.
//!
//! The output is a fully valid, verifiable PFS-MS file.

use std::fs::{self, OpenOptions};
use std::io::{Read, Seek, Write};
use std::path::{Path, PathBuf};

use pcf::HashAlgo;

use crate::error::{Error, Result};
use crate::fs::FsReader;
use crate::tree::Tree;
use crate::writer::{Change, FsWriter};
use crate::ROOT_NODE_ID;

/// Rebuild the PFS-MS file in `src` into a fresh, single-session image written
/// to `dst`, returning the destination handle.
///
/// The resolved current tree of `src` becomes session 1 (`session_seq = 1`,
/// `prev_session_hash = 0`); history is discarded (Section 15). The source is
/// verified before any output is produced, so a corrupt input is rejected
/// rather than propagated. `dst` must be a fresh, writable, empty handle.
///
/// The whole source tree (every live file's content) is materialised in memory
/// before `dst` is touched, so `src` and `dst` may be distinct handles to the
/// same logical data without interfering.
pub fn compact<R, W>(src: R, mut dst: W) -> Result<W>
where
R: Read + Write + Seek,
W: Read + Write + Seek,
{
let mut r = FsReader::open(src)?;
// Refuse to compact a corrupt source (mirrors pcf-compact's verify-before).
r.verify()?;

let algo = source_hash_algo(&mut r)?;
let tree = r.tree()?;
let changes = collect_changes(&mut r, &tree)?;

let mut w = FsWriter::create(&mut dst, algo)?;
w.set_writer_id(b"pfs-compact");
w.set_compression(true);
// An empty source tree yields no changes; `commit_changes` then commits
// nothing and `dst` stays at the valid empty-table state from `create`.
w.commit_changes(&changes)?;
drop(w);

Ok(dst)
}

/// Compact the PFS-MS file at `src` into `dst` on the host filesystem.
///
/// When `dst == src` the file is compacted in place. Output is written to a
/// sibling temp file, fsynced, and atomically renamed into place, so a crash
/// leaves either the original or the fully written replacement.
pub fn compact_archive(src: &Path, dst: &Path) -> Result<()> {
// Build the compacted image in memory from the source.
let image = {
let in_file = OpenOptions::new()
.read(true)
.write(true)
.open(src)
.map_err(Error::Io)?;
let out = compact(in_file, std::io::Cursor::new(Vec::new()))?;
out.into_inner()
};

// Write to a sibling temp file, fsync, then atomically rename into place.
let dir = dst.parent().filter(|p| !p.as_os_str().is_empty());
let tmp: PathBuf = {
let name = dst
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| "pfs".into());
let pid = std::process::id();
let tmp_name = format!(".{name}.pfs-compact.tmp.{pid}");
match dir {
Some(d) => d.join(tmp_name),
None => PathBuf::from(tmp_name),
}
};

let mut f = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(&tmp)
.map_err(Error::Io)?;
f.write_all(&image).map_err(Error::Io)?;
f.sync_all().map_err(Error::Io)?;
drop(f);

fs::rename(&tmp, dst).map_err(|e| {
let _ = fs::remove_file(&tmp);
Error::Io(e)
})?;
Ok(())
}

/// The table-hash algorithm of the source's head session (`Sha256` if empty).
fn source_hash_algo<S: Read + Write + Seek>(r: &mut FsReader<S>) -> Result<HashAlgo> {
let scan = r.scan()?;
Ok(scan
.sessions
.first()
.map(|s| s.block_hashes[0].2)
.unwrap_or(HashAlgo::Sha256))
}

/// Build the change set re-creating the whole live tree in one session.
fn collect_changes<S: Read + Write + Seek>(
r: &mut FsReader<S>,
tree: &Tree,
) -> Result<Vec<Change>> {
let mut out = Vec::new();
walk(r, tree, ROOT_NODE_ID, "", &mut out)?;
Ok(out)
}

fn walk<S: Read + Write + Seek>(
r: &mut FsReader<S>,
tree: &Tree,
node: [u8; 16],
prefix: &str,
out: &mut Vec<Change>,
) -> Result<()> {
let kids = match tree.children.get(&node) {
Some(k) => k.clone(),
None => return Ok(()),
};
for cid in kids {
let rec = tree.nodes.get(&cid).ok_or(Error::NotFound)?;
let name = rec.name_str();
let rel = if prefix.is_empty() {
name
} else {
format!("{prefix}/{name}")
};
if rec.is_dir() {
// Emit every directory (preserving empty ones), then recurse.
out.push(Change::Mkdir {
path: rel.clone(),
mode: rec.mode,
mtime_unix_ms: rec.mtime_unix_ms,
});
walk(r, tree, cid, &rel, out)?;
} else {
// Reconstruct the full current content; re-emitted as Direct/Empty.
let (mode, mtime) = (rec.mode, rec.mtime_unix_ms);
let content = r.read_path(&rel)?;
out.push(Change::PutFile {
path: rel,
content,
mode,
mtime_unix_ms: mtime,
});
}
}
Ok(())
}
2 changes: 2 additions & 0 deletions reference/PFS-MS-v1.0/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
//! assert_eq!(r.read_path("docs/hello.txt").unwrap(), b"Hello, world\n");
//! ```

mod compact;
mod compress;
pub mod consts;
mod delta;
Expand All @@ -52,6 +53,7 @@ mod tree;
mod vector;
mod writer;

pub use compact::{compact, compact_archive};
pub use compress::{compress_deflate, decompress};
pub use consts::*;
pub use dirsync::{create_archive, extract_archive, session_at_time, update_archive, SyncOptions};
Expand Down
Loading
Loading