Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,30 @@ jobs:
- run: cargo clippy -p pcf-compact --all-targets -- -D warnings
- run: cargo build -p pcf-compact --verbose
- run: cargo test -p pcf-compact --verbose

pcf-dcp:
name: pcf-dcp profile
runs-on: ubuntu-latest
defaults:
run:
working-directory: .
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- run: cargo fmt -p pcf-dcp -- --check
- run: cargo clippy -p pcf-dcp --all-targets -- -D warnings
- run: cargo build -p pcf-dcp --verbose
- run: cargo test -p pcf-dcp --verbose
- name: Regenerate the spec test vector
run: cargo run -p pcf-dcp --example gen_testvector -- pcf_dcp_testvector.bin
- name: Inspect generated test vector (spec Section 17 is 700 bytes)
run: |
ls -l pcf_dcp_testvector.bin
test "$(wc -c < pcf_dcp_testvector.bin)" = "700"
- uses: actions/upload-artifact@v4
with:
name: pcf-dcp-testvector
path: pcf_dcp_testvector.bin
3 changes: 3 additions & 0 deletions .github/workflows/release-prepare.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,16 @@ jobs:
sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' reference/PCF-v1.0/Cargo.toml
sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' reference/PFS-MS-v1.0/Cargo.toml
sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' reference/PCF-SIG-v1.0/Cargo.toml
sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' reference/PCF-DCP-v1.0/Cargo.toml
sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' tools/pcf-debug/Cargo.toml
sed -i 's/^version = "[^"]*"/version = "'"$NEW"'"/' tools/pcf-compact/Cargo.toml
# path-dep version pins on pcf
sed -i 's|pcf = { path = "\.\./PCF-v1.0", version = "[^"]*" }|pcf = { path = "../PCF-v1.0", version = "'"$NEW"'" }|' reference/PFS-MS-v1.0/Cargo.toml
sed -i 's|pcf = { path = "\.\./PCF-v1.0", version = "[^"]*" }|pcf = { path = "../PCF-v1.0", version = "'"$NEW"'" }|' reference/PCF-SIG-v1.0/Cargo.toml
sed -i 's|pcf = { path = "\.\./PCF-v1.0", version = "[^"]*" }|pcf = { path = "../PCF-v1.0", version = "'"$NEW"'" }|' reference/PCF-DCP-v1.0/Cargo.toml
sed -i 's|pcf = { path = "\.\./\.\./reference/PCF-v1.0", version = "[^"]*" }|pcf = { path = "../../reference/PCF-v1.0", version = "'"$NEW"'" }|' tools/pcf-debug/Cargo.toml
sed -i 's|pcf-sig = { path = "\.\./\.\./reference/PCF-SIG-v1.0", version = "[^"]*" }|pcf-sig = { path = "../../reference/PCF-SIG-v1.0", version = "'"$NEW"'" }|' tools/pcf-debug/Cargo.toml
sed -i 's|pcf-dcp = { path = "\.\./\.\./reference/PCF-DCP-v1.0", version = "[^"]*" }|pcf-dcp = { path = "../../reference/PCF-DCP-v1.0", version = "'"$NEW"'" }|' tools/pcf-debug/Cargo.toml
sed -i 's|pcf = { path = "\.\./\.\./reference/PCF-v1.0", version = "[^"]*" }|pcf = { path = "../../reference/PCF-v1.0", version = "'"$NEW"'" }|' tools/pcf-compact/Cargo.toml

- name: Bump TypeScript packages
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ jobs:
if: needs.resolve.outputs.dry_run != 'true'
run: sleep 45

- name: cargo publish pcf-dcp
shell: bash
run: |
if [ "${{ needs.resolve.outputs.dry_run }}" = "true" ]; then
cargo publish -p pcf-dcp --allow-dirty --dry-run
else
cargo publish -p pcf-dcp --allow-dirty --token "${{ steps.cargo-auth.outputs.token }}"
fi

- name: Wait for crates.io index
if: needs.resolve.outputs.dry_run != 'true'
run: sleep 45

- name: cargo publish pcf-debug
shell: bash
run: |
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ members = [
"reference/PCF-v1.0",
"reference/PFS-MS-v1.0",
"reference/PCF-SIG-v1.0",
"reference/PCF-DCP-v1.0",
"tools/pcf-debug",
"tools/pcf-compact",
]
25 changes: 25 additions & 0 deletions reference/PCF-DCP-v1.0/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "pcf-dcp"
version = "0.0.8"
edition = "2021"
description = "Reference implementation of PCF-DCP v1.0, the PCF Dynamic Container Partition profile"
license = "MIT OR Apache-2.0"
repository = "https://github.com/kduma-OSS/Partitioned-Container-Format"
homepage = "https://github.com/kduma-OSS/Partitioned-Container-Format"
readme = "README.md"
keywords = ["pcf", "dcp", "container", "deduplication", "fragmentation"]
categories = ["encoding", "filesystem"]

# This crate is a *reference* implementation of the PCF-DCP profile. Like the
# `pcf` crate it builds on, it favours a direct, auditable mapping onto the
# written specification (`specs/PCF-DCP-spec-v1.0.txt`) over raw performance.

[[bin]]
name = "dcp"
path = "src/bin/dcp.rs"

[dependencies]
# The PCF-DCP profile is layered strictly above PCF v1.0; every byte container
# operation goes through the reference PCF crate. The arena reuses PCF's Table
# Block, Partition Entry, and table-hash primitives directly.
pcf = { path = "../PCF-v1.0", version = "0.0.8" }
120 changes: 120 additions & 0 deletions reference/PCF-DCP-v1.0/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# pcf-dcp — PCF Dynamic Container Partition (reference implementation)

Reference reader/writer for **PCF-DCP v1.0**, an application-level profile that
adds *dynamic*, fragmentable, dedup-friendly sub-partitions to the
[Partitioned Container Format](../PCF-v1.0) without modifying the PCF byte
container.

This crate mirrors the written specification (`specs/PCF-DCP-spec-v1.0.txt`)
field-for-field and is intended as the *normative* implementation against which
language ports are checked. It favours auditability over performance.

## Model at a glance

PCF-DCP defines one new PCF partition type:

| Type | Name | Holds |
|--------------|-----------------|----------------------------------------------------|
| `0xAAAC0001` | `DCP_CONTAINER` | An *arena*: a header, an inner partition table, fragment tables, and data extents |

A DCP container's bytes are an **arena** addressed by arena-relative offsets:

```
arena:
[ DCP Header (24 B) | data extents | Fragment Tables | Inner Table Block(s) ]
```

* **DCP Header** — `"PDCP"` magic, profile version, `inner_table_offset`,
`arena_used` (a bump pointer).
* **Inner Table Block** — a chain of reused PCF Table Blocks (74 B header +
141 B entries), byte-for-byte identical to the top-level table, listing the
*inner* partitions. Two entry fields are reinterpreted: `start_offset` points
at the partition's Fragment Table, and `max_length` equals `used_bytes`.
* **Fragment Table** — per inner partition, a chain of 9-byte block headers each
followed by 18-byte **Fragment Entries**. Each entry names one extent
`(offset, length, kind, flags)`. The logical content of an inner partition is
the concatenation of its DATA extents.

A generic PCF reader sees a DCP file as **one opaque partition**; only a
DCP-aware reader looks inside. A DCP file is always a conforming PCF v1.0 file.

## Why a profile

PCF stores each partition as a contiguous, statically-reserved region. PCF-DCP
makes each *inner* partition grow, shrink, and be edited in the middle without
relocating its neighbours, by describing it as a list of extents rather than one
range. This buys:

* **Fragmentation / random edits** — append, insert, overwrite, delete, and
truncate are edits of the Fragment Table (copy-on-write for shared bytes); no
data is moved.
* **Deduplication** — two extents may name the same arena bytes; identical
chunks are stored once. The per-extent `SHARED` flag makes safe in-place
editing explicit.
* **Hash / signature stability** — an inner partition's `data_hash` covers its
*logical content*, so fragmentation, dedup, compaction, and promotion all
leave the hash (and any PCF-SIG signature over it) unchanged.

## Library example

```rust
use std::io::Cursor;
use pcf_dcp::{Arena, Chunker, DcpReader, DcpWriter, HashAlgo};

let mut arena = Arena::new();
arena.add_inner(0x10, [0xA1; 16], "A", b"Hello, World!", HashAlgo::Sha256, Chunker::Fixed(7))?;
arena.add_inner(0x10, [0xB2; 16], "B", b"World!", HashAlgo::Sha256, Chunker::Whole)?;

let mut w = DcpWriter::new();
w.add_container([0xDC; 16], "dcp", arena)?;
let image = w.to_image()?;

let mut r = DcpReader::open(Cursor::new(image))?;
r.verify()?;
assert_eq!(r.read_inner(&[0xB2; 16])?, b"World!");
# Ok::<(), pcf_dcp::Error>(())
```

## Promotion / demotion

`DcpWriter::promote` moves an inner partition out to a top-level PCF partition
(dynamic → fixed); `demote` moves a top-level partition into a container
(fixed → dynamic). Both preserve `uid`, `partition_type`, `label`,
`data_hash_algo_id`, and `data_hash` — the **promotion invariant**, identical to
the set of fields a PCF-SIG signature protects.

## Command-line tool

The `dcp` binary inspects and rewrites DCP files; every mutating command
re-verifies before writing:

```
dcp info <file>
dcp dedup <file> [--fixed N] [--trailer]
dcp defrag <file> [--trailer]
dcp promote <file> <container-uid> <inner-uid> [--trailer]
dcp demote <file> <part-uid> <container-uid> [--trailer]
```

UIDs are 32 hex digits, or `0xNN` for a uid of 16 identical bytes (e.g. `0xDC`).

## Build & test

```
cargo test -p pcf-dcp
cargo run -p pcf-dcp --example gen_testvector -- /tmp/dcp.bin # the 700-byte vector
cargo run -p pcf-dcp --bin dcp -- info /tmp/dcp.bin
```

The example reproduces the byte-exact 700-byte test vector from Section 17 of
the specification.

## Relationship to `pcf`

This crate is layered strictly above [`pcf`](../PCF-v1.0): every container byte
operation goes through the reference PCF crate, and the arena reuses PCF's Table
Block, Partition Entry, and table-hash primitives directly.

## Licence

MIT OR Apache-2.0.
57 changes: 57 additions & 0 deletions reference/PCF-DCP-v1.0/examples/gen_testvector.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//! Generates the canonical PCF-DCP v1.0 test-vector file used in spec
//! Section 17.
//!
//! Run with: `cargo run --example gen_testvector -- <output-path>`
//! (defaults to ./pcf_dcp_testvector.bin). Everything is fixed and
//! deterministic so that ports can reproduce the file byte-for-byte.

use std::io::Cursor;

use pcf::Container;
use pcf_dcp::{build_reference_vector, DcpReader};

fn main() {
let path = std::env::args()
.nth(1)
.unwrap_or_else(|| "pcf_dcp_testvector.bin".to_string());

let image = build_reference_vector().expect("build reference vector");
std::fs::write(&path, &image).expect("write file");

// It is a conforming PCF v1.0 file ...
let mut pcf = Container::open(Cursor::new(image.clone())).expect("pcf open");
pcf.verify().expect("pcf verify");

// ... and a conforming DCP file.
let mut dcp = DcpReader::open(Cursor::new(image.clone())).expect("dcp open");
dcp.verify().expect("dcp verify");

eprintln!("wrote {} ({} bytes)", path, image.len());
for c in dcp.containers().expect("containers") {
let arena = dcp.open_arena(&c).expect("arena");
eprintln!(
" container {:<6} type=0x{:08X} used={} inners={}",
c.label_string().unwrap_or_default(),
c.partition_type,
c.used_bytes,
arena.len()
);
for info in arena.inners() {
let n = info.data_hash_algo.digest_len();
let hex: String = info.data_hash[..n]
.iter()
.map(|b| format!("{b:02x}"))
.collect();
let shared = info.extents.iter().filter(|e| e.shared).count();
eprintln!(
" inner {:<3} type=0x{:08X} used={} extents={} shared={} data_hash={}",
info.label,
info.partition_type,
info.used_bytes,
info.extents.len(),
shared,
hex
);
}
}
}
Loading
Loading