From da2a5f7b22da800c5029ccf1ec0c6a80718b6bd9 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Wed, 24 Jun 2026 16:40:03 +0200 Subject: [PATCH 1/2] fix(replication): offload responsible-chunk audit challenges off the receive loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Answering a responsible-chunk AuditChallenge digests the stored bytes of every challenged key, yet it was the one audit responder still handled inline in the serial replication receive loop — so a single challenge blocked all other replication traffic until its digests completed (head-of-line blocking). The ADR-0002 subtree/byte audits (PR #128) were already spawned off the loop under flood-fair admission. Route the AuditChallenge responder through the same admit_audit_responder admission (global MAX_CONCURRENT_AUDIT_RESPONSES cap + per-peer MAX_AUDIT_RESPONSES_PER_PEER cap) and a detached task, consistent with the subtree and byte challenge handlers. On admission failure the challenge is dropped, which an honest auditor graces as a timeout; a flooder is bounded to its per-peer share and cannot starve other peers. The cap is now a single shared ceiling across all three audit-responder types. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/replication/mod.rs | 53 +++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/src/replication/mod.rs b/src/replication/mod.rs index b3d827d..0f6394a 100644 --- a/src/replication/mod.rs +++ b/src/replication/mod.rs @@ -1691,22 +1691,53 @@ async fn handle_replication_message( ) .await } - ReplicationMessageBody::AuditChallenge(ref challenge) => { + ReplicationMessageBody::AuditChallenge(challenge) => { // Responsible-chunk audit (audit #2) responder: answer with per-key // possession digests. This same handler also answers the // prune-confirmation audit, which sends the same `AuditChallenge` // wire message. + // + // Answering digests the stored bytes of every challenged key, so — + // like the subtree/byte audits below — run it on a detached task off + // this serial message loop. Handling it inline lets one challenge + // block all other replication traffic until its digests complete + // (head-of-line blocking). The same flood-fair admission applies: a + // global ceiling AND a per-peer cap, dropping the challenge if either + // is hit (an honest auditor graces a non-response as a timeout, while + // a flooder is held to its per-peer share and cannot starve others). + let Some(guard) = + admit_audit_responder(audit_responder_semaphore, audit_responder_inflight, source) + .await + else { + warn!( + "Audit challenge reply not sent: kind=responsible response=dropped \ + source={source} (audit-responder capacity reached)" + ); + return Ok(()); + }; let bootstrapping = *is_bootstrapping.read().await; - handle_audit_challenge_msg( - source, - challenge, - storage, - p2p_node, - bootstrapping, - msg.request_id, - rr_message_id, - ) - .await + let storage = Arc::clone(storage); + let p2p_node = Arc::clone(p2p_node); + let source = *source; + let request_id = msg.request_id; + let rr_message_id = rr_message_id.map(ToOwned::to_owned); + tokio::spawn(async move { + let _guard = guard; // global permit + per-peer slot, held until done + if let Err(e) = handle_audit_challenge_msg( + &source, + &challenge, + &storage, + &p2p_node, + bootstrapping, + request_id, + rr_message_id.as_deref(), + ) + .await + { + debug!("Audit challenge from {source} error: {e}"); + } + }); + Ok(()) } ReplicationMessageBody::SubtreeAuditChallenge(challenge) => { // Gossip-triggered storage-bound subtree audit (ADR-0002). The From 5d82f5f3c4a41ebaff414d51665140ee783e4ae0 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Wed, 24 Jun 2026 16:50:21 +0200 Subject: [PATCH 2/2] perf(replication): raise concurrent audit-responder cap to 16 The audit-responder pool is now shared across all three responder types (responsible-chunk, subtree round 1, byte round 2) since the responsible- chunk AuditChallenge was moved onto the same admission. Raise the global ceiling from 8 to 16 so the three types have more combined headroom before challenges are dropped. The per-peer cap (MAX_AUDIT_RESPONSES_PER_PEER = 2) is unchanged, so a single flooder still cannot occupy more than its share. Doubles the worst-case concurrent get_raw reads / resident byte-serve memory; still bounded. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/replication/config.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/replication/config.rs b/src/replication/config.rs index e2aec73..571c934 100644 --- a/src/replication/config.rs +++ b/src/replication/config.rs @@ -103,17 +103,18 @@ pub const MAX_CONCURRENT_REPLICATION_SENDS: usize = 3; /// Maximum number of concurrent in-flight audit-responder tasks. /// -/// Subtree (round 1) and byte (round 2) challenge handlers are spawned off the -/// serial replication message loop so their disk reads don't stall replication. -/// This caps how many run at once across the engine, restoring backpressure: a -/// peer flooding audit challenges cannot fan out unbounded `get_raw` reads or -/// multi-MiB byte serves. When the cap is hit, the challenge is dropped — the -/// auditor graces a non-response as a timeout, so honest auditors are -/// unaffected and only a flooder is throttled. Sized to cover a handful of -/// concurrent honest auditors (the per-peer gossip-audit cooldown is 30 min, so -/// genuine concurrent audits are few) while bounding the byte round's worst-case -/// resident bytes (`N × MAX_BYTE_CHALLENGE_KEYS × MAX_CHUNK_SIZE`). -pub const MAX_CONCURRENT_AUDIT_RESPONSES: usize = 8; +/// The responsible-chunk (audit #2), subtree (round 1), and byte (round 2) +/// challenge handlers are all spawned off the serial replication message loop so +/// their disk reads don't stall replication. This caps how many run at once +/// across the engine, restoring backpressure: a peer flooding audit challenges +/// cannot fan out unbounded `get_raw` reads or multi-MiB byte serves. When the +/// cap is hit, the challenge is dropped — the auditor graces a non-response as a +/// timeout, so honest auditors are unaffected and only a flooder is throttled. +/// Sized to cover a handful of concurrent honest auditors (the per-peer +/// gossip-audit cooldown is 30 min, so genuine concurrent audits are few) while +/// bounding the byte round's worst-case resident bytes +/// (`N × MAX_BYTE_CHALLENGE_KEYS × MAX_CHUNK_SIZE`). +pub const MAX_CONCURRENT_AUDIT_RESPONSES: usize = 16; /// Maximum concurrent in-flight audit-responder tasks from any SINGLE peer. ///