From 83909c8a65982f6f9392ff94f7453c3cf4c0274a Mon Sep 17 00:00:00 2001 From: Scott Young <29237040+resrever@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:12:07 -0400 Subject: [PATCH] fix(bedrock): clamp top_p >= 0.95 for adaptive thinking, not just legacy budget Anthropic-on-Bedrock enforces the top_p >= 0.95-or-unset constraint for both thinking.enabled (legacy budget) and thinking.adaptive modes. The previous code only applied the clamp for emits_legacy_thinking, leaving top_p unconstrained for Opus 4.6 / Sonnet 4.6 in adaptive mode. The direct Anthropic and Vertex AI paths were already correct (ReasoningTransform strips top_p to None whenever is_reasoning_supported()). Opus 4.7/4.8 on Bedrock were also fine (ModelSpecificReasoning strips top_p before conversion). The only broken combo was AdaptiveFriendly models (Opus 4.6, Sonnet 4.6) on Bedrock where top_p was passed through untouched alongside thinking.adaptive. Fix: change the adjusted_top_p guard from emits_legacy_thinking to reasoning_on. Update the test that previously asserted adaptive thinking left top_p untouched. --- crates/forge_repo/src/provider/bedrock.rs | 34 +++++++++-------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/crates/forge_repo/src/provider/bedrock.rs b/crates/forge_repo/src/provider/bedrock.rs index 7b3689c8e0..b03294a09f 100644 --- a/crates/forge_repo/src/provider/bedrock.rs +++ b/crates/forge_repo/src/provider/bedrock.rs @@ -455,17 +455,8 @@ impl FromDomain // Capture reasoning-related flags before `context.messages` / other fields // are consumed below. `ModelSpecificReasoning` runs earlier in the pipeline - // and has already normalized `reasoning` per model family, so here we just - // branch on the shape it produced: - // - `max_tokens.is_some()` -> legacy `thinking.enabled` budget shape - // - otherwise -> `thinking.adaptive` (Opus 4.7 / 4.6 / Sonnet 4.6) + // and has already normalized `reasoning` per model family. let reasoning_on = context.is_reasoning_supported(); - let emits_legacy_thinking = reasoning_on - && context - .reasoning - .as_ref() - .and_then(|r| r.max_tokens) - .is_some(); // Convert system messages let system: Vec = context @@ -550,12 +541,13 @@ impl FromDomain }; // Convert inference configuration - // When `thinking.enabled` (legacy budget shape) is being emitted below, - // Anthropic-on-Bedrock requires `top_p >= 0.95` or unset. `thinking.adaptive` - // (Opus 4.7 / Opus 4.6 / Sonnet 4.6) has no such constraint, and - // `ModelSpecificReasoning` already strips `top_p` entirely for Opus 4.7. - let adjusted_top_p = if emits_legacy_thinking { - // If legacy thinking is emitted and top_p is set, ensure it's at least 0.95 + // Anthropic-on-Bedrock requires `top_p >= 0.95` or unset whenever any + // thinking shape is active — both `thinking.enabled` (legacy budget) and + // `thinking.adaptive` (Opus 4.6 / Sonnet 4.6) enforce this constraint. + // `ModelSpecificReasoning` already strips `top_p` entirely for Opus 4.7/4.8 + // (AdaptiveOnly), so they arrive here with top_p == None already. + let adjusted_top_p = if reasoning_on { + // Clamp to minimum 0.95 when any thinking shape is active. context.top_p.map(|p| { let value = p.value(); if value < 0.95 { @@ -2054,10 +2046,10 @@ mod tests { ); } - /// Adaptive thinking must NOT trigger the legacy `top_p >= 0.95` clamp — - /// that constraint only applies to `thinking.enabled` (budget shape). + /// Adaptive thinking enforces the same `top_p >= 0.95` constraint as the + /// legacy budget shape — both modes reject values below 0.95. #[test] - fn test_from_domain_context_adaptive_thinking_does_not_clamp_top_p() { + fn test_from_domain_context_adaptive_thinking_clamps_top_p() { use aws_sdk_bedrockruntime::operation::converse_stream::ConverseStreamInput; use forge_domain::{Context, ReasoningConfig, TopP}; @@ -2084,8 +2076,8 @@ mod tests { let actual = ConverseStreamInput::from_domain(fixture).unwrap(); let top_p = actual.inference_config().unwrap().top_p().unwrap(); assert!( - (top_p - 0.5).abs() < f32::EPSILON, - "adaptive thinking must leave top_p untouched, got {top_p}" + top_p >= 0.95, + "adaptive thinking must clamp top_p to at least 0.95, got {top_p}" ); }