diff --git a/native/shared/src/renderer/froxel.rs b/native/shared/src/renderer/froxel.rs
new file mode 100644
index 0000000..943b726
--- /dev/null
+++ b/native/shared/src/renderer/froxel.rs
@@ -0,0 +1,421 @@
+//! Froxel light clustering — task #23 of the architecture audit.
+//!
+//! The 8+256 light-cap raise removed the capability ceiling but left
+//! the scene shader paying O(live point lights) per fragment. This
+//! module restores O(cluster lights): a compute pass assigns the point
+//! lights (read from the same lighting UBO the shaders already use) to
+//! a 16×9×24 view-frustum froxel grid each frame, and a clustered
+//! variant of the scene shader loops only its froxel's index list.
+//!
+//! Backend split, by capability rather than cfg: storage buffers in
+//! fragment shaders don't exist on WebGL2, so [`FroxelPass::supported`]
+//! gates on the device limits. Unsupported backends keep the plain
+//! count-driven loop (the semantic reference — the clustered path must
+//! match it exactly, which the many_point_lights golden enforces).
+//!
+//! Memory: counts 3456×4 B ≈ 14 KB; index list 3456×256×4 B ≈ 3.5 MB
+//! (256 = worst-case every light in one froxel — exact parity with the
+//! reference loop, no truncation).
+
+use wgpu::util::DeviceExt;
+
+pub(super) const GRID_X: u32 = 16;
+pub(super) const GRID_Y: u32 = 9;
+pub(super) const GRID_Z: u32 = 24;
+pub(super) const CLUSTER_COUNT: u32 = GRID_X * GRID_Y * GRID_Z;
+pub(super) const MAX_LIGHTS_PER_CLUSTER: u32 = 256;
+
+/// Uniform parameters shared by the assignment compute pass and the
+/// clustered fragment loop. Layout mirrored in WGSL below and in the
+/// fragment include.
+#[repr(C)]
+#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
+pub(super) struct FroxelParams {
+    /// View matrix (world → view) for light-position transform.
+    pub view: [[f32; 4]; 4],
+    /// x = grid_x, y = grid_y, z = grid_z, w = live point-light count.
+    pub grid: [u32; 4],
+    /// x = znear, y = zfar, z = log(zfar/znear), w = unused.
+    pub depth_range: [f32; 4],
+    /// x = 1/tile_w_px, y = 1/tile_h_px (fragment tile lookup),
+    /// z = p22, w = p32 (depth linearization, same convention as Hi-Z).
+    pub screen: [f32; 4],
+    /// Inverse projection — froxel corner reconstruction in the
+    /// assignment pass.
+    pub inv_proj: [[f32; 4]; 4],
+}
+
+const ASSIGN_SHADER: &str = "
+struct FroxelParams {
+    view: mat4x4<f32>,
+    grid: vec4<u32>,
+    depth_range: vec4<f32>,
+    screen: vec4<f32>,
+    inv_proj: mat4x4<f32>,
+};
+struct PointLight { position: vec4<f32>, color: vec4<f32> };
+struct Lights {
+    // Mirrors the tail of the Lighting UBO relevant here. The host
+    // binds a dedicated compact UBO (positions+ranges only would do,
+    // but reusing PointLight keeps one struct).
+    count: vec4<f32>,
+    lights: array<PointLight, 256>,
+};
+
+@group(0) @binding(0) var<uniform> p: FroxelParams;
+@group(0) @binding(1) var<uniform> l: Lights;
+@group(0) @binding(2) var<storage, read_write> cluster_counts: array<u32>;
+@group(0) @binding(3) var<storage, read_write> cluster_indices: array<u32>;
+
+// View-space Z of slice boundary k (logarithmic distribution).
+fn slice_z(k: u32) -> f32 {
+    let t = f32(k) / f32(p.grid.z);
+    return p.depth_range.x * exp(t * p.depth_range.z);
+}
+
+@compute @workgroup_size(4, 4, 4)
+fn cs_main(@builtin(global_invocation_id) gid: vec3<u32>) {
+    if (gid.x >= p.grid.x || gid.y >= p.grid.y || gid.z >= p.grid.z) { return; }
+    let cluster = gid.x + gid.y * p.grid.x + gid.z * p.grid.x * p.grid.y;
+
+    // Froxel AABB in view space: reconstruct the tile's corner rays on
+    // the near plane of the projection and scale to the slice depths.
+    // NDC tile extents:
+    let x0 = (f32(gid.x)       / f32(p.grid.x)) * 2.0 - 1.0;
+    let x1 = (f32(gid.x + 1u)  / f32(p.grid.x)) * 2.0 - 1.0;
+    // NDC y is up; tile row 0 is the TOP of the screen.
+    let y1 = 1.0 - (f32(gid.y)      / f32(p.grid.y)) * 2.0;
+    let y0 = 1.0 - (f32(gid.y + 1u) / f32(p.grid.y)) * 2.0;
+
+    // Unproject the four corners at an arbitrary depth and normalize to
+    // rays through the camera (view space, looking down -Z).
+    var mn = vec3<f32>( 1e30,  1e30,  1e30);
+    var mx = vec3<f32>(-1e30, -1e30, -1e30);
+    let z_near_s = slice_z(gid.z);
+    let z_far_s  = slice_z(gid.z + 1u);
+    for (var cx = 0u; cx < 2u; cx++) {
+        for (var cy = 0u; cy < 2u; cy++) {
+            let nx = select(x0, x1, cx == 1u);
+            let ny = select(y0, y1, cy == 1u);
+            let h = p.inv_proj * vec4<f32>(nx, ny, 0.5, 1.0);
+            let dir = h.xyz / h.w;          // a point on the ray (view space)
+            let ray = dir / max(-dir.z, 1e-6); // scale so z == -1
+            // corner at both slice depths (view z is negative forward)
+            let a = ray * z_near_s;
+            let b = ray * z_far_s;
+            mn = min(mn, min(vec3<f32>(a.xy, -z_near_s), vec3<f32>(b.xy, -z_far_s)));
+            mx = max(mx, max(vec3<f32>(a.xy, -z_near_s), vec3<f32>(b.xy, -z_far_s)));
+        }
+    }
+
+    // Sphere/AABB tests against every live light.
+    var count = 0u;
+    let n = u32(l.count.x);
+    let base = cluster * 256u;
+    for (var i = 0u; i < n; i++) {
+        let pos_w = l.lights[i].position;
+        let pos_v = (p.view * vec4<f32>(pos_w.xyz, 1.0)).xyz;
+        let r = pos_w.w;
+        let closest = clamp(pos_v, mn, mx);
+        let d = pos_v - closest;
+        if (dot(d, d) <= r * r) {
+            cluster_indices[base + count] = i;
+            count++;
+        }
+    }
+    cluster_counts[cluster] = count;
+}
+";
+
+/// The fragment-side replacement for the plain point-light loop, plus
+/// the bindings it needs. Spliced into SCENE_SHADER between the
+/// BEGIN/END-POINT-LIGHT-LOOP markers by [`clustered_scene_shader`].
+const CLUSTERED_BINDINGS: &str = "
+struct FroxelParams {
+    view: mat4x4<f32>,
+    grid: vec4<u32>,
+    depth_range: vec4<f32>,
+    screen: vec4<f32>,
+    inv_proj: mat4x4<f32>,
+};
+@group(1) @binding(10) var<uniform> froxel: FroxelParams;
+@group(1) @binding(11) var<storage, read> cluster_counts: array<u32>;
+@group(1) @binding(12) var<storage, read> cluster_indices: array<u32>;
+";
+
+const CLUSTERED_LOOP: &str = "
+    // Froxel-clustered point lights: identical shading math to the
+    // reference loop, restricted to this fragment's cluster list.
+    let view_z = -froxel.screen.w / (in.clip_position.z + froxel.screen.z);
+    let slice = clamp(
+        u32(log(max(view_z, froxel.depth_range.x) / froxel.depth_range.x)
+            / froxel.depth_range.z * f32(froxel.grid.z)),
+        0u, froxel.grid.z - 1u);
+    let tile_x = min(u32(in.clip_position.x * froxel.screen.x), froxel.grid.x - 1u);
+    let tile_y = min(u32(in.clip_position.y * froxel.screen.y), froxel.grid.y - 1u);
+    let cluster = tile_x + tile_y * froxel.grid.x + slice * froxel.grid.x * froxel.grid.y;
+    let cl_count = cluster_counts[cluster];
+    let cl_base = cluster * 256u;
+    for (var ci = 0u; ci < cl_count; ci++) {
+        let pl = lighting.point_lights[cluster_indices[cl_base + ci]];
+        let to_light = pl.position.xyz - in.world_pos;
+        let dist = length(to_light);
+        let range = pl.position.w;
+        if (dist < range && dist > 0.0) {
+            let l = to_light / dist;
+            let atten = 1.0 - (dist / range);
+            let atten2 = atten * atten;
+            lit += shade_pbr(n, v, l, pl.color.rgb, pl.color.w * atten2,
+                             base_color, metallic, roughness);
+        }
+    }
+";
+
+/// Build the clustered SCENE_SHADER variant from the canonical source.
+pub(super) fn clustered_scene_shader(source: &str) -> String {
+    let begin = source
+        .find("// BEGIN-POINT-LIGHT-LOOP")
+        .expect("scene shader missing BEGIN-POINT-LIGHT-LOOP marker");
+    let end_marker = "// END-POINT-LIGHT-LOOP";
+    let end = source.find(end_marker).expect("scene shader missing END marker") + end_marker.len();
+    format!(
+        "{}{}{}{}",
+        CLUSTERED_BINDINGS,
+        &source[..begin],
+        CLUSTERED_LOOP,
+        &source[end..]
+    )
+}
+
+/// The three entries appended to `lighting_layout` (group 1) when the
+/// device supports the clustered path. Pipelines whose shaders don't
+/// reference them (SHADER_3D's pipeline_3d) are unaffected — extra
+/// layout entries are legal as long as the bind group provides them.
+pub(super) fn extra_lighting_layout_entries() -> [wgpu::BindGroupLayoutEntry; 3] {
+    let storage_ro = wgpu::BindingType::Buffer {
+        ty: wgpu::BufferBindingType::Storage { read_only: true },
+        has_dynamic_offset: false,
+        min_binding_size: None,
+    };
+    [
+        wgpu::BindGroupLayoutEntry {
+            binding: 10,
+            visibility: wgpu::ShaderStages::FRAGMENT,
+            ty: wgpu::BindingType::Buffer {
+                ty: wgpu::BufferBindingType::Uniform,
+                has_dynamic_offset: false,
+                min_binding_size: None,
+            },
+            count: None,
+        },
+        wgpu::BindGroupLayoutEntry { binding: 11, visibility: wgpu::ShaderStages::FRAGMENT, ty: storage_ro, count: None },
+        wgpu::BindGroupLayoutEntry { binding: 12, visibility: wgpu::ShaderStages::FRAGMENT, ty: storage_ro, count: None },
+    ]
+}
+
+pub struct FroxelPass {
+    pub assign_pipeline: wgpu::ComputePipeline,
+    pub assign_layout: wgpu::BindGroupLayout,
+    pub params_buffer: wgpu::Buffer,
+    /// Compact point-light UBO for the compute pass (count + 256 lights).
+    pub lights_buffer: wgpu::Buffer,
+    pub counts_buffer: wgpu::Buffer,
+    pub indices_buffer: wgpu::Buffer,
+    assign_bg: wgpu::BindGroup,
+}
+
+impl FroxelPass {
+    /// Storage buffers must be available in BOTH compute and fragment
+    /// stages (WebGL2 has neither). `BLOOM_DISABLE_FROXEL=1` forces the
+    /// reference loop — used to (re)generate the clustered-parity
+    /// golden and to bisect suspected clustering bugs in the field.
+    pub fn supported(device: &wgpu::Device) -> bool {
+        if std::env::var_os("BLOOM_DISABLE_FROXEL").is_some_and(|v| v == "1") {
+            return false;
+        }
+        let l = device.limits();
+        l.max_storage_buffers_per_shader_stage >= 2
+            && l.max_storage_buffer_binding_size as u64
+                >= (CLUSTER_COUNT * MAX_LIGHTS_PER_CLUSTER * 4) as u64
+    }
+
+    pub fn new(device: &wgpu::Device) -> Self {
+        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: Some("froxel_assign_shader"),
+            source: wgpu::ShaderSource::Wgsl(ASSIGN_SHADER.into()),
+        });
+        let assign_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: Some("froxel_assign_layout"),
+            entries: &[
+                wgpu::BindGroupLayoutEntry {
+                    binding: 0,
+                    visibility: wgpu::ShaderStages::COMPUTE,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Uniform,
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+                wgpu::BindGroupLayoutEntry {
+                    binding: 1,
+                    visibility: wgpu::ShaderStages::COMPUTE,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Uniform,
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+                wgpu::BindGroupLayoutEntry {
+                    binding: 2,
+                    visibility: wgpu::ShaderStages::COMPUTE,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Storage { read_only: false },
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+                wgpu::BindGroupLayoutEntry {
+                    binding: 3,
+                    visibility: wgpu::ShaderStages::COMPUTE,
+                    ty: wgpu::BindingType::Buffer {
+                        ty: wgpu::BufferBindingType::Storage { read_only: false },
+                        has_dynamic_offset: false,
+                        min_binding_size: None,
+                    },
+                    count: None,
+                },
+            ],
+        });
+        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("froxel_assign_pl"),
+            bind_group_layouts: &[Some(&assign_layout)],
+            ..Default::default()
+        });
+        let assign_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: Some("froxel_assign_pipeline"),
+            layout: Some(&pl),
+            module: &shader,
+            entry_point: Some("cs_main"),
+            compilation_options: Default::default(),
+            cache: None,
+        });
+        let params_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("froxel_params"),
+            contents: &[0u8; std::mem::size_of::<FroxelParams>()],
+            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
+        });
+        // count vec4 + 256 lights × 2 vec4
+        let lights_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("froxel_lights"),
+            size: 16 + 256 * 32,
+            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+        let counts_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("froxel_counts"),
+            size: (CLUSTER_COUNT * 4) as u64,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+        let indices_buffer = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("froxel_indices"),
+            size: (CLUSTER_COUNT * MAX_LIGHTS_PER_CLUSTER * 4) as u64,
+            usage: wgpu::BufferUsages::STORAGE,
+            mapped_at_creation: false,
+        });
+        let assign_bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: Some("froxel_assign_bg"),
+            layout: &assign_layout,
+            entries: &[
+                wgpu::BindGroupEntry { binding: 0, resource: params_buffer.as_entire_binding() },
+                wgpu::BindGroupEntry { binding: 1, resource: lights_buffer.as_entire_binding() },
+                wgpu::BindGroupEntry { binding: 2, resource: counts_buffer.as_entire_binding() },
+                wgpu::BindGroupEntry { binding: 3, resource: indices_buffer.as_entire_binding() },
+            ],
+        });
+        Self {
+            assign_pipeline,
+            assign_layout,
+            params_buffer,
+            lights_buffer,
+            counts_buffer,
+            indices_buffer,
+            assign_bg,
+        }
+    }
+
+    /// The bind-group entries matching [`extra_lighting_layout_entries`],
+    /// appended to every lighting bind group the renderer builds.
+    pub(super) fn extra_lighting_bind_entries(&self) -> [wgpu::BindGroupEntry<'_>; 3] {
+        [
+            wgpu::BindGroupEntry { binding: 10, resource: self.params_buffer.as_entire_binding() },
+            wgpu::BindGroupEntry { binding: 11, resource: self.counts_buffer.as_entire_binding() },
+            wgpu::BindGroupEntry { binding: 12, resource: self.indices_buffer.as_entire_binding() },
+        ]
+    }
+
+    /// Record the per-frame assignment dispatch. The caller uploads
+    /// params + lights first (see Renderer::record_froxel_assign).
+    pub fn record(&self, encoder: &mut wgpu::CommandEncoder) {
+        let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: Some("froxel_assign_pass"),
+            timestamp_writes: None,
+        });
+        pass.set_pipeline(&self.assign_pipeline);
+        pass.set_bind_group(0, &self.assign_bg, &[]);
+        pass.dispatch_workgroups(GRID_X / 4, GRID_Y.div_ceil(4), GRID_Z / 4);
+    }
+}
+
+impl super::Renderer {
+    /// Upload froxel params + the compact light list and dispatch the
+    /// assignment pass. Runs every 3D frame on supported devices —
+    /// even with zero lights, so `cluster_counts` never carries stale
+    /// data from a previous frame's camera.
+    pub(super) fn record_froxel_assign(&mut self, encoder: &mut wgpu::CommandEncoder) {
+        let Some(froxel) = &self.froxel else { return };
+
+        let proj = self.current_proj_matrix;
+        let p22 = proj[2][2];
+        let p32 = proj[3][2];
+        // Same linearization as Hi-Z: view_z(depth) = -p32/(depth + p22),
+        // positive forward. Evaluate at depth 0 and 1; min/max makes
+        // this hold for reversed-Z too, and the clamps keep an
+        // infinite-far projection (division by ~0) finite.
+        let z_at = |d: f32| -p32 / (d + p22);
+        let (z0, z1) = (z_at(0.0), z_at(1.0));
+        let znear = z0.min(z1).max(1e-3);
+        let zfar = z0.max(z1).clamp(znear * 1.001, 1e9);
+
+        // clip_position.xy is in render-target pixels — the HDR scene
+        // pass runs at render_extent (render_scale-aware), not surface
+        // size.
+        let (rw, rh) = self.render_extent();
+        let n = (self.lighting_uniforms.point_light_count[0] as u32)
+            .min(MAX_LIGHTS_PER_CLUSTER);
+        let params = FroxelParams {
+            view: self.current_view_matrix,
+            grid: [GRID_X, GRID_Y, GRID_Z, n],
+            depth_range: [znear, zfar, (zfar / znear).ln(), 0.0],
+            screen: [
+                GRID_X as f32 / rw.max(1) as f32,
+                GRID_Y as f32 / rh.max(1) as f32,
+                p22,
+                p32,
+            ],
+            inv_proj: self.current_inv_proj_matrix,
+        };
+        self.queue.write_buffer(&froxel.params_buffer, 0, bytemuck::bytes_of(&params));
+        let count = [n as f32, 0.0, 0.0, 0.0_f32];
+        self.queue.write_buffer(&froxel.lights_buffer, 0, bytemuck::bytes_of(&count));
+        self.queue.write_buffer(
+            &froxel.lights_buffer,
+            16,
+            bytemuck::cast_slice(&self.lighting_uniforms.point_lights),
+        );
+        froxel.record(encoder);
+    }
+}
diff --git a/native/shared/src/renderer/hiz.rs b/native/shared/src/renderer/hiz.rs
index d47815f..5db7447 100644
--- a/native/shared/src/renderer/hiz.rs
+++ b/native/shared/src/renderer/hiz.rs
@@ -5,7 +5,8 @@
 //! policy); pipelines and the mip chain stay fields on [`Renderer`].
 
 use super::formats::HIZ_MIP_COUNT;
-use super::{HizDownsampleParams, HizLinearizeParams, SsaoBlurParams};
+use super::formats::halton;
+use super::{HizDownsampleParams, HizLinearizeParams, SsaoBlurParams, SsaoParams};
 use super::Renderer;
 
 impl Renderer {
@@ -169,3 +170,96 @@ impl Renderer {
     }
     }
 }
+
+impl Renderer {
+    /// GTAO compute dispatch (half-res, Hi-Z-accelerated, temporal EMA
+    /// ping-pong). Caller guards on `ssao_enabled` and passes the
+    /// projection terms. Split from end_frame_with_scene.
+    #[allow(clippy::too_many_arguments)]
+    pub(super) fn record_gtao(
+        &mut self,
+        encoder: &mut wgpu::CommandEncoder,
+        profiler: &mut crate::profiler::Profiler,
+        half_w: u32,
+        half_h: u32,
+        p00: f32,
+        p11: f32,
+        p20: f32,
+        p21: f32,
+    ) {
+        let p22 = self.current_proj_matrix[2][2];
+        let p32 = self.current_proj_matrix[3][2];
+        // --- SSAO (compute GTAO, samples Hi-Z pyramid) --------------
+        let ld = self.lighting_uniforms.light_dir;
+        let v = &self.current_view_matrix;
+        let light_dir_vs = [
+            v[0][0]*ld[0] + v[1][0]*ld[1] + v[2][0]*ld[2],
+            v[0][1]*ld[0] + v[1][1]*ld[1] + v[2][1]*ld[2],
+            v[0][2]*ld[0] + v[1][2]*ld[1] + v[2][2]*ld[2],
+            0.0,
+        ];
+        // Temporal accumulation: ping-pong history textures.
+        // `write_idx` is the current-frame output; `read_idx` the
+        // previous frame's result. First 4 frames force alpha=1
+        // so the initial clear never contaminates the signal.
+        let write_idx = self.ssao_history_idx;
+        let read_idx = 1 - write_idx;
+        let frame_phase = self.ssao_history_frame % 4;
+        let force_refresh = if self.ssao_history_frame < 4 { 1u32 } else { 0u32 };
+        // 4-frame EMA: alpha = 1/4 = 0.25 gives equal weight to
+        // each of the 4 phases at steady state.
+        let alpha = 0.25_f32;
+        // Halton-5 rotation: uncorrelated with TAA's base-2/3 jitter
+        // so the two noise patterns don't resonate.
+        let halton5 = halton(self.ssao_history_frame + 1, 5);
+        let sp = SsaoParams {
+            params: [
+                1.0 / half_w as f32,
+                1.0 / half_h as f32,
+                self.ssao_radius,
+                self.ssao_strength,
+            ],
+            proj_row01: [p00, p11, p20, p21],
+            proj_z: [p22, p32, 1.0 / p00, 1.0 / p11],
+            light_dir_vs,
+            size: [half_w, half_h, frame_phase, force_refresh],
+            temporal: [alpha, halton5, 0.0, 0.0],
+        };
+        self.queue.write_buffer(&self.ssao_uniform_buffer, 0, bytemuck::bytes_of(&sp));
+
+        if self.ssao_bg_cache[write_idx].is_none() {
+            self.ssao_bg_cache[write_idx] = Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
+                label: Some("ssao_bg"),
+                layout: &self.ssao_layout,
+                entries: &[
+                    wgpu::BindGroupEntry { binding: 0, resource: self.ssao_uniform_buffer.as_entire_binding() },
+                    wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&self.ssao_rt_view) },
+                    wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.hiz_sampler) },
+                    wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.hiz_views[0]) },
+                    wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::TextureView(&self.hiz_views[1]) },
+                    wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.hiz_views[2]) },
+                    wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.hiz_views[3]) },
+                    wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.hiz_views[4]) },
+                    wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::TextureView(&self.velocity_rt_view) },
+                    wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[read_idx]) },
+                    wgpu::BindGroupEntry { binding: 10, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) },
+                    wgpu::BindGroupEntry { binding: 11, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[write_idx]) },
+                ],
+            }));
+        }
+        let bg = self.ssao_bg_cache[write_idx].as_ref().unwrap();
+
+        let ssao_ts = profiler.compute_pass_timestamp_writes("ssao_pass");
+        let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: Some("ssao_pass"),
+            timestamp_writes: ssao_ts,
+        });
+        pass.set_pipeline(&self.ssao_pipeline);
+        pass.set_bind_group(0, bg, &[]);
+        pass.dispatch_workgroups((half_w + 7) / 8, (half_h + 7) / 8, 1);
+
+        // Flip ping-pong indices for the next frame.
+        self.ssao_history_idx = read_idx;
+        self.ssao_history_frame = self.ssao_history_frame.wrapping_add(1);
+    }
+}
diff --git a/native/shared/src/renderer/lighting.rs b/native/shared/src/renderer/lighting.rs
new file mode 100644
index 0000000..453887b
--- /dev/null
+++ b/native/shared/src/renderer/lighting.rs
@@ -0,0 +1,153 @@
+//! Group-1 lighting bind group — layout and construction.
+//!
+//! The scene + immediate-mode 3D pipelines share one bind-group layout
+//! for lighting data: the Lighting UBO, env/IBL textures, the shadow
+//! cascade, and (on clustered devices) the froxel buffers at bindings
+//! 10-12. The bind group is rebuilt whenever the env source changes
+//! (HDR load, panorama, procedural sky); every rebuild goes through
+//! [`Renderer::make_lighting_bind_group`] so the entry list exists in
+//! exactly one place and cannot drift between call sites.
+
+use super::{froxel, Renderer};
+
+/// Create the group-1 layout. `clustered` appends the froxel bindings —
+/// set when [`froxel::FroxelPass::supported`] holds for the device.
+/// Pipelines whose shaders don't reference bindings 10-12 (pipeline_3d)
+/// share the layout unaffected; extra entries are legal as long as the
+/// bind group provides them.
+pub(super) fn create_lighting_layout(
+    device: &wgpu::Device,
+    clustered: bool,
+) -> wgpu::BindGroupLayout {
+    let tex_float = wgpu::BindingType::Texture {
+        sample_type: wgpu::TextureSampleType::Float { filterable: true },
+        view_dimension: wgpu::TextureViewDimension::D2,
+        multisampled: false,
+    };
+    let tex_depth = wgpu::BindingType::Texture {
+        sample_type: wgpu::TextureSampleType::Depth,
+        view_dimension: wgpu::TextureViewDimension::D2,
+        multisampled: false,
+    };
+    let frag = wgpu::ShaderStages::FRAGMENT;
+    let mut entries = vec![
+        // 0: Lighting UBO
+        wgpu::BindGroupLayoutEntry {
+            binding: 0,
+            visibility: frag,
+            ty: wgpu::BindingType::Buffer {
+                ty: wgpu::BufferBindingType::Uniform,
+                has_dynamic_offset: false,
+                min_binding_size: None,
+            },
+            count: None,
+        },
+        // 1/2: env (IBL specular) texture + sampler
+        wgpu::BindGroupLayoutEntry { binding: 1, visibility: frag, ty: tex_float, count: None },
+        wgpu::BindGroupLayoutEntry {
+            binding: 2,
+            visibility: frag,
+            ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
+            count: None,
+        },
+        // 3/4: BRDF LUT + sampler
+        wgpu::BindGroupLayoutEntry { binding: 3, visibility: frag, ty: tex_float, count: None },
+        wgpu::BindGroupLayoutEntry {
+            binding: 4,
+            visibility: frag,
+            ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
+            count: None,
+        },
+        // 5-7: shadow cascades, 8: comparison sampler
+        wgpu::BindGroupLayoutEntry { binding: 5, visibility: frag, ty: tex_depth, count: None },
+        wgpu::BindGroupLayoutEntry { binding: 6, visibility: frag, ty: tex_depth, count: None },
+        wgpu::BindGroupLayoutEntry { binding: 7, visibility: frag, ty: tex_depth, count: None },
+        wgpu::BindGroupLayoutEntry {
+            binding: 8,
+            visibility: frag,
+            ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Comparison),
+            count: None,
+        },
+        // 9: env diffuse (IBL irradiance)
+        wgpu::BindGroupLayoutEntry { binding: 9, visibility: frag, ty: tex_float, count: None },
+    ];
+    if clustered {
+        entries.extend(froxel::extra_lighting_layout_entries());
+    }
+    device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+        label: Some("lighting_layout"),
+        entries: &entries,
+    })
+}
+
+/// Everything a lighting bind group references besides the env views.
+/// `Renderer::new` builds one from constructor locals (before `self`
+/// exists); [`Renderer::make_lighting_bind_group`] from fields.
+pub(super) struct LightingBindSources<'a> {
+    pub lighting_buffer: &'a wgpu::Buffer,
+    pub env_sampler: &'a wgpu::Sampler,
+    pub brdf_lut_view: &'a wgpu::TextureView,
+    pub brdf_lut_sampler: &'a wgpu::Sampler,
+    pub shadow_map: &'a crate::shadows::ShadowMap,
+    pub froxel: Option<&'a froxel::FroxelPass>,
+}
+
+/// The single source of truth for the group-1 entry list — every
+/// lighting bind group the renderer ever creates goes through here.
+pub(super) fn create_lighting_bind_group(
+    device: &wgpu::Device,
+    layout: &wgpu::BindGroupLayout,
+    label: &str,
+    src: &LightingBindSources<'_>,
+    env_view: &wgpu::TextureView,
+    diffuse_view: &wgpu::TextureView,
+) -> wgpu::BindGroup {
+    let mut entries = vec![
+        wgpu::BindGroupEntry { binding: 0, resource: src.lighting_buffer.as_entire_binding() },
+        wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(env_view) },
+        wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(src.env_sampler) },
+        wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(src.brdf_lut_view) },
+        wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(src.brdf_lut_sampler) },
+        wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&src.shadow_map.depth_views[0]) },
+        wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&src.shadow_map.depth_views[1]) },
+        wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&src.shadow_map.depth_views[2]) },
+        wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&src.shadow_map.sampler) },
+        wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(diffuse_view) },
+    ];
+    if let Some(f) = src.froxel {
+        entries.extend(f.extra_lighting_bind_entries());
+    }
+    device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: Some(label),
+        layout,
+        entries: &entries,
+    })
+}
+
+impl Renderer {
+    /// Build a group-1 lighting bind group for the given env-specular /
+    /// env-diffuse views. Everything else (UBO, BRDF LUT, shadow
+    /// cascade, froxel buffers when clustered) comes from `self`.
+    pub(super) fn make_lighting_bind_group(
+        &self,
+        label: &str,
+        env_view: &wgpu::TextureView,
+        diffuse_view: &wgpu::TextureView,
+    ) -> wgpu::BindGroup {
+        create_lighting_bind_group(
+            &self.device,
+            &self.lighting_layout,
+            label,
+            &LightingBindSources {
+                lighting_buffer: &self.lighting_buffer,
+                env_sampler: &self.env_sampler,
+                brdf_lut_view: &self.brdf_lut_view,
+                brdf_lut_sampler: &self.brdf_lut_sampler,
+                shadow_map: &self.shadow_map,
+                froxel: self.froxel.as_ref(),
+            },
+            env_view,
+            diffuse_view,
+        )
+    }
+}
diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs
index 3678aab..7bc3ed1 100644
--- a/native/shared/src/renderer/mod.rs
+++ b/native/shared/src/renderer/mod.rs
@@ -11,6 +11,8 @@ mod ssgi_pass;
 mod shadow_pass;
 mod postfx_chain;
 mod scene_pass;
+mod froxel;
+mod lighting;
 pub use occlusion::OcclusionCuller;
 use shaders::*;
 
@@ -204,7 +206,7 @@ pub(super) struct HizDownsampleParams {
 
 #[repr(C)]
 #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
-struct SsaoParams {
+pub(super) struct SsaoParams {
     /// xy = inv_size (1/half_w, 1/half_h), z = radius (world units),
     /// w = strength
     params: [f32; 4],
@@ -1394,6 +1396,12 @@ pub struct Renderer {
     // don't have to carry tangent vertex data or normal-map bindings.
     pub scene_pipeline: wgpu::RenderPipeline,
     pub scene_material_layout: wgpu::BindGroupLayout,
+    /// Froxel light clustering (task #23). `Some` when the device has
+    /// fragment-stage storage buffers (everything but WebGL2); the
+    /// scene shader is then compiled with the clustered point-light
+    /// loop and `lighting_layout` gains bindings 10–12. `None` keeps
+    /// the plain count-driven loop.
+    pub froxel: Option<froxel::FroxelPass>,
     /// 1×1 gray env fallback and its sampler — bound in the lighting
     /// bind group before any HDR is loaded. `load_env_from_hdr`
     /// rebuilds the lighting bind group to swap in the real env
@@ -1674,99 +1682,13 @@ impl Renderer {
         // higher device limit). pipeline_3d doesn't reference the env
         // / BRDF bindings — WGSL lets bind group layouts expose more
         // than a shader consumes.
-        let lighting_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
-            label: Some("lighting_layout"),
-            entries: &[
-                wgpu::BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Buffer {
-                        ty: wgpu::BufferBindingType::Uniform,
-                        has_dynamic_offset: false,
-                        min_binding_size: None,
-                    },
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 1,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Texture {
-                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
-                        view_dimension: wgpu::TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 2,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 3,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Texture {
-                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
-                        view_dimension: wgpu::TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 4,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 5,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Texture {
-                        sample_type: wgpu::TextureSampleType::Depth,
-                        view_dimension: wgpu::TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 6,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Texture {
-                        sample_type: wgpu::TextureSampleType::Depth,
-                        view_dimension: wgpu::TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 7,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Texture {
-                        sample_type: wgpu::TextureSampleType::Depth,
-                        view_dimension: wgpu::TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 8,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Comparison),
-                    count: None,
-                },
-                wgpu::BindGroupLayoutEntry {
-                    binding: 9,
-                    visibility: wgpu::ShaderStages::FRAGMENT,
-                    ty: wgpu::BindingType::Texture {
-                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
-                        view_dimension: wgpu::TextureViewDimension::D2,
-                        multisampled: false,
-                    },
-                    count: None,
-                },
-            ],
-        });
+        // Froxel clustering first — its presence decides whether the
+        // lighting layout grows bindings 10-12 and which point-light
+        // loop the scene shader is compiled with.
+        let froxel = froxel::FroxelPass::supported(&device)
+            .then(|| froxel::FroxelPass::new(&device));
+
+        let lighting_layout = lighting::create_lighting_layout(&device, froxel.is_some());
         let lighting_uniforms = LightingUniforms::defaults();
         let lighting_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
             label: Some("lighting_buffer"),
@@ -2020,22 +1942,21 @@ impl Renderer {
         // group since the bind group binds the shadow depth view.
         let shadow_map = crate::shadows::ShadowMap::new(&device, Vertex3D::desc());
 
-        let lighting_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
-            label: Some("lighting_bg"),
-            layout: &lighting_layout,
-            entries: &[
-                wgpu::BindGroupEntry { binding: 0, resource: lighting_buffer.as_entire_binding() },
-                wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&scene_env_default_view) },
-                wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&env_sampler) },
-                wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&brdf_lut_view) },
-                wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&brdf_lut_sampler) },
-                wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&shadow_map.depth_views[0]) },
-                wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&shadow_map.depth_views[1]) },
-                wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&shadow_map.depth_views[2]) },
-                wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&shadow_map.sampler) },
-                wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&scene_env_default_view) },
-            ],
-        });
+        let lighting_bind_group = lighting::create_lighting_bind_group(
+            &device,
+            &lighting_layout,
+            "lighting_bg",
+            &lighting::LightingBindSources {
+                lighting_buffer: &lighting_buffer,
+                env_sampler: &env_sampler,
+                brdf_lut_view: &brdf_lut_view,
+                brdf_lut_sampler: &brdf_lut_sampler,
+                shadow_map: &shadow_map,
+                froxel: froxel.as_ref(),
+            },
+            &scene_env_default_view,
+            &scene_env_default_view,
+        );
 
         // --- Default 1x1 white texture ---
         let white_data = [255u8, 255, 255, 255];
@@ -2864,9 +2785,17 @@ impl Renderer {
         // ============================================================
         // Scene pipeline (retained scene-graph draws with normal maps)
         // ============================================================
+        // Clustered devices get the froxel point-light loop spliced in
+        // place of the plain reference loop (same shading math — the
+        // many_point_lights golden enforces equivalence).
+        let scene_shader_source: std::borrow::Cow<'static, str> = if froxel.is_some() {
+            froxel::clustered_scene_shader(SCENE_SHADER).into()
+        } else {
+            SCENE_SHADER.into()
+        };
         let scene_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
             label: Some("scene_shader"),
-            source: wgpu::ShaderSource::Wgsl(SCENE_SHADER.into()),
+            source: wgpu::ShaderSource::Wgsl(scene_shader_source),
         });
         // Scene material layout:
         //   0: base_color texture      4: metallic_roughness texture
@@ -6274,6 +6203,7 @@ impl Renderer {
             aerial_perspective_sampler,
             env_diffuse_texture: None,
             scene_pipeline,
+            froxel,
             scene_material_layout,
             _scene_env_default_texture: scene_env_default_texture,
             scene_env_default_view,
@@ -8227,22 +8157,7 @@ impl Renderer {
         // LUT bindings stay put — only env tex/sampler + diffuse view
         // change.
         let diffuse_view_bg = diffuse_texture.create_view(&wgpu::TextureViewDescriptor::default());
-        let new_lighting_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
-            label: Some("lighting_bg"),
-            layout: &self.lighting_layout,
-            entries: &[
-                wgpu::BindGroupEntry { binding: 0, resource: self.lighting_buffer.as_entire_binding() },
-                wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&view) },
-                wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.env_sampler) },
-                wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.brdf_lut_view) },
-                wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.brdf_lut_sampler) },
-                wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[0]) },
-                wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[1]) },
-                wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[2]) },
-                wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&self.shadow_map.sampler) },
-                wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&diffuse_view_bg) },
-            ],
-        });
+        let new_lighting_bg = self.make_lighting_bind_group("lighting_bg", &view, &diffuse_view_bg);
 
         self.sky_texture = Some(texture);
         self.sky_bind_group = Some(bg);
@@ -8555,22 +8470,7 @@ impl Renderer {
                 self._scene_env_default_texture
                     .create_view(&wgpu::TextureViewDescriptor::default())
             });
-        let new_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
-            label: Some("lighting_bg_panorama"),
-            layout: &self.lighting_layout,
-            entries: &[
-                wgpu::BindGroupEntry { binding: 0, resource: self.lighting_buffer.as_entire_binding() },
-                wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&env_view) },
-                wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.env_sampler) },
-                wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.brdf_lut_view) },
-                wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.brdf_lut_sampler) },
-                wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[0]) },
-                wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[1]) },
-                wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[2]) },
-                wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&self.shadow_map.sampler) },
-                wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&diffuse_view) },
-            ],
-        });
+        let new_bg = self.make_lighting_bind_group("lighting_bg_panorama", &env_view, &diffuse_view);
         self.lighting_bind_group = new_bg;
         self.lighting_bg_is_procedural = false;
     }
@@ -8582,22 +8482,11 @@ impl Renderer {
     /// every sun-move and the bind group's TextureView references
     /// remain valid.
     fn swap_lighting_bg_to_procedural(&mut self) {
-        let new_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
-            label: Some("lighting_bg_procedural"),
-            layout: &self.lighting_layout,
-            entries: &[
-                wgpu::BindGroupEntry { binding: 0, resource: self.lighting_buffer.as_entire_binding() },
-                wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&self.procedural_sky_equirect_full_view) },
-                wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.env_sampler) },
-                wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.brdf_lut_view) },
-                wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.brdf_lut_sampler) },
-                wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[0]) },
-                wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[1]) },
-                wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[2]) },
-                wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&self.shadow_map.sampler) },
-                wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&self.procedural_env_diffuse_view) },
-            ],
-        });
+        let new_bg = self.make_lighting_bind_group(
+            "lighting_bg_procedural",
+            &self.procedural_sky_equirect_full_view,
+            &self.procedural_env_diffuse_view,
+        );
         self.lighting_bind_group = new_bg;
         self.lighting_bg_is_procedural = true;
     }
@@ -9228,10 +9117,6 @@ impl Renderer {
             profiler.end("card_light");
         }
 
-        // Cascaded shadow maps (with the ticket-004 cache-hit skip) —
-        // see record_shadow_pass in shadow_pass.rs.
-        self.record_shadow_pass(&mut encoder, profiler, scene);
-
         // Upload immediate-mode 2D data
         profiler.begin("upload_geometry");
         let has_2d = !self.vertices_2d.is_empty();
@@ -9254,434 +9139,225 @@ impl Renderer {
         }
         profiler.end("upload_geometry");
 
-        // HDR scene pass (sky-view LUT refresh, sky + 3D batch +
-        // scene-graph render into the HDR MRTs, then the opaque
-        // material pass on the inner graph) — see
-        // record_hdr_scene_pass in scene_pass.rs.
-        self.record_hdr_scene_pass(&mut encoder, profiler, scene);
-        // ============================================================
-        // Phase 4b — translucent / refractive / additive material pass
+        let surf_w = self.surface_config.width;
+        let surf_h = self.surface_config.height;
+        let exposure_src_idx = self.exposure_current_idx;
+        let exposure_dst_idx = 1 - self.exposure_current_idx;
+
         // ============================================================
+        // Frame render graph (RFC 0001 Phase 2b — complete).
         //
-        // Runs after opaque materials, before post-FX. Loads hdr_rt so
-        // opaque output survives; alpha-blends into it. Depth is
-        // bound as read-only so translucent draws participate in the
-        // depth test without writing.
+        // Every render pass between geometry upload and the terminal
+        // composite runs as a PassNode. Reads/writes document the real
+        // data dependencies; in addition, each node carries a with_after
+        // pin to its predecessor so the schedule reproduces the
+        // hand-tuned order exactly. Loosening those pins (to let the
+        // scheduler interleave independent passes) is the documented
+        // next refinement — do it dependency-by-dependency with the
+        // golden tests watching.
         //
-        // If any submitted translucent material declared
-        // `reads_scene = true`, we first snapshot hdr_rt into a
-        // swapchain-sized transient and bind that as group 4
-        // scene_color_tex for the dispatch. Free after the pass so
-        // the transient pool reuses on the next frame.
-        if !self.material_system.translucent_commands.is_empty() {
-            // Back-to-front by view depth — required for correct alpha
-            // compositing; submission order is only kept between
-            // equal-depth draws (stable sort).
-            self.material_system.sort_translucent();
-            profiler.begin("translucent_pass");
-            let swap_w = self.surface_config.width;
-            let swap_h = self.surface_config.height;
-            self.transient_pool.begin_frame(swap_w, swap_h);
-
-            // Phase 7 — run the impulse decay + splat compute BEFORE
-            // we build scene_inputs so the front view reflects this
-            // frame's submissions.
-            self.impulse_field.update(&self.device, &self.queue, &mut encoder);
-
-            // Does any queued translucent material need the scene
-            // colour snapshot?
-            let needs_scene = self.material_system.translucent_commands
-                .iter()
-                .any(|c| self.material_system.pipelines
-                    .get(c.material as usize - 1)
-                    .and_then(|p| p.as_ref())
-                    .map(|p| p.reads_scene)
-                    .unwrap_or(false));
-
-            let scene_color_tid = if needs_scene {
-                let desc = transient::TransientDesc::new(
-                    formats::HDR_FORMAT,
-                    wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
-                    transient::SizePolicy::Swapchain,
-                );
-                Some(self.transient_pool.acquire(&self.device, desc))
-            } else {
-                None
-            };
-
-            // Phase 4c — depth snapshot. wgpu forbids sampling a
-            // texture that is also a depth-stencil attachment of the
-            // same pass, so we copy the opaque depth buffer into a
-            // transient before beginning the translucent pass and
-            // bind the transient at group 4 binding 2. Acquired
-            // whenever any translucent material reads_scene (same
-            // gate as colour) — cheap enough that it's not worth a
-            // separate `reads_depth` flag yet.
-            let scene_depth_tid = if needs_scene {
-                let desc = transient::TransientDesc::new(
-                    formats::DEPTH_FORMAT,
-                    wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
-                    transient::SizePolicy::Swapchain,
-                );
-                Some(self.transient_pool.acquire(&self.device, desc))
-            } else {
-                None
-            };
-
-            // Snapshot hdr_rt + live depth -> transients.
-            if let (Some(ctid), Some(dtid)) = (scene_color_tid, scene_depth_tid) {
-                let color_tex = self.transient_pool.texture(ctid).expect("fresh color transient");
-                encoder.copy_texture_to_texture(
-                    wgpu::TexelCopyTextureInfo {
-                        texture: &self.hdr_rt_texture,
-                        mip_level: 0,
-                        origin: wgpu::Origin3d::ZERO,
-                        aspect: wgpu::TextureAspect::All,
-                    },
-                    wgpu::TexelCopyTextureInfo {
-                        texture: color_tex,
-                        mip_level: 0,
-                        origin: wgpu::Origin3d::ZERO,
-                        aspect: wgpu::TextureAspect::All,
-                    },
-                    wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 },
-                );
-                let depth_tex = self.transient_pool.texture(dtid).expect("fresh depth transient");
-                encoder.copy_texture_to_texture(
-                    wgpu::TexelCopyTextureInfo {
-                        texture: &self.depth_texture,
-                        mip_level: 0,
-                        origin: wgpu::Origin3d::ZERO,
-                        aspect: wgpu::TextureAspect::DepthOnly,
-                    },
-                    wgpu::TexelCopyTextureInfo {
-                        texture: depth_tex,
-                        mip_level: 0,
-                        origin: wgpu::Origin3d::ZERO,
-                        aspect: wgpu::TextureAspect::DepthOnly,
-                    },
-                    wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 },
-                );
-                let color_view = self.transient_pool.view(ctid).unwrap();
-                let depth_view = self.transient_pool.view(dtid).unwrap();
-                let imp_view = self.impulse_field.front_view();
-                let imp_samp = self.impulse_field.sampler();
-                self.material_system.update_scene_inputs(
-                    &self.device, color_view, Some(depth_view),
-                    Some((imp_view, imp_samp)),
-                );
-            } else {
-                // No refractive/depth-reading materials this frame —
-                // still need a valid bind group. None → internal stubs.
-                self.material_system.update_scene_inputs(
-                    &self.device, &self.hdr_rt_view, None, None,
-                );
+        // The context owns &mut Renderer, so node closures borrow
+        // nothing at build time and can call the record_* methods.
+        // Feature toggles (ssao/ssr/ssgi/bloom) are checked inside the
+        // closures (or inside the methods), never by omitting nodes —
+        // with_after on a missing node is a schedule error.
+        // ============================================================
+        {
+            use graph::{Graph, PassInput, PassNode, PassOutput};
+            // Transient ordering tokens for resources the enum doesn't
+            // name. The textures themselves are persistent renderer
+            // fields; these ids only express producer→consumer edges.
+            const HIZ_PYRAMID: u32 = 0;
+            const SSAO_TEX: u32 = 1;
+            const SSR_TEX: u32 = 2;
+            const SSGI_TEX: u32 = 3;
+            const BLOOM_CHAIN: u32 = 4;
+            const COMPOSED: u32 = 5;
+            const LDR_FINAL: u32 = 6;
+            const FROXEL_CLUSTERS: u32 = 7;
+
+            struct FrameCtx2<'a> {
+                r: &'a mut Renderer,
+                encoder: &'a mut wgpu::CommandEncoder,
+                profiler: &'a mut crate::profiler::Profiler,
+                scene: &'a mut crate::scene::SceneGraph,
+                surf: (u32, u32),
+                exposure_idx: (usize, usize),
             }
 
-            {
-                let t_ts = profiler.pass_timestamp_writes("translucent_pass");
-                let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
-                    label: Some("bloom_translucent_pass"),
-                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
-                        view: &self.hdr_rt_view,
-                        resolve_target: None,
-                        depth_slice: None,
-                        ops: wgpu::Operations {
-                            load: wgpu::LoadOp::Load,
-                            store: wgpu::StoreOp::Store,
-                        },
-                    })],
-                    depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
-                        view: &self.depth_view,
-                        depth_ops: Some(wgpu::Operations {
-                            load: wgpu::LoadOp::Load,
-                            // Translucents don't write depth — keep
-                            // the opaque pass's depth pristine so
-                            // downstream post-FX (SSR/SSGI) still
-                            // sees the opaque geometry.
-                            store: wgpu::StoreOp::Store,
-                        }),
-                        stencil_ops: None,
-                    }),
-                    timestamp_writes: t_ts,
-                    occlusion_query_set: None,
-                    multiview_mask: None,
-                });
-                let cache = &self.model_gpu_cache;
-                self.material_system.dispatch_translucent(&mut pass, |handle, idx| {
-                    if let Some(Some(meshes)) = cache.get(&handle) {
-                        if idx < meshes.len() {
-                            let mesh = &meshes[idx];
-                            return Some((&mesh.vb, &mesh.ib, mesh.index_count));
-                        }
+            let mut g: Graph<FrameCtx2> = Graph::new();
+            g.push(
+                PassNode::new("froxel_assign", Box::new(|c: &mut FrameCtx2| {
+                    // No-op when self.froxel is None (the method gates);
+                    // the node stays in the graph so with_after pins
+                    // never dangle.
+                    c.r.record_froxel_assign(c.encoder);
+                }))
+                .with_writes(&[PassOutput::Transient(FROXEL_CLUSTERS)]),
+            );
+            g.push(
+                PassNode::new("shadow", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_shadow_pass(c.encoder, c.profiler, c.scene);
+                }))
+                .with_writes(&[PassOutput::Shadow(0), PassOutput::Shadow(1), PassOutput::Shadow(2)]),
+            );
+            g.push(
+                PassNode::new("hdr_scene", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_hdr_scene_pass(c.encoder, c.profiler, c.scene);
+                }))
+                .with_reads(&[
+                    PassInput::Shadow(0),
+                    PassInput::Shadow(1),
+                    PassInput::Shadow(2),
+                    PassInput::Transient(FROXEL_CLUSTERS),
+                ])
+                .with_writes(&[
+                    PassOutput::HdrColor,
+                    PassOutput::MaterialRt,
+                    PassOutput::VelocityRt,
+                    PassOutput::AlbedoRt,
+                    PassOutput::Depth,
+                ])
+                .with_after(&["shadow", "froxel_assign"]),
+            );
+            g.push(
+                PassNode::new("translucent", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_translucent_pass(c.encoder, c.profiler);
+                }))
+                // Reads the opaque HDR + depth and alpha-blends back into
+                // HdrColor; the pin (not a second HdrColor write) keeps a
+                // single declared writer per resource.
+                .with_after(&["hdr_scene"]),
+            );
+            g.push(
+                PassNode::new("hiz_build", Box::new(|c: &mut FrameCtx2| {
+                    if !c.r.ssao_enabled {
+                        return;
                     }
-                    None
-                });
-            }
-
-            if let Some(tid) = scene_color_tid {
-                self.transient_pool.release(tid);
-            }
-            profiler.end("translucent_pass");
-        }
-
-        // ============================================================
-        // SSAO: half-res GTAO sampling a hierarchical linear-depth
-        // pyramid. Build hiz (linearize + 4 min-downsamples), then
-        // dispatch the GTAO compute pass.
-        // ============================================================
-        profiler.begin("post_fx");
-        let surf_w = self.surface_config.width;
-        let surf_h = self.surface_config.height;
-        if self.ssao_enabled {
-            let p = &self.current_proj_matrix;
-            let p00 = p[0][0];
-            let p11 = p[1][1];
-            let p20 = p[2][0];
-            let p21 = p[2][1];
-            let p22 = p[2][2];
-            let p32 = p[3][2];
-            let half_w = (surf_w / 2).max(1);
-            let half_h = (surf_h / 2).max(1);
-
-            // Hi-Z build + occlusion capture run on the render graph
-            // (Phase 2b, cluster 1). Unlike the older material-pass nodes
-            // that capture individual field refs, these use the
-            // ctx-owns-renderer pattern: the context carries &mut Renderer
-            // and closures borrow nothing at build time — the shape the
-            // rest of end_frame_with_scene migrates onto.
-            {
-                use graph::{Graph, PassInput, PassNode, PassOutput};
-                // Transient id 0 = the linearized Hi-Z pyramid for this
-                // frame (graph-internal ordering token; the textures
-                // themselves are persistent renderer fields).
-                const HIZ_PYRAMID: u32 = 0;
-
-                struct HizCtx<'a> {
-                    r: &'a mut Renderer,
-                    encoder: &'a mut wgpu::CommandEncoder,
-                    profiler: &'a mut crate::profiler::Profiler,
-                    half: (u32, u32),
-                    p22: f32,
-                    p32: f32,
-                }
-
-                let mut g: Graph<HizCtx> = Graph::new();
-                g.push(
-                    PassNode::new(
-                        "hiz_build",
-                        Box::new(|ctx: &mut HizCtx| {
-                            let (hw, hh) = ctx.half;
-                            ctx.r.record_hiz_chain(ctx.encoder, ctx.profiler, hw, hh, ctx.p22, ctx.p32);
-                        }),
-                    )
-                    .with_reads(&[PassInput::SceneDepth])
-                    .with_writes(&[PassOutput::Transient(HIZ_PYRAMID)]),
-                );
-                // Max-reduce the linearized depth into the 64x64 occlusion
-                // grid and queue its readback; scene.prepare consumes it
-                // next frame (one-frame latency, no stall).
-                g.push(
-                    PassNode::new(
-                        "occlusion_capture",
-                        Box::new(|ctx: &mut HizCtx| {
-                            let vp = ctx.r.vp_matrix();
-                            let (hw, hh) = ctx.half;
-                            // Split borrows: occlusion is a sibling field
-                            // of device/queue; record() also needs the
-                            // hiz view.
-                            let occlusion = &mut ctx.r.occlusion as *mut OcclusionCuller;
-                            unsafe {
-                                (*occlusion).record(
-                                    &ctx.r.device,
-                                    &ctx.r.queue,
-                                    ctx.encoder,
-                                    &ctx.r.hiz_views[0],
-                                    (hw, hh),
-                                    vp,
-                                );
-                            }
-                        }),
-                    )
-                    .with_reads(&[PassInput::Transient(HIZ_PYRAMID)]),
-                );
-                let mut ctx = HizCtx {
-                    r: self,
-                    encoder: &mut encoder,
-                    profiler,
-                    half: (half_w, half_h),
-                    p22,
-                    p32,
-                };
-                if let Err(e) = g.execute(&mut ctx) {
-                    eprintln!("[graph] hiz/occlusion cluster failed: {:?}", e);
-                }
-            }
+                    let (hw, hh) = ((c.surf.0 / 2).max(1), (c.surf.1 / 2).max(1));
+                    let p22 = c.r.current_proj_matrix[2][2];
+                    let p32 = c.r.current_proj_matrix[3][2];
+                    c.r.record_hiz_chain(c.encoder, c.profiler, hw, hh, p22, p32);
+                }))
+                .with_reads(&[PassInput::SceneDepth])
+                .with_writes(&[PassOutput::Transient(HIZ_PYRAMID)])
+                .with_after(&["translucent"]),
+            );
+            g.push(
+                PassNode::new("occlusion_capture", Box::new(|c: &mut FrameCtx2| {
+                    if !c.r.ssao_enabled {
+                        return;
+                    }
+                    let (hw, hh) = ((c.surf.0 / 2).max(1), (c.surf.1 / 2).max(1));
+                    let vp = c.r.vp_matrix();
+                    let occlusion = &mut c.r.occlusion as *mut OcclusionCuller;
+                    unsafe {
+                        (*occlusion).record(&c.r.device, &c.r.queue, c.encoder, &c.r.hiz_views[0], (hw, hh), vp);
+                    }
+                }))
+                .with_reads(&[PassInput::Transient(HIZ_PYRAMID)])
+                .with_after(&["hiz_build"]),
+            );
+            g.push(
+                PassNode::new("gtao", Box::new(|c: &mut FrameCtx2| {
+                    if !c.r.ssao_enabled {
+                        return;
+                    }
+                    let (hw, hh) = ((c.surf.0 / 2).max(1), (c.surf.1 / 2).max(1));
+                    let p = &c.r.current_proj_matrix;
+                    let (p00, p11, p20, p21) = (p[0][0], p[1][1], p[2][0], p[2][1]);
+                    c.r.record_gtao(c.encoder, c.profiler, hw, hh, p00, p11, p20, p21);
+                }))
+                .with_reads(&[PassInput::Transient(HIZ_PYRAMID)])
+                .with_after(&["occlusion_capture"]),
+            );
+            g.push(
+                PassNode::new("ssao_blur", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_ssao_blur(c.encoder, c.surf.0, c.surf.1);
+                }))
+                .with_writes(&[PassOutput::Transient(SSAO_TEX)])
+                .with_after(&["gtao"]),
+            );
+            g.push(
+                PassNode::new("ssr_march", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_ssr_march(c.encoder, c.profiler);
+                }))
+                .with_reads(&[PassInput::SceneColor, PassInput::SceneDepth])
+                .with_after(&["ssao_blur"]),
+            );
+            g.push(
+                PassNode::new("ssr_temporal", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_ssr_temporal(c.encoder);
+                }))
+                .with_writes(&[PassOutput::Transient(SSR_TEX)])
+                .with_after(&["ssr_march"]),
+            );
+            g.push(
+                PassNode::new("ssgi", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_ssgi_passes(c.encoder, c.profiler, c.surf.0, c.surf.1);
+                }))
+                .with_reads(&[PassInput::SceneColor, PassInput::SceneDepth])
+                .with_writes(&[PassOutput::Transient(SSGI_TEX)])
+                .with_after(&["ssr_temporal"]),
+            );
+            g.push(
+                PassNode::new("bloom", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_bloom_chain(c.encoder, c.profiler, c.surf.0, c.surf.1);
+                }))
+                .with_reads(&[PassInput::SceneColor])
+                .with_writes(&[PassOutput::Transient(BLOOM_CHAIN)])
+                .with_after(&["ssgi"]),
+            );
+            g.push(
+                PassNode::new("compose", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_scene_compose(c.encoder);
+                }))
+                .with_reads(&[
+                    PassInput::SceneColor,
+                    PassInput::Transient(SSAO_TEX),
+                    PassInput::Transient(SSR_TEX),
+                    PassInput::Transient(SSGI_TEX),
+                    PassInput::Transient(BLOOM_CHAIN),
+                ])
+                .with_writes(&[PassOutput::Transient(COMPOSED)])
+                .with_after(&["bloom"]),
+            );
+            g.push(
+                PassNode::new("postfx_tail", Box::new(|c: &mut FrameCtx2| {
+                    c.r.record_postfx_tail(c.encoder, c.profiler);
+                }))
+                .with_reads(&[PassInput::Transient(COMPOSED), PassInput::MotionVectors])
+                .with_writes(&[PassOutput::Transient(LDR_FINAL)])
+                .with_after(&["compose"]),
+            );
+            g.push(
+                PassNode::new("auto_exposure", Box::new(|c: &mut FrameCtx2| {
+                    let (src, dst) = c.exposure_idx;
+                    c.r.record_auto_exposure(c.encoder, src, dst);
+                }))
+                .with_reads(&[PassInput::Transient(LDR_FINAL)])
+                .with_after(&["postfx_tail"]),
+            );
 
-            // --- SSAO (compute GTAO, samples Hi-Z pyramid) --------------
-            let ld = self.lighting_uniforms.light_dir;
-            let v = &self.current_view_matrix;
-            let light_dir_vs = [
-                v[0][0]*ld[0] + v[1][0]*ld[1] + v[2][0]*ld[2],
-                v[0][1]*ld[0] + v[1][1]*ld[1] + v[2][1]*ld[2],
-                v[0][2]*ld[0] + v[1][2]*ld[1] + v[2][2]*ld[2],
-                0.0,
-            ];
-            // Temporal accumulation: ping-pong history textures.
-            // `write_idx` is the current-frame output; `read_idx` the
-            // previous frame's result. First 4 frames force alpha=1
-            // so the initial clear never contaminates the signal.
-            let write_idx = self.ssao_history_idx;
-            let read_idx = 1 - write_idx;
-            let frame_phase = self.ssao_history_frame % 4;
-            let force_refresh = if self.ssao_history_frame < 4 { 1u32 } else { 0u32 };
-            // 4-frame EMA: alpha = 1/4 = 0.25 gives equal weight to
-            // each of the 4 phases at steady state.
-            let alpha = 0.25_f32;
-            // Halton-5 rotation: uncorrelated with TAA's base-2/3 jitter
-            // so the two noise patterns don't resonate.
-            let halton5 = halton(self.ssao_history_frame + 1, 5);
-            let sp = SsaoParams {
-                params: [
-                    1.0 / half_w as f32,
-                    1.0 / half_h as f32,
-                    self.ssao_radius,
-                    self.ssao_strength,
-                ],
-                proj_row01: [p00, p11, p20, p21],
-                proj_z: [p22, p32, 1.0 / p00, 1.0 / p11],
-                light_dir_vs,
-                size: [half_w, half_h, frame_phase, force_refresh],
-                temporal: [alpha, halton5, 0.0, 0.0],
+            let mut ctx = FrameCtx2 {
+                r: self,
+                encoder: &mut encoder,
+                profiler,
+                scene,
+                surf: (surf_w, surf_h),
+                exposure_idx: (exposure_src_idx, exposure_dst_idx),
             };
-            self.queue.write_buffer(&self.ssao_uniform_buffer, 0, bytemuck::bytes_of(&sp));
-
-            if self.ssao_bg_cache[write_idx].is_none() {
-                self.ssao_bg_cache[write_idx] = Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
-                    label: Some("ssao_bg"),
-                    layout: &self.ssao_layout,
-                    entries: &[
-                        wgpu::BindGroupEntry { binding: 0, resource: self.ssao_uniform_buffer.as_entire_binding() },
-                        wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&self.ssao_rt_view) },
-                        wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.hiz_sampler) },
-                        wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.hiz_views[0]) },
-                        wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::TextureView(&self.hiz_views[1]) },
-                        wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.hiz_views[2]) },
-                        wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.hiz_views[3]) },
-                        wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.hiz_views[4]) },
-                        wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::TextureView(&self.velocity_rt_view) },
-                        wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[read_idx]) },
-                        wgpu::BindGroupEntry { binding: 10, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) },
-                        wgpu::BindGroupEntry { binding: 11, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[write_idx]) },
-                    ],
-                }));
+            if let Err(e) = g.execute(&mut ctx) {
+                // A schedule error means a malformed graph (cycle /
+                // unknown pin) — a programming error, not a runtime
+                // condition. Surface loudly; the frame still presents
+                // whatever was encoded before the failure.
+                eprintln!("[graph] frame graph failed: {:?}", e);
             }
-            let bg = self.ssao_bg_cache[write_idx].as_ref().unwrap();
-
-            let ssao_ts = profiler.compute_pass_timestamp_writes("ssao_pass");
-            let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
-                label: Some("ssao_pass"),
-                timestamp_writes: ssao_ts,
-            });
-            pass.set_pipeline(&self.ssao_pipeline);
-            pass.set_bind_group(0, bg, &[]);
-            pass.dispatch_workgroups((half_w + 7) / 8, (half_h + 7) / 8, 1);
-
-            // Flip ping-pong indices for the next frame.
-            self.ssao_history_idx = read_idx;
-            self.ssao_history_frame = self.ssao_history_frame.wrapping_add(1);
         }
 
-        // GTAO bilateral blur (or disabled-clear) — see hiz.rs.
-        self.record_ssao_blur(&mut encoder, surf_w, surf_h);
-
-        // SSR ray march — see record_ssr_march in ssr_pass.rs.
-        self.record_ssr_march(&mut encoder, profiler);
-
-        // SSR temporal denoiser — see record_ssr_temporal in ssr_pass.rs.
-        self.record_ssr_temporal(&mut encoder);
-
-        // The compose pass reads denoised SSR from the current history
-        // texture when ssr_enabled; otherwise the raw ssr_rt (which was
-        // cleared to transparent above) so it contributes nothing.
-        // Lumen-style screen-probe SSGI (place/trace/temporal/resolve)
-        // or disabled-clear — see record_ssgi_passes in ssgi_pass.rs.
-        self.record_ssgi_passes(&mut encoder, profiler, surf_w, surf_h);
-
-
-        // The resolve pass writes directly into `ssgi_rt_view`, so
-        // downstream composite + TAA reads are unchanged from the
-        // legacy path.
-        // Bloom chain (Karis-thresholded downsample + additive upsample)
-        // — see record_bloom_chain in postfx_chain.rs.
-        self.record_bloom_chain(&mut encoder, profiler, surf_w, surf_h);
-
-
-        // Scene compose (HDR + SSR + SSGI*albedo + bloom + fog + shafts
-        // -> composed_rt) — see record_scene_compose in postfx_chain.rs.
-        self.record_scene_compose(&mut encoder);
-        // Post-FX tail: upscale/TAA/DoF/motion-blur/SSS/CAS, each
-        // reading the previous enabled stage — see
-        // record_postfx_tail in postfx_chain.rs.
-        self.record_postfx_tail(&mut encoder, profiler);
-
         let composite_src_view = self.composite_source_view();
 
-        // ============================================================
-        // Auto-exposure update pass (runs only when auto_exposure is
-        // on; otherwise the composite reads the old exposure texture
-        // which is fine since manual_exposure bypasses the read).
-        // ============================================================
-        let exposure_src_idx = self.exposure_current_idx;
-        let exposure_dst_idx = 1 - self.exposure_current_idx;
-        if self.auto_exposure {
-            let ep = ExposureParams {
-                params: [
-                    self.auto_exposure_key,
-                    self.auto_exposure_rate,
-                    // Wide clamp — without SSGI, Sponza's shadowed
-                    // corridors have ~7× less average luma than its
-                    // sunlit courtyard, so exposure needs to span
-                    // the same range to keep perceived brightness
-                    // stable across rotations.
-                    0.1,
-                    10.0,
-                ],
-            };
-            self.queue.write_buffer(&self.exposure_uniform_buffer, 0, bytemuck::bytes_of(&ep));
-
-            let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
-                label: Some("exposure_bg"),
-                layout: &self.exposure_layout,
-                entries: &[
-                    wgpu::BindGroupEntry { binding: 0, resource: self.exposure_uniform_buffer.as_entire_binding() },
-                    wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(composite_src_view) },
-                    wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) },
-                    wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.exposure_views[exposure_src_idx]) },
-                    wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) },
-                ],
-            });
-            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
-                label: Some("exposure_pass"),
-                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
-                    view: &self.exposure_views[exposure_dst_idx],
-                    resolve_target: None,
-                    depth_slice: None,
-                    ops: wgpu::Operations {
-                        load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
-                        store: wgpu::StoreOp::Store,
-                    },
-                })],
-                depth_stencil_attachment: None,
-                timestamp_writes: None,
-                occlusion_query_set: None,
-                multiview_mask: None,
-            });
-            pass.set_pipeline(&self.exposure_pipeline);
-            pass.set_bind_group(0, &bg, &[]);
-            pass.draw(0..3, 0..1);
-        }
-
         // composite_uniform_buffer carries per-frame composite state.
         // x = tonemap kind (0 ACES / 1 AgX)
         // y = auto-exposure toggle
diff --git a/native/shared/src/renderer/postfx_chain.rs b/native/shared/src/renderer/postfx_chain.rs
index eac7864..4a9d946 100644
--- a/native/shared/src/renderer/postfx_chain.rs
+++ b/native/shared/src/renderer/postfx_chain.rs
@@ -625,3 +625,67 @@ impl Renderer {
         }
     }
 }
+
+impl Renderer {
+    /// Auto-exposure measure + adapt pass into the dst slot of the
+    /// ping-pong exposure texture. No-op when auto_exposure is off (the
+    /// composite keeps reading the stale texture, which manual_exposure
+    /// bypasses). The caller owns the src/dst indices because the
+    /// composite binds the same dst view.
+    pub(super) fn record_auto_exposure(
+        &mut self,
+        encoder: &mut wgpu::CommandEncoder,
+        exposure_src_idx: usize,
+        exposure_dst_idx: usize,
+    ) {
+        // The luminance source is whatever the composite will read.
+        let composite_src_view = self.composite_source_view();
+    if self.auto_exposure {
+        let ep = ExposureParams {
+            params: [
+                self.auto_exposure_key,
+                self.auto_exposure_rate,
+                // Wide clamp — without SSGI, Sponza's shadowed
+                // corridors have ~7× less average luma than its
+                // sunlit courtyard, so exposure needs to span
+                // the same range to keep perceived brightness
+                // stable across rotations.
+                0.1,
+                10.0,
+            ],
+        };
+        self.queue.write_buffer(&self.exposure_uniform_buffer, 0, bytemuck::bytes_of(&ep));
+
+        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: Some("exposure_bg"),
+            layout: &self.exposure_layout,
+            entries: &[
+                wgpu::BindGroupEntry { binding: 0, resource: self.exposure_uniform_buffer.as_entire_binding() },
+                wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(composite_src_view) },
+                wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) },
+                wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.exposure_views[exposure_src_idx]) },
+                wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) },
+            ],
+        });
+        let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: Some("exposure_pass"),
+            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                view: &self.exposure_views[exposure_dst_idx],
+                resolve_target: None,
+                depth_slice: None,
+                ops: wgpu::Operations {
+                    load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
+                    store: wgpu::StoreOp::Store,
+                },
+            })],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+            multiview_mask: None,
+        });
+        pass.set_pipeline(&self.exposure_pipeline);
+        pass.set_bind_group(0, &bg, &[]);
+        pass.draw(0..3, 0..1);
+    }
+    }
+}
diff --git a/native/shared/src/renderer/scene_pass.rs b/native/shared/src/renderer/scene_pass.rs
index f290cec..a191ae2 100644
--- a/native/shared/src/renderer/scene_pass.rs
+++ b/native/shared/src/renderer/scene_pass.rs
@@ -285,3 +285,181 @@ impl Renderer {
 
     }
 }
+
+impl Renderer {
+    /// Translucent / refractive / additive material pass: after opaque,
+    /// before post-FX; loads hdr_rt, depth read-only, back-to-front
+    /// sorted; snapshots scene color for reads_scene materials. Split
+    /// from end_frame_with_scene.
+    pub(super) fn record_translucent_pass(
+        &mut self,
+        encoder: &mut wgpu::CommandEncoder,
+        profiler: &mut crate::profiler::Profiler,
+    ) {
+    // ============================================================
+    // Phase 4b — translucent / refractive / additive material pass
+    // ============================================================
+    //
+    // Runs after opaque materials, before post-FX. Loads hdr_rt so
+    // opaque output survives; alpha-blends into it. Depth is
+    // bound as read-only so translucent draws participate in the
+    // depth test without writing.
+    //
+    // If any submitted translucent material declared
+    // `reads_scene = true`, we first snapshot hdr_rt into a
+    // swapchain-sized transient and bind that as group 4
+    // scene_color_tex for the dispatch. Free after the pass so
+    // the transient pool reuses on the next frame.
+    if !self.material_system.translucent_commands.is_empty() {
+        // Back-to-front by view depth — required for correct alpha
+        // compositing; submission order is only kept between
+        // equal-depth draws (stable sort).
+        self.material_system.sort_translucent();
+        profiler.begin("translucent_pass");
+        let swap_w = self.surface_config.width;
+        let swap_h = self.surface_config.height;
+        self.transient_pool.begin_frame(swap_w, swap_h);
+
+        // Phase 7 — run the impulse decay + splat compute BEFORE
+        // we build scene_inputs so the front view reflects this
+        // frame's submissions.
+        self.impulse_field.update(&self.device, &self.queue, &mut *encoder);
+
+        // Does any queued translucent material need the scene
+        // colour snapshot?
+        let needs_scene = self.material_system.translucent_commands
+            .iter()
+            .any(|c| self.material_system.pipelines
+                .get(c.material as usize - 1)
+                .and_then(|p| p.as_ref())
+                .map(|p| p.reads_scene)
+                .unwrap_or(false));
+
+        let scene_color_tid = if needs_scene {
+            let desc = transient::TransientDesc::new(
+                formats::HDR_FORMAT,
+                wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
+                transient::SizePolicy::Swapchain,
+            );
+            Some(self.transient_pool.acquire(&self.device, desc))
+        } else {
+            None
+        };
+
+        // Phase 4c — depth snapshot. wgpu forbids sampling a
+        // texture that is also a depth-stencil attachment of the
+        // same pass, so we copy the opaque depth buffer into a
+        // transient before beginning the translucent pass and
+        // bind the transient at group 4 binding 2. Acquired
+        // whenever any translucent material reads_scene (same
+        // gate as colour) — cheap enough that it's not worth a
+        // separate `reads_depth` flag yet.
+        let scene_depth_tid = if needs_scene {
+            let desc = transient::TransientDesc::new(
+                formats::DEPTH_FORMAT,
+                wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
+                transient::SizePolicy::Swapchain,
+            );
+            Some(self.transient_pool.acquire(&self.device, desc))
+        } else {
+            None
+        };
+
+        // Snapshot hdr_rt + live depth -> transients.
+        if let (Some(ctid), Some(dtid)) = (scene_color_tid, scene_depth_tid) {
+            let color_tex = self.transient_pool.texture(ctid).expect("fresh color transient");
+            encoder.copy_texture_to_texture(
+                wgpu::TexelCopyTextureInfo {
+                    texture: &self.hdr_rt_texture,
+                    mip_level: 0,
+                    origin: wgpu::Origin3d::ZERO,
+                    aspect: wgpu::TextureAspect::All,
+                },
+                wgpu::TexelCopyTextureInfo {
+                    texture: color_tex,
+                    mip_level: 0,
+                    origin: wgpu::Origin3d::ZERO,
+                    aspect: wgpu::TextureAspect::All,
+                },
+                wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 },
+            );
+            let depth_tex = self.transient_pool.texture(dtid).expect("fresh depth transient");
+            encoder.copy_texture_to_texture(
+                wgpu::TexelCopyTextureInfo {
+                    texture: &self.depth_texture,
+                    mip_level: 0,
+                    origin: wgpu::Origin3d::ZERO,
+                    aspect: wgpu::TextureAspect::DepthOnly,
+                },
+                wgpu::TexelCopyTextureInfo {
+                    texture: depth_tex,
+                    mip_level: 0,
+                    origin: wgpu::Origin3d::ZERO,
+                    aspect: wgpu::TextureAspect::DepthOnly,
+                },
+                wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 },
+            );
+            let color_view = self.transient_pool.view(ctid).unwrap();
+            let depth_view = self.transient_pool.view(dtid).unwrap();
+            let imp_view = self.impulse_field.front_view();
+            let imp_samp = self.impulse_field.sampler();
+            self.material_system.update_scene_inputs(
+                &self.device, color_view, Some(depth_view),
+                Some((imp_view, imp_samp)),
+            );
+        } else {
+            // No refractive/depth-reading materials this frame —
+            // still need a valid bind group. None → internal stubs.
+            self.material_system.update_scene_inputs(
+                &self.device, &self.hdr_rt_view, None, None,
+            );
+        }
+
+        {
+            let t_ts = profiler.pass_timestamp_writes("translucent_pass");
+            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: Some("bloom_translucent_pass"),
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &self.hdr_rt_view,
+                    resolve_target: None,
+                    depth_slice: None,
+                    ops: wgpu::Operations {
+                        load: wgpu::LoadOp::Load,
+                        store: wgpu::StoreOp::Store,
+                    },
+                })],
+                depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
+                    view: &self.depth_view,
+                    depth_ops: Some(wgpu::Operations {
+                        load: wgpu::LoadOp::Load,
+                        // Translucents don't write depth — keep
+                        // the opaque pass's depth pristine so
+                        // downstream post-FX (SSR/SSGI) still
+                        // sees the opaque geometry.
+                        store: wgpu::StoreOp::Store,
+                    }),
+                    stencil_ops: None,
+                }),
+                timestamp_writes: t_ts,
+                occlusion_query_set: None,
+                multiview_mask: None,
+            });
+            let cache = &self.model_gpu_cache;
+            self.material_system.dispatch_translucent(&mut pass, |handle, idx| {
+                if let Some(Some(meshes)) = cache.get(&handle) {
+                    if idx < meshes.len() {
+                        let mesh = &meshes[idx];
+                        return Some((&mesh.vb, &mesh.ib, mesh.index_count));
+                    }
+                }
+                None
+            });
+        }
+
+        if let Some(tid) = scene_color_tid {
+            self.transient_pool.release(tid);
+        }
+        profiler.end("translucent_pass");
+    }
+    }
+}
diff --git a/native/shared/src/renderer/shaders/core.rs b/native/shared/src/renderer/shaders/core.rs
index c7ee590..e3b4aaf 100644
--- a/native/shared/src/renderer/shaders/core.rs
+++ b/native/shared/src/renderer/shaders/core.rs
@@ -766,6 +766,10 @@ fn fs_main_scene(in: VertexOutputScene) -> SceneOut {
                          base_color, metallic, roughness);
     }
 
+    // BEGIN-POINT-LIGHT-LOOP (replaced by the froxel-clustered variant
+    // at pipeline build on storage-buffer-capable backends — see
+    // renderer/froxel.rs; this plain loop is the WebGL fallback and the
+    // semantic reference the clustered path must match exactly)
     let pt_count = u32(lighting.point_light_count.x);
     for (var i = 0u; i < pt_count; i++) {
         let pl = lighting.point_lights[i];
@@ -780,6 +784,7 @@ fn fs_main_scene(in: VertexOutputScene) -> SceneOut {
                              base_color, metallic, roughness);
         }
     }
+    // END-POINT-LIGHT-LOOP
 
     // --- Split-sum IBL (Karis 2013) ---
     //   IBL_diffuse  = base_color * (1 - kS_avg) * (1 - metallic)
diff --git a/native/shared/src/renderer/shaders/post.rs b/native/shared/src/renderer/shaders/post.rs
index 0072b9f..a8c5733 100644
--- a/native/shared/src/renderer/shaders/post.rs
+++ b/native/shared/src/renderer/shaders/post.rs
@@ -792,6 +792,21 @@ fn fs_main(in: VsOut) -> @location(0) vec4<f32> {
     var prev_uv: vec2<f32>;
     if (vel_len > 0.00001) {
         prev_uv = vec2<f32>(in.uv.x - vel.x, in.uv.y + vel.y);
+    } else if (depth >= 0.9999) {
+        // Sky / far plane: the positional reconstruction divides by a
+        // near-zero w and reprojects sky pixels onto arbitrary scene
+        // points — the luma-only history clamp then locks that wrong
+        // chroma in forever (uniform green/red sky tint). The sky is at
+        // infinity, so reproject the view DIRECTION instead: exact under
+        // camera rotation, translation-invariant by definition.
+        let dir = world_h.xyz; // w ~ 0 at the far plane: xyz IS the direction
+        let prev_clip = u.prev_vp * vec4<f32>(dir, 0.0);
+        if (prev_clip.w > 0.00001) {
+            let prev_ndc = prev_clip.xyz / prev_clip.w;
+            prev_uv = vec2<f32>(prev_ndc.x * 0.5 + 0.5, 1.0 - (prev_ndc.y * 0.5 + 0.5));
+        } else {
+            prev_uv = in.uv;
+        }
     } else {
         let prev_clip = u.prev_vp * vec4<f32>(world, 1.0);
         let prev_ndc = prev_clip.xyz / prev_clip.w;
diff --git a/native/shared/tests/golden/lit_primitives_taa.png b/native/shared/tests/golden/lit_primitives_taa.png
new file mode 100644
index 0000000..cfa8302
Binary files /dev/null and b/native/shared/tests/golden/lit_primitives_taa.png differ
diff --git a/native/shared/tests/golden/many_point_lights_clustered_scene.png b/native/shared/tests/golden/many_point_lights_clustered_scene.png
new file mode 100644
index 0000000..23e65aa
Binary files /dev/null and b/native/shared/tests/golden/many_point_lights_clustered_scene.png differ
diff --git a/native/shared/tests/golden_render.rs b/native/shared/tests/golden_render.rs
index 0c70d8c..753079f 100644
--- a/native/shared/tests/golden_render.rs
+++ b/native/shared/tests/golden_render.rs
@@ -17,7 +17,7 @@
 //!   regenerated with BLOOM_UPDATE_GOLDEN=1 `cargo test golden`.
 
 use bloom_shared::engine::EngineState;
-use bloom_shared::renderer::Renderer;
+use bloom_shared::renderer::{Renderer, Vertex3D};
 
 const W: u32 = 256;
 const H: u32 = 256;
@@ -219,45 +219,116 @@ fn golden_many_point_lights() {
     compare_or_update("many_point_lights", w, h, &rgba);
 }
 
+/// Froxel-clustering parity gate. The golden for this test is generated
+/// with `BLOOM_DISABLE_FROXEL=1` (the plain reference loop); the test
+/// then runs through the clustered scene shader, so any divergence
+/// between the two point-light paths — wrong cluster lookup, lights
+/// missed by the sphere/AABB assignment, slice math drift — shows up as
+/// a pixel diff. Unlike `golden_many_point_lights` (immediate-mode
+/// `pipeline_3d`, which keeps the plain loop), this drives the retained
+/// scene graph through `scene_pipeline`, the shader the clustered loop
+/// is spliced into.
 #[test]
-fn golden_lod_selection() {
-    use bloom_shared::renderer::Vertex3D;
+fn golden_many_point_lights_clustered_scene() {
     let Some(mut eng) = try_engine() else {
         eprintln!("skip: no GPU adapter");
         return;
     };
+    // The gate is meaningless if the clustered path silently fell back
+    // to the reference loop. Storage buffers are available on every
+    // non-WebGL2 device this test runs on, so demand the froxel path
+    // unless the kill-switch is set (golden regeneration).
+    if std::env::var_os("BLOOM_DISABLE_FROXEL").is_none() {
+        assert!(
+            eng.renderer.froxel.is_some(),
+            "froxel clustering inactive on a storage-buffer-capable adapter — \
+             parity test would silently test the reference loop against itself"
+        );
+    }
 
-    fn cube_verts(half: f32, color: [f32; 4]) -> (Vec<Vertex3D>, Vec<u32>) {
-        // 6 faces, outward winding (matches scene-node conventions:
-        // prepare() recomputes bounds from positions).
-        let h = half;
-        let faces: [([f32; 3], [[f32; 3]; 4]); 6] = [
-            ([0.0, 0.0, -1.0], [[-h,-h,-h],[ h,-h,-h],[ h, h,-h],[-h, h,-h]]),
-            ([0.0, 0.0,  1.0], [[ h,-h, h],[-h,-h, h],[-h, h, h],[ h, h, h]]),
-            ([-1.0, 0.0, 0.0], [[-h,-h, h],[-h,-h,-h],[-h, h,-h],[-h, h, h]]),
-            ([1.0, 0.0, 0.0],  [[ h,-h,-h],[ h,-h, h],[ h, h, h],[ h, h,-h]]),
-            ([0.0, 1.0, 0.0],  [[-h, h,-h],[ h, h,-h],[ h, h, h],[-h, h, h]]),
-            ([0.0, -1.0, 0.0], [[-h,-h, h],[ h,-h, h],[ h,-h,-h],[-h,-h,-h]]),
-        ];
-        let mut verts = Vec::new();
-        let mut idx = Vec::new();
-        for (normal, vs) in faces {
-            let base = verts.len() as u32;
-            for p in vs {
-                verts.push(Vertex3D {
-                    position: p,
-                    normal,
-                    color,
-                    uv: [0.0, 0.0],
-                    joints: [0.0; 4],
-                    weights: [0.0; 4],
-                    tangent: [0.0; 4],
-                });
-            }
-            idx.extend_from_slice(&[base, base + 2, base + 1, base, base + 3, base + 2]);
+    // Floor (squashed cube) + a ring of cubes, lit by 40 colored point
+    // lights — enough that most froxels see only a few lights, so a
+    // broken cluster lookup cannot hide.
+    let scale_translate = |sx: f32, sy: f32, sz: f32, x: f32, y: f32, z: f32| -> [[f32; 4]; 4] {
+        let mut m = [[0.0f32; 4]; 4];
+        m[0][0] = sx; m[1][1] = sy; m[2][2] = sz; m[3][3] = 1.0;
+        m[3][0] = x; m[3][1] = y; m[3][2] = z;
+        m
+    };
+    let (floor_v, floor_i) = cube_verts(0.5, [0.45, 0.45, 0.45, 1.0]);
+    let floor = eng.scene.create_node();
+    eng.scene.update_geometry(floor, floor_v, floor_i);
+    eng.scene.set_transform(floor, scale_translate(14.0, 0.2, 14.0, 0.0, -0.1, 0.0));
+
+    let (cube_v, cube_i) = cube_verts(0.5, [0.8, 0.8, 0.8, 1.0]);
+    for i in 0..6u32 {
+        let t = i as f32 / 6.0 * std::f32::consts::TAU;
+        let node = eng.scene.create_node();
+        eng.scene.update_geometry(node, cube_v.clone(), cube_i.clone());
+        eng.scene.set_transform(node, scale_translate(1.0, 1.0, 1.0, t.cos() * 2.2, 0.5, t.sin() * 2.2));
+    }
+
+    let (w, h, rgba) = render(&mut eng, 6, |eng| {
+        let r = &mut eng.renderer;
+        r.set_clear_color(2.0, 2.0, 4.0, 255.0);
+        r.begin_mode_3d(
+            6.0, 7.0, 6.0,
+            0.0, 0.0, 0.0,
+            0.0, 1.0, 0.0,
+            60.0, 0.0,
+        );
+        for i in 0..40u32 {
+            let t = i as f32 / 40.0 * std::f32::consts::TAU;
+            let (sx, sz) = (t.cos() * 4.0, t.sin() * 4.0);
+            let (lr, lg, lb) = (
+                0.5 + 0.5 * (t).cos(),
+                0.5 + 0.5 * (t + 2.094).cos(),
+                0.5 + 0.5 * (t + 4.189).cos(),
+            );
+            r.add_point_light(sx, 1.2, sz, 3.5, lr, lg, lb, 1.6);
         }
-        (verts, idx)
+    });
+    compare_or_update("many_point_lights_clustered_scene", w, h, &rgba);
+}
+
+/// Unit cube as scene-node geometry — 6 faces, outward winding (matches
+/// scene-node conventions: prepare() recomputes bounds from positions).
+fn cube_verts(half: f32, color: [f32; 4]) -> (Vec<Vertex3D>, Vec<u32>) {
+    let h = half;
+    let faces: [([f32; 3], [[f32; 3]; 4]); 6] = [
+        ([0.0, 0.0, -1.0], [[-h,-h,-h],[ h,-h,-h],[ h, h,-h],[-h, h,-h]]),
+        ([0.0, 0.0,  1.0], [[ h,-h, h],[-h,-h, h],[-h, h, h],[ h, h, h]]),
+        ([-1.0, 0.0, 0.0], [[-h,-h, h],[-h,-h,-h],[-h, h,-h],[-h, h, h]]),
+        ([1.0, 0.0, 0.0],  [[ h,-h,-h],[ h,-h, h],[ h, h, h],[ h, h,-h]]),
+        ([0.0, 1.0, 0.0],  [[-h, h,-h],[ h, h,-h],[ h, h, h],[-h, h, h]]),
+        ([0.0, -1.0, 0.0], [[-h,-h, h],[ h,-h, h],[ h,-h,-h],[-h,-h,-h]]),
+    ];
+    let mut verts = Vec::new();
+    let mut idx = Vec::new();
+    for (normal, vs) in faces {
+        let base = verts.len() as u32;
+        for p in vs {
+            verts.push(Vertex3D {
+                position: p,
+                normal,
+                color,
+                uv: [0.0, 0.0],
+                joints: [0.0; 4],
+                weights: [0.0; 4],
+                tangent: [0.0; 4],
+            });
+        }
+        idx.extend_from_slice(&[base, base + 2, base + 1, base, base + 3, base + 2]);
     }
+    (verts, idx)
+}
+
+#[test]
+fn golden_lod_selection() {
+    let Some(mut eng) = try_engine() else {
+        eprintln!("skip: no GPU adapter");
+        return;
+    };
 
     let (red_v, red_i) = cube_verts(0.5, [0.9, 0.1, 0.1, 1.0]);
     let (green_v, green_i) = cube_verts(0.5, [0.1, 0.9, 0.1, 1.0]);
@@ -345,3 +416,29 @@ fn cooked_bc7_texture_matches_raw() {
         "cooked render diverges from raw render: max channel diff {max_diff}"
     );
 }
+
+#[test]
+fn golden_lit_primitives_taa() {
+    let Some(mut eng) = try_engine() else {
+        eprintln!("skip: no GPU adapter");
+        return;
+    };
+    // Same scene as lit_primitives_3d but with TAA ON: pins the TAA
+    // branch of the post-FX cascade (reprojection, neighborhood clamp,
+    // Catmull-Rom upscale path) that the TAA-off goldens never touch.
+    // The Halton jitter sequence is indexed by frame number, so a fixed
+    // frame count renders deterministically.
+    eng.renderer.set_taa_enabled(true);
+    let (w, h, rgba) = render(&mut eng, 10, |eng| {
+        let r = &mut eng.renderer;
+        r.set_clear_color(13.0, 18.0, 26.0, 255.0);
+        r.begin_mode_3d(4.0, 3.0, 6.0, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 45.0, 0.0);
+        r.add_directional_light(-0.5, -1.0, -0.3, 1.0, 0.95, 0.9, 1.2);
+        r.add_point_light(2.0, 2.0, 2.0, 10.0, 0.2, 0.4, 1.0, 2.0);
+        r.draw_plane(0.0, 0.0, 0.0, 10.0, 10.0, 120.0, 120.0, 125.0, 255.0);
+        r.draw_cube(-1.2, 0.5, 0.0, 1.0, 1.0, 1.0, 230.0, 41.0, 55.0, 255.0);
+        r.draw_sphere(1.2, 0.75, 0.5, 0.75, 0.0, 228.0, 48.0, 255.0);
+        r.draw_cube(0.0, 1.6, -1.0, 0.8, 0.8, 0.8, 253.0, 249.0, 0.0, 255.0);
+    });
+    compare_or_update("lit_primitives_taa", w, h, &rgba);
+}
diff --git a/tools/file-lines-baseline.json b/tools/file-lines-baseline.json
index b914210..e5d34f4 100644
--- a/tools/file-lines-baseline.json
+++ b/tools/file-lines-baseline.json
@@ -1,3 +1,3 @@
 {
-  "native/shared/src/renderer/mod.rs": 12099
-}
\ No newline at end of file
+  "native/shared/src/renderer/mod.rs": 11775
+}