diff --git a/native/shared/src/renderer/froxel.rs b/native/shared/src/renderer/froxel.rs new file mode 100644 index 0000000..943b726 --- /dev/null +++ b/native/shared/src/renderer/froxel.rs @@ -0,0 +1,421 @@ +//! Froxel light clustering — task #23 of the architecture audit. +//! +//! The 8+256 light-cap raise removed the capability ceiling but left +//! the scene shader paying O(live point lights) per fragment. This +//! module restores O(cluster lights): a compute pass assigns the point +//! lights (read from the same lighting UBO the shaders already use) to +//! a 16×9×24 view-frustum froxel grid each frame, and a clustered +//! variant of the scene shader loops only its froxel's index list. +//! +//! Backend split, by capability rather than cfg: storage buffers in +//! fragment shaders don't exist on WebGL2, so [`FroxelPass::supported`] +//! gates on the device limits. Unsupported backends keep the plain +//! count-driven loop (the semantic reference — the clustered path must +//! match it exactly, which the many_point_lights golden enforces). +//! +//! Memory: counts 3456×4 B ≈ 14 KB; index list 3456×256×4 B ≈ 3.5 MB +//! (256 = worst-case every light in one froxel — exact parity with the +//! reference loop, no truncation). + +use wgpu::util::DeviceExt; + +pub(super) const GRID_X: u32 = 16; +pub(super) const GRID_Y: u32 = 9; +pub(super) const GRID_Z: u32 = 24; +pub(super) const CLUSTER_COUNT: u32 = GRID_X * GRID_Y * GRID_Z; +pub(super) const MAX_LIGHTS_PER_CLUSTER: u32 = 256; + +/// Uniform parameters shared by the assignment compute pass and the +/// clustered fragment loop. Layout mirrored in WGSL below and in the +/// fragment include. +#[repr(C)] +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +pub(super) struct FroxelParams { + /// View matrix (world → view) for light-position transform. + pub view: [[f32; 4]; 4], + /// x = grid_x, y = grid_y, z = grid_z, w = live point-light count. + pub grid: [u32; 4], + /// x = znear, y = zfar, z = log(zfar/znear), w = unused. + pub depth_range: [f32; 4], + /// x = 1/tile_w_px, y = 1/tile_h_px (fragment tile lookup), + /// z = p22, w = p32 (depth linearization, same convention as Hi-Z). + pub screen: [f32; 4], + /// Inverse projection — froxel corner reconstruction in the + /// assignment pass. + pub inv_proj: [[f32; 4]; 4], +} + +const ASSIGN_SHADER: &str = " +struct FroxelParams { + view: mat4x4, + grid: vec4, + depth_range: vec4, + screen: vec4, + inv_proj: mat4x4, +}; +struct PointLight { position: vec4, color: vec4 }; +struct Lights { + // Mirrors the tail of the Lighting UBO relevant here. The host + // binds a dedicated compact UBO (positions+ranges only would do, + // but reusing PointLight keeps one struct). + count: vec4, + lights: array, +}; + +@group(0) @binding(0) var p: FroxelParams; +@group(0) @binding(1) var l: Lights; +@group(0) @binding(2) var cluster_counts: array; +@group(0) @binding(3) var cluster_indices: array; + +// View-space Z of slice boundary k (logarithmic distribution). +fn slice_z(k: u32) -> f32 { + let t = f32(k) / f32(p.grid.z); + return p.depth_range.x * exp(t * p.depth_range.z); +} + +@compute @workgroup_size(4, 4, 4) +fn cs_main(@builtin(global_invocation_id) gid: vec3) { + if (gid.x >= p.grid.x || gid.y >= p.grid.y || gid.z >= p.grid.z) { return; } + let cluster = gid.x + gid.y * p.grid.x + gid.z * p.grid.x * p.grid.y; + + // Froxel AABB in view space: reconstruct the tile's corner rays on + // the near plane of the projection and scale to the slice depths. + // NDC tile extents: + let x0 = (f32(gid.x) / f32(p.grid.x)) * 2.0 - 1.0; + let x1 = (f32(gid.x + 1u) / f32(p.grid.x)) * 2.0 - 1.0; + // NDC y is up; tile row 0 is the TOP of the screen. + let y1 = 1.0 - (f32(gid.y) / f32(p.grid.y)) * 2.0; + let y0 = 1.0 - (f32(gid.y + 1u) / f32(p.grid.y)) * 2.0; + + // Unproject the four corners at an arbitrary depth and normalize to + // rays through the camera (view space, looking down -Z). + var mn = vec3( 1e30, 1e30, 1e30); + var mx = vec3(-1e30, -1e30, -1e30); + let z_near_s = slice_z(gid.z); + let z_far_s = slice_z(gid.z + 1u); + for (var cx = 0u; cx < 2u; cx++) { + for (var cy = 0u; cy < 2u; cy++) { + let nx = select(x0, x1, cx == 1u); + let ny = select(y0, y1, cy == 1u); + let h = p.inv_proj * vec4(nx, ny, 0.5, 1.0); + let dir = h.xyz / h.w; // a point on the ray (view space) + let ray = dir / max(-dir.z, 1e-6); // scale so z == -1 + // corner at both slice depths (view z is negative forward) + let a = ray * z_near_s; + let b = ray * z_far_s; + mn = min(mn, min(vec3(a.xy, -z_near_s), vec3(b.xy, -z_far_s))); + mx = max(mx, max(vec3(a.xy, -z_near_s), vec3(b.xy, -z_far_s))); + } + } + + // Sphere/AABB tests against every live light. + var count = 0u; + let n = u32(l.count.x); + let base = cluster * 256u; + for (var i = 0u; i < n; i++) { + let pos_w = l.lights[i].position; + let pos_v = (p.view * vec4(pos_w.xyz, 1.0)).xyz; + let r = pos_w.w; + let closest = clamp(pos_v, mn, mx); + let d = pos_v - closest; + if (dot(d, d) <= r * r) { + cluster_indices[base + count] = i; + count++; + } + } + cluster_counts[cluster] = count; +} +"; + +/// The fragment-side replacement for the plain point-light loop, plus +/// the bindings it needs. Spliced into SCENE_SHADER between the +/// BEGIN/END-POINT-LIGHT-LOOP markers by [`clustered_scene_shader`]. +const CLUSTERED_BINDINGS: &str = " +struct FroxelParams { + view: mat4x4, + grid: vec4, + depth_range: vec4, + screen: vec4, + inv_proj: mat4x4, +}; +@group(1) @binding(10) var froxel: FroxelParams; +@group(1) @binding(11) var cluster_counts: array; +@group(1) @binding(12) var cluster_indices: array; +"; + +const CLUSTERED_LOOP: &str = " + // Froxel-clustered point lights: identical shading math to the + // reference loop, restricted to this fragment's cluster list. + let view_z = -froxel.screen.w / (in.clip_position.z + froxel.screen.z); + let slice = clamp( + u32(log(max(view_z, froxel.depth_range.x) / froxel.depth_range.x) + / froxel.depth_range.z * f32(froxel.grid.z)), + 0u, froxel.grid.z - 1u); + let tile_x = min(u32(in.clip_position.x * froxel.screen.x), froxel.grid.x - 1u); + let tile_y = min(u32(in.clip_position.y * froxel.screen.y), froxel.grid.y - 1u); + let cluster = tile_x + tile_y * froxel.grid.x + slice * froxel.grid.x * froxel.grid.y; + let cl_count = cluster_counts[cluster]; + let cl_base = cluster * 256u; + for (var ci = 0u; ci < cl_count; ci++) { + let pl = lighting.point_lights[cluster_indices[cl_base + ci]]; + let to_light = pl.position.xyz - in.world_pos; + let dist = length(to_light); + let range = pl.position.w; + if (dist < range && dist > 0.0) { + let l = to_light / dist; + let atten = 1.0 - (dist / range); + let atten2 = atten * atten; + lit += shade_pbr(n, v, l, pl.color.rgb, pl.color.w * atten2, + base_color, metallic, roughness); + } + } +"; + +/// Build the clustered SCENE_SHADER variant from the canonical source. +pub(super) fn clustered_scene_shader(source: &str) -> String { + let begin = source + .find("// BEGIN-POINT-LIGHT-LOOP") + .expect("scene shader missing BEGIN-POINT-LIGHT-LOOP marker"); + let end_marker = "// END-POINT-LIGHT-LOOP"; + let end = source.find(end_marker).expect("scene shader missing END marker") + end_marker.len(); + format!( + "{}{}{}{}", + CLUSTERED_BINDINGS, + &source[..begin], + CLUSTERED_LOOP, + &source[end..] + ) +} + +/// The three entries appended to `lighting_layout` (group 1) when the +/// device supports the clustered path. Pipelines whose shaders don't +/// reference them (SHADER_3D's pipeline_3d) are unaffected — extra +/// layout entries are legal as long as the bind group provides them. +pub(super) fn extra_lighting_layout_entries() -> [wgpu::BindGroupLayoutEntry; 3] { + let storage_ro = wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: true }, + has_dynamic_offset: false, + min_binding_size: None, + }; + [ + wgpu::BindGroupLayoutEntry { + binding: 10, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { binding: 11, visibility: wgpu::ShaderStages::FRAGMENT, ty: storage_ro, count: None }, + wgpu::BindGroupLayoutEntry { binding: 12, visibility: wgpu::ShaderStages::FRAGMENT, ty: storage_ro, count: None }, + ] +} + +pub struct FroxelPass { + pub assign_pipeline: wgpu::ComputePipeline, + pub assign_layout: wgpu::BindGroupLayout, + pub params_buffer: wgpu::Buffer, + /// Compact point-light UBO for the compute pass (count + 256 lights). + pub lights_buffer: wgpu::Buffer, + pub counts_buffer: wgpu::Buffer, + pub indices_buffer: wgpu::Buffer, + assign_bg: wgpu::BindGroup, +} + +impl FroxelPass { + /// Storage buffers must be available in BOTH compute and fragment + /// stages (WebGL2 has neither). `BLOOM_DISABLE_FROXEL=1` forces the + /// reference loop — used to (re)generate the clustered-parity + /// golden and to bisect suspected clustering bugs in the field. + pub fn supported(device: &wgpu::Device) -> bool { + if std::env::var_os("BLOOM_DISABLE_FROXEL").is_some_and(|v| v == "1") { + return false; + } + let l = device.limits(); + l.max_storage_buffers_per_shader_stage >= 2 + && l.max_storage_buffer_binding_size as u64 + >= (CLUSTER_COUNT * MAX_LIGHTS_PER_CLUSTER * 4) as u64 + } + + pub fn new(device: &wgpu::Device) -> Self { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("froxel_assign_shader"), + source: wgpu::ShaderSource::Wgsl(ASSIGN_SHADER.into()), + }); + let assign_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("froxel_assign_layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }); + let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("froxel_assign_pl"), + bind_group_layouts: &[Some(&assign_layout)], + ..Default::default() + }); + let assign_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("froxel_assign_pipeline"), + layout: Some(&pl), + module: &shader, + entry_point: Some("cs_main"), + compilation_options: Default::default(), + cache: None, + }); + let params_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("froxel_params"), + contents: &[0u8; std::mem::size_of::()], + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + // count vec4 + 256 lights × 2 vec4 + let lights_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("froxel_lights"), + size: 16 + 256 * 32, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let counts_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("froxel_counts"), + size: (CLUSTER_COUNT * 4) as u64, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let indices_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("froxel_indices"), + size: (CLUSTER_COUNT * MAX_LIGHTS_PER_CLUSTER * 4) as u64, + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }); + let assign_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("froxel_assign_bg"), + layout: &assign_layout, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: params_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: lights_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 2, resource: counts_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 3, resource: indices_buffer.as_entire_binding() }, + ], + }); + Self { + assign_pipeline, + assign_layout, + params_buffer, + lights_buffer, + counts_buffer, + indices_buffer, + assign_bg, + } + } + + /// The bind-group entries matching [`extra_lighting_layout_entries`], + /// appended to every lighting bind group the renderer builds. + pub(super) fn extra_lighting_bind_entries(&self) -> [wgpu::BindGroupEntry<'_>; 3] { + [ + wgpu::BindGroupEntry { binding: 10, resource: self.params_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 11, resource: self.counts_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 12, resource: self.indices_buffer.as_entire_binding() }, + ] + } + + /// Record the per-frame assignment dispatch. The caller uploads + /// params + lights first (see Renderer::record_froxel_assign). + pub fn record(&self, encoder: &mut wgpu::CommandEncoder) { + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("froxel_assign_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&self.assign_pipeline); + pass.set_bind_group(0, &self.assign_bg, &[]); + pass.dispatch_workgroups(GRID_X / 4, GRID_Y.div_ceil(4), GRID_Z / 4); + } +} + +impl super::Renderer { + /// Upload froxel params + the compact light list and dispatch the + /// assignment pass. Runs every 3D frame on supported devices — + /// even with zero lights, so `cluster_counts` never carries stale + /// data from a previous frame's camera. + pub(super) fn record_froxel_assign(&mut self, encoder: &mut wgpu::CommandEncoder) { + let Some(froxel) = &self.froxel else { return }; + + let proj = self.current_proj_matrix; + let p22 = proj[2][2]; + let p32 = proj[3][2]; + // Same linearization as Hi-Z: view_z(depth) = -p32/(depth + p22), + // positive forward. Evaluate at depth 0 and 1; min/max makes + // this hold for reversed-Z too, and the clamps keep an + // infinite-far projection (division by ~0) finite. + let z_at = |d: f32| -p32 / (d + p22); + let (z0, z1) = (z_at(0.0), z_at(1.0)); + let znear = z0.min(z1).max(1e-3); + let zfar = z0.max(z1).clamp(znear * 1.001, 1e9); + + // clip_position.xy is in render-target pixels — the HDR scene + // pass runs at render_extent (render_scale-aware), not surface + // size. + let (rw, rh) = self.render_extent(); + let n = (self.lighting_uniforms.point_light_count[0] as u32) + .min(MAX_LIGHTS_PER_CLUSTER); + let params = FroxelParams { + view: self.current_view_matrix, + grid: [GRID_X, GRID_Y, GRID_Z, n], + depth_range: [znear, zfar, (zfar / znear).ln(), 0.0], + screen: [ + GRID_X as f32 / rw.max(1) as f32, + GRID_Y as f32 / rh.max(1) as f32, + p22, + p32, + ], + inv_proj: self.current_inv_proj_matrix, + }; + self.queue.write_buffer(&froxel.params_buffer, 0, bytemuck::bytes_of(¶ms)); + let count = [n as f32, 0.0, 0.0, 0.0_f32]; + self.queue.write_buffer(&froxel.lights_buffer, 0, bytemuck::bytes_of(&count)); + self.queue.write_buffer( + &froxel.lights_buffer, + 16, + bytemuck::cast_slice(&self.lighting_uniforms.point_lights), + ); + froxel.record(encoder); + } +} diff --git a/native/shared/src/renderer/hiz.rs b/native/shared/src/renderer/hiz.rs index d47815f..5db7447 100644 --- a/native/shared/src/renderer/hiz.rs +++ b/native/shared/src/renderer/hiz.rs @@ -5,7 +5,8 @@ //! policy); pipelines and the mip chain stay fields on [`Renderer`]. use super::formats::HIZ_MIP_COUNT; -use super::{HizDownsampleParams, HizLinearizeParams, SsaoBlurParams}; +use super::formats::halton; +use super::{HizDownsampleParams, HizLinearizeParams, SsaoBlurParams, SsaoParams}; use super::Renderer; impl Renderer { @@ -169,3 +170,96 @@ impl Renderer { } } } + +impl Renderer { + /// GTAO compute dispatch (half-res, Hi-Z-accelerated, temporal EMA + /// ping-pong). Caller guards on `ssao_enabled` and passes the + /// projection terms. Split from end_frame_with_scene. + #[allow(clippy::too_many_arguments)] + pub(super) fn record_gtao( + &mut self, + encoder: &mut wgpu::CommandEncoder, + profiler: &mut crate::profiler::Profiler, + half_w: u32, + half_h: u32, + p00: f32, + p11: f32, + p20: f32, + p21: f32, + ) { + let p22 = self.current_proj_matrix[2][2]; + let p32 = self.current_proj_matrix[3][2]; + // --- SSAO (compute GTAO, samples Hi-Z pyramid) -------------- + let ld = self.lighting_uniforms.light_dir; + let v = &self.current_view_matrix; + let light_dir_vs = [ + v[0][0]*ld[0] + v[1][0]*ld[1] + v[2][0]*ld[2], + v[0][1]*ld[0] + v[1][1]*ld[1] + v[2][1]*ld[2], + v[0][2]*ld[0] + v[1][2]*ld[1] + v[2][2]*ld[2], + 0.0, + ]; + // Temporal accumulation: ping-pong history textures. + // `write_idx` is the current-frame output; `read_idx` the + // previous frame's result. First 4 frames force alpha=1 + // so the initial clear never contaminates the signal. + let write_idx = self.ssao_history_idx; + let read_idx = 1 - write_idx; + let frame_phase = self.ssao_history_frame % 4; + let force_refresh = if self.ssao_history_frame < 4 { 1u32 } else { 0u32 }; + // 4-frame EMA: alpha = 1/4 = 0.25 gives equal weight to + // each of the 4 phases at steady state. + let alpha = 0.25_f32; + // Halton-5 rotation: uncorrelated with TAA's base-2/3 jitter + // so the two noise patterns don't resonate. + let halton5 = halton(self.ssao_history_frame + 1, 5); + let sp = SsaoParams { + params: [ + 1.0 / half_w as f32, + 1.0 / half_h as f32, + self.ssao_radius, + self.ssao_strength, + ], + proj_row01: [p00, p11, p20, p21], + proj_z: [p22, p32, 1.0 / p00, 1.0 / p11], + light_dir_vs, + size: [half_w, half_h, frame_phase, force_refresh], + temporal: [alpha, halton5, 0.0, 0.0], + }; + self.queue.write_buffer(&self.ssao_uniform_buffer, 0, bytemuck::bytes_of(&sp)); + + if self.ssao_bg_cache[write_idx].is_none() { + self.ssao_bg_cache[write_idx] = Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("ssao_bg"), + layout: &self.ssao_layout, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: self.ssao_uniform_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&self.ssao_rt_view) }, + wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.hiz_sampler) }, + wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.hiz_views[0]) }, + wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::TextureView(&self.hiz_views[1]) }, + wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.hiz_views[2]) }, + wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.hiz_views[3]) }, + wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.hiz_views[4]) }, + wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::TextureView(&self.velocity_rt_view) }, + wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[read_idx]) }, + wgpu::BindGroupEntry { binding: 10, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) }, + wgpu::BindGroupEntry { binding: 11, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[write_idx]) }, + ], + })); + } + let bg = self.ssao_bg_cache[write_idx].as_ref().unwrap(); + + let ssao_ts = profiler.compute_pass_timestamp_writes("ssao_pass"); + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("ssao_pass"), + timestamp_writes: ssao_ts, + }); + pass.set_pipeline(&self.ssao_pipeline); + pass.set_bind_group(0, bg, &[]); + pass.dispatch_workgroups((half_w + 7) / 8, (half_h + 7) / 8, 1); + + // Flip ping-pong indices for the next frame. + self.ssao_history_idx = read_idx; + self.ssao_history_frame = self.ssao_history_frame.wrapping_add(1); + } +} diff --git a/native/shared/src/renderer/lighting.rs b/native/shared/src/renderer/lighting.rs new file mode 100644 index 0000000..453887b --- /dev/null +++ b/native/shared/src/renderer/lighting.rs @@ -0,0 +1,153 @@ +//! Group-1 lighting bind group — layout and construction. +//! +//! The scene + immediate-mode 3D pipelines share one bind-group layout +//! for lighting data: the Lighting UBO, env/IBL textures, the shadow +//! cascade, and (on clustered devices) the froxel buffers at bindings +//! 10-12. The bind group is rebuilt whenever the env source changes +//! (HDR load, panorama, procedural sky); every rebuild goes through +//! [`Renderer::make_lighting_bind_group`] so the entry list exists in +//! exactly one place and cannot drift between call sites. + +use super::{froxel, Renderer}; + +/// Create the group-1 layout. `clustered` appends the froxel bindings — +/// set when [`froxel::FroxelPass::supported`] holds for the device. +/// Pipelines whose shaders don't reference bindings 10-12 (pipeline_3d) +/// share the layout unaffected; extra entries are legal as long as the +/// bind group provides them. +pub(super) fn create_lighting_layout( + device: &wgpu::Device, + clustered: bool, +) -> wgpu::BindGroupLayout { + let tex_float = wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }; + let tex_depth = wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Depth, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }; + let frag = wgpu::ShaderStages::FRAGMENT; + let mut entries = vec![ + // 0: Lighting UBO + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: frag, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + // 1/2: env (IBL specular) texture + sampler + wgpu::BindGroupLayoutEntry { binding: 1, visibility: frag, ty: tex_float, count: None }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: frag, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + // 3/4: BRDF LUT + sampler + wgpu::BindGroupLayoutEntry { binding: 3, visibility: frag, ty: tex_float, count: None }, + wgpu::BindGroupLayoutEntry { + binding: 4, + visibility: frag, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + // 5-7: shadow cascades, 8: comparison sampler + wgpu::BindGroupLayoutEntry { binding: 5, visibility: frag, ty: tex_depth, count: None }, + wgpu::BindGroupLayoutEntry { binding: 6, visibility: frag, ty: tex_depth, count: None }, + wgpu::BindGroupLayoutEntry { binding: 7, visibility: frag, ty: tex_depth, count: None }, + wgpu::BindGroupLayoutEntry { + binding: 8, + visibility: frag, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Comparison), + count: None, + }, + // 9: env diffuse (IBL irradiance) + wgpu::BindGroupLayoutEntry { binding: 9, visibility: frag, ty: tex_float, count: None }, + ]; + if clustered { + entries.extend(froxel::extra_lighting_layout_entries()); + } + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("lighting_layout"), + entries: &entries, + }) +} + +/// Everything a lighting bind group references besides the env views. +/// `Renderer::new` builds one from constructor locals (before `self` +/// exists); [`Renderer::make_lighting_bind_group`] from fields. +pub(super) struct LightingBindSources<'a> { + pub lighting_buffer: &'a wgpu::Buffer, + pub env_sampler: &'a wgpu::Sampler, + pub brdf_lut_view: &'a wgpu::TextureView, + pub brdf_lut_sampler: &'a wgpu::Sampler, + pub shadow_map: &'a crate::shadows::ShadowMap, + pub froxel: Option<&'a froxel::FroxelPass>, +} + +/// The single source of truth for the group-1 entry list — every +/// lighting bind group the renderer ever creates goes through here. +pub(super) fn create_lighting_bind_group( + device: &wgpu::Device, + layout: &wgpu::BindGroupLayout, + label: &str, + src: &LightingBindSources<'_>, + env_view: &wgpu::TextureView, + diffuse_view: &wgpu::TextureView, +) -> wgpu::BindGroup { + let mut entries = vec![ + wgpu::BindGroupEntry { binding: 0, resource: src.lighting_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(env_view) }, + wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(src.env_sampler) }, + wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(src.brdf_lut_view) }, + wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(src.brdf_lut_sampler) }, + wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&src.shadow_map.depth_views[0]) }, + wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&src.shadow_map.depth_views[1]) }, + wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&src.shadow_map.depth_views[2]) }, + wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&src.shadow_map.sampler) }, + wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(diffuse_view) }, + ]; + if let Some(f) = src.froxel { + entries.extend(f.extra_lighting_bind_entries()); + } + device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some(label), + layout, + entries: &entries, + }) +} + +impl Renderer { + /// Build a group-1 lighting bind group for the given env-specular / + /// env-diffuse views. Everything else (UBO, BRDF LUT, shadow + /// cascade, froxel buffers when clustered) comes from `self`. + pub(super) fn make_lighting_bind_group( + &self, + label: &str, + env_view: &wgpu::TextureView, + diffuse_view: &wgpu::TextureView, + ) -> wgpu::BindGroup { + create_lighting_bind_group( + &self.device, + &self.lighting_layout, + label, + &LightingBindSources { + lighting_buffer: &self.lighting_buffer, + env_sampler: &self.env_sampler, + brdf_lut_view: &self.brdf_lut_view, + brdf_lut_sampler: &self.brdf_lut_sampler, + shadow_map: &self.shadow_map, + froxel: self.froxel.as_ref(), + }, + env_view, + diffuse_view, + ) + } +} diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs index 3678aab..7bc3ed1 100644 --- a/native/shared/src/renderer/mod.rs +++ b/native/shared/src/renderer/mod.rs @@ -11,6 +11,8 @@ mod ssgi_pass; mod shadow_pass; mod postfx_chain; mod scene_pass; +mod froxel; +mod lighting; pub use occlusion::OcclusionCuller; use shaders::*; @@ -204,7 +206,7 @@ pub(super) struct HizDownsampleParams { #[repr(C)] #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] -struct SsaoParams { +pub(super) struct SsaoParams { /// xy = inv_size (1/half_w, 1/half_h), z = radius (world units), /// w = strength params: [f32; 4], @@ -1394,6 +1396,12 @@ pub struct Renderer { // don't have to carry tangent vertex data or normal-map bindings. pub scene_pipeline: wgpu::RenderPipeline, pub scene_material_layout: wgpu::BindGroupLayout, + /// Froxel light clustering (task #23). `Some` when the device has + /// fragment-stage storage buffers (everything but WebGL2); the + /// scene shader is then compiled with the clustered point-light + /// loop and `lighting_layout` gains bindings 10–12. `None` keeps + /// the plain count-driven loop. + pub froxel: Option, /// 1×1 gray env fallback and its sampler — bound in the lighting /// bind group before any HDR is loaded. `load_env_from_hdr` /// rebuilds the lighting bind group to swap in the real env @@ -1674,99 +1682,13 @@ impl Renderer { // higher device limit). pipeline_3d doesn't reference the env // / BRDF bindings — WGSL lets bind group layouts expose more // than a shader consumes. - let lighting_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { - label: Some("lighting_layout"), - entries: &[ - wgpu::BindGroupLayoutEntry { - binding: 0, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Buffer { - ty: wgpu::BufferBindingType::Uniform, - has_dynamic_offset: false, - min_binding_size: None, - }, - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 1, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 2, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 3, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 4, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 5, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Depth, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 6, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Depth, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 7, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Depth, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 8, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Comparison), - count: None, - }, - wgpu::BindGroupLayoutEntry { - binding: 9, - visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }, - ], - }); + // Froxel clustering first — its presence decides whether the + // lighting layout grows bindings 10-12 and which point-light + // loop the scene shader is compiled with. + let froxel = froxel::FroxelPass::supported(&device) + .then(|| froxel::FroxelPass::new(&device)); + + let lighting_layout = lighting::create_lighting_layout(&device, froxel.is_some()); let lighting_uniforms = LightingUniforms::defaults(); let lighting_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("lighting_buffer"), @@ -2020,22 +1942,21 @@ impl Renderer { // group since the bind group binds the shadow depth view. let shadow_map = crate::shadows::ShadowMap::new(&device, Vertex3D::desc()); - let lighting_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("lighting_bg"), - layout: &lighting_layout, - entries: &[ - wgpu::BindGroupEntry { binding: 0, resource: lighting_buffer.as_entire_binding() }, - wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&scene_env_default_view) }, - wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&env_sampler) }, - wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&brdf_lut_view) }, - wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&brdf_lut_sampler) }, - wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&shadow_map.depth_views[0]) }, - wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&shadow_map.depth_views[1]) }, - wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&shadow_map.depth_views[2]) }, - wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&shadow_map.sampler) }, - wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&scene_env_default_view) }, - ], - }); + let lighting_bind_group = lighting::create_lighting_bind_group( + &device, + &lighting_layout, + "lighting_bg", + &lighting::LightingBindSources { + lighting_buffer: &lighting_buffer, + env_sampler: &env_sampler, + brdf_lut_view: &brdf_lut_view, + brdf_lut_sampler: &brdf_lut_sampler, + shadow_map: &shadow_map, + froxel: froxel.as_ref(), + }, + &scene_env_default_view, + &scene_env_default_view, + ); // --- Default 1x1 white texture --- let white_data = [255u8, 255, 255, 255]; @@ -2864,9 +2785,17 @@ impl Renderer { // ============================================================ // Scene pipeline (retained scene-graph draws with normal maps) // ============================================================ + // Clustered devices get the froxel point-light loop spliced in + // place of the plain reference loop (same shading math — the + // many_point_lights golden enforces equivalence). + let scene_shader_source: std::borrow::Cow<'static, str> = if froxel.is_some() { + froxel::clustered_scene_shader(SCENE_SHADER).into() + } else { + SCENE_SHADER.into() + }; let scene_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: Some("scene_shader"), - source: wgpu::ShaderSource::Wgsl(SCENE_SHADER.into()), + source: wgpu::ShaderSource::Wgsl(scene_shader_source), }); // Scene material layout: // 0: base_color texture 4: metallic_roughness texture @@ -6274,6 +6203,7 @@ impl Renderer { aerial_perspective_sampler, env_diffuse_texture: None, scene_pipeline, + froxel, scene_material_layout, _scene_env_default_texture: scene_env_default_texture, scene_env_default_view, @@ -8227,22 +8157,7 @@ impl Renderer { // LUT bindings stay put — only env tex/sampler + diffuse view // change. let diffuse_view_bg = diffuse_texture.create_view(&wgpu::TextureViewDescriptor::default()); - let new_lighting_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("lighting_bg"), - layout: &self.lighting_layout, - entries: &[ - wgpu::BindGroupEntry { binding: 0, resource: self.lighting_buffer.as_entire_binding() }, - wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&view) }, - wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.env_sampler) }, - wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.brdf_lut_view) }, - wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.brdf_lut_sampler) }, - wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[0]) }, - wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[1]) }, - wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[2]) }, - wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&self.shadow_map.sampler) }, - wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&diffuse_view_bg) }, - ], - }); + let new_lighting_bg = self.make_lighting_bind_group("lighting_bg", &view, &diffuse_view_bg); self.sky_texture = Some(texture); self.sky_bind_group = Some(bg); @@ -8555,22 +8470,7 @@ impl Renderer { self._scene_env_default_texture .create_view(&wgpu::TextureViewDescriptor::default()) }); - let new_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("lighting_bg_panorama"), - layout: &self.lighting_layout, - entries: &[ - wgpu::BindGroupEntry { binding: 0, resource: self.lighting_buffer.as_entire_binding() }, - wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&env_view) }, - wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.env_sampler) }, - wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.brdf_lut_view) }, - wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.brdf_lut_sampler) }, - wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[0]) }, - wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[1]) }, - wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[2]) }, - wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&self.shadow_map.sampler) }, - wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&diffuse_view) }, - ], - }); + let new_bg = self.make_lighting_bind_group("lighting_bg_panorama", &env_view, &diffuse_view); self.lighting_bind_group = new_bg; self.lighting_bg_is_procedural = false; } @@ -8582,22 +8482,11 @@ impl Renderer { /// every sun-move and the bind group's TextureView references /// remain valid. fn swap_lighting_bg_to_procedural(&mut self) { - let new_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("lighting_bg_procedural"), - layout: &self.lighting_layout, - entries: &[ - wgpu::BindGroupEntry { binding: 0, resource: self.lighting_buffer.as_entire_binding() }, - wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&self.procedural_sky_equirect_full_view) }, - wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.env_sampler) }, - wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.brdf_lut_view) }, - wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.brdf_lut_sampler) }, - wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[0]) }, - wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[1]) }, - wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.shadow_map.depth_views[2]) }, - wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::Sampler(&self.shadow_map.sampler) }, - wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&self.procedural_env_diffuse_view) }, - ], - }); + let new_bg = self.make_lighting_bind_group( + "lighting_bg_procedural", + &self.procedural_sky_equirect_full_view, + &self.procedural_env_diffuse_view, + ); self.lighting_bind_group = new_bg; self.lighting_bg_is_procedural = true; } @@ -9228,10 +9117,6 @@ impl Renderer { profiler.end("card_light"); } - // Cascaded shadow maps (with the ticket-004 cache-hit skip) — - // see record_shadow_pass in shadow_pass.rs. - self.record_shadow_pass(&mut encoder, profiler, scene); - // Upload immediate-mode 2D data profiler.begin("upload_geometry"); let has_2d = !self.vertices_2d.is_empty(); @@ -9254,434 +9139,225 @@ impl Renderer { } profiler.end("upload_geometry"); - // HDR scene pass (sky-view LUT refresh, sky + 3D batch + - // scene-graph render into the HDR MRTs, then the opaque - // material pass on the inner graph) — see - // record_hdr_scene_pass in scene_pass.rs. - self.record_hdr_scene_pass(&mut encoder, profiler, scene); - // ============================================================ - // Phase 4b — translucent / refractive / additive material pass + let surf_w = self.surface_config.width; + let surf_h = self.surface_config.height; + let exposure_src_idx = self.exposure_current_idx; + let exposure_dst_idx = 1 - self.exposure_current_idx; + // ============================================================ + // Frame render graph (RFC 0001 Phase 2b — complete). // - // Runs after opaque materials, before post-FX. Loads hdr_rt so - // opaque output survives; alpha-blends into it. Depth is - // bound as read-only so translucent draws participate in the - // depth test without writing. + // Every render pass between geometry upload and the terminal + // composite runs as a PassNode. Reads/writes document the real + // data dependencies; in addition, each node carries a with_after + // pin to its predecessor so the schedule reproduces the + // hand-tuned order exactly. Loosening those pins (to let the + // scheduler interleave independent passes) is the documented + // next refinement — do it dependency-by-dependency with the + // golden tests watching. // - // If any submitted translucent material declared - // `reads_scene = true`, we first snapshot hdr_rt into a - // swapchain-sized transient and bind that as group 4 - // scene_color_tex for the dispatch. Free after the pass so - // the transient pool reuses on the next frame. - if !self.material_system.translucent_commands.is_empty() { - // Back-to-front by view depth — required for correct alpha - // compositing; submission order is only kept between - // equal-depth draws (stable sort). - self.material_system.sort_translucent(); - profiler.begin("translucent_pass"); - let swap_w = self.surface_config.width; - let swap_h = self.surface_config.height; - self.transient_pool.begin_frame(swap_w, swap_h); - - // Phase 7 — run the impulse decay + splat compute BEFORE - // we build scene_inputs so the front view reflects this - // frame's submissions. - self.impulse_field.update(&self.device, &self.queue, &mut encoder); - - // Does any queued translucent material need the scene - // colour snapshot? - let needs_scene = self.material_system.translucent_commands - .iter() - .any(|c| self.material_system.pipelines - .get(c.material as usize - 1) - .and_then(|p| p.as_ref()) - .map(|p| p.reads_scene) - .unwrap_or(false)); - - let scene_color_tid = if needs_scene { - let desc = transient::TransientDesc::new( - formats::HDR_FORMAT, - wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING, - transient::SizePolicy::Swapchain, - ); - Some(self.transient_pool.acquire(&self.device, desc)) - } else { - None - }; - - // Phase 4c — depth snapshot. wgpu forbids sampling a - // texture that is also a depth-stencil attachment of the - // same pass, so we copy the opaque depth buffer into a - // transient before beginning the translucent pass and - // bind the transient at group 4 binding 2. Acquired - // whenever any translucent material reads_scene (same - // gate as colour) — cheap enough that it's not worth a - // separate `reads_depth` flag yet. - let scene_depth_tid = if needs_scene { - let desc = transient::TransientDesc::new( - formats::DEPTH_FORMAT, - wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING, - transient::SizePolicy::Swapchain, - ); - Some(self.transient_pool.acquire(&self.device, desc)) - } else { - None - }; - - // Snapshot hdr_rt + live depth -> transients. - if let (Some(ctid), Some(dtid)) = (scene_color_tid, scene_depth_tid) { - let color_tex = self.transient_pool.texture(ctid).expect("fresh color transient"); - encoder.copy_texture_to_texture( - wgpu::TexelCopyTextureInfo { - texture: &self.hdr_rt_texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - wgpu::TexelCopyTextureInfo { - texture: color_tex, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 }, - ); - let depth_tex = self.transient_pool.texture(dtid).expect("fresh depth transient"); - encoder.copy_texture_to_texture( - wgpu::TexelCopyTextureInfo { - texture: &self.depth_texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::DepthOnly, - }, - wgpu::TexelCopyTextureInfo { - texture: depth_tex, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::DepthOnly, - }, - wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 }, - ); - let color_view = self.transient_pool.view(ctid).unwrap(); - let depth_view = self.transient_pool.view(dtid).unwrap(); - let imp_view = self.impulse_field.front_view(); - let imp_samp = self.impulse_field.sampler(); - self.material_system.update_scene_inputs( - &self.device, color_view, Some(depth_view), - Some((imp_view, imp_samp)), - ); - } else { - // No refractive/depth-reading materials this frame — - // still need a valid bind group. None → internal stubs. - self.material_system.update_scene_inputs( - &self.device, &self.hdr_rt_view, None, None, - ); + // The context owns &mut Renderer, so node closures borrow + // nothing at build time and can call the record_* methods. + // Feature toggles (ssao/ssr/ssgi/bloom) are checked inside the + // closures (or inside the methods), never by omitting nodes — + // with_after on a missing node is a schedule error. + // ============================================================ + { + use graph::{Graph, PassInput, PassNode, PassOutput}; + // Transient ordering tokens for resources the enum doesn't + // name. The textures themselves are persistent renderer + // fields; these ids only express producer→consumer edges. + const HIZ_PYRAMID: u32 = 0; + const SSAO_TEX: u32 = 1; + const SSR_TEX: u32 = 2; + const SSGI_TEX: u32 = 3; + const BLOOM_CHAIN: u32 = 4; + const COMPOSED: u32 = 5; + const LDR_FINAL: u32 = 6; + const FROXEL_CLUSTERS: u32 = 7; + + struct FrameCtx2<'a> { + r: &'a mut Renderer, + encoder: &'a mut wgpu::CommandEncoder, + profiler: &'a mut crate::profiler::Profiler, + scene: &'a mut crate::scene::SceneGraph, + surf: (u32, u32), + exposure_idx: (usize, usize), } - { - let t_ts = profiler.pass_timestamp_writes("translucent_pass"); - let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { - label: Some("bloom_translucent_pass"), - color_attachments: &[Some(wgpu::RenderPassColorAttachment { - view: &self.hdr_rt_view, - resolve_target: None, - depth_slice: None, - ops: wgpu::Operations { - load: wgpu::LoadOp::Load, - store: wgpu::StoreOp::Store, - }, - })], - depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { - view: &self.depth_view, - depth_ops: Some(wgpu::Operations { - load: wgpu::LoadOp::Load, - // Translucents don't write depth — keep - // the opaque pass's depth pristine so - // downstream post-FX (SSR/SSGI) still - // sees the opaque geometry. - store: wgpu::StoreOp::Store, - }), - stencil_ops: None, - }), - timestamp_writes: t_ts, - occlusion_query_set: None, - multiview_mask: None, - }); - let cache = &self.model_gpu_cache; - self.material_system.dispatch_translucent(&mut pass, |handle, idx| { - if let Some(Some(meshes)) = cache.get(&handle) { - if idx < meshes.len() { - let mesh = &meshes[idx]; - return Some((&mesh.vb, &mesh.ib, mesh.index_count)); - } + let mut g: Graph = Graph::new(); + g.push( + PassNode::new("froxel_assign", Box::new(|c: &mut FrameCtx2| { + // No-op when self.froxel is None (the method gates); + // the node stays in the graph so with_after pins + // never dangle. + c.r.record_froxel_assign(c.encoder); + })) + .with_writes(&[PassOutput::Transient(FROXEL_CLUSTERS)]), + ); + g.push( + PassNode::new("shadow", Box::new(|c: &mut FrameCtx2| { + c.r.record_shadow_pass(c.encoder, c.profiler, c.scene); + })) + .with_writes(&[PassOutput::Shadow(0), PassOutput::Shadow(1), PassOutput::Shadow(2)]), + ); + g.push( + PassNode::new("hdr_scene", Box::new(|c: &mut FrameCtx2| { + c.r.record_hdr_scene_pass(c.encoder, c.profiler, c.scene); + })) + .with_reads(&[ + PassInput::Shadow(0), + PassInput::Shadow(1), + PassInput::Shadow(2), + PassInput::Transient(FROXEL_CLUSTERS), + ]) + .with_writes(&[ + PassOutput::HdrColor, + PassOutput::MaterialRt, + PassOutput::VelocityRt, + PassOutput::AlbedoRt, + PassOutput::Depth, + ]) + .with_after(&["shadow", "froxel_assign"]), + ); + g.push( + PassNode::new("translucent", Box::new(|c: &mut FrameCtx2| { + c.r.record_translucent_pass(c.encoder, c.profiler); + })) + // Reads the opaque HDR + depth and alpha-blends back into + // HdrColor; the pin (not a second HdrColor write) keeps a + // single declared writer per resource. + .with_after(&["hdr_scene"]), + ); + g.push( + PassNode::new("hiz_build", Box::new(|c: &mut FrameCtx2| { + if !c.r.ssao_enabled { + return; } - None - }); - } - - if let Some(tid) = scene_color_tid { - self.transient_pool.release(tid); - } - profiler.end("translucent_pass"); - } - - // ============================================================ - // SSAO: half-res GTAO sampling a hierarchical linear-depth - // pyramid. Build hiz (linearize + 4 min-downsamples), then - // dispatch the GTAO compute pass. - // ============================================================ - profiler.begin("post_fx"); - let surf_w = self.surface_config.width; - let surf_h = self.surface_config.height; - if self.ssao_enabled { - let p = &self.current_proj_matrix; - let p00 = p[0][0]; - let p11 = p[1][1]; - let p20 = p[2][0]; - let p21 = p[2][1]; - let p22 = p[2][2]; - let p32 = p[3][2]; - let half_w = (surf_w / 2).max(1); - let half_h = (surf_h / 2).max(1); - - // Hi-Z build + occlusion capture run on the render graph - // (Phase 2b, cluster 1). Unlike the older material-pass nodes - // that capture individual field refs, these use the - // ctx-owns-renderer pattern: the context carries &mut Renderer - // and closures borrow nothing at build time — the shape the - // rest of end_frame_with_scene migrates onto. - { - use graph::{Graph, PassInput, PassNode, PassOutput}; - // Transient id 0 = the linearized Hi-Z pyramid for this - // frame (graph-internal ordering token; the textures - // themselves are persistent renderer fields). - const HIZ_PYRAMID: u32 = 0; - - struct HizCtx<'a> { - r: &'a mut Renderer, - encoder: &'a mut wgpu::CommandEncoder, - profiler: &'a mut crate::profiler::Profiler, - half: (u32, u32), - p22: f32, - p32: f32, - } - - let mut g: Graph = Graph::new(); - g.push( - PassNode::new( - "hiz_build", - Box::new(|ctx: &mut HizCtx| { - let (hw, hh) = ctx.half; - ctx.r.record_hiz_chain(ctx.encoder, ctx.profiler, hw, hh, ctx.p22, ctx.p32); - }), - ) - .with_reads(&[PassInput::SceneDepth]) - .with_writes(&[PassOutput::Transient(HIZ_PYRAMID)]), - ); - // Max-reduce the linearized depth into the 64x64 occlusion - // grid and queue its readback; scene.prepare consumes it - // next frame (one-frame latency, no stall). - g.push( - PassNode::new( - "occlusion_capture", - Box::new(|ctx: &mut HizCtx| { - let vp = ctx.r.vp_matrix(); - let (hw, hh) = ctx.half; - // Split borrows: occlusion is a sibling field - // of device/queue; record() also needs the - // hiz view. - let occlusion = &mut ctx.r.occlusion as *mut OcclusionCuller; - unsafe { - (*occlusion).record( - &ctx.r.device, - &ctx.r.queue, - ctx.encoder, - &ctx.r.hiz_views[0], - (hw, hh), - vp, - ); - } - }), - ) - .with_reads(&[PassInput::Transient(HIZ_PYRAMID)]), - ); - let mut ctx = HizCtx { - r: self, - encoder: &mut encoder, - profiler, - half: (half_w, half_h), - p22, - p32, - }; - if let Err(e) = g.execute(&mut ctx) { - eprintln!("[graph] hiz/occlusion cluster failed: {:?}", e); - } - } + let (hw, hh) = ((c.surf.0 / 2).max(1), (c.surf.1 / 2).max(1)); + let p22 = c.r.current_proj_matrix[2][2]; + let p32 = c.r.current_proj_matrix[3][2]; + c.r.record_hiz_chain(c.encoder, c.profiler, hw, hh, p22, p32); + })) + .with_reads(&[PassInput::SceneDepth]) + .with_writes(&[PassOutput::Transient(HIZ_PYRAMID)]) + .with_after(&["translucent"]), + ); + g.push( + PassNode::new("occlusion_capture", Box::new(|c: &mut FrameCtx2| { + if !c.r.ssao_enabled { + return; + } + let (hw, hh) = ((c.surf.0 / 2).max(1), (c.surf.1 / 2).max(1)); + let vp = c.r.vp_matrix(); + let occlusion = &mut c.r.occlusion as *mut OcclusionCuller; + unsafe { + (*occlusion).record(&c.r.device, &c.r.queue, c.encoder, &c.r.hiz_views[0], (hw, hh), vp); + } + })) + .with_reads(&[PassInput::Transient(HIZ_PYRAMID)]) + .with_after(&["hiz_build"]), + ); + g.push( + PassNode::new("gtao", Box::new(|c: &mut FrameCtx2| { + if !c.r.ssao_enabled { + return; + } + let (hw, hh) = ((c.surf.0 / 2).max(1), (c.surf.1 / 2).max(1)); + let p = &c.r.current_proj_matrix; + let (p00, p11, p20, p21) = (p[0][0], p[1][1], p[2][0], p[2][1]); + c.r.record_gtao(c.encoder, c.profiler, hw, hh, p00, p11, p20, p21); + })) + .with_reads(&[PassInput::Transient(HIZ_PYRAMID)]) + .with_after(&["occlusion_capture"]), + ); + g.push( + PassNode::new("ssao_blur", Box::new(|c: &mut FrameCtx2| { + c.r.record_ssao_blur(c.encoder, c.surf.0, c.surf.1); + })) + .with_writes(&[PassOutput::Transient(SSAO_TEX)]) + .with_after(&["gtao"]), + ); + g.push( + PassNode::new("ssr_march", Box::new(|c: &mut FrameCtx2| { + c.r.record_ssr_march(c.encoder, c.profiler); + })) + .with_reads(&[PassInput::SceneColor, PassInput::SceneDepth]) + .with_after(&["ssao_blur"]), + ); + g.push( + PassNode::new("ssr_temporal", Box::new(|c: &mut FrameCtx2| { + c.r.record_ssr_temporal(c.encoder); + })) + .with_writes(&[PassOutput::Transient(SSR_TEX)]) + .with_after(&["ssr_march"]), + ); + g.push( + PassNode::new("ssgi", Box::new(|c: &mut FrameCtx2| { + c.r.record_ssgi_passes(c.encoder, c.profiler, c.surf.0, c.surf.1); + })) + .with_reads(&[PassInput::SceneColor, PassInput::SceneDepth]) + .with_writes(&[PassOutput::Transient(SSGI_TEX)]) + .with_after(&["ssr_temporal"]), + ); + g.push( + PassNode::new("bloom", Box::new(|c: &mut FrameCtx2| { + c.r.record_bloom_chain(c.encoder, c.profiler, c.surf.0, c.surf.1); + })) + .with_reads(&[PassInput::SceneColor]) + .with_writes(&[PassOutput::Transient(BLOOM_CHAIN)]) + .with_after(&["ssgi"]), + ); + g.push( + PassNode::new("compose", Box::new(|c: &mut FrameCtx2| { + c.r.record_scene_compose(c.encoder); + })) + .with_reads(&[ + PassInput::SceneColor, + PassInput::Transient(SSAO_TEX), + PassInput::Transient(SSR_TEX), + PassInput::Transient(SSGI_TEX), + PassInput::Transient(BLOOM_CHAIN), + ]) + .with_writes(&[PassOutput::Transient(COMPOSED)]) + .with_after(&["bloom"]), + ); + g.push( + PassNode::new("postfx_tail", Box::new(|c: &mut FrameCtx2| { + c.r.record_postfx_tail(c.encoder, c.profiler); + })) + .with_reads(&[PassInput::Transient(COMPOSED), PassInput::MotionVectors]) + .with_writes(&[PassOutput::Transient(LDR_FINAL)]) + .with_after(&["compose"]), + ); + g.push( + PassNode::new("auto_exposure", Box::new(|c: &mut FrameCtx2| { + let (src, dst) = c.exposure_idx; + c.r.record_auto_exposure(c.encoder, src, dst); + })) + .with_reads(&[PassInput::Transient(LDR_FINAL)]) + .with_after(&["postfx_tail"]), + ); - // --- SSAO (compute GTAO, samples Hi-Z pyramid) -------------- - let ld = self.lighting_uniforms.light_dir; - let v = &self.current_view_matrix; - let light_dir_vs = [ - v[0][0]*ld[0] + v[1][0]*ld[1] + v[2][0]*ld[2], - v[0][1]*ld[0] + v[1][1]*ld[1] + v[2][1]*ld[2], - v[0][2]*ld[0] + v[1][2]*ld[1] + v[2][2]*ld[2], - 0.0, - ]; - // Temporal accumulation: ping-pong history textures. - // `write_idx` is the current-frame output; `read_idx` the - // previous frame's result. First 4 frames force alpha=1 - // so the initial clear never contaminates the signal. - let write_idx = self.ssao_history_idx; - let read_idx = 1 - write_idx; - let frame_phase = self.ssao_history_frame % 4; - let force_refresh = if self.ssao_history_frame < 4 { 1u32 } else { 0u32 }; - // 4-frame EMA: alpha = 1/4 = 0.25 gives equal weight to - // each of the 4 phases at steady state. - let alpha = 0.25_f32; - // Halton-5 rotation: uncorrelated with TAA's base-2/3 jitter - // so the two noise patterns don't resonate. - let halton5 = halton(self.ssao_history_frame + 1, 5); - let sp = SsaoParams { - params: [ - 1.0 / half_w as f32, - 1.0 / half_h as f32, - self.ssao_radius, - self.ssao_strength, - ], - proj_row01: [p00, p11, p20, p21], - proj_z: [p22, p32, 1.0 / p00, 1.0 / p11], - light_dir_vs, - size: [half_w, half_h, frame_phase, force_refresh], - temporal: [alpha, halton5, 0.0, 0.0], + let mut ctx = FrameCtx2 { + r: self, + encoder: &mut encoder, + profiler, + scene, + surf: (surf_w, surf_h), + exposure_idx: (exposure_src_idx, exposure_dst_idx), }; - self.queue.write_buffer(&self.ssao_uniform_buffer, 0, bytemuck::bytes_of(&sp)); - - if self.ssao_bg_cache[write_idx].is_none() { - self.ssao_bg_cache[write_idx] = Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("ssao_bg"), - layout: &self.ssao_layout, - entries: &[ - wgpu::BindGroupEntry { binding: 0, resource: self.ssao_uniform_buffer.as_entire_binding() }, - wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(&self.ssao_rt_view) }, - wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.hiz_sampler) }, - wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.hiz_views[0]) }, - wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::TextureView(&self.hiz_views[1]) }, - wgpu::BindGroupEntry { binding: 5, resource: wgpu::BindingResource::TextureView(&self.hiz_views[2]) }, - wgpu::BindGroupEntry { binding: 6, resource: wgpu::BindingResource::TextureView(&self.hiz_views[3]) }, - wgpu::BindGroupEntry { binding: 7, resource: wgpu::BindingResource::TextureView(&self.hiz_views[4]) }, - wgpu::BindGroupEntry { binding: 8, resource: wgpu::BindingResource::TextureView(&self.velocity_rt_view) }, - wgpu::BindGroupEntry { binding: 9, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[read_idx]) }, - wgpu::BindGroupEntry { binding: 10, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) }, - wgpu::BindGroupEntry { binding: 11, resource: wgpu::BindingResource::TextureView(&self.ssao_history_views[write_idx]) }, - ], - })); + if let Err(e) = g.execute(&mut ctx) { + // A schedule error means a malformed graph (cycle / + // unknown pin) — a programming error, not a runtime + // condition. Surface loudly; the frame still presents + // whatever was encoded before the failure. + eprintln!("[graph] frame graph failed: {:?}", e); } - let bg = self.ssao_bg_cache[write_idx].as_ref().unwrap(); - - let ssao_ts = profiler.compute_pass_timestamp_writes("ssao_pass"); - let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { - label: Some("ssao_pass"), - timestamp_writes: ssao_ts, - }); - pass.set_pipeline(&self.ssao_pipeline); - pass.set_bind_group(0, bg, &[]); - pass.dispatch_workgroups((half_w + 7) / 8, (half_h + 7) / 8, 1); - - // Flip ping-pong indices for the next frame. - self.ssao_history_idx = read_idx; - self.ssao_history_frame = self.ssao_history_frame.wrapping_add(1); } - // GTAO bilateral blur (or disabled-clear) — see hiz.rs. - self.record_ssao_blur(&mut encoder, surf_w, surf_h); - - // SSR ray march — see record_ssr_march in ssr_pass.rs. - self.record_ssr_march(&mut encoder, profiler); - - // SSR temporal denoiser — see record_ssr_temporal in ssr_pass.rs. - self.record_ssr_temporal(&mut encoder); - - // The compose pass reads denoised SSR from the current history - // texture when ssr_enabled; otherwise the raw ssr_rt (which was - // cleared to transparent above) so it contributes nothing. - // Lumen-style screen-probe SSGI (place/trace/temporal/resolve) - // or disabled-clear — see record_ssgi_passes in ssgi_pass.rs. - self.record_ssgi_passes(&mut encoder, profiler, surf_w, surf_h); - - - // The resolve pass writes directly into `ssgi_rt_view`, so - // downstream composite + TAA reads are unchanged from the - // legacy path. - // Bloom chain (Karis-thresholded downsample + additive upsample) - // — see record_bloom_chain in postfx_chain.rs. - self.record_bloom_chain(&mut encoder, profiler, surf_w, surf_h); - - - // Scene compose (HDR + SSR + SSGI*albedo + bloom + fog + shafts - // -> composed_rt) — see record_scene_compose in postfx_chain.rs. - self.record_scene_compose(&mut encoder); - // Post-FX tail: upscale/TAA/DoF/motion-blur/SSS/CAS, each - // reading the previous enabled stage — see - // record_postfx_tail in postfx_chain.rs. - self.record_postfx_tail(&mut encoder, profiler); - let composite_src_view = self.composite_source_view(); - // ============================================================ - // Auto-exposure update pass (runs only when auto_exposure is - // on; otherwise the composite reads the old exposure texture - // which is fine since manual_exposure bypasses the read). - // ============================================================ - let exposure_src_idx = self.exposure_current_idx; - let exposure_dst_idx = 1 - self.exposure_current_idx; - if self.auto_exposure { - let ep = ExposureParams { - params: [ - self.auto_exposure_key, - self.auto_exposure_rate, - // Wide clamp — without SSGI, Sponza's shadowed - // corridors have ~7× less average luma than its - // sunlit courtyard, so exposure needs to span - // the same range to keep perceived brightness - // stable across rotations. - 0.1, - 10.0, - ], - }; - self.queue.write_buffer(&self.exposure_uniform_buffer, 0, bytemuck::bytes_of(&ep)); - - let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("exposure_bg"), - layout: &self.exposure_layout, - entries: &[ - wgpu::BindGroupEntry { binding: 0, resource: self.exposure_uniform_buffer.as_entire_binding() }, - wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(composite_src_view) }, - wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) }, - wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.exposure_views[exposure_src_idx]) }, - wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) }, - ], - }); - let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { - label: Some("exposure_pass"), - color_attachments: &[Some(wgpu::RenderPassColorAttachment { - view: &self.exposure_views[exposure_dst_idx], - resolve_target: None, - depth_slice: None, - ops: wgpu::Operations { - load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT), - store: wgpu::StoreOp::Store, - }, - })], - depth_stencil_attachment: None, - timestamp_writes: None, - occlusion_query_set: None, - multiview_mask: None, - }); - pass.set_pipeline(&self.exposure_pipeline); - pass.set_bind_group(0, &bg, &[]); - pass.draw(0..3, 0..1); - } - // composite_uniform_buffer carries per-frame composite state. // x = tonemap kind (0 ACES / 1 AgX) // y = auto-exposure toggle diff --git a/native/shared/src/renderer/postfx_chain.rs b/native/shared/src/renderer/postfx_chain.rs index eac7864..4a9d946 100644 --- a/native/shared/src/renderer/postfx_chain.rs +++ b/native/shared/src/renderer/postfx_chain.rs @@ -625,3 +625,67 @@ impl Renderer { } } } + +impl Renderer { + /// Auto-exposure measure + adapt pass into the dst slot of the + /// ping-pong exposure texture. No-op when auto_exposure is off (the + /// composite keeps reading the stale texture, which manual_exposure + /// bypasses). The caller owns the src/dst indices because the + /// composite binds the same dst view. + pub(super) fn record_auto_exposure( + &mut self, + encoder: &mut wgpu::CommandEncoder, + exposure_src_idx: usize, + exposure_dst_idx: usize, + ) { + // The luminance source is whatever the composite will read. + let composite_src_view = self.composite_source_view(); + if self.auto_exposure { + let ep = ExposureParams { + params: [ + self.auto_exposure_key, + self.auto_exposure_rate, + // Wide clamp — without SSGI, Sponza's shadowed + // corridors have ~7× less average luma than its + // sunlit courtyard, so exposure needs to span + // the same range to keep perceived brightness + // stable across rotations. + 0.1, + 10.0, + ], + }; + self.queue.write_buffer(&self.exposure_uniform_buffer, 0, bytemuck::bytes_of(&ep)); + + let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("exposure_bg"), + layout: &self.exposure_layout, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: self.exposure_uniform_buffer.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(composite_src_view) }, + wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) }, + wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&self.exposure_views[exposure_src_idx]) }, + wgpu::BindGroupEntry { binding: 4, resource: wgpu::BindingResource::Sampler(&self.composite_sampler) }, + ], + }); + let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("exposure_pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.exposure_views[exposure_dst_idx], + resolve_target: None, + depth_slice: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT), + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + multiview_mask: None, + }); + pass.set_pipeline(&self.exposure_pipeline); + pass.set_bind_group(0, &bg, &[]); + pass.draw(0..3, 0..1); + } + } +} diff --git a/native/shared/src/renderer/scene_pass.rs b/native/shared/src/renderer/scene_pass.rs index f290cec..a191ae2 100644 --- a/native/shared/src/renderer/scene_pass.rs +++ b/native/shared/src/renderer/scene_pass.rs @@ -285,3 +285,181 @@ impl Renderer { } } + +impl Renderer { + /// Translucent / refractive / additive material pass: after opaque, + /// before post-FX; loads hdr_rt, depth read-only, back-to-front + /// sorted; snapshots scene color for reads_scene materials. Split + /// from end_frame_with_scene. + pub(super) fn record_translucent_pass( + &mut self, + encoder: &mut wgpu::CommandEncoder, + profiler: &mut crate::profiler::Profiler, + ) { + // ============================================================ + // Phase 4b — translucent / refractive / additive material pass + // ============================================================ + // + // Runs after opaque materials, before post-FX. Loads hdr_rt so + // opaque output survives; alpha-blends into it. Depth is + // bound as read-only so translucent draws participate in the + // depth test without writing. + // + // If any submitted translucent material declared + // `reads_scene = true`, we first snapshot hdr_rt into a + // swapchain-sized transient and bind that as group 4 + // scene_color_tex for the dispatch. Free after the pass so + // the transient pool reuses on the next frame. + if !self.material_system.translucent_commands.is_empty() { + // Back-to-front by view depth — required for correct alpha + // compositing; submission order is only kept between + // equal-depth draws (stable sort). + self.material_system.sort_translucent(); + profiler.begin("translucent_pass"); + let swap_w = self.surface_config.width; + let swap_h = self.surface_config.height; + self.transient_pool.begin_frame(swap_w, swap_h); + + // Phase 7 — run the impulse decay + splat compute BEFORE + // we build scene_inputs so the front view reflects this + // frame's submissions. + self.impulse_field.update(&self.device, &self.queue, &mut *encoder); + + // Does any queued translucent material need the scene + // colour snapshot? + let needs_scene = self.material_system.translucent_commands + .iter() + .any(|c| self.material_system.pipelines + .get(c.material as usize - 1) + .and_then(|p| p.as_ref()) + .map(|p| p.reads_scene) + .unwrap_or(false)); + + let scene_color_tid = if needs_scene { + let desc = transient::TransientDesc::new( + formats::HDR_FORMAT, + wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING, + transient::SizePolicy::Swapchain, + ); + Some(self.transient_pool.acquire(&self.device, desc)) + } else { + None + }; + + // Phase 4c — depth snapshot. wgpu forbids sampling a + // texture that is also a depth-stencil attachment of the + // same pass, so we copy the opaque depth buffer into a + // transient before beginning the translucent pass and + // bind the transient at group 4 binding 2. Acquired + // whenever any translucent material reads_scene (same + // gate as colour) — cheap enough that it's not worth a + // separate `reads_depth` flag yet. + let scene_depth_tid = if needs_scene { + let desc = transient::TransientDesc::new( + formats::DEPTH_FORMAT, + wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING, + transient::SizePolicy::Swapchain, + ); + Some(self.transient_pool.acquire(&self.device, desc)) + } else { + None + }; + + // Snapshot hdr_rt + live depth -> transients. + if let (Some(ctid), Some(dtid)) = (scene_color_tid, scene_depth_tid) { + let color_tex = self.transient_pool.texture(ctid).expect("fresh color transient"); + encoder.copy_texture_to_texture( + wgpu::TexelCopyTextureInfo { + texture: &self.hdr_rt_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyTextureInfo { + texture: color_tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 }, + ); + let depth_tex = self.transient_pool.texture(dtid).expect("fresh depth transient"); + encoder.copy_texture_to_texture( + wgpu::TexelCopyTextureInfo { + texture: &self.depth_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::DepthOnly, + }, + wgpu::TexelCopyTextureInfo { + texture: depth_tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::DepthOnly, + }, + wgpu::Extent3d { width: swap_w, height: swap_h, depth_or_array_layers: 1 }, + ); + let color_view = self.transient_pool.view(ctid).unwrap(); + let depth_view = self.transient_pool.view(dtid).unwrap(); + let imp_view = self.impulse_field.front_view(); + let imp_samp = self.impulse_field.sampler(); + self.material_system.update_scene_inputs( + &self.device, color_view, Some(depth_view), + Some((imp_view, imp_samp)), + ); + } else { + // No refractive/depth-reading materials this frame — + // still need a valid bind group. None → internal stubs. + self.material_system.update_scene_inputs( + &self.device, &self.hdr_rt_view, None, None, + ); + } + + { + let t_ts = profiler.pass_timestamp_writes("translucent_pass"); + let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("bloom_translucent_pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.hdr_rt_view, + resolve_target: None, + depth_slice: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: &self.depth_view, + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + // Translucents don't write depth — keep + // the opaque pass's depth pristine so + // downstream post-FX (SSR/SSGI) still + // sees the opaque geometry. + store: wgpu::StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: t_ts, + occlusion_query_set: None, + multiview_mask: None, + }); + let cache = &self.model_gpu_cache; + self.material_system.dispatch_translucent(&mut pass, |handle, idx| { + if let Some(Some(meshes)) = cache.get(&handle) { + if idx < meshes.len() { + let mesh = &meshes[idx]; + return Some((&mesh.vb, &mesh.ib, mesh.index_count)); + } + } + None + }); + } + + if let Some(tid) = scene_color_tid { + self.transient_pool.release(tid); + } + profiler.end("translucent_pass"); + } + } +} diff --git a/native/shared/src/renderer/shaders/core.rs b/native/shared/src/renderer/shaders/core.rs index c7ee590..e3b4aaf 100644 --- a/native/shared/src/renderer/shaders/core.rs +++ b/native/shared/src/renderer/shaders/core.rs @@ -766,6 +766,10 @@ fn fs_main_scene(in: VertexOutputScene) -> SceneOut { base_color, metallic, roughness); } + // BEGIN-POINT-LIGHT-LOOP (replaced by the froxel-clustered variant + // at pipeline build on storage-buffer-capable backends — see + // renderer/froxel.rs; this plain loop is the WebGL fallback and the + // semantic reference the clustered path must match exactly) let pt_count = u32(lighting.point_light_count.x); for (var i = 0u; i < pt_count; i++) { let pl = lighting.point_lights[i]; @@ -780,6 +784,7 @@ fn fs_main_scene(in: VertexOutputScene) -> SceneOut { base_color, metallic, roughness); } } + // END-POINT-LIGHT-LOOP // --- Split-sum IBL (Karis 2013) --- // IBL_diffuse = base_color * (1 - kS_avg) * (1 - metallic) diff --git a/native/shared/src/renderer/shaders/post.rs b/native/shared/src/renderer/shaders/post.rs index 0072b9f..a8c5733 100644 --- a/native/shared/src/renderer/shaders/post.rs +++ b/native/shared/src/renderer/shaders/post.rs @@ -792,6 +792,21 @@ fn fs_main(in: VsOut) -> @location(0) vec4 { var prev_uv: vec2; if (vel_len > 0.00001) { prev_uv = vec2(in.uv.x - vel.x, in.uv.y + vel.y); + } else if (depth >= 0.9999) { + // Sky / far plane: the positional reconstruction divides by a + // near-zero w and reprojects sky pixels onto arbitrary scene + // points — the luma-only history clamp then locks that wrong + // chroma in forever (uniform green/red sky tint). The sky is at + // infinity, so reproject the view DIRECTION instead: exact under + // camera rotation, translation-invariant by definition. + let dir = world_h.xyz; // w ~ 0 at the far plane: xyz IS the direction + let prev_clip = u.prev_vp * vec4(dir, 0.0); + if (prev_clip.w > 0.00001) { + let prev_ndc = prev_clip.xyz / prev_clip.w; + prev_uv = vec2(prev_ndc.x * 0.5 + 0.5, 1.0 - (prev_ndc.y * 0.5 + 0.5)); + } else { + prev_uv = in.uv; + } } else { let prev_clip = u.prev_vp * vec4(world, 1.0); let prev_ndc = prev_clip.xyz / prev_clip.w; diff --git a/native/shared/tests/golden/lit_primitives_taa.png b/native/shared/tests/golden/lit_primitives_taa.png new file mode 100644 index 0000000..cfa8302 Binary files /dev/null and b/native/shared/tests/golden/lit_primitives_taa.png differ diff --git a/native/shared/tests/golden/many_point_lights_clustered_scene.png b/native/shared/tests/golden/many_point_lights_clustered_scene.png new file mode 100644 index 0000000..23e65aa Binary files /dev/null and b/native/shared/tests/golden/many_point_lights_clustered_scene.png differ diff --git a/native/shared/tests/golden_render.rs b/native/shared/tests/golden_render.rs index 0c70d8c..753079f 100644 --- a/native/shared/tests/golden_render.rs +++ b/native/shared/tests/golden_render.rs @@ -17,7 +17,7 @@ //! regenerated with BLOOM_UPDATE_GOLDEN=1 `cargo test golden`. use bloom_shared::engine::EngineState; -use bloom_shared::renderer::Renderer; +use bloom_shared::renderer::{Renderer, Vertex3D}; const W: u32 = 256; const H: u32 = 256; @@ -219,45 +219,116 @@ fn golden_many_point_lights() { compare_or_update("many_point_lights", w, h, &rgba); } +/// Froxel-clustering parity gate. The golden for this test is generated +/// with `BLOOM_DISABLE_FROXEL=1` (the plain reference loop); the test +/// then runs through the clustered scene shader, so any divergence +/// between the two point-light paths — wrong cluster lookup, lights +/// missed by the sphere/AABB assignment, slice math drift — shows up as +/// a pixel diff. Unlike `golden_many_point_lights` (immediate-mode +/// `pipeline_3d`, which keeps the plain loop), this drives the retained +/// scene graph through `scene_pipeline`, the shader the clustered loop +/// is spliced into. #[test] -fn golden_lod_selection() { - use bloom_shared::renderer::Vertex3D; +fn golden_many_point_lights_clustered_scene() { let Some(mut eng) = try_engine() else { eprintln!("skip: no GPU adapter"); return; }; + // The gate is meaningless if the clustered path silently fell back + // to the reference loop. Storage buffers are available on every + // non-WebGL2 device this test runs on, so demand the froxel path + // unless the kill-switch is set (golden regeneration). + if std::env::var_os("BLOOM_DISABLE_FROXEL").is_none() { + assert!( + eng.renderer.froxel.is_some(), + "froxel clustering inactive on a storage-buffer-capable adapter — \ + parity test would silently test the reference loop against itself" + ); + } - fn cube_verts(half: f32, color: [f32; 4]) -> (Vec, Vec) { - // 6 faces, outward winding (matches scene-node conventions: - // prepare() recomputes bounds from positions). - let h = half; - let faces: [([f32; 3], [[f32; 3]; 4]); 6] = [ - ([0.0, 0.0, -1.0], [[-h,-h,-h],[ h,-h,-h],[ h, h,-h],[-h, h,-h]]), - ([0.0, 0.0, 1.0], [[ h,-h, h],[-h,-h, h],[-h, h, h],[ h, h, h]]), - ([-1.0, 0.0, 0.0], [[-h,-h, h],[-h,-h,-h],[-h, h,-h],[-h, h, h]]), - ([1.0, 0.0, 0.0], [[ h,-h,-h],[ h,-h, h],[ h, h, h],[ h, h,-h]]), - ([0.0, 1.0, 0.0], [[-h, h,-h],[ h, h,-h],[ h, h, h],[-h, h, h]]), - ([0.0, -1.0, 0.0], [[-h,-h, h],[ h,-h, h],[ h,-h,-h],[-h,-h,-h]]), - ]; - let mut verts = Vec::new(); - let mut idx = Vec::new(); - for (normal, vs) in faces { - let base = verts.len() as u32; - for p in vs { - verts.push(Vertex3D { - position: p, - normal, - color, - uv: [0.0, 0.0], - joints: [0.0; 4], - weights: [0.0; 4], - tangent: [0.0; 4], - }); - } - idx.extend_from_slice(&[base, base + 2, base + 1, base, base + 3, base + 2]); + // Floor (squashed cube) + a ring of cubes, lit by 40 colored point + // lights — enough that most froxels see only a few lights, so a + // broken cluster lookup cannot hide. + let scale_translate = |sx: f32, sy: f32, sz: f32, x: f32, y: f32, z: f32| -> [[f32; 4]; 4] { + let mut m = [[0.0f32; 4]; 4]; + m[0][0] = sx; m[1][1] = sy; m[2][2] = sz; m[3][3] = 1.0; + m[3][0] = x; m[3][1] = y; m[3][2] = z; + m + }; + let (floor_v, floor_i) = cube_verts(0.5, [0.45, 0.45, 0.45, 1.0]); + let floor = eng.scene.create_node(); + eng.scene.update_geometry(floor, floor_v, floor_i); + eng.scene.set_transform(floor, scale_translate(14.0, 0.2, 14.0, 0.0, -0.1, 0.0)); + + let (cube_v, cube_i) = cube_verts(0.5, [0.8, 0.8, 0.8, 1.0]); + for i in 0..6u32 { + let t = i as f32 / 6.0 * std::f32::consts::TAU; + let node = eng.scene.create_node(); + eng.scene.update_geometry(node, cube_v.clone(), cube_i.clone()); + eng.scene.set_transform(node, scale_translate(1.0, 1.0, 1.0, t.cos() * 2.2, 0.5, t.sin() * 2.2)); + } + + let (w, h, rgba) = render(&mut eng, 6, |eng| { + let r = &mut eng.renderer; + r.set_clear_color(2.0, 2.0, 4.0, 255.0); + r.begin_mode_3d( + 6.0, 7.0, 6.0, + 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 60.0, 0.0, + ); + for i in 0..40u32 { + let t = i as f32 / 40.0 * std::f32::consts::TAU; + let (sx, sz) = (t.cos() * 4.0, t.sin() * 4.0); + let (lr, lg, lb) = ( + 0.5 + 0.5 * (t).cos(), + 0.5 + 0.5 * (t + 2.094).cos(), + 0.5 + 0.5 * (t + 4.189).cos(), + ); + r.add_point_light(sx, 1.2, sz, 3.5, lr, lg, lb, 1.6); } - (verts, idx) + }); + compare_or_update("many_point_lights_clustered_scene", w, h, &rgba); +} + +/// Unit cube as scene-node geometry — 6 faces, outward winding (matches +/// scene-node conventions: prepare() recomputes bounds from positions). +fn cube_verts(half: f32, color: [f32; 4]) -> (Vec, Vec) { + let h = half; + let faces: [([f32; 3], [[f32; 3]; 4]); 6] = [ + ([0.0, 0.0, -1.0], [[-h,-h,-h],[ h,-h,-h],[ h, h,-h],[-h, h,-h]]), + ([0.0, 0.0, 1.0], [[ h,-h, h],[-h,-h, h],[-h, h, h],[ h, h, h]]), + ([-1.0, 0.0, 0.0], [[-h,-h, h],[-h,-h,-h],[-h, h,-h],[-h, h, h]]), + ([1.0, 0.0, 0.0], [[ h,-h,-h],[ h,-h, h],[ h, h, h],[ h, h,-h]]), + ([0.0, 1.0, 0.0], [[-h, h,-h],[ h, h,-h],[ h, h, h],[-h, h, h]]), + ([0.0, -1.0, 0.0], [[-h,-h, h],[ h,-h, h],[ h,-h,-h],[-h,-h,-h]]), + ]; + let mut verts = Vec::new(); + let mut idx = Vec::new(); + for (normal, vs) in faces { + let base = verts.len() as u32; + for p in vs { + verts.push(Vertex3D { + position: p, + normal, + color, + uv: [0.0, 0.0], + joints: [0.0; 4], + weights: [0.0; 4], + tangent: [0.0; 4], + }); + } + idx.extend_from_slice(&[base, base + 2, base + 1, base, base + 3, base + 2]); } + (verts, idx) +} + +#[test] +fn golden_lod_selection() { + let Some(mut eng) = try_engine() else { + eprintln!("skip: no GPU adapter"); + return; + }; let (red_v, red_i) = cube_verts(0.5, [0.9, 0.1, 0.1, 1.0]); let (green_v, green_i) = cube_verts(0.5, [0.1, 0.9, 0.1, 1.0]); @@ -345,3 +416,29 @@ fn cooked_bc7_texture_matches_raw() { "cooked render diverges from raw render: max channel diff {max_diff}" ); } + +#[test] +fn golden_lit_primitives_taa() { + let Some(mut eng) = try_engine() else { + eprintln!("skip: no GPU adapter"); + return; + }; + // Same scene as lit_primitives_3d but with TAA ON: pins the TAA + // branch of the post-FX cascade (reprojection, neighborhood clamp, + // Catmull-Rom upscale path) that the TAA-off goldens never touch. + // The Halton jitter sequence is indexed by frame number, so a fixed + // frame count renders deterministically. + eng.renderer.set_taa_enabled(true); + let (w, h, rgba) = render(&mut eng, 10, |eng| { + let r = &mut eng.renderer; + r.set_clear_color(13.0, 18.0, 26.0, 255.0); + r.begin_mode_3d(4.0, 3.0, 6.0, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 45.0, 0.0); + r.add_directional_light(-0.5, -1.0, -0.3, 1.0, 0.95, 0.9, 1.2); + r.add_point_light(2.0, 2.0, 2.0, 10.0, 0.2, 0.4, 1.0, 2.0); + r.draw_plane(0.0, 0.0, 0.0, 10.0, 10.0, 120.0, 120.0, 125.0, 255.0); + r.draw_cube(-1.2, 0.5, 0.0, 1.0, 1.0, 1.0, 230.0, 41.0, 55.0, 255.0); + r.draw_sphere(1.2, 0.75, 0.5, 0.75, 0.0, 228.0, 48.0, 255.0); + r.draw_cube(0.0, 1.6, -1.0, 0.8, 0.8, 0.8, 253.0, 249.0, 0.0, 255.0); + }); + compare_or_update("lit_primitives_taa", w, h, &rgba); +} diff --git a/tools/file-lines-baseline.json b/tools/file-lines-baseline.json index b914210..e5d34f4 100644 --- a/tools/file-lines-baseline.json +++ b/tools/file-lines-baseline.json @@ -1,3 +1,3 @@ { - "native/shared/src/renderer/mod.rs": 12099 -} \ No newline at end of file + "native/shared/src/renderer/mod.rs": 11775 +}