diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b82fca9b..284d96d9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,9 @@ set(pgen set(output ${default_output} CACHE BOOL "Enable output") +set(ascent + ${default_ascent} + CACHE BOOL "Enable Ascent in situ visualization") set(mpi ${default_mpi} CACHE BOOL "Use MPI") @@ -319,6 +322,22 @@ if(${output}) endif() endif() +# Ascent (in situ visualization) +if(${ascent}) + if(NOT ${output}) + message( + FATAL_ERROR + "${Red}Ascent requires `output=ON` to be enabled.${ColorReset}") + endif() + find_package(Ascent REQUIRED) + add_compile_options("-D ASCENT_ENABLED") + if(${mpi}) + set(DEPENDENCIES ${DEPENDENCIES} ascent::ascent_mpi) + else() + set(DEPENDENCIES ${DEPENDENCIES} ascent::ascent) + endif() +endif() + link_libraries(${DEPENDENCIES}) set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/) diff --git a/ascent/field_lines/README.md b/ascent/field_lines/README.md new file mode 100644 index 000000000..dda17e291 --- /dev/null +++ b/ascent/field_lines/README.md @@ -0,0 +1,66 @@ +# `turbulence_lines` + +Driven 3D turbulence (re-using the standard +[`turbulence`](../../turbulence) pgen) with in-situ rendering of magnetic +**field lines** via [Ascent](https://ascent.readthedocs.io/). + +The same setup as +[`turbulence_ascent`](../turbulence_ascent), but instead of volume-rendering +`|B|`, this example traces the magnetic field with Ascent's `streamline` +filter from a uniform grid of seed points inside the box and rasterizes +the resulting polylines as 3D tubes. One PNG is produced per render +cycle: + +``` +turbulence_lines_0000.png +turbulence_lines_0001.png +... +``` + +## Files + +- `turbulence_lines.toml` — input file (3D, periodic, driven antenna). +- `ascent_actions.yaml` — Ascent pipeline (composite_vector → streamline) + + ray-traced pseudocolor scene. +- `README.md` — this file. + +The C++ problem generator is reused from `pgens/turbulence/pgen.hpp`. + +## Build + +```sh +cmake -S . -B build \ + -D pgen=turbulence \ + -D output=ON \ + -D ascent=ON \ + -D Ascent_DIR=/path/to/ascent/install/lib/cmake/ascent +cmake --build build -j +``` + +## Run + +```sh +./build/src/entity.xc -input pgens/examples/turbulence_lines/turbulence_lines.toml +``` + +Ascent writes PNGs into the `turbulence_lines/` simulation directory. + +## Tweaking the field-line render + +The interesting knobs all live in `ascent_actions.yaml`: + +- `num_steps` × `step_size` controls how far each field line is integrated + in code units. The default `200 × 0.5 = 100` covers ~1.5× the box width. +- `rendering/tube_size` sets the rendered tube radius. Reduce it + (e.g. `0.2`) for a finer-looking line, or increase it for a thicker, + easier-to-see line. +- `seeds.num_seeds_x` / `num_seeds_y` / `num_seeds_z` set how many seeds + are placed along each axis of the seeding box (for `sampling_type: + "uniform"`). The example uses a 4×4×4 grid (64 seeds total). Switch to + `sampling_type: "random"` to draw `num_seeds` random points instead. +- `seeds.extents_x` / `extents_y` / `extents_z` define the seeding box. +- Other supported `seeds.type` values are `point`, `point_list`, `line` + — see the Ascent `streamline` filter docs. +- To color the tubes by some derived scalar, add another filter to the + pipeline (e.g. `vector_magnitude` on `B`) before the streamline filter + and reference the scalar in the plot's `field` key. diff --git a/ascent/field_lines/ascent_actions.yaml b/ascent/field_lines/ascent_actions.yaml new file mode 100644 index 000000000..7aeeb197a --- /dev/null +++ b/ascent/field_lines/ascent_actions.yaml @@ -0,0 +1,73 @@ +# Ascent actions for the turbulence_lines example. +# +# Pipeline: +# streamline: integrate field lines through the published vector field +# `B` starting from a uniform grid of seeds across the interior of the +# box. Output is rendered as 3D tubes (`enable_tubes = "true"`) so they +# can be raytraced. +# Scene: +# pseudocolor plot of the streamline output, ray-traced to PNG. +# +# Entity publishes B1/B2/B3 both as scalar fields AND as the (u,v,w) +# components of a Conduit-Blueprint MCArray vector named `B`, so the +# streamline filter can read it directly without an extra +# `composite_vector` step (that step produces a vector layout VTK-h's +# streamline backend in Ascent v0.9.x can't unpack as Vec). +# +# This file is loaded automatically by Ascent when the option +# output.ascent.actions_file = "ascent_actions.yaml" +# is set in the toml input file. + +- + action: "add_pipelines" + pipelines: + pl_lines: + f1: + type: "streamline" + params: + field: "B" + # Each line is integrated for num_steps * step_size code-units. + num_steps: 200 + step_size: 0.5 + seeds: + type: "box" + sampling_type: "uniform" + sampling_space: "interior" + # The Ascent vtkh_streamline filter expects `extents_` + # (not `_extents`) and per-axis seed counts. + extents_x: [-28.0, 28.0] + extents_y: [-28.0, 28.0] + extents_z: [-28.0, 28.0] + num_seeds_x: 4 + num_seeds_y: 4 + num_seeds_z: 4 + rendering: + # Tube-rasterization options live under `rendering/` in this + # version of Ascent. `enable_tubes` is a string ("true"/"false"). + enable_tubes: "true" + tube_size: 0.4 + tube_sides: 6 + output_field: "B_lines" + +- + action: "add_scenes" + scenes: + s_lines: + plots: + p1: + type: "pseudocolor" + pipeline: "pl_lines" + field: "B_lines" + renders: + r1: + type: "raytracer" + image_width: 1024 + image_height: 1024 + image_prefix: "turbulence_lines_%08d" + bg_color: [0.0, 0.0, 0.0] + fg_color: [1.0, 1.0, 1.0] + camera: + position: [80.0, 80.0, 80.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 diff --git a/ascent/field_lines/turbulence_lines.toml b/ascent/field_lines/turbulence_lines.toml new file mode 100644 index 000000000..f862f3b3d --- /dev/null +++ b/ascent/field_lines/turbulence_lines.toml @@ -0,0 +1,74 @@ +[simulation] + name = "turbulence_lines" + engine = "srpic" + runtime = 200.0 + +[grid] + resolution = [64, 64, 64] + extent = [[-32.0, 32.0], [-32.0, 32.0], [-32.0, 32.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 4.0 + + [[particles.species]] + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 2e6 + + [[particles.species]] + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 2e6 + +[setup] + temperature = 1e0 + dB = 1.0 + omega_0 = 0.0156 + gamma_0 = 0.0078 + +[output] + format = "BPFile" + interval_time = 20.0 + + [output.fields] + quantities = ["B"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + + [output.stats] + enable = false + + # Field-line rendering of the magnetic field via Ascent. We publish the + # three component scalars and let the actions file (a) composite them + # into a vector, (b) trace streamlines, (c) render as 3D tubes. + [output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B1", "B2", "B3"] + interval_time = 4.0 + +[diagnostics] + colored_stdout = true diff --git a/ascent/simple_cube/README.md b/ascent/simple_cube/README.md new file mode 100644 index 000000000..ba3f36fcb --- /dev/null +++ b/ascent/simple_cube/README.md @@ -0,0 +1,71 @@ +# `ascent_cube` + +Minimal demonstration of the in-situ visualization interface to +[Ascent](https://ascent.readthedocs.io/) shipped with Entity. + +The setup is a 3D periodic cube on a Cartesian Minkowski metric. The +problem generator initializes the magnetic field with a non-trivial third +component: + +``` +B1 = 0 +B2 = 0 +B3 = B0 * sin(2 pi x / Lx) * sin(2 pi y / Ly) +``` + +While the simulation runs, Entity publishes the rectilinear mesh and the +selected field components to Ascent every `output.fields.interval_time`. +Ascent reads `ascent_actions.yaml` and renders one PNG per cycle: + +``` +ascent_cube_B3_0000.png +ascent_cube_B3_0001.png +... +``` + +## Build + +Configure Entity with Ascent enabled. Make sure `Ascent_DIR` points at +your Ascent install (which provides `AscentConfig.cmake`): + +```sh +cmake -S . -B build \ + -D pgen=examples/ascent_cube \ + -D output=ON \ + -D ascent=ON \ + -D Ascent_DIR=/path/to/ascent/install/lib/cmake/ascent +cmake --build build -j +``` + +## Run + +```sh +./build/src/entity.xc -input pgens/examples/ascent_cube/ascent_cube.toml +``` + +The PNGs are written next to the `ascent_cube/` simulation output +directory (controlled by `default_dir` in the writer). + +## Toml options + +In addition to the usual `[output]` keys, this example uses: + +```toml +[output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B3"] + interval_time = 0.02 # Render cadence (independent of [output.fields]) +``` + +`fields` lists the variable names that Entity should publish to Ascent. +Vector field components are postfixed with the index, so `B1`, `B2`, +`B3` give the three components of the magnetic field; `E1`/`E2`/`E3` +the electric field; `J1`/`J2`/`J3` the current. The names appear +verbatim as Conduit field names, so the actions file should reference +them under `field: "B3"`. + +The `interval` / `interval_time` keys control how often Ascent renders; +they are completely independent of the ADIOS field-write cadence in +`[output.fields]`. If both keys are omitted, the global `[output]` +cadence is used. diff --git a/ascent/simple_cube/ascent_actions.yaml b/ascent/simple_cube/ascent_actions.yaml new file mode 100644 index 000000000..56cd98dc3 --- /dev/null +++ b/ascent/simple_cube/ascent_actions.yaml @@ -0,0 +1,27 @@ +# Ascent actions for the ascent_cube example. +# +# This file is loaded automatically by Ascent when the option +# output.ascent.actions_file = "ascent_actions.yaml" +# is set in the toml input file. +# +# Each cycle Entity publishes a 3D rectilinear mesh containing the field +# components requested in `output.ascent.fields`. The pseudocolor plot below +# renders the third component of the magnetic field on a 3D cube. +- + action: "add_scenes" + scenes: + s_b3: + plots: + p1: + type: "pseudocolor" + field: "B3" + renders: + r1: + image_width: 800 + image_height: 800 + image_prefix: "cube_B3_%08d" + camera: + position: [3.0, 3.0, 3.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 diff --git a/ascent/simple_cube/ascent_cube.toml b/ascent/simple_cube/ascent_cube.toml new file mode 100644 index 000000000..4eda5e428 --- /dev/null +++ b/ascent/simple_cube/ascent_cube.toml @@ -0,0 +1,58 @@ +[simulation] + name = "ascent_cube" + engine = "srpic" + runtime = 1.0 + +[grid] + resolution = [64, 64, 64] + extent = [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 0.1 + skindepth0 = 0.1 + +[algorithms.fieldsolver] + enable = true + +[algorithms.deposit] + enable = false + +[particles] + ppc0 = 0.0 + +[setup] + # Amplitude of the initial B3 component. + B0 = 1.0 + +[output] + format = "hdf5" + interval_time = 0.1 + + [output.fields] + quantities = ["B"] + downsampling = [1, 1, 1] + + [output.particles] + enable = false + + [output.spectra] + enable = false + + # In-situ visualization with Ascent. The cadence is independent of the + # ADIOS field-output cadence (set above). Here Ascent renders 5x more + # frequently than fields are dumped to disk. + [output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B3"] + interval_time = 0.02 + +[diagnostics] + interval = 1 diff --git a/ascent/simple_cube/pgen.hpp b/ascent/simple_cube/pgen.hpp new file mode 100644 index 000000000..e97edb90b --- /dev/null +++ b/ascent/simple_cube/pgen.hpp @@ -0,0 +1,84 @@ +#ifndef PROBLEM_GENERATOR_H +#define PROBLEM_GENERATOR_H + +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "traits/pgen.h" +#include "utils/numeric.h" + +#include "framework/domain/metadomain.h" +#include "framework/parameters/parameters.h" + +namespace user { + using namespace ntt; + + /* + * Initial magnetic field on a cubic domain. + * + * B1 = B2 = 0 + * B3 = B0 * sin(2 pi (x - x_lo) / Lx) * sin(2 pi (y - y_lo) / Ly) + * + * The pattern is purely spatial so the cube can be visualised with + * Ascent's pseudocolor plot from the very first output cycle. + */ + template + struct CubeB3Field { + CubeB3Field(real_t b0, real_t lx, real_t ly, real_t x0, real_t y0) + : B0 { b0 } + , Lx { lx } + , Ly { ly } + , x0 { x0 } + , y0 { y0 } {} + + Inline auto bx1(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto bx2(const coord_t&) const -> real_t { + return ZERO; + } + + Inline auto bx3(const coord_t& x_Ph) const -> real_t { + const auto kx = static_cast(constant::TWO_PI) / Lx; + const auto ky = static_cast(constant::TWO_PI) / Ly; + return B0 * math::sin(kx * (x_Ph[0] - x0)) * + math::sin(ky * (x_Ph[1] - y0)); + } + + const real_t B0, Lx, Ly, x0, y0; + }; + + template + struct PGen { + static constexpr auto D { M::Dim }; + static constexpr auto engines { + ::traits::pgen::compatible_with {} + }; + static constexpr auto metrics { + ::traits::pgen::compatible_with {} + }; + static constexpr auto dimensions { + ::traits::pgen::compatible_with {} + }; + + const SimulationParams& params; + CubeB3Field init_flds; + + PGen(const SimulationParams& p, const Metadomain& global_domain) + : params { p } + , init_flds { + params.template get("setup.B0", ONE), + global_domain.mesh().extent(in::x1).second - + global_domain.mesh().extent(in::x1).first, + global_domain.mesh().extent(in::x2).second - + global_domain.mesh().extent(in::x2).first, + global_domain.mesh().extent(in::x1).first, + global_domain.mesh().extent(in::x2).first + } {} + }; + +} // namespace user + +#endif diff --git a/ascent/triptych_NBJ/README.md b/ascent/triptych_NBJ/README.md new file mode 100644 index 000000000..ac1db7c06 --- /dev/null +++ b/ascent/triptych_NBJ/README.md @@ -0,0 +1,104 @@ +# `turbulence_triptych` + +Driven 3D turbulence (re-using the standard +[`turbulence`](../../pgens/turbulence) pgen) with a three-panel in-situ +render via [Ascent](https://ascent.readthedocs.io/): + +1. Particle number density `N`. +2. Magnetic-field magnitude `|B|`. +3. Current-density magnitude `|J|`. + +Each panel is a separate ray-traced volume render with a shared camera +and cadence. Ascent v0.9.x has no built-in render viewport for tiling +multiple scenes inside one PNG, so each quantity is written to its own +PNG stream with a synchronized cycle index in the filename: + +``` +N_00000000.png Bmag_00000000.png Jmag_00000000.png +N_00000001.png Bmag_00000001.png Jmag_00000001.png +... +``` + +To get a single side-by-side image per cycle, tile them with +ImageMagick (or any other compositor): + +```sh +cd turbulence_triptych/plots +for i in $(ls N_*.png | sed 's/N_\([0-9]*\)\.png/\1/'); do + montage -tile 3x1 -geometry +4+0 \ + N_${i}.png Bmag_${i}.png Jmag_${i}.png \ + triptych_${i}.png +done +``` + +## Files + +- `turbulence_triptych.toml` — input file (3D, periodic, driven antenna). +- `ascent_actions.yaml` — pipelines for `|B|` and `|J|` plus three + volume-render scenes. +- `README.md` — this file. + +The C++ problem generator is reused from `pgens/turbulence/pgen.hpp`, +so no separate `pgen.hpp` is required in this directory. + +## Build + +Configure Entity with the `turbulence` pgen and Ascent enabled. +`Ascent_DIR` should point to the directory containing +`AscentConfig.cmake`: + +```sh +cmake -S . -B build \ + -D pgen=turbulence \ + -D output=ON \ + -D ascent=ON \ + -D Ascent_DIR=/path/to/ascent/install/lib/cmake/ascent +cmake --build build -j +``` + +## Run + +```sh +./build/src/entity.xc -input ascent/triptych_NBJ/turbulence_triptych.toml +``` + +Ascent writes PNGs into `turbulence_triptych/plots/`. + +## Toml options of interest + +```toml +[output.fields] + quantities = ["N", "B", "J"] # density + B and J vectors + mom_smooth = 1 # one smoothing pass for the moment + +[output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["N", "B1", "B2", "B3", "J1", "J2", "J3"] + interval_time = 4.0 +``` + +`fields` lists the component scalars Entity publishes to Conduit. The +B1/B2/B3 and J1/J2/J3 entries are also auto-aliased into MCArray vectors +named `B` and `J` (see `src/output/ascent_writer.cpp`), but the +`composite_vector` filter in the actions file is what `vector_magnitude` +consumes here — the proven path for magnitude rendering in the +installed Ascent v0.9.x. + +The Ascent cadence (`interval_time = 4.0`) is independent of the ADIOS +field-output cadence (`output.interval_time = 20.0`), so you'll get +many more PNGs than `.bp` dumps. + +## Tweaking the render + +In `ascent_actions.yaml`: + +- `min_value` / `max_value` per scene control the volume-render + transfer function. `|J|` in particular has a much tighter dynamic + range than `|B|`; adjust if current sheets wash out or vanish. +- Swap `color_table.name` for any VTK-m colormap (`Cool to Warm`, + `Spectral`, `Black-Body Radiation`, ...). +- Replace `volume` with `pseudocolor` to surface-render instead of + ray-cast through the box. +- Camera/`image_width`/`image_height` are duplicated across scenes on + purpose — keep them consistent so the panels tile cleanly. diff --git a/ascent/triptych_NBJ/ascent_actions.yaml b/ascent/triptych_NBJ/ascent_actions.yaml new file mode 100644 index 000000000..97f01e00b --- /dev/null +++ b/ascent/triptych_NBJ/ascent_actions.yaml @@ -0,0 +1,131 @@ +# Ascent actions for the turbulence_triptych example. +# +# Pipelines: +# pl_b: (B1,B2,B3) -> composite_vector -> vector_magnitude -> "Bmag" +# pl_j: (J1,J2,J3) -> composite_vector -> vector_magnitude -> "Jmag" +# +# Scenes (each ray-traced to its own PNG; share camera + cadence): +# s_n: volume render of the published density `N` (no pipeline). +# s_bmag: volume render of |B| from pl_b. +# s_jmag: volume render of |J| from pl_j. +# +# Ascent v0.9.x does not expose render viewports for in-image +# side-by-side composition, so each quantity is written to its own +# stream of PNGs with a synchronized cycle index in the file name +# (`*_%08d`). Tile them externally, e.g.: +# montage -tile 3x1 -geometry +4+0 \ +# N_00000010.png Bmag_00000010.png Jmag_00000010.png triptych_0010.png +# +# This file is loaded automatically by Ascent when the option +# output.ascent.actions_file = "ascent_actions.yaml" +# is set in the toml input file. + +- + action: "add_pipelines" + pipelines: + pl_b: + f1: + type: "composite_vector" + params: + field1: "B1" + field2: "B2" + field3: "B3" + output_name: "B_vec" + f2: + type: "vector_magnitude" + params: + field: "B_vec" + output_name: "Bmag" + pl_j: + f1: + type: "composite_vector" + params: + field1: "J1" + field2: "J2" + field3: "J3" + output_name: "J_vec" + f2: + type: "vector_magnitude" + params: + field: "J_vec" + output_name: "Jmag" + +- + action: "add_scenes" + scenes: + # Particle number density. + s_n: + plots: + p1: + type: "volume" + field: "N" + # Make low densities transparent so structure stands out. + min_value: 1.5 + max_value: 5.0 + color_table: + name: "Viridis" + renders: + r1: + type: "raytracer" + image_width: 1024 + image_height: 1024 + image_prefix: "N_%08d" + bg_color: [0.0, 0.0, 0.0] + fg_color: [1.0, 1.0, 1.0] + camera: + position: [80.0, 80.0, 80.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 + + # Magnetic-field magnitude. + s_bmag: + plots: + p1: + type: "volume" + field: "Bmag" + pipeline: "pl_b" + min_value: 0.5 + max_value: 4.0 + color_table: + name: "Plasma" + renders: + r1: + type: "raytracer" + image_width: 1024 + image_height: 1024 + image_prefix: "Bmag_%08d" + bg_color: [0.0, 0.0, 0.0] + fg_color: [1.0, 1.0, 1.0] + camera: + position: [80.0, 80.0, 80.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 + + # Current-density magnitude. + s_jmag: + plots: + p1: + type: "volume" + field: "Jmag" + pipeline: "pl_j" + # |J| dynamic range is much smaller than |B|; tighten transfer + # function around the bulk so current sheets stand out. + min_value: 0.05 + max_value: 0.6 + color_table: + name: "Inferno" + renders: + r1: + type: "raytracer" + image_width: 1024 + image_height: 1024 + image_prefix: "Jmag_%08d" + bg_color: [0.0, 0.0, 0.0] + fg_color: [1.0, 1.0, 1.0] + camera: + position: [80.0, 80.0, 80.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 diff --git a/ascent/triptych_NBJ/turbulence_triptych.toml b/ascent/triptych_NBJ/turbulence_triptych.toml new file mode 100644 index 000000000..15565a45c --- /dev/null +++ b/ascent/triptych_NBJ/turbulence_triptych.toml @@ -0,0 +1,78 @@ +[simulation] + name = "turbulence_triptych" + engine = "srpic" + runtime = 200.0 + +[grid] + resolution = [64, 64, 64] + extent = [[-32.0, 32.0], [-32.0, 32.0], [-32.0, 32.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 4.0 + + [[particles.species]] + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 2e6 + + [[particles.species]] + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 2e6 + +[setup] + temperature = 1e0 + dB = 1.0 + omega_0 = 0.0156 + gamma_0 = 0.0078 + +[output] + format = "BPFile" + interval_time = 20.0 + + [output.fields] + # `N` is the total particle number density; `B` and `J` are the + # magnetic-field and electric-current vectors. Each vector is + # published to Ascent as three component scalars (B1/B2/B3, J1/J2/J3). + quantities = ["N", "B", "J"] + mom_smooth = 1 + + [output.particles] + enable = false + + [output.spectra] + enable = false + + [output.stats] + enable = false + + # In-situ triptych: three independent ray-traced scenes (N, |B|, |J|), + # each rendered to its own PNG per cycle. Frames share cycle numbers, + # so they can be tiled side-by-side externally (see README). + [output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["N", "B1", "B2", "B3", "J1", "J2", "J3"] + interval_time = 4.0 + +[diagnostics] + colored_stdout = true diff --git a/ascent/volume/README.md b/ascent/volume/README.md new file mode 100644 index 000000000..9f9141e43 --- /dev/null +++ b/ascent/volume/README.md @@ -0,0 +1,84 @@ +# `turbulence_ascent` + +Driven 3D MHD-like turbulence (re-using the standard +[`turbulence`](../../turbulence) pgen) with in-situ volume rendering of +the magnetic-field magnitude `|B|` via [Ascent](https://ascent.readthedocs.io/). + +The setup is a 64³ periodic Cartesian box driven by the same antenna as +the original 2D `turbulence` example, but now in 3D. Each output cycle +the three magnetic-field components are published to Ascent; an Ascent +*pipeline* combines them into a vector field and derives the scalar +`Bmag = |B|`. A *volume* plot then ray-casts `Bmag` to produce one PNG +per render cycle. + +``` +turbulence_ascent_Bmag_0000.png +turbulence_ascent_Bmag_0001.png +... +``` + +## Files + +- `turbulence_ascent.toml` — input file (3D, periodic, driven antenna). +- `ascent_actions.yaml` — Ascent pipeline + volume-render scene. +- `README.md` — this file. + +The C++ problem generator itself is reused from `pgens/turbulence/pgen.hpp`, +so no separate `pgen.hpp` is required in this directory. + +## Build + +Configure Entity with the existing `turbulence` pgen and Ascent enabled. +`Ascent_DIR` should point to the directory containing `AscentConfig.cmake`: + +```sh +cmake -S . -B build \ + -D pgen=turbulence \ + -D output=ON \ + -D ascent=ON \ + -D Ascent_DIR=/path/to/ascent/install/lib/cmake/ascent +cmake --build build -j +``` + +For a quicker run, drop `ppc0` further or shrink the grid in the toml. + +## Run + +```sh +./build/src/entity.xc -input pgens/examples/turbulence_ascent/turbulence_ascent.toml +``` + +Ascent writes PNGs into the `turbulence_ascent/` simulation directory. + +## Toml options of interest + +```toml +[output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B1", "B2", "B3"] + interval_time = 4.0 +``` + +`fields` lists the component scalars Entity publishes to Conduit. The +actions file is responsible for any derived quantities (here `Bmag`). +The Ascent cadence (`interval_time = 4.0`) is independent of the ADIOS +field-output cadence (`output.fields.interval_time` falls back to +`output.interval_time = 20.0`), so you'll get many more PNGs than `.bp` +dumps. + +## Tweaking the render + +To go from a single component to a different derived quantity, edit +`ascent_actions.yaml`. Useful filters in Ascent pipelines include: + +- `composite_vector` — combine scalars into a vector field. +- `vector_magnitude` — `|v|` from a vector field. +- `vector_component` — extract one component of a vector field. +- `gradient` — spatial gradient of a scalar. +- `clip` / `slice` — geometric subset before rendering. + +To switch the rendering style, change the `plots/p1/type` to e.g. +`pseudocolor` (surface) or keep `volume` and adjust `min_value` / +`max_value` to control transparency, or set `samples` to control the +ray-cast resolution. diff --git a/ascent/volume/ascent_actions.yaml b/ascent/volume/ascent_actions.yaml new file mode 100644 index 000000000..a8a7a7935 --- /dev/null +++ b/ascent/volume/ascent_actions.yaml @@ -0,0 +1,56 @@ +# Ascent actions for the turbulence_ascent example. +# +# Pipeline: +# 1. composite_vector: stitch the three published scalars (B1, B2, B3) +# into a single vector field `B`. +# 2. vector_magnitude: derive |B| from `B`. +# Scene: +# 3. volume plot of |B| (ray-cast / volume rendering) from a 3D camera. +# +# This file is loaded automatically by Ascent when the option +# output.ascent.actions_file = "ascent_actions.yaml" +# is set in the toml input file. + +- + action: "add_pipelines" + pipelines: + pl_b: + f1: + type: "composite_vector" + params: + field1: "B1" + field2: "B2" + field3: "B3" + output_name: "B" + f2: + type: "vector_magnitude" + params: + field: "B" + output_name: "Bmag" + +- + action: "add_scenes" + scenes: + s_bmag: + plots: + p1: + type: "volume" + field: "Bmag" + pipeline: "pl_b" + # Tweak the transfer function so weak |B| is transparent and + # strong |B| stands out. + min_value: 0.5 + max_value: 4.0 + renders: + r1: + type: "raytracer" + image_width: 1024 + image_height: 1024 + image_prefix: "turbulence_ascent_Bmag_%08d" + bg_color: [0.0, 0.0, 0.0] + fg_color: [1.0, 1.0, 1.0] + camera: + position: [80.0, 80.0, 80.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 diff --git a/ascent/volume/turbulence_ascent.toml b/ascent/volume/turbulence_ascent.toml new file mode 100644 index 000000000..ef918e2a3 --- /dev/null +++ b/ascent/volume/turbulence_ascent.toml @@ -0,0 +1,74 @@ +[simulation] + name = "turbulence_ascent" + engine = "srpic" + runtime = 200.0 + +[grid] + resolution = [64, 64, 64] + extent = [[-32.0, 32.0], [-32.0, 32.0], [-32.0, 32.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 4.0 + + [[particles.species]] + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 2e6 + + [[particles.species]] + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 2e6 + +[setup] + temperature = 1e0 + dB = 1.0 + omega_0 = 0.0156 + gamma_0 = 0.0078 + +[output] + format = "BPFile" + interval_time = 20.0 + + [output.fields] + quantities = ["B"] + + [output.particles] + enable = false + + [output.spectra] + enable = false + + [output.stats] + enable = false + + # Ray-traced volume rendering of |B| via Ascent. Three component scalars + # (B1, B2, B3) are published; the actions file composites them into a + # vector and computes |B| in a pipeline before the volume render. + [output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B1", "B2", "B3"] + interval_time = 4.0 + +[diagnostics] + colored_stdout = true diff --git a/ascent/volume_lines/README.md b/ascent/volume_lines/README.md new file mode 100644 index 000000000..571066a7b --- /dev/null +++ b/ascent/volume_lines/README.md @@ -0,0 +1,81 @@ +# `turbulence_density_lines` + +Driven 3D turbulence (re-using the standard +[`turbulence`](../../turbulence) pgen) with a combined in-situ render via +[Ascent](https://ascent.readthedocs.io/) that **overlays** two +visualizations into one image per cycle: + +1. A ray-traced **volume** render of the particle number density `N`. +2. **Magnetic field lines** traced from `(B1, B2, B3)` and rasterized + as 3D tubes on top of the density volume. + +Both plots live in the same Ascent scene, so they share a camera and +get composited into a single PNG: + +``` +turbulence_density_lines_0000.png +turbulence_density_lines_0001.png +... +``` + +## Files + +- `turbulence_density_lines.toml` — input file. +- `ascent_actions.yaml` — pipeline (composite_vector → streamline) + + two-plot scene (volume + pseudocolor). +- `README.md` — this file. + +The C++ problem generator is reused from `pgens/turbulence/pgen.hpp`. + +## Build + +```sh +cmake -S . -B build \ + -D pgen=turbulence \ + -D output=ON \ + -D ascent=ON \ + -D Ascent_DIR=/path/to/ascent/install/lib/cmake/ascent +cmake --build build -j +``` + +## Run + +```sh +./build/src/entity.xc -input pgens/examples/turbulence_density_lines/turbulence_density_lines.toml +``` + +PNGs are written into the `turbulence_density_lines/` simulation +directory. + +## Toml options + +```toml +[output.fields] + quantities = ["B", "N"] # B is the vector field; N is the moment + mom_smooth = 1 # one smoothing pass for the density moment + +[output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B1", "B2", "B3", "N"] + interval_time = 4.0 +``` + +The four entries in `output.ascent.fields` are pushed to Ascent under +the matching Conduit field names. `B1`/`B2`/`B3` are then composited +into a vector inside the actions file; `N` is rendered directly. + +## Tweaking the look + +In `ascent_actions.yaml`: + +- `p_density.min_value` / `max_value` — control the transparency + transfer function for the volume render. Raising `min_value` makes + weak density invisible (good for letting field lines show through); + lowering `max_value` saturates the bright cores. +- `pl_lines.f2.params.num_seeds` — number of seeded streamlines. +- `pl_lines.f2.params.tube_size` — radius of the rendered tubes. +- `pl_lines.f2.params.num_steps` × `step_size` — total integration length. +- Use a different scalar to color the tubes: chain another filter + (e.g. `vector_magnitude` on `B`) before `streamline` and reference + the resulting scalar in `p_lines.field`. diff --git a/ascent/volume_lines/ascent_actions.yaml b/ascent/volume_lines/ascent_actions.yaml new file mode 100644 index 000000000..63c591adf --- /dev/null +++ b/ascent/volume_lines/ascent_actions.yaml @@ -0,0 +1,81 @@ +# Ascent actions for the turbulence_density_lines example. +# +# Pipeline: +# pl_lines: streamline tracing of the published vector field `B`. Entity +# publishes B1/B2/B3 both as scalars AND as the (u,v,w) +# components of a Conduit-Blueprint MCArray vector named `B`, +# so the streamline filter can consume it directly without an +# extra `composite_vector` step. +# +# Scene with two co-rendered plots: +# p_density: ray-traced volume render of the density `N` directly off the +# published mesh (no pipeline). +# p_lines: pseudocolor of the streamline tubes from `pl_lines`. +# +# Both plots share the same camera/render so they composite into a single +# image per cycle. + +- + action: "add_pipelines" + pipelines: + pl_lines: + f1: + type: "streamline" + params: + field: "B" + num_steps: 200 + step_size: 0.5 + seeds: + type: "box" + sampling_type: "uniform" + sampling_space: "interior" + # `extents_` (not `_extents`) and per-axis seed + # counts are required by this version of Ascent's + # vtkh_streamline filter. + extents_x: [-28.0, 28.0] + extents_y: [-28.0, 28.0] + extents_z: [-28.0, 28.0] + num_seeds_x: 4 + num_seeds_y: 4 + num_seeds_z: 3 + rendering: + # Tube-rasterization options live under `rendering/`. + enable_tubes: "true" + tube_size: 0.35 + tube_sides: 6 + output_field: "B_lines" + +- + action: "add_scenes" + scenes: + s_combo: + plots: + # Background: ray-traced volume render of particle density. + p_density: + type: "volume" + field: "N" + # Make low densities transparent so the field lines show through. + min_value: 1.5 + max_value: 5.0 + color_table: + name: "Viridis" + # Foreground: magnetic field lines as 3D tubes. + p_lines: + type: "pseudocolor" + pipeline: "pl_lines" + field: "B_lines" + color_table: + name: "Plasma" + renders: + r1: + type: "raytracer" + image_width: 1280 + image_height: 1024 + image_prefix: "density_lines_%08d" + bg_color: [0.0, 0.0, 0.0] + fg_color: [1.0, 1.0, 1.0] + camera: + position: [120.0, 120.0, 120.0] + look_at: [0.0, 0.0, 0.0] + up: [0.0, 0.0, 1.0] + fov: 35.0 diff --git a/ascent/volume_lines/turbulence_density_lines.toml b/ascent/volume_lines/turbulence_density_lines.toml new file mode 100644 index 000000000..1faebc9ea --- /dev/null +++ b/ascent/volume_lines/turbulence_density_lines.toml @@ -0,0 +1,77 @@ +[simulation] + name = "turbulence_density_lines" + engine = "srpic" + runtime = 200.0 + +[grid] + resolution = [64, 64, 64] + extent = [[-32.0, 32.0], [-32.0, 32.0], [-32.0, 32.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 4.0 + + [[particles.species]] + label = "e-_p" + mass = 1.0 + charge = -1.0 + maxnpart = 2e6 + + [[particles.species]] + label = "e+_p" + mass = 1.0 + charge = 1.0 + maxnpart = 2e6 + +[setup] + temperature = 1e0 + dB = 1.0 + omega_0 = 0.0156 + gamma_0 = 0.0078 + +[output] + format = "BPFile" + interval_time = 20.0 + + [output.fields] + # `B` provides the three magnetic-field components; `N` is the total + # particle number density, summed over all massive species. + quantities = ["B", "N"] + mom_smooth = 1 + + [output.particles] + enable = false + + [output.spectra] + enable = false + + [output.stats] + enable = false + + # Combined rendering: volume render of the density `N` plus magnetic + # field lines computed from B. Both plots live in the same Ascent + # scene, so they composite into a single PNG per cycle. + [output.ascent] + enable = true + actions_file = "ascent_actions.yaml" + fields = ["B1", "B2", "B3", "N"] + interval_time = 4.0 + +[diagnostics] + colored_stdout = true diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index c03ebaf81..7a860a122 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -59,6 +59,18 @@ endif() set_property(CACHE default_gui PROPERTY TYPE BOOL) +if(DEFINED ENV{Entity_ENABLE_ASCENT}) + set(default_ascent + $ENV{Entity_ENABLE_ASCENT} + CACHE INTERNAL "Default flag for Ascent in situ visualization") +else() + set(default_ascent + OFF + CACHE INTERNAL "Default flag for Ascent in situ visualization") +endif() + +set_property(CACHE default_ascent PROPERTY TYPE BOOL) + if(DEFINED ENV{Entity_ENABLE_MPI}) set(default_mpi $ENV{Entity_ENABLE_MPI} diff --git a/cmake/report.cmake b/cmake/report.cmake index d972fafc4..dcfa561b0 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -102,6 +102,15 @@ printchoices( "${Green}" OUTPUT_REPORT 46) +printchoices( + "Ascent" + "ascent" + "${ON_OFF_VALUES}" + ${ascent} + ${default_ascent} + "${Green}" + ASCENT_REPORT + 46) printchoices( "MPI" "mpi" @@ -213,6 +222,9 @@ string( "\n" " " ${OUTPUT_REPORT} + "\n" + " " + ${ASCENT_REPORT} "\n") string(REPLACE ";" "+" Kokkos_ARCH "${Kokkos_ARCH}") @@ -320,6 +332,11 @@ if(${output}) endif() endif() +if(${ascent}) + string(APPEND REPORT_TEXT " - Ascent: v" ${Ascent_VERSION} "\n") + string(APPEND REPORT_TEXT " " "${Dim}${Ascent_DIR}${ColorReset}" "\n") +endif() + string( APPEND REPORT_TEXT diff --git a/dependencies.py b/dependencies.py index 114cfd145..33c293eae 100755 --- a/dependencies.py +++ b/dependencies.py @@ -37,6 +37,13 @@ KOKKOS_BACKENDS = ["cpu", "cuda", "hip", "sycl"] ADIOS2_MPI_MODES = ["non-mpi", "mpi"] +ASCENT_BACKENDS = ["cpu", "cuda", "hip", "sycl"] +ASCENT_MODES = ["build", "system-module"] +ASCENT_MPI_MODES = ["non-mpi", "mpi"] +# default points at the ALCF/Aurora pre-installed Ascent (used in system-module mode) +ASCENT_DEFAULT_SYSTEM_DIR = ( + "/soft/visualization/ascent/release/v0.9.5/ascent-checkout/lib/cmake/ascent" +) MESSAGE: str = "" @@ -49,12 +56,18 @@ class Settings: install_prefix: str = os.path.join(os.path.expanduser("~"), ".entity") apps: dict = field( - default_factory=lambda: {"Kokkos": False, "adios2": False, "nt2py": False} + default_factory=lambda: { + "Kokkos": False, + "adios2": False, + "Ascent": False, + "nt2py": False, + } ) # versions kokkos_version: str = "5.0.1" adios2_version: str = "2.11.0" + ascent_version: str = "v0.9.5" # options kokkos_backend: str = "cpu" @@ -62,6 +75,12 @@ class Settings: extra_kokkos_flags: List[str] = field(default_factory=list) adios2_mpi: str = "non-mpi" extra_adios2_flags: List[str] = field(default_factory=list) + # Ascent: either build the full third-party stack ("build") or point at a + # pre-installed system module ("system-module", e.g. Aurora). + ascent_backend: str = "cpu" + ascent_mode: str = "build" + ascent_mpi: str = "mpi" + ascent_system_dir: str = ASCENT_DEFAULT_SYSTEM_DIR module_loads: List[str] = field(default_factory=list) @@ -75,10 +94,17 @@ def from_json(self, json_str: str) -> None: versions = data.get("versions", {}) self.kokkos_version = versions.get("Kokkos", self.kokkos_version) self.adios2_version = versions.get("adios2", self.adios2_version) + self.ascent_version = versions.get("Ascent", self.ascent_version) options = data.get("options", {}) self.kokkos_backend = options.get("kokkos_backend", self.kokkos_backend) self.kokkos_arch = options.get("kokkos_arch", self.kokkos_arch) self.adios2_mpi = options.get("adios2_mpi", self.adios2_mpi) + self.ascent_backend = options.get("ascent_backend", self.ascent_backend) + self.ascent_mode = options.get("ascent_mode", self.ascent_mode) + self.ascent_mpi = options.get("ascent_mpi", self.ascent_mpi) + self.ascent_system_dir = options.get( + "ascent_system_dir", self.ascent_system_dir + ) self.module_loads = data.get("module_loads", self.module_loads) def apps_summary(self) -> str: @@ -96,11 +122,16 @@ def to_json(self) -> str: "versions": { "Kokkos": self.kokkos_version, "adios2": self.adios2_version, + "Ascent": self.ascent_version, }, "options": { "kokkos_backend": self.kokkos_backend, "kokkos_arch": self.kokkos_arch, "adios2_mpi": self.adios2_mpi, + "ascent_backend": self.ascent_backend, + "ascent_mode": self.ascent_mode, + "ascent_mpi": self.ascent_mpi, + "ascent_system_dir": self.ascent_system_dir, }, "module_loads": self.module_loads, }, @@ -269,6 +300,179 @@ def InstallAdios2Script(settings: Settings) -> tuple[str, str]: return ("""# skipping Adios2 install""", "") +def InstallAscentScript(settings: Settings) -> tuple[str, str]: + if not settings.apps.get("Ascent", False): + return ("""# skipping Ascent install""", "") + + prefix = settings.install_prefix + version = settings.ascent_version + backend = settings.ascent_backend + mode = settings.ascent_mode + mpi_mode = settings.ascent_mpi + modules_in_module = "\n".join( + [f"module load {module}" for module in settings.module_loads] + ) + + # ----- system-module mode: don't build, point at a pre-installed Ascent ----- + if mode == "system-module": + ascent_dir = settings.ascent_system_dir.strip() + script = f""" + # Ascent: using a pre-installed system module -- nothing to build. + # entity will be pointed at: + # Ascent_DIR={ascent_dir} + # Make sure the matching runtime libraries are on LD_LIBRARY_PATH before + # configuring/running entity, e.g. on Aurora: + # module use /soft/modulefiles && module load ascent + echo "Ascent: using system install at {ascent_dir}" + """ + + modfile = f""" + #%Module1.0###################################################################### + ## + ## Ascent @ system-module modulefile + ## + ################################################################################# + proc ModulesHelp {{ }} {{ + puts stderr \"\\tAscent @ system install\\n\" + }} + + module-whatis \"Sets up Ascent (system install)\" + + conflict ascent + {modules_in_module} + + setenv Ascent_DIR {ascent_dir} + prepend-path CMAKE_PREFIX_PATH {ascent_dir}""" + + return (unindent(script), unindent(modfile)) + + # ----- build mode: build the full TPL stack via Ascent's build_ascent.sh ----- + modules = "\n".join( + [f"module load {module} && \\" for module in settings.module_loads] + ) + src_path = os.path.join(prefix, "src", "ascent") + install_path = os.path.join(prefix, "ascent", version, backend) + if os.path.exists(install_path) and not settings.overwrite: + raise FileExistsError( + f"Ascent install path {install_path} already exists and overwrite is disabled" + ) + + # build_ascent.sh installs everything under `/install/`, with the + # Ascent install dir named `ascent-checkout` when the script is run from + # inside an Ascent source checkout (which is what we do below by cloning the + # repo at the requested tag and invoking its *bundled* script). TPL install + # dirs are version-stamped (e.g. conduit-v0.9.5), so the modulefile globs + # them onto LD_LIBRARY_PATH rather than hard-coding the names. + install_dir = os.path.join(install_path, "install") + ascent_dir = os.path.join( + install_dir, "ascent-checkout", "lib", "cmake", "ascent" + ) + + enable_mpi = "ON" if mpi_mode == "mpi" else "OFF" + backend_flag = { + "cpu": "enable_openmp=ON", + "cuda": "enable_cuda=ON", + "hip": "enable_hip=ON", + "sycl": "enable_sycl=ON", + }[backend] + # compiler hints for backends where the default gcc/g++ is wrong; edit as needed + compiler_hint = { + "cpu": "", + "cuda": "", + "hip": "CC=hipcc CXX=hipcc \\\n ", + "sycl": "CC=icx CXX=icpx \\\n ", + }[backend] + + # Frontier (HIP): use the verified ORNL recipe instead of CC=hipcc CXX=hipcc. + # The Cray wrappers (cc/CC/ftn) are the *host* compilers; build_ascent.sh sets + # the HIP *device* compiler itself. Forcing hipcc as host makes every TPL .cpp + # compile as HIP (-x hip), which breaks non-GPU TPLs such as zfp. The raw ROCm + # clang++ then handles HIP device compiles and bypasses the wrapper, so MPI and + # ROCm must be supplied explicitly: CPATH gives HIP TUs mpi.h (vtk-m flow + # filters), and ROCM_PATH lets BLT (camp/raja/umpire) find the ROCm root. + # enable_find_mpi=OFF since the Cray wrappers already inject MPI. Mirrors + # scripts/build_ascent/build_ascent_hip_frontier.sh. + cray_env = "" + extra_env_flags = "" + if settings.cluster == "frontier" and backend == "hip": + cray_env = ( + "export MPICH_GPU_SUPPORT_ENABLED=1 && \\\n" + "export ROCM_ARCH=gfx90a && \\\n" + 'export ROCM_PATH="${ROCM_PATH:-/opt/rocm-6.2.4}" && \\\n' + "export CC=$(which cc) CXX=$(which CC) FTN=$(which ftn) && \\\n" + 'export CFLAGS="${CRAY_ROCM_INCLUDE_OPTS}" && \\\n' + 'export CXXFLAGS="${CRAY_ROCM_INCLUDE_OPTS} -Wno-pass-failed" && \\\n' + 'export LDFLAGS="${CRAY_ROCM_POST_LINK_OPTS}" && \\\n' + 'export CPATH="${MPICH_DIR}/include${CPATH:+:$CPATH}" && \\\n' + ) + compiler_hint = "" + extra_env_flags = "enable_find_mpi=OFF \\\n " + + # We clone the repo at `version` and run its *own* build_ascent.sh so that + # the exact tagged Ascent source is built (the standalone script otherwise + # defaults to the `develop` branch regardless of where it was fetched from). + script = f""" +# Ascent installation (full third-party stack via build_ascent.sh) +# build_ascent.sh downloads & builds zlib, conduit, vtk-m, camp, raja, umpire +# and Ascent itself. Review/adjust the env vars below before running: +# - for CUDA set CUDA_ARCH (e.g. CUDA_ARCH=80) +# - for HIP set ROCM_ARCH (e.g. ROCM_ARCH=gfx90a) and ROCM_PATH +# - set CC/CXX to your MPI/GPU compiler wrappers as appropriate +# NOTE: the SYCL/Intel-GPU path is the least mature; on Aurora prefer the +# 'system-module' mode instead of building from scratch. +{modules} +rm -rf {src_path} {install_path} && \\ +git clone --recursive https://github.com/Alpine-DAV/ascent.git {src_path} && \\ +cd {src_path} && \\ +git checkout {version} && \\ +git submodule update --init --recursive && \\ +# Ascent only needs the zfp *library*, not its tests (compiled as -x hip under a +# HIP host compiler, which breaks) or its OpenMP (leaves libzfp.so with unresolved +# OpenMP symbols -> ld.lld --no-allow-shlib-undefined). enable_tests=OFF above +# does not reach zfp, so force both off here. +sed -i 's|zfp_extra_cmake_opts="-DBUILD_ZFPY=${{enable_python}}"|zfp_extra_cmake_opts="-DBUILD_ZFPY=${{enable_python}} -DBUILD_TESTING=OFF -DZFP_WITH_OPENMP=OFF"|' {src_path}/scripts/build_ascent/build_ascent.sh && \\ +# Ascent auto-enables its Sphinx docs if it finds a system sphinx-build; that build +# is fragile (needs sphinxcontrib-jquery etc.) and Ascent itself doesn't need docs. +sed -i 's|-C ${{root_dir}}/ascent-config.cmake|& -DENABLE_DOCS=OFF|' {src_path}/scripts/build_ascent/build_ascent.sh && \\ +mkdir -p {install_path} && \\ +{cray_env}env \\ + prefix={install_path} \\ + build_jobs=$(nproc) \\ + enable_mpi={enable_mpi} \\ + {extra_env_flags}enable_fortran=OFF \\ + enable_python=OFF \\ + enable_tests=OFF \\ + {backend_flag} \\ + {compiler_hint}bash {src_path}/scripts/build_ascent/build_ascent.sh +# After a successful build, Ascent_DIR will be: +# {ascent_dir}""" + + modfile = f""" +#%Module1.0###################################################################### +## +## Ascent @ {backend} modulefile +## +################################################################################# +proc ModulesHelp {{ }} {{ + puts stderr \"\\tAscent @ {backend}\\n\" +}} + +module-whatis \"Sets up Ascent @ {backend}\" + +conflict ascent +{modules_in_module} + +setenv Ascent_DIR {ascent_dir} +prepend-path CMAKE_PREFIX_PATH {ascent_dir} + +# third-party runtime libraries (TPL dir names are version-stamped, so glob them) +foreach d [glob -nocomplain {install_dir}/*/lib {install_dir}/*/lib64] {{ + prepend-path LD_LIBRARY_PATH $d +}}""" + + return (unindent(script), unindent(modfile)) + + def InstallNt2pyScript(settings: Settings) -> str: if settings.apps.get("nt2py", False): prefix = settings.install_prefix @@ -325,8 +529,38 @@ def InstallNt2pyScript(settings: Settings) -> str: "CMAKE_C_COMPILER=cc" ] }, - "frontier": {"module_loads": []}, - "aurora": {"module_loads": []}, + "frontier": { + # explicit modules matching the verified `entity` collection (CPE 24.11 + + # rocm 6.2.4); craype-accel-amd-gfx90a/rocm/cray-mpich provide the + # CRAY_ROCM_*/MPICH_DIR/ROCM_PATH that the hip Ascent build below relies on. + "module_loads": [ + "PrgEnv-cray/8.6.0", + "cce/18.0.1", + "craype-accel-amd-gfx90a", + "rocm/6.2.4", + "cray-mpich/8.1.31", + "cmake/3.30.5", + ], + "kokkos_backend": "hip", + "kokkos_arch": "AMD_GFX90A", + "extra_kokkos_flags": [ + "CMAKE_CXX_COMPILER=hipcc", + "AMDGPU_TARGETS=gfx90a", + ], + "adios2_mpi": "mpi", + "ascent": True, + "ascent_mode": "build", + "ascent_backend": "hip", + "ascent_mpi": "mpi", + }, + "aurora": { + "module_loads": [], + "kokkos_backend": "sycl", + "ascent": True, + "ascent_mode": "system-module", + "ascent_backend": "sycl", + "ascent_system_dir": ASCENT_DEFAULT_SYSTEM_DIR, + }, } @@ -336,6 +570,7 @@ def apply_preset(s: Settings, name: str) -> None: cluster_preset = PRESETS.get(name, {}) s.apps["Kokkos"] = True s.apps["adios2"] = True + s.apps["Ascent"] = cluster_preset.get("ascent", False) s.apps["nt2py"] = False s.write_modulefiles = True s.overwrite = True @@ -345,6 +580,14 @@ def apply_preset(s: Settings, name: str) -> None: s.adios2_mpi = cluster_preset.get("adios2_mpi", "mpi") s.extra_kokkos_flags = cluster_preset.get("extra_kokkos_flags", []) s.extra_adios2_flags = cluster_preset.get("extra_adios2_flags", []) + s.ascent_mode = cluster_preset.get("ascent_mode", "build") + s.ascent_backend = cluster_preset.get( + "ascent_backend", s.kokkos_backend + ) + s.ascent_mpi = cluster_preset.get("ascent_mpi", "mpi") + s.ascent_system_dir = cluster_preset.get( + "ascent_system_dir", ASCENT_DEFAULT_SYSTEM_DIR + ) def on_install_confirmed(settings: Settings) -> None: @@ -352,11 +595,14 @@ def on_install_confirmed(settings: Settings) -> None: os.makedirs(settings.install_prefix, exist_ok=True) kokkos_script, kokkos_modfile = InstallKokkosScriptModfile(settings) adios2_script, adios2_modfile = InstallAdios2Script(settings) + ascent_script, ascent_modfile = InstallAscentScript(settings) with open(os.path.join(settings.install_prefix, "install.sh"), "w") as f: f.write("#!/usr/bin/env bash\n\n") f.write(kokkos_script) f.write("\n\n") f.write(adios2_script) + f.write("\n\n") + f.write(ascent_script) f.write("\n") if settings.write_modulefiles: os.makedirs(os.path.join(settings.install_prefix, "modules"), exist_ok=True) @@ -391,6 +637,26 @@ def on_install_confirmed(settings: Settings) -> None: ) with open(adios2_modfile_file, "w") as f: f.write(adios2_modfile) + if ascent_modfile != "": + ascent_leaf = ( + "system" + if settings.ascent_mode == "system-module" + else settings.ascent_backend + ) + ascent_modfile_file = os.path.join( + settings.install_prefix, + "modules", + "ascent", + settings.ascent_version, + ascent_leaf, + ) + os.makedirs(os.path.dirname(ascent_modfile_file), exist_ok=True) + if os.path.exists(ascent_modfile_file) and not settings.overwrite: + raise FileExistsError( + f"modulefile {ascent_modfile_file} already exists and overwrite is disabled" + ) + with open(ascent_modfile_file, "w") as f: + f.write(ascent_modfile) os.chmod(os.path.join(settings.install_prefix, "install.sh"), 0o755) MESSAGE = f"- installation script written to {os.path.join(settings.install_prefix, 'install.sh')}!\n" @@ -399,6 +665,22 @@ def on_install_confirmed(settings: Settings) -> None: MESSAGE += f"- module files have been written to {os.path.join(settings.install_prefix, 'modules')} directory.\n" MESSAGE += f" add them to your .rc script as `module use --append {os.path.join(settings.install_prefix, 'modules')}`\n\n" + if settings.apps.get("Ascent", False): + if settings.ascent_mode == "system-module": + MESSAGE += ( + "- Ascent set to use the pre-installed system module at " + f"{settings.ascent_system_dir}.\n" + " load the matching system module before building/running entity " + "(e.g. `module use /soft/modulefiles && module load ascent`).\n\n" + ) + else: + MESSAGE += ( + "- Ascent will be built from scratch via build_ascent.sh " + "(zlib, conduit, vtk-m, camp, raja, umpire + Ascent).\n" + " this is a long build (vtk-m alone is heavy); review the env vars " + "in install.sh (CC/CXX, CUDA_ARCH/ROCM_ARCH) before running.\n\n" + ) + if settings.apps.get("nt2py", False): MESSAGE += ( "- nt2py installed in a new virtual environment at " @@ -484,6 +766,11 @@ def kokkos_right(self) -> str: def adios2_right(self) -> str: return f"{self.s.adios2_version} · {self.s.adios2_mpi}" + def ascent_right(self) -> str: + if self.s.ascent_mode == "system-module": + return f"{self.s.ascent_version} · system-module" + return f"{self.s.ascent_version} · {self.s.ascent_backend} · build" + # ----- nav stack ----- def push(self, st: str) -> None: @@ -684,12 +971,13 @@ def confirm_install(self) -> bool: f"dependencies: {self.s.apps_summary()}", f"kokkos: {self.s.kokkos_version} · {self.s.kokkos_backend} · {arch}", f"adios2: {self.s.adios2_version} · {self.s.adios2_mpi}", - "", - "confirm install?", ] + if self.s.apps.get("Ascent", False): + lines.append(f"ascent: {self.ascent_right()}") + lines += ["", "confirm install?"] h, w = self.stdscr.getmaxyx() - win_h, win_w = min(16, max(10, h - 6)), min(94, max(52, w - 6)) + win_h, win_w = min(18, max(10, h - 6)), min(94, max(52, w - 6)) top, left = max(0, (h - win_h) // 2), max(0, (w - win_w) // 2) win = curses.newwin(win_h, win_w, top, left) @@ -858,6 +1146,13 @@ def edit_adios2(): if val: self.s.adios2_version = val.strip() + def edit_ascent(): + val = self.input_box( + "ascent version", "enter version/tag:", self.s.ascent_version + ) + if val: + self.s.ascent_version = val.strip() + return ( "versions", "set versions:", @@ -874,6 +1169,12 @@ def edit_adios2(): right=lambda: self.s.adios2_version, on_enter=edit_adios2, ), + MenuItem( + "ascent version", + "enter to edit", + right=lambda: self.s.ascent_version, + on_enter=edit_ascent, + ), MenuItem("back", "return", on_enter=self.pop), ], ) @@ -892,6 +1193,26 @@ def edit_kokkos_arch(): def cycle_adios2(): self.s.adios2_mpi = self.cycle(self.s.adios2_mpi, ADIOS2_MPI_MODES) + def cycle_ascent_backend(): + self.s.ascent_backend = self.cycle( + self.s.ascent_backend, ASCENT_BACKENDS + ) + + def cycle_ascent_mode(): + self.s.ascent_mode = self.cycle(self.s.ascent_mode, ASCENT_MODES) + + def cycle_ascent_mpi(): + self.s.ascent_mpi = self.cycle(self.s.ascent_mpi, ASCENT_MPI_MODES) + + def edit_ascent_dir(): + val = self.input_box( + "ascent system dir", + "path to a pre-installed Ascent cmake dir:", + self.s.ascent_system_dir, + ) + if val is not None: + self.s.ascent_system_dir = val.strip() + return ( "options", "set build options:", @@ -919,6 +1240,40 @@ def cycle_adios2(): on_space=cycle_adios2, disabled=lambda: not self.s.apps.get("adios2", False), ), + MenuItem( + "ascent mode", + "space cycles: build/system-module", + right=lambda: self.s.ascent_mode, + on_enter=cycle_ascent_mode, + on_space=cycle_ascent_mode, + disabled=lambda: not self.s.apps.get("Ascent", False), + ), + MenuItem( + "ascent backend", + "space cycles: cpu/cuda/hip/sycl (build mode)", + right=lambda: self.s.ascent_backend, + on_enter=cycle_ascent_backend, + on_space=cycle_ascent_backend, + disabled=lambda: not self.s.apps.get("Ascent", False) + or self.s.ascent_mode == "system-module", + ), + MenuItem( + "ascent mpi", + "space cycles: non-mpi/mpi (build mode)", + right=lambda: self.s.ascent_mpi, + on_enter=cycle_ascent_mpi, + on_space=cycle_ascent_mpi, + disabled=lambda: not self.s.apps.get("Ascent", False) + or self.s.ascent_mode == "system-module", + ), + MenuItem( + "ascent system dir", + "enter to edit (system-module mode)", + right=lambda: self.s.ascent_system_dir, + on_enter=edit_ascent_dir, + disabled=lambda: not self.s.apps.get("Ascent", False) + or self.s.ascent_mode != "system-module", + ), MenuItem("back", "return", on_enter=self.pop), ], ) @@ -1018,10 +1373,11 @@ def do_install(): [ f"kokkos {self.s.kokkos_version}", f"adios2 {self.s.adios2_version}", + f"ascent {self.s.ascent_version}", ], [ self.s.apps.get(app, False) - for app in ["Kokkos", "adios2"] + for app in ["Kokkos", "adios2", "Ascent"] ], ) if ae @@ -1039,10 +1395,11 @@ def do_install(): [ f"kokkos {self.s.kokkos_backend}/{self.s.kokkos_arch.strip() or '-'}", f"adios2 {self.s.adios2_mpi}", + f"ascent {self.s.ascent_mode if self.s.ascent_mode == 'system-module' else self.s.ascent_backend}", ], [ self.s.apps.get(app, False) - for app in ["Kokkos", "adios2"] + for app in ["Kokkos", "adios2", "Ascent"] ], ) if ae @@ -1077,6 +1434,13 @@ def toggle(k: str): on_space=lambda: toggle("adios2"), right=self.adios2_right, ), + MenuItem( + f"{self.checkbox(self.s.apps.get('Ascent', False))} ascent", + "", + on_enter=lambda: toggle("Ascent"), + on_space=lambda: toggle("Ascent"), + right=self.ascent_right, + ), MenuItem( f"{self.checkbox(self.s.apps.get('nt2py', False))} nt2py", "", diff --git a/input.example.toml b/input.example.toml index 87ca48f3c..a15b4c6d3 100644 --- a/input.example.toml +++ b/input.example.toml @@ -28,6 +28,35 @@ # @example: [2, 2, 2] (total of 8 domains) decomposition = "" + # Diffusion-style dynamic load balancing (Cartesian metrics only). + # Domain boundaries between MPI neighbors are nudged to equalize the + # active-particle count per rank. All inter-rank traffic uses only the + # existing nearest-neighbor field/particle communication paths. + [simulation.domain.load_balance] + # Enable dynamic load balancing + # @type: bool + # @default: false + enable = "" + # Run the rebalancer every `interval` timesteps (0 disables) + # @type: int + # @default: 0 + interval = "" + # Dimensions along which load is redistributed (1 = x1, 2 = x2, 3 = x3) + # @type: array of int, subset of [1, 2, 3] + # @default: [1] + dimensions = "" + # Skip rebalancing along a dim when (max - min) / mean of the per-slice + # particle count is below this fraction + # @type: float + # @default: 0.1 + tolerance = "" + # Maximum cell-shift per interior boundary per event; clamped at compile + # time to N_GHOSTS so the migrating field strip is already cached in the + # rank's ghost zone. + # @type: int + # @default: N_GHOSTS + max_shift = "" + [grid] # Spatial resolution of the grid # @required @@ -625,6 +654,70 @@ # @default: false ghosts = "" + [output.ascent] + # Enable in-situ visualization via Ascent (https://ascent.readthedocs.io/) + # @type: bool + # @default: false + # @note: Entity must be configured with `-D ascent=ON`; otherwise this is ignored + enable = "" + # Path to a YAML/JSON file describing the Ascent actions (scenes, plots, ...) + # @type: string + # @default: "" + # @note: When empty, Ascent will look for `ascent_actions.yaml` in the cwd + actions_file = "" + # Names of field quantities to publish to Ascent every render cycle + # @type: array + # @default: [] + # @note: Vector components are suffixed with the index, e.g. "B3" is the third component of B + fields = "" + # When true, components named "1"/"2"/"3" are ALSO published as the + # u/v/w components of an MCArray vector field "" (so streamline / gradient + # filters can reference the prefix directly). Disable to halve the published + # mesh size when the actions file only uses the scalar form (or only the + # vector form). + # @type: bool + # @default: true + vector_aliases = "" + # Per-axis stride applied when extracting fields for Ascent. A factor of + # `k` cuts the published mesh and per-render rendering cost by `k` along + # that axis (`k^Dim` overall in 3D). Aligned to the global grid so cells + # line up across MPI ranks. Independent of `output.fields.downsampling`. + # @type: array [length = `grid.dim`] OR uint (broadcast to all axes) + # @default: 1 (no downsampling) + # @note: Each factor must be > 0 and small enough to leave at least one cell per rank. + downsample = "" + # Number of timesteps between Ascent renders + # @type: uint + # @default: 0 + # @note: When `!= 0`, overrides `output.interval` + # @note: When `== 0` and `interval_time == -1`, falls back to the global `output.interval` + # @note: This cadence is independent of `output.fields.interval` + interval = "" + # Physical (code) time interval between Ascent renders + # @type: float + # @default: -1.0 + # @note: When `>= 0`, overrides `interval` + # @note: This cadence is independent of `output.fields.interval_time` + interval_time = "" + # Drift velocity along the x-axis (code units). At render time + # `v_drift * t` is added to the x-component of every `camera/position` + # and `camera/look_at` in the actions file, so the camera tracks a + # feature drifting through the simulation domain (e.g. a shock front + # under a moving window). + # @type: float + # @default: 0.0 + # @note: Zero leaves the actions file untouched. + v_drift = "" + # Rotational velocity (radians per code time unit) of the camera + # about its `look_at` point, using the camera's `up` vector as the + # rotation axis. Combined freely with `v_drift`: the camera orbits + # the (drifted) focus point at angular rate `v_rot`, so theta = v_rot * t. + # @type: float + # @default: 0.0 + # @note: Zero leaves camera positions unrotated. + # @note: A full orbit takes `2*pi / v_rot` code time units. + v_rot = "" + [output.stats] # Toggle for the stats output # @type: bool diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index 057bfcb5a..2a91e515d 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -253,8 +253,9 @@ namespace ntt { "ParticlePusher", "FieldBoundaries", "ParticleBoundaries", "Communications", "Injector", "Custom", + "LoadBalance", "ParticleSort", "Output", - "Checkpoint" }, + "Ascent", "Checkpoint" }, []() { Kokkos::fence(); }, @@ -267,6 +268,17 @@ namespace ntt { const auto clear_interval = m_params.template get( "particles.clear_interval"); + const auto lb_enable = m_params.template get( + "simulation.domain.load_balance.enable"); + const auto lb_interval = m_params.template get( + "simulation.domain.load_balance.interval"); + const auto lb_dim_mask = m_params.template get( + "simulation.domain.load_balance.dim_mask"); + const auto lb_tolerance = m_params.template get( + "simulation.domain.load_balance.tolerance"); + const auto lb_max_shift = m_params.template get( + "simulation.domain.load_balance.max_shift"); + // main algorithm loop while (step < max_steps) { // run the engine-dependent algorithm step @@ -282,6 +294,27 @@ namespace ntt { }); timers.stop("Custom"); } + if constexpr (MetricClass) { + if (lb_enable and lb_interval > 0 and (step + 1) % lb_interval == 0 and + lb_dim_mask != 0u) { + timers.start("LoadBalance"); + m_metadomain.Rebalance(lb_dim_mask, + lb_tolerance, + static_cast(lb_max_shift)); + timers.stop("LoadBalance"); + } + } + // Sort particles last — after step_forward, CustomPostStep, and + // LoadBalance — so the tile layout (and dead-particle compaction) the + // next step's deposit relies on reflects every particle change made this + // step: moving-window shift/injection and in-place dead-tagging done in + // CustomPostStep, plus any rebalance migration. + timers.start("ParticleSort"); + m_metadomain.runOnLocalDomains([this](auto& dom) { + m_metadomain.SortParticles(time, step, m_params, dom); + }); + timers.stop("ParticleSort"); + auto print_prtl_clear = (clear_interval > 0 and step % clear_interval == 0 and step > 0); @@ -290,6 +323,7 @@ namespace ntt { ++step; auto print_output = false; + auto print_ascent = false; auto print_checkpoint = false; #if defined(OUTPUT_ENABLED) timers.start("Output"); @@ -335,6 +369,12 @@ namespace ntt { } timers.stop("Output"); +#if defined(ASCENT_ENABLED) + timers.start("Ascent"); + print_ascent = m_metadomain.RenderAscent(step, time); + timers.stop("Ascent"); +#endif + timers.start("Checkpoint"); print_checkpoint = m_metadomain.WriteCheckpoint(m_params, step, @@ -361,6 +401,7 @@ namespace ntt { m_metadomain.l_maxnpart_perspec(), print_prtl_clear, print_output, + print_ascent, print_checkpoint, m_params.get("diagnostics.colored_stdout")); } diff --git a/src/engines/grpic/grpic.hpp b/src/engines/grpic/grpic.hpp index 621592955..3487d2867 100644 --- a/src/engines/grpic/grpic.hpp +++ b/src/engines/grpic/grpic.hpp @@ -613,9 +613,9 @@ namespace ntt { timers.stop("FieldBoundaries"); } - timers.start("ParticleSort"); - m_metadomain.SortParticles(time, step, m_params, dom); - timers.stop("ParticleSort"); + // NOTE: particle sorting is intentionally NOT done here. It runs once per + // step in the engine loop (Engine::run) after CustomPostStep and + // LoadBalance — see the SRPIC engine for the rationale. /** * Finally: em0::B at n-1/2 diff --git a/src/engines/srpic/srpic.hpp b/src/engines/srpic/srpic.hpp index 94a8949c1..d0352318e 100644 --- a/src/engines/srpic/srpic.hpp +++ b/src/engines/srpic/srpic.hpp @@ -182,9 +182,10 @@ namespace ntt { timers.stop("Injector"); } - timers.start("ParticleSort"); - m_metadomain.SortParticles(time, step, m_params, dom); - timers.stop("ParticleSort"); + // NOTE: particle sorting is intentionally NOT done here. It runs once per + // step in the engine loop (Engine::run) after CustomPostStep and + // LoadBalance, so the layout the next deposit uses reflects window + // shifts/injection and dead-tagging performed in CustomPostStep. } }; diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index 5343f3d8a..377c1f98a 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -19,6 +19,7 @@ # * domain/metadomain_stats.cpp # * domain/metadomain_io.cpp # * domain/metadomain_reshape.cpp +# * domain/metadomain_loadbal.cpp # * containers/particles.cpp # * containers/particles_comm.cpp # * containers/particles_io.cpp @@ -59,6 +60,7 @@ set(SOURCES ${SRC_DIR}/domain/metadomain_sort.cpp ${SRC_DIR}/domain/metadomain_stats.cpp ${SRC_DIR}/domain/metadomain_reshape.cpp + ${SRC_DIR}/domain/metadomain_loadbal.cpp ${SRC_DIR}/containers/particles.cpp ${SRC_DIR}/containers/particles_sort.cpp ${SRC_DIR}/containers/fields.cpp) diff --git a/src/framework/containers/fields.h b/src/framework/containers/fields.h index 4acabf7b4..2dcfd0a7e 100644 --- a/src/framework/containers/fields.h +++ b/src/framework/containers/fields.h @@ -175,7 +175,10 @@ namespace ntt { void CheckpointRead(adios2::IO&, adios2::Engine&, const adios2::Box&); - void CheckpointWrite(adios2::IO&, adios2::Engine&) const; + void CheckpointWrite(adios2::IO&, + adios2::Engine&, + const std::vector&, + const std::vector&) const; #endif }; diff --git a/src/framework/containers/fields_io.cpp b/src/framework/containers/fields_io.cpp index 683519493..a17ab1430 100644 --- a/src/framework/containers/fields_io.cpp +++ b/src/framework/containers/fields_io.cpp @@ -62,13 +62,27 @@ namespace ntt { } template - void Fields::CheckpointWrite(adios2::IO& io, adios2::Engine& writer) const { + void Fields::CheckpointWrite( + adios2::IO& io, + adios2::Engine& writer, + const std::vector& local_shape, + const std::vector& local_offset) const { logger::Checkpoint("Writing fields checkpoint", HERE); - out::WriteNDField(io, writer, "em", em); + // Per-rank slab: re-set the variable selection to track the (possibly + // rebalanced) local layout. The component axis is always full. + auto build_range = [&](unsigned short ncomp) { + auto start = adios2::Dims(local_offset.begin(), local_offset.end()); + auto count = adios2::Dims(local_shape.begin(), local_shape.end()); + start.push_back(0); + count.push_back(ncomp); + return adios2::Box(start, count); + }; + + out::WriteNDField(io, writer, "em", em, build_range(6)); if (S == ntt::SimEngine::GRPIC) { - out::WriteNDField(io, writer, "em0", em0); - out::WriteNDField(io, writer, "cur", cur); + out::WriteNDField(io, writer, "em0", em0, build_range(6)); + out::WriteNDField(io, writer, "cur", cur, build_range(3)); } } @@ -81,7 +95,10 @@ namespace ntt { template void Fields::CheckpointRead(adios2::IO&, \ adios2::Engine&, \ const adios2::Box&); \ - template void Fields::CheckpointWrite(adios2::IO&, adios2::Engine&) const; + template void Fields::CheckpointWrite(adios2::IO&, \ + adios2::Engine&, \ + const std::vector&, \ + const std::vector&) const; FIELDS_CHECKPOINTS(Dim::_1D, SimEngine::SRPIC) FIELDS_CHECKPOINTS(Dim::_2D, SimEngine::SRPIC) diff --git a/src/framework/domain/domain.h b/src/framework/domain/domain.h index 747ab4576..0b708e555 100644 --- a/src/framework/domain/domain.h +++ b/src/framework/domain/domain.h @@ -165,6 +165,10 @@ namespace ntt { m_neighbor_idx[dir] = idx; } + void set_offset_ncells(const std::vector& off) { + m_offset_ncells = off; + } + /* printer overload ----------------------------------------------------- */ auto Report() const -> std::string { std::string report; diff --git a/src/framework/domain/mesh.h b/src/framework/domain/mesh.h index aa2eea726..50449ae49 100644 --- a/src/framework/domain/mesh.h +++ b/src/framework/domain/mesh.h @@ -65,6 +65,15 @@ namespace ntt { new (&metric) M { this->m_resolution, new_extent, m_metric_params_raw }; } + void set_resolution_and_extent(const std::vector& new_res, + const boundaries_t& new_extent) { + raise::ErrorIf(new_res.size() != D, "invalid resolution dim", HERE); + this->m_resolution = new_res; + m_extent = new_extent; + metric.~M(); + new (&metric) M { this->m_resolution, m_extent, m_metric_params_raw }; + } + /** * @brief Get the intersection of the mesh with a box * @param box physical extent diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 8af9e6e0b..e0231d9c8 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -45,6 +45,10 @@ #include #endif // OUTPUT_ENABLED +#if defined(ASCENT_ENABLED) + #include "output/ascent_writer.h" +#endif // ASCENT_ENABLED + #include #include #include @@ -132,6 +136,23 @@ namespace ntt { /* domain update-related ------------------------------------------------ */ void ShiftByCells(int, in = in::x1); + /** + * @brief Rebalance the load (active particles) across MPI domains by + * shifting interior domain boundaries between neighbors. + * @param dim_mask bitmask: bit d (0,1,2) set => balance along dim x1/x2/x3 + * @param tolerance skip if (max-min)/mean of the per-slice load is below + * this fraction + * @param max_shift_cells per-event cap for any single boundary movement, + * additionally clamped to N_GHOSTS so the field strip we need is already + * present in the local ghost zone + * @note Only neighbor communication is used (CommunicateFields ghosts + + * CommunicateParticles). + */ + void Rebalance(unsigned int dim_mask, + real_t tolerance, + ncells_t max_shift_cells) + requires(MetricClass); + /* output-related ------------------------------------------------------- */ #if defined(OUTPUT_ENABLED) void InitWriter(adios2::ADIOS*, const SimulationParams&); @@ -146,6 +167,13 @@ namespace ntt { timestep_t, simtime_t, const Domain&)>& = nullptr) -> bool; +#if defined(ASCENT_ENABLED) + /** + * @brief Trigger the Ascent pipeline if there is data pending from + * the most recent Write() call. Returns true if a frame was rendered. + */ + auto RenderAscent(timestep_t, simtime_t) -> bool; +#endif void InitCheckpointWriter(adios2::ADIOS*, const SimulationParams&); auto WriteCheckpoint(const SimulationParams&, timestep_t, @@ -290,6 +318,10 @@ namespace ntt { checkpoint::Writer g_checkpoint_writer; #endif +#if defined(ASCENT_ENABLED) + out::AscentWriter g_ascent_writer; +#endif + #if defined(MPI_ENABLED) int g_mpi_rank { -1 }, g_mpi_size { -1 }; #endif diff --git a/src/framework/domain/metadomain_chckpt.cpp b/src/framework/domain/metadomain_chckpt.cpp index 1116df85e..fbf0f11cd 100644 --- a/src/framework/domain/metadomain_chckpt.cpp +++ b/src/framework/domain/metadomain_chckpt.cpp @@ -125,8 +125,19 @@ namespace ntt { } params.saveTOML(g_checkpoint_writer.written().back().second, current_time); + // Recompute the local with-ghosts shape/offset every step so the + // ADIOS variable selection tracks any rebalance that has happened + // since InitCheckpointWriter. + std::vector loc_off_with_ghosts; + for (auto d { 0u }; d < M::Dim; ++d) { + loc_off_with_ghosts.push_back( + local_domain->offset_ncells()[d] + + 2 * N_GHOSTS * local_domain->offset_ndomains()[d]); + } local_domain->fields.CheckpointWrite(g_checkpoint_writer.io(), - g_checkpoint_writer.writer()); + g_checkpoint_writer.writer(), + local_domain->mesh.n_all(), + loc_off_with_ghosts); #if !defined(MPI_ENABLED) const std::size_t dom_tot = 1, dom_offset = 0; #else diff --git a/src/framework/domain/metadomain_io.cpp b/src/framework/domain/metadomain_io.cpp index 3fc0f5f57..45440a9f7 100644 --- a/src/framework/domain/metadomain_io.cpp +++ b/src/framework/domain/metadomain_io.cpp @@ -117,6 +117,65 @@ namespace ntt { "output." + std::string(type) + ".interval_time")); } g_writer.writeAttrs(params); + +#if defined(ASCENT_ENABLED) + if (params.template get("output.ascent.enable")) { + const auto ascent_fields = params.template get>( + "output.ascent.fields"); + raise::ErrorIf(M::CoordType != Coord::Cartesian, + "Ascent output is currently restricted to cartesian grids", + HERE); + raise::ErrorIf(ascent_fields.empty(), + "output.ascent.fields must list at least one field", + HERE); + g_ascent_writer.init(params.template get("simulation.name"), + params.template get( + "output.ascent.actions_file"), + ascent_fields, + params.template get( + "output.ascent.interval"), + params.template get( + "output.ascent.interval_time"), + params.template get( + "output.ascent.vector_aliases"), + params.template get( + "output.ascent.v_drift"), + params.template get( + "output.ascent.v_rot")); + const auto loc_corner = local_domain->offset_ncells(); + const auto loc_shape = local_domain->mesh.n_active(); + std::vector corner(loc_corner.begin(), loc_corner.end()); + std::vector full_shape(loc_shape.begin(), loc_shape.end()); + + // Apply Ascent-specific downsampling. Stride is aligned to the + // global grid (using each rank's `offset_ncells`) so downsampled + // cells from neighboring ranks line up at domain boundaries. + const auto a_dwn = params.template get>( + "output.ascent.downsample"); + std::vector downsample(static_cast(M::Dim), 1u); + std::vector first_cell(static_cast(M::Dim), 0u); + std::vector shape = full_shape; + for (std::size_t d = 0; d < static_cast(M::Dim); ++d) { + const std::size_t s = (d < a_dwn.size()) + ? static_cast(a_dwn[d]) + : 1u; + downsample[d] = s; + if (s <= 1u) { + continue; + } + const std::size_t l_offset = corner[d]; + const std::size_t n = full_shape[d]; + const std::size_t first = (s - (l_offset % s)) % s; + raise::ErrorIf(first >= n, + "output.ascent.downsample factor leaves a rank with " + "no cells; reduce the factor or the decomposition", + HERE); + first_cell[d] = first; + shape[d] = (n - first + s - 1) / s; + } + g_ascent_writer.defineMesh(M::Dim, corner, shape, first_cell, downsample); + } +#endif } template @@ -382,8 +441,15 @@ namespace ntt { g_writer.shouldWrite("spectra", finished_step, finished_time); +#if defined(ASCENT_ENABLED) + const auto render_ascent = g_ascent_writer.shouldRender(finished_step, + finished_time); +#else + const auto render_ascent = false; +#endif + const auto write_fields_any = write_fields or render_ascent; const auto extension = params.template get("output.format"); - if (not(write_fields or write_particles or write_spectra) and + if (not(write_fields_any or write_particles or write_spectra) and extension != "disabled") { return false; } @@ -392,8 +458,10 @@ namespace ntt { "local_domain is a placeholder", HERE); logger::Checkpoint("Writing output", HERE); - if (write_fields) { - g_writer.beginWriting(WriteMode::Fields, current_step, current_time); + if (write_fields_any) { + if (write_fields) { + g_writer.beginWriting(WriteMode::Fields, current_step, current_time); + } const auto incl_ghosts = params.template get("output.debug.ghosts"); const auto dwn = params.template get>( "output.fields.downsampling"); @@ -409,6 +477,9 @@ namespace ntt { } } } + // Refresh the writer's cached per-rank slab so that field/mesh writes + // pick up the (possibly rebalanced) current local layout. + g_writer.setLocalLayout(off_ncells_with_ghosts, loc_shape_with_ghosts); for (auto dim { 0u }; dim < M::Dim; ++dim) { const auto l_size = local_domain->mesh.n_active()[dim]; const auto l_offset = local_domain->offset_ncells()[dim]; @@ -457,11 +528,52 @@ namespace ntt { xe(offset + i_dwn + 1) = x_Ph[dim]; } }); - g_writer.writeMesh( - dim, - xc, - xe, - { off_ncells_with_ghosts[dim], loc_shape_with_ghosts[dim] }); + if (write_fields) { + g_writer.writeMesh( + dim, + xc, + xe, + { off_ncells_with_ghosts[dim], loc_shape_with_ghosts[dim] }); + } + +#if defined(ASCENT_ENABLED) + // Ascent edge coordinates: must match the per-axis downsampling + // applied in publishField (defineMesh stored the same factors). + if (render_ascent && g_ascent_writer.initialized()) { + const auto a_dwn = params.template get>( + "output.ascent.downsample"); + const std::size_t s = (dim < a_dwn.size()) + ? static_cast(a_dwn[dim]) + : 1u; + const std::size_t first = (s <= 1u) ? 0u + : ((s - (l_offset % s)) % s); + const std::size_t n_dwn = (l_size > first) + ? (l_size - first + s - 1) / s + : 0u; + // Edge / node coordinates for the vertex-centered publish: one more + // than the (downsampled) cell count. Neighbouring ranks share the + // boundary node, so the shared edge coordinate must agree; the final + // edge is clamped to the rank's right boundary (`l_size`) so it lands + // on the same physical position as the neighbour's first node even + // when (l_size - first) % s != 0. + const std::size_t nedges = n_dwn + 1; + const array_t xe_full { "Xe_ascent", nedges }; + const auto& metric_a = local_domain->mesh.metric; + Kokkos::parallel_for( + "GenerateMeshAscent", + nedges, + Lambda(cellidx_t e) { + const std::size_t idx = (e == n_dwn) ? l_size : (first + e * s); + const auto i_ = static_cast(idx); + coord_t x_Cd { ZERO }, x_Ph { ZERO }; + x_Cd[dim] = i_; + metric_a.template convert(x_Cd, x_Ph); + xe_full(e) = x_Ph[dim]; + }); + g_ascent_writer.setMeshCoords(static_cast(dim), + xe_full); + } +#endif } const auto output_asis = params.template get("output.debug.as_is"); // !TODO: this can probably be optimized to dump things at once @@ -775,10 +887,40 @@ namespace ntt { } else { raise::Error("Wrong # of components requested for output", HERE); } - g_writer.writeField(names, local_domain->fields.bckp, addresses); + if (write_fields) { + g_writer.writeField(names, + local_domain->fields.bckp, + addresses); + } + +#if defined(ASCENT_ENABLED) + if (render_ascent && g_ascent_writer.initialized()) { + const auto& asked = g_ascent_writer.fields(); + for (auto k = 0u; k < names.size(); ++k) { + // names[k] carries the leading "f" prefix from out::OutputField; + // the user-facing name in toml/Ascent is the same name without it. + const auto short_name = (names[k].size() > 1u and + names[k].front() == 'f') + ? names[k].substr(1) + : names[k]; + for (const auto& want : asked) { + if (short_name == want) { + g_ascent_writer.publishField(names[k], + local_domain->fields.bckp, + addresses[k]); + } + } + } + } +#endif } - g_writer.endWriting(WriteMode::Fields); - } // end shouldWrite("fields", step, time) + if (write_fields) { + g_writer.endWriting(WriteMode::Fields); + } + // The actual `g_ascent_writer.render()` (publish + execute) is fired + // separately from `Metadomain::RenderAscent` so the engine loop can + // surround it with its own dedicated timer. + } // end write_fields_any if (write_particles) { g_writer.beginWriting(WriteMode::Particles, current_step, current_time); @@ -845,6 +987,16 @@ namespace ntt { return true; } +#if defined(ASCENT_ENABLED) + template + auto Metadomain::RenderAscent(timestep_t step, simtime_t time) -> bool { + if (not g_ascent_writer.initialized() or not g_ascent_writer.hasPending()) { + return false; + } + return g_ascent_writer.render(step, time); + } +#endif + // NOLINTBEGIN(bugprone-macro-parentheses) #define METADOMAIN_OUTPUT(S, M, D) \ template void Metadomain>::InitWriter(adios2::ADIOS*, \ @@ -866,6 +1018,14 @@ namespace ntt { #undef METADOMAIN_OUTPUT +#if defined(ASCENT_ENABLED) + #define METADOMAIN_RENDER_ASCENT(S, M, D) \ + template auto Metadomain>::RenderAscent(timestep_t, simtime_t) \ + -> bool; + NTT_FOREACH_SPECIALIZATION(METADOMAIN_RENDER_ASCENT) + #undef METADOMAIN_RENDER_ASCENT +#endif + #if defined(MPI_ENABLED) #define COMMVECTORPOTENTIAL(S, M, D) \ template void Metadomain>::CommunicateVectorPotential(unsigned short); diff --git a/src/framework/domain/metadomain_loadbal.cpp b/src/framework/domain/metadomain_loadbal.cpp new file mode 100644 index 000000000..0768acad6 --- /dev/null +++ b/src/framework/domain/metadomain_loadbal.cpp @@ -0,0 +1,428 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "traits/metric.h" +#include "utils/error.h" +#include "utils/log.h" +#include "utils/numeric.h" + +#include "framework/containers/fields.h" +#include "framework/domain/metadomain.h" +#include "framework/specialization_registry.h" + +#if defined(MPI_ENABLED) + #include "arch/mpi_aliases.h" + #include "arch/mpi_tags.h" + + #include +#endif + +#include + +#include +#include +#include +#include +#include +#include + +namespace ntt { + +#if defined(MPI_ENABLED) + // Copy old field into new (already-zero) field, applying a shift in the + // active-cell offset along each dimension. + // For new (with-ghost) index i, old (with-ghost) index = i + delta[d]. + // Cells whose old index is out of bounds are left at 0 and will be filled + // by CommunicateFields() afterwards. + template + void CopyShifted(const ndfield_mirror_t& src_h, + ndfield_t& dst_dev, + const std::vector& delta) { + auto dst_h = Kokkos::create_mirror_view(dst_dev); + Kokkos::deep_copy(dst_h, ZERO); + + if constexpr (D == Dim::_1D) { + const int new_n1 = static_cast(dst_h.extent(0)); + const int old_n1 = static_cast(src_h.extent(0)); + for (int i1 = 0; i1 < new_n1; ++i1) { + const int oi1 = i1 + delta[0]; + if (oi1 < 0 or oi1 >= old_n1) { + continue; + } + for (auto c { 0u }; c < NC; ++c) { + dst_h(i1, c) = src_h(oi1, c); + } + } + } else if constexpr (D == Dim::_2D) { + const int new_n1 = static_cast(dst_h.extent(0)); + const int new_n2 = static_cast(dst_h.extent(1)); + const int old_n1 = static_cast(src_h.extent(0)); + const int old_n2 = static_cast(src_h.extent(1)); + for (int i1 = 0; i1 < new_n1; ++i1) { + const int oi1 = i1 + delta[0]; + if (oi1 < 0 or oi1 >= old_n1) { + continue; + } + for (int i2 = 0; i2 < new_n2; ++i2) { + const int oi2 = i2 + delta[1]; + if (oi2 < 0 or oi2 >= old_n2) { + continue; + } + for (auto c { 0u }; c < NC; ++c) { + dst_h(i1, i2, c) = src_h(oi1, oi2, c); + } + } + } + } else if constexpr (D == Dim::_3D) { + const int new_n1 = static_cast(dst_h.extent(0)); + const int new_n2 = static_cast(dst_h.extent(1)); + const int new_n3 = static_cast(dst_h.extent(2)); + const int old_n1 = static_cast(src_h.extent(0)); + const int old_n2 = static_cast(src_h.extent(1)); + const int old_n3 = static_cast(src_h.extent(2)); + for (int i1 = 0; i1 < new_n1; ++i1) { + const int oi1 = i1 + delta[0]; + if (oi1 < 0 or oi1 >= old_n1) { + continue; + } + for (int i2 = 0; i2 < new_n2; ++i2) { + const int oi2 = i2 + delta[1]; + if (oi2 < 0 or oi2 >= old_n2) { + continue; + } + for (int i3 = 0; i3 < new_n3; ++i3) { + const int oi3 = i3 + delta[2]; + if (oi3 < 0 or oi3 >= old_n3) { + continue; + } + for (auto c { 0u }; c < NC; ++c) { + dst_h(i1, i2, i3, c) = src_h(oi1, oi2, oi3, c); + } + } + } + } + } + Kokkos::deep_copy(dst_dev, dst_h); + } +#endif // MPI_ENABLED + + template + void Metadomain::Rebalance(unsigned int dim_mask, + real_t tolerance, + ncells_t max_shift_cells) + requires(MetricClass) + { +#if !defined(MPI_ENABLED) + (void)dim_mask; + (void)tolerance; + (void)max_shift_cells; + return; +#else + raise::ErrorIf(l_subdomain_indices().size() != 1, + "Rebalance assumes one local subdomain per rank", + HERE); + // The theta dimension (idx 1) is bounded by the polar axis for any + // non-Cartesian metric: moving an interior boundary in theta is fine + // in principle, but the safety net here forbids it pending validation. + if constexpr (M::CoordType != ntt::Coord::Cartesian) { + raise::ErrorIf((dim_mask & (1u << 1)) != 0u, + "Rebalance along the polar axis is not supported", + HERE); + } + // strip-width is constrained by N_GHOSTS so that any new active cells + // are already present in the rank's old ghost zone (after the most + // recent ghost-cell exchange). + if (max_shift_cells > N_GHOSTS) { + max_shift_cells = static_cast(N_GHOSTS); + } + if (max_shift_cells == 0 or dim_mask == 0) { + return; + } + + const auto local_idx = l_subdomain_indices()[0]; + + /* --- 1. Allgather active particle counts per domain ------------------ */ + npart_t local_npart { 0 }; + for (const auto& sp : g_subdomains[local_idx].species) { + local_npart += sp.npart(); + } + std::vector npart_per_dom(g_ndomains, 0); + MPI_Allgather(&local_npart, + 1, + mpi::get_type(), + npart_per_dom.data(), + 1, + mpi::get_type(), + MPI_COMM_WORLD); + + /* --- 2. Project ncells / load onto each balanced dim ----------------- */ + std::vector> ncells_per_pos(M::Dim); + std::vector> load_per_pos(M::Dim); + for (auto d { 0u }; d < M::Dim; ++d) { + ncells_per_pos[d].assign(g_ndomains_per_dim[d], 0); + load_per_pos[d].assign(g_ndomains_per_dim[d], 0.0); + } + for (unsigned int idx { 0 }; idx < g_ndomains; ++idx) { + const auto& off = g_domain_offsets[idx]; + const auto ncells = g_subdomains[idx].mesh.n_active(); + for (auto d { 0u }; d < M::Dim; ++d) { + ncells_per_pos[d][off[d]] = ncells[d]; + load_per_pos[d][off[d]] += static_cast(npart_per_dom[idx]); + } + } + + /* --- 3. Diffusion-style boundary shifts per balanced dim ------------- */ + std::vector> new_ncells_per_pos = ncells_per_pos; + const auto MIN_NCELLS = static_cast(2 * N_GHOSTS + 4); + bool any_shift = false; + + for (auto d { 0u }; d < M::Dim; ++d) { + if ((dim_mask & (1u << d)) == 0u) { + continue; + } + const auto N = g_ndomains_per_dim[d]; + if (N < 2) { + continue; + } + + double total_load = 0.0; + double max_load = 0.0; + double min_load = std::numeric_limits::infinity(); + for (auto p { 0u }; p < N; ++p) { + total_load += load_per_pos[d][p]; + max_load = std::max(max_load, load_per_pos[d][p]); + min_load = std::min(min_load, load_per_pos[d][p]); + } + if (total_load <= 0.0) { + continue; + } + const auto mean = total_load / static_cast(N); + if (((max_load - min_load) / mean) < + static_cast(tolerance)) { + continue; + } + + // bnd_shift[k] is the number of cells transferred from position k-1 to + // position k by moving the (interior) boundary k. bnd_shift[0] and + // bnd_shift[N] are exterior boundaries that are pinned at 0. + std::vector bnd_shift(N + 1, 0); + const int cap = static_cast(max_shift_cells); + for (auto k { 1u }; k < N; ++k) { + const auto l_load = load_per_pos[d][k - 1]; + const auto r_load = load_per_pos[d][k]; + const auto l_density = (ncells_per_pos[d][k - 1] > 0) + ? l_load / static_cast( + ncells_per_pos[d][k - 1]) + : 0.0; + const auto r_density = (ncells_per_pos[d][k] > 0) + ? r_load / static_cast( + ncells_per_pos[d][k]) + : 0.0; + const auto avg_density = std::max(0.5 * (l_density + r_density), 1.0); + // Move boundary towards the lighter side. Halve the gradient so that + // a single sweep does roughly one diffusion step. + int shift = static_cast( + std::round(0.5 * (l_load - r_load) / avg_density)); + shift = std::clamp(shift, -cap, cap); + // Don't shrink either side below MIN_NCELLS. + const int max_pos = static_cast(ncells_per_pos[d][k - 1]) - + static_cast(MIN_NCELLS); + const int max_neg = static_cast(ncells_per_pos[d][k]) - + static_cast(MIN_NCELLS); + shift = std::clamp(shift, + -std::max(max_neg, 0), + std::max(max_pos, 0)); + bnd_shift[k] = shift; + if (shift != 0) { + any_shift = true; + } + } + // new_ncells[p] = ncells[p] + bnd_shift[p] - bnd_shift[p+1] + for (auto p { 0u }; p < N; ++p) { + new_ncells_per_pos[d][p] = static_cast( + static_cast(ncells_per_pos[d][p]) + bnd_shift[p] - + bnd_shift[p + 1]); + } + } + + if (not any_shift) { + return; + } + + /* --- 4. Per-position prefix sums (offset and physical extent) -------- */ + // Face positions are queried from g_mesh.metric in the global code-coordinate + // system, so curvilinear stretches (log-r, eta-stretching) are honored. + std::vector> new_offset_per_pos(M::Dim); + std::vector>> extent_per_pos(M::Dim); + auto face_phys = [this](unsigned int d, real_t x_code) -> real_t { + if (d == 0u) { + return g_mesh.metric.template convert<1, Crd::Cd, Crd::Ph>(x_code); + } + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + if (d == 1u) { + return g_mesh.metric.template convert<2, Crd::Cd, Crd::Ph>(x_code); + } + } + if constexpr (M::Dim == Dim::_3D) { + if (d == 2u) { + return g_mesh.metric.template convert<3, Crd::Cd, Crd::Ph>(x_code); + } + } + raise::Error("Invalid dimension index in Rebalance face_phys", HERE); + return ZERO; + }; + for (auto d { 0u }; d < M::Dim; ++d) { + const auto N = g_ndomains_per_dim[d]; + new_offset_per_pos[d].assign(N, 0); + extent_per_pos[d].resize(N); + ncells_t running { 0 }; + for (auto p { 0u }; p < N; ++p) { + new_offset_per_pos[d][p] = running; + const auto x_lo = face_phys(d, static_cast(running)); + running += new_ncells_per_pos[d][p]; + const auto x_hi = face_phys(d, static_cast(running)); + extent_per_pos[d][p] = { x_lo, x_hi }; + } + } + + /* --- 5. Save local em (and em0/cur0 for GRPIC) to host --------------- */ + auto& local_dom = g_subdomains[local_idx]; + const auto old_offset_ncells = local_dom.offset_ncells(); + + auto em_old_h = Kokkos::create_mirror_view(local_dom.fields.em); + Kokkos::deep_copy(em_old_h, local_dom.fields.em); + auto em0_old_h = decltype(Kokkos::create_mirror_view(local_dom.fields.em0)) {}; + auto cur0_old_h = decltype(Kokkos::create_mirror_view(local_dom.fields.cur0)) {}; + if constexpr (S == SimEngine::GRPIC) { + em0_old_h = Kokkos::create_mirror_view(local_dom.fields.em0); + cur0_old_h = Kokkos::create_mirror_view(local_dom.fields.cur0); + Kokkos::deep_copy(em0_old_h, local_dom.fields.em0); + Kokkos::deep_copy(cur0_old_h, local_dom.fields.cur0); + } + + /* --- 6. Update bookkeeping for every g_subdomain --------------------- */ + std::vector new_local_ncells(M::Dim); + std::vector new_local_offset(M::Dim); + for (unsigned int idx { 0 }; idx < g_ndomains; ++idx) { + auto& sub = g_subdomains[idx]; + const auto& off_ndoms = g_domain_offsets[idx]; + std::vector ncells_d(M::Dim); + std::vector offset_d(M::Dim); + boundaries_t ext_d; + for (auto d { 0u }; d < M::Dim; ++d) { + ncells_d[d] = new_ncells_per_pos[d][off_ndoms[d]]; + offset_d[d] = new_offset_per_pos[d][off_ndoms[d]]; + ext_d.push_back(extent_per_pos[d][off_ndoms[d]]); + } + sub.mesh.set_resolution_and_extent(ncells_d, ext_d); + sub.set_offset_ncells(offset_d); + if (idx == local_idx) { + new_local_ncells = ncells_d; + new_local_offset = offset_d; + } + } + // Boundary conditions and neighbor topology are unchanged. + + /* --- 7. Reallocate local fields, copy from saved buffer with shift --- */ + // delta[d] = new_offset[d] - old_offset[d] (in the with-ghost field + // coordinate system the same delta applies). + std::vector delta(M::Dim); + for (auto d { 0u }; d < M::Dim; ++d) { + delta[d] = static_cast(new_local_offset[d]) - + static_cast(old_offset_ncells[d]); + } + + local_dom.fields = Fields { new_local_ncells }; + CopyShifted(em_old_h, local_dom.fields.em, delta); + if constexpr (S == SimEngine::GRPIC) { + CopyShifted(em0_old_h, local_dom.fields.em0, delta); + CopyShifted(cur0_old_h, local_dom.fields.cur0, delta); + } + + /* --- 8. Refill ghost zones from neighbors ---------------------------- */ + CommunicateFields(local_dom, Comm::E | Comm::B); + + /* --- 9. Shift particle indices, retag, and migrate ------------------- */ + // Particle indices i_d are in active-cell coordinates: i_d in [0, n_active) + // for an in-domain particle. After the offset moves by delta, particles + // get i_d_new = i_d_old - delta[d]. Particles whose new index falls + // outside [0, n_active) are tagged for the appropriate neighbor and + // sent by CommunicateParticles(). + for (auto& sp : local_dom.species) { + if (sp.npart() == 0) { + continue; + } + auto i1 = sp.i1, i2 = sp.i2, i3 = sp.i3; + auto i1p = sp.i1_prev, i2p = sp.i2_prev, i3p = sp.i3_prev; + auto tag = sp.tag; + const int dx1 = -delta[0]; + int dx2 = 0; + int dx3 = 0; + int new_n1 = static_cast(new_local_ncells[0]); + int new_n2 = 1; + int new_n3 = 1; + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + dx2 = -delta[1]; + new_n2 = static_cast(new_local_ncells[1]); + } + if constexpr (M::Dim == Dim::_3D) { + dx3 = -delta[2]; + new_n3 = static_cast(new_local_ncells[2]); + } + Kokkos::parallel_for( + "RebalanceShiftPrtls", + sp.rangeActiveParticles(), + Lambda(prtlidx_t p) { + if (tag(p) != ParticleTag::alive) { + return; + } + if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or + M::Dim == Dim::_3D) { + i1(p) += dx1; + i1p(p) += dx1; + } + if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { + i2(p) += dx2; + i2p(p) += dx2; + } + if constexpr (M::Dim == Dim::_3D) { + i3(p) += dx3; + i3p(p) += dx3; + } + if constexpr (M::Dim == Dim::_1D) { + tag(p) = mpi::SendTag(tag(p), i1(p) < 0, i1(p) >= new_n1); + } else if constexpr (M::Dim == Dim::_2D) { + tag(p) = mpi::SendTag(tag(p), + i1(p) < 0, + i1(p) >= new_n1, + i2(p) < 0, + i2(p) >= new_n2); + } else if constexpr (M::Dim == Dim::_3D) { + tag(p) = mpi::SendTag(tag(p), + i1(p) < 0, + i1(p) >= new_n1, + i2(p) < 0, + i2(p) >= new_n2, + i3(p) < 0, + i3(p) >= new_n3); + } + }); + sp.set_unsorted(); + } + + CommunicateParticles(local_dom); + logger::Checkpoint("Rebalance: domains shifted, fields and particles redistributed", + HERE); +#endif // MPI_ENABLED + } + + // NOLINTBEGIN(bugprone-macro-parentheses) +#define METADOMAIN_REBAL(S, M, D) \ + template void Metadomain>::Rebalance(unsigned int, real_t, ncells_t); + + NTT_FOREACH_SPECIALIZATION(METADOMAIN_REBAL) +#undef METADOMAIN_REBAL + // NOLINTEND(bugprone-macro-parentheses) + +} // namespace ntt diff --git a/src/framework/parameters/parameters.cpp b/src/framework/parameters/parameters.cpp index 7372da510..c6b423f95 100644 --- a/src/framework/parameters/parameters.cpp +++ b/src/framework/parameters/parameters.cpp @@ -165,6 +165,108 @@ namespace ntt { output_params.read(dim, get("particles.nspec"), toml_data); output_params.setParams(this); + /* [output.ascent] (in situ visualization) ------------------------------ */ + set("output.ascent.enable", + toml::find_or(toml_data, "output", "ascent", "enable", false)); + set("output.ascent.actions_file", + toml::find_or(toml_data, + "output", + "ascent", + "actions_file", + "")); + set("output.ascent.fields", + toml::find_or>(toml_data, + "output", + "ascent", + "fields", + std::vector {})); + set("output.ascent.vector_aliases", + toml::find_or(toml_data, + "output", + "ascent", + "vector_aliases", + true)); + // Shift applied at render time to the x-component of every + // `camera/position` and `camera/look_at` in the actions file: + // dx = v_drift * t. Lets the camera ride along with a moving + // window so the rendered region tracks a feature drifting through + // the simulation domain (e.g. a shock front). + set("output.ascent.v_drift", + toml::find_or(toml_data, + "output", + "ascent", + "v_drift", + ZERO)); + // Rotational velocity (radians per code time unit) around the + // camera's `look_at` point, with the camera's `up` vector as the + // rotation axis. The rotation angle theta = v_rot * t is applied + // before any v_drift translation, which keeps the camera orbiting + // the (drifted) focus point regardless of v_drift. + set("output.ascent.v_rot", + toml::find_or(toml_data, + "output", + "ascent", + "v_rot", + ZERO)); + { + // Per-axis stride applied when extracting fields for Ascent. A factor + // > 1 cuts the published mesh and rendered work by that factor in + // the corresponding direction. Accepts either a scalar (broadcast to + // all axes) or an array of length `dim`. + std::vector a_dwn; + try { + a_dwn = toml::find>(toml_data, + "output", + "ascent", + "downsample"); + } catch (...) { + try { + const auto a_dwn_scalar = toml::find(toml_data, + "output", + "ascent", + "downsample"); + a_dwn.assign(static_cast(dim), a_dwn_scalar); + } catch (...) { + a_dwn.assign(static_cast(dim), 1u); + } + } + if (a_dwn.size() > static_cast(dim)) { + a_dwn.erase(a_dwn.begin() + dim, a_dwn.end()); + } + raise::ErrorIf(a_dwn.size() != static_cast(dim), + "output.ascent.downsample must have length `grid.dim`", + HERE); + for (const auto& d : a_dwn) { + raise::ErrorIf(d == 0u, + "output.ascent.downsample factor must be nonzero", + HERE); + } + set("output.ascent.downsample", a_dwn); + } + { + // Cadence is independent of [output.fields]: when neither is set, fall + // back to the global [output] interval keys. + const auto a_int = toml::find_or(toml_data, + "output", + "ascent", + "interval", + 0); + const auto a_int_time = toml::find_or(toml_data, + "output", + "ascent", + "interval_time", + -1.0); + if ((a_int == 0) and (a_int_time == -1.0)) { + set("output.ascent.interval", + get("output.interval")); + set("output.ascent.interval_time", + get("output.interval_time")); + } else { + set("output.ascent.interval", a_int); + set("output.ascent.interval_time", a_int_time); + } + } + /* [checkpoint] --------------------------------------------------------- */ set("checkpoint.interval", toml::find_or(toml_data, @@ -239,6 +341,52 @@ namespace ntt { alg_params.read(get("scales.dx0"), alg_extra_flags, toml_data); alg_params.setParams(alg_extra_flags, this); + /* [simulation.domain.load_balance] ------------------------------------- */ + set("simulation.domain.load_balance.enable", + toml::find_or(toml_data, "simulation", "domain", "load_balance", "enable", false)); + set("simulation.domain.load_balance.interval", + toml::find_or(toml_data, + "simulation", + "domain", + "load_balance", + "interval", + 0u)); + set("simulation.domain.load_balance.tolerance", + toml::find_or(toml_data, + "simulation", + "domain", + "load_balance", + "tolerance", + static_cast(0.1))); + set("simulation.domain.load_balance.max_shift", + toml::find_or(toml_data, + "simulation", + "domain", + "load_balance", + "max_shift", + static_cast(N_GHOSTS))); + { + // dimensions: list of 1/2/3 mapped to a bitmask + const auto dim_ints = toml::find_or>( + toml_data, + "simulation", + "domain", + "load_balance", + "dimensions", + std::vector { 1 }); + unsigned int mask = 0u; + for (const auto& d : dim_ints) { + if (d == 1 or d == 2 or d == 3) { + mask |= 1u << (d - 1); + } else { + raise::Error( + "simulation.domain.load_balance.dimensions: unknown dim, expected 1/2/3", + HERE); + } + } + set("simulation.domain.load_balance.dim_mask", mask); + } + /* extra physics ------------------------------------------------------ */ params::Extra extra_params {}; const std::map extra_extra_flags = { diff --git a/src/global/global.h b/src/global/global.h index c3fbc5061..d707a2712 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -254,6 +254,7 @@ namespace Timer { PrintParticleSort = 1 << 4, PrintCheckpoint = 1 << 5, PrintNormed = 1 << 6, + PrintAscent = 1 << 7, Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, }; } // namespace Timer diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index 604a872ae..8f491ef23 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -90,6 +90,7 @@ namespace diag { const std::vector& species_maxnpart, bool print_prtl_clear, bool print_output, + bool print_ascent, bool print_checkpoint, bool print_colors) { DiagFlags diag_flags = Diag::Default; @@ -106,6 +107,9 @@ namespace diag { if (print_output) { timer_flags |= Timer::PrintOutput; } + if (print_ascent) { + timer_flags |= Timer::PrintAscent; + } if (print_checkpoint) { timer_flags |= Timer::PrintCheckpoint; } diff --git a/src/global/utils/diag.h b/src/global/utils/diag.h index 18669c1f1..410c35acf 100644 --- a/src/global/utils/diag.h +++ b/src/global/utils/diag.h @@ -36,6 +36,7 @@ namespace diag { * @param maxnpart (per each species) * @param particlesort (if true, dead particles were removed) * @param output (if true, output was written) + * @param ascent (if true, an Ascent frame was rendered) * @param checkpoint (if true, checkpoint was written) * @param colorful_print (if true, print with colors) */ @@ -52,6 +53,7 @@ namespace diag { bool, bool, bool, + bool, bool); } // namespace diag diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index 6f2045be6..d550c12fd 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -136,7 +136,10 @@ namespace timer { auto Timers::printAll(TimerFlags flags, npart_t npart, ncells_t ncells) const -> std::string { - const std::vector extras { "ParticleSort", "Output", "Checkpoint" }; + const std::vector extras { "ParticleSort", + "Output", + "Ascent", + "Checkpoint" }; const auto stats = gather(extras, npart, ncells); if (stats.empty()) { return ""; @@ -259,9 +262,10 @@ namespace timer { } } - // print extra timers for output/checkpoint/particleSort + // print extra timers for output/ascent/checkpoint/particleSort const std::vector extras_f { Timer::PrintParticleSort, Timer::PrintOutput, + Timer::PrintAscent, Timer::PrintCheckpoint }; for (auto i { 0u }; i < extras.size(); ++i) { const auto& name = extras[i]; diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index 4cf1bf410..fbeaedf7a 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -35,6 +35,9 @@ if(${output}) list(APPEND SOURCES ${SRC_DIR}/utils/readers.cpp) list(APPEND SOURCES ${SRC_DIR}/utils/tuning.cpp) endif() +if(${ascent}) + list(APPEND SOURCES ${SRC_DIR}/ascent_writer.cpp) +endif() add_library(ntt_output ${SOURCES}) set(libs ntt_global) diff --git a/src/output/ascent_writer.cpp b/src/output/ascent_writer.cpp new file mode 100644 index 000000000..30ba06a79 --- /dev/null +++ b/src/output/ascent_writer.cpp @@ -0,0 +1,842 @@ +#include "output/ascent_writer.h" + +#if defined(ASCENT_ENABLED) + + #include "enums.h" + #include "global.h" + + #include "arch/kokkos_aliases.h" + #include "utils/error.h" + #include "utils/log.h" + + #include + #include + #include + #include + #include + + #if defined(MPI_ENABLED) + #include + #endif + + #include + #include + #include + #include + #include + #include + #include + #include + +namespace out { + + namespace { + /* + * Clamp a (possibly out-of-range) source index into [lo, hi]. Used on the + * device by the cell -> node recentering in publishField: clamping to the + * local *active* cell range makes boundary nodes one-sided (they only + * average this domain's own cells) instead of reaching into the ghost + * ring, so no halo exchange is needed. + */ + KOKKOS_INLINE_FUNCTION + auto clamp_idx(long i, long lo, long hi) -> long { + if (i < lo) { + return lo; + } + if (i > hi) { + return hi; + } + return i; + } + + /* + * Read a 3-vector from a Conduit float64/float32 array of length 3. + * Returns false (leaving `out` untouched) for any other layout — the + * caller then skips the transform for that camera entry instead of + * silently corrupting it. + */ + auto readVec3(const conduit::Node& arr, + std::array& out) -> bool { + if (!arr.dtype().is_floating_point()) { + return false; + } + if (arr.dtype().number_of_elements() != 3) { + return false; + } + if (arr.dtype().is_float64()) { + const auto a = arr.as_float64_array(); + out = { a[0], a[1], a[2] }; + return true; + } + if (arr.dtype().is_float32()) { + const auto a = arr.as_float32_array(); + out = { static_cast(a[0]), + static_cast(a[1]), + static_cast(a[2]) }; + return true; + } + return false; + } + + /* + * Write a 3-vector back into the same Conduit array, preserving its + * original element type (float32/float64). No-op for unsupported + * layouts so the original values stay intact. + */ + void writeVec3(conduit::Node& arr, const std::array& v) { + if (arr.dtype().number_of_elements() != 3) { + return; + } + if (arr.dtype().is_float64()) { + auto a = arr.as_float64_array(); + a[0] = v[0]; + a[1] = v[1]; + a[2] = v[2]; + } else if (arr.dtype().is_float32()) { + auto a = arr.as_float32_array(); + a[0] = static_cast(v[0]); + a[1] = static_cast(v[1]); + a[2] = static_cast(v[2]); + } + } + + /* + * Rodrigues' rotation: rotate `v` by `theta` (radians) about the + * unit axis `k`. Caller is responsible for normalizing `k`. + */ + auto rotateAboutAxis(const std::array& v, + const std::array& k, + double theta) + -> std::array { + const double c = std::cos(theta); + const double s = std::sin(theta); + const double kdv = k[0] * v[0] + k[1] * v[1] + k[2] * v[2]; + const std::array kxv { + k[1] * v[2] - k[2] * v[1], + k[2] * v[0] - k[0] * v[2], + k[0] * v[1] - k[1] * v[0], + }; + return { + v[0] * c + kxv[0] * s + k[0] * kdv * (1.0 - c), + v[1] * c + kxv[1] * s + k[1] * kdv * (1.0 - c), + v[2] * c + kxv[2] * s + k[2] * kdv * (1.0 - c), + }; + } + + /* + * Walk the actions tree and apply the v_drift / v_rot rewrite to + * every `camera` block: + * 1. rotate `position` about `look_at` by `theta` using `up` as + * axis (skipped if `up` is missing or zero-length); + * 2. translate both `position` and `look_at` by `dx` along x. + * Step 1 commutes with step 2 — translating both end-points by the + * same vector preserves the rotation pivot — so this single pass + * also covers "rotate around the original (un-drifted) focus point + * and then translate". + * + * Applied freshly each render against the pristine pre-loaded base + * tree, never in place. + */ + void applyCameraTransforms(conduit::Node& node, double dx, double theta) { + for (conduit::index_t i = 0; i < node.number_of_children(); ++i) { + auto& child = node.child(i); + const std::string name = child.name(); + if (name == "camera") { + std::array position {}; + std::array look_at {}; + std::array up {}; + const bool has_pos = child.has_child("position") && + readVec3(child["position"], position); + const bool has_look = child.has_child("look_at") && + readVec3(child["look_at"], look_at); + if (theta != 0.0 && has_pos && has_look) { + std::array axis { 0.0, 0.0, 1.0 }; + if (child.has_child("up")) { + readVec3(child["up"], up); + const double n = std::sqrt(up[0] * up[0] + up[1] * up[1] + + up[2] * up[2]); + if (n > 0.0) { + axis = { up[0] / n, up[1] / n, up[2] / n }; + } + } + const std::array rel { position[0] - look_at[0], + position[1] - look_at[1], + position[2] - look_at[2] }; + const auto rotated = rotateAboutAxis(rel, axis, theta); + position = { rotated[0] + look_at[0], + rotated[1] + look_at[1], + rotated[2] + look_at[2] }; + writeVec3(child["position"], position); + } + if (dx != 0.0) { + if (has_pos) { + position[0] += dx; + writeVec3(child["position"], position); + } + if (has_look) { + look_at[0] += dx; + writeVec3(child["look_at"], look_at); + } + // 2D camera window-bounds [x0, x1, y0, y1]: translate the + // x-pair only, leaving y bounds untouched. + if (child.has_child("2d") && + child["2d"].dtype().number_of_elements() == 4) { + auto& w = child["2d"]; + if (w.dtype().is_float64()) { + auto a = w.as_float64_array(); + a[0] += dx; + a[1] += dx; + } else if (w.dtype().is_float32()) { + auto a = w.as_float32_array(); + a[0] = static_cast(a[0] + dx); + a[1] = static_cast(a[1] + dx); + } + } + } + } + applyCameraTransforms(child, dx, theta); + } + } + + /* + * Conduit's YAML parser stores integer scalars as int64 by default, but + * many Ascent / VTK-h filters call `.as_int()` (i.e. int32) on their + * parameters and that check is strict. Walk the parsed actions tree + * once and demote int64 leaves to int32 when the value fits — this is + * safe for the small counters used in actions files (num_steps, + * num_seeds_*, image_width, ...). + * + * We also rewrite multi-element int64 arrays into int32 arrays so that + * filters like `point_list` whose params are int-vectors stay valid. + * + * Returns the number of leaves that were demoted. + */ + auto demoteInt64ToInt32(conduit::Node& node) -> std::size_t { + std::size_t n_demoted = 0; + // Catch any integer leaf that isn't already int32. We use + // `.to_int64()` so we don't have to know the source type — it does + // a value-preserving cast for int8/16/32/64 and uint8/16/32/64. + if (node.dtype().is_integer() and not node.dtype().is_int32()) { + const auto nelem = node.dtype().number_of_elements(); + if (nelem == 1) { + const conduit::int64 v = node.to_int64(); + if (v >= static_cast(INT32_MIN) and + v <= static_cast(INT32_MAX)) { + node.set_int32(static_cast(v)); + ++n_demoted; + } + } else if (nelem > 1) { + // multi-element int array → int32 array (if all values fit) + conduit::int64_array src = node.as_int64_array(); + std::vector tmp; + tmp.reserve(static_cast(nelem)); + bool fits = true; + for (conduit::index_t i = 0; i < nelem; ++i) { + const conduit::int64 v = src[i]; + if (v < static_cast(INT32_MIN) or + v > static_cast(INT32_MAX)) { + fits = false; + break; + } + tmp.push_back(static_cast(v)); + } + if (fits) { + node.set(tmp.data(), tmp.size()); + ++n_demoted; + } + } + } + for (conduit::index_t i = 0; i < node.number_of_children(); ++i) { + n_demoted += demoteInt64ToInt32(node.child(i)); + } + return n_demoted; + } + } // namespace + + AscentWriter::~AscentWriter() { + if (m_initialized) { + try { + m_ascent.close(); + } catch (...) { + // swallow shutdown errors so we never throw from a destructor + } + m_initialized = false; + } + } + + void AscentWriter::init(const std::string& title, + const std::string& actions_file, + const std::vector& fields, + timestep_t interval, + simtime_t interval_time, + bool vector_aliases, + real_t v_drift, + real_t v_rot) { + raise::ErrorIf(m_initialized, "AscentWriter already initialized", HERE); + + m_root = title; + m_actions_file = actions_file; + m_fields = fields; + m_vector_aliases = vector_aliases; + m_v_drift = v_drift; + m_v_rot = v_rot; + + m_tracker.init("ascent", interval, interval_time); + + // Mirror the layout used by the ADIOS field/particle writers: + // //. Renders go under /plots/. + const auto plots_dir = std::filesystem::path(m_root) / "plots"; + std::filesystem::create_directories(plots_dir); + + m_options.reset(); + #if defined(MPI_ENABLED) + m_options["mpi_comm"] = MPI_Comm_c2f(MPI_COMM_WORLD); + #endif + m_options["default_dir"] = plots_dir.string(); + m_options["exceptions"] = "forward"; + m_options["messages"] = "quiet"; + + // Pre-parse the user's actions file so we can demote int64 leaves to + // int32 (Conduit YAML defaults to int64 but many Ascent filters call + // `.as_int()` strictly). Only when this fails do we fall back to + // letting Ascent read the file itself. + m_actions.reset(); + m_have_actions = false; + if (!m_actions_file.empty()) { + try { + if (!std::filesystem::exists(m_actions_file)) { + throw std::runtime_error("actions file not found at '" + + m_actions_file + "' (cwd: " + + std::filesystem::current_path().string() + + ")"); + } + // Use the same loader Ascent uses internally so the parse is + // guaranteed compatible with the YAML schema Ascent expects. + conduit::relay::io::load(m_actions_file, "yaml", m_actions); + const auto n_demoted = demoteInt64ToInt32(m_actions); + m_have_actions = true; + logger::Checkpoint( + "Pre-loaded Ascent actions file '" + m_actions_file + "' (demoted " + + std::to_string(n_demoted) + " int64 -> int32 leaves)", + HERE); + // CRITICAL: Ascent's `execute()` always calls CheckForSettingsFile + // with merge=false, which REPLACES our in-memory actions with a + // freshly-parsed copy from disk if `actions_file` resolves to a + // file that exists. By default `actions_file` is unset, in which + // case Ascent falls back to "ascent_actions.yaml" in the cwd. + // Set actions_file to "" so `is_file("")` is false and the + // disk-load path (which would discard our int32 demotion) is + // skipped. + m_options["actions_file"] = ""; + // Drop a copy of the patched actions inside the plots directory + // so users can inspect exactly what was sent to Ascent. + try { + const auto settings_path = plots_dir / "ascent_settings.yaml"; + conduit::relay::io::save(m_actions, settings_path.string(), "yaml"); + } catch (...) { + // best-effort debug dump; don't crash if it fails + } + } catch (const std::exception& e) { + raise::Warning( + "Could not pre-load Ascent actions file '" + m_actions_file + + "' (" + e.what() + + "); falling back to Ascent's own loader (int32 demotion skipped).", + HERE); + m_options["actions_file"] = m_actions_file; + } + } + + m_ascent.open(m_options); + m_initialized = true; + + // Surface the active Ascent runtime + backend so users can confirm + // they're getting the GPU-accelerated path. If the build accidentally + // fell back to the serial host runtime, rendering 10^7+ cells will be + // dominated by VTK-m on the CPU. + // + // `ascent::about(node)` is a free function (not a method on the + // ascent::Ascent instance) and reports build-time configuration: + // version, the default runtime, and per-runtime availability + // including the active VTK-m backend. The exact key layout varies + // between Ascent versions, so we probe a handful of well-known paths + // and fall back to dumping a YAML summary if none match. + try { + conduit::Node about_node; + ::ascent::about(about_node); + + // Dump the full about node so users can read the exact key layout + // for the installed Ascent version (paths drift between releases). + try { + const auto about_path = plots_dir / "ascent_about.yaml"; + conduit::relay::io::save(about_node, about_path.string(), "yaml"); + } catch (...) { + // best-effort debug dump + } + + std::string summary = "Ascent"; + if (about_node.has_path("version")) { + summary += " v" + about_node["version"].as_string(); + } + std::string default_runtime; + if (about_node.has_path("default_runtime")) { + default_runtime = about_node["default_runtime"].as_string(); + summary += " runtime=" + default_runtime; + } + + // Enumerate VTK-m backends marked "enabled" in the about node. + // Layout in Ascent 0.9.x: + // runtimes//vtkm/backends/ = "enabled"|"disabled" + // This reports what was BUILT into VTK-m. The active dispatcher + // when the Kokkos backend is selected is determined by Kokkos's + // default execution space (printed below). + if (!default_runtime.empty()) { + const std::string backends_path = "runtimes/" + default_runtime + + "/vtkm/backends"; + if (about_node.has_path(backends_path)) { + const auto& backends = about_node[backends_path]; + std::string enabled_list; + for (conduit::index_t i = 0; i < backends.number_of_children(); + ++i) { + const auto& c = backends.child(i); + if (c.dtype().is_string() && c.as_string() == "enabled") { + if (!enabled_list.empty()) { + enabled_list += ","; + } + enabled_list += c.name(); + } + } + if (!enabled_list.empty()) { + summary += " vtkm-backends=" + enabled_list; + } + } + } + + // Kokkos's default execution space is the actual dispatcher when + // Ascent runs through the Kokkos VTK-m backend. This name (SYCL / + // CUDA / HIP / OpenMP / Serial) is the source of truth for whether + // in situ rendering runs on the GPU. + summary += " kokkos="; + summary += Kokkos::DefaultExecutionSpace::name(); + + logger::Checkpoint(summary, HERE); + } catch (...) { + // never let an introspection failure abort init + } + logger::Checkpoint("Initialized Ascent in situ writer", HERE); + } + + auto AscentWriter::shouldRender(timestep_t step, simtime_t time) -> bool { + if (!m_initialized) { + return false; + } + return m_tracker.shouldWrite(step, time); + } + + void AscentWriter::defineMesh(Dimension dim, + const std::vector& l_corner, + const std::vector& l_shape, + const std::vector& l_first_cell, + const std::vector& downsample) { + raise::ErrorIf(!m_initialized, "AscentWriter not initialized", HERE); + const auto nd = static_cast(dim); + raise::ErrorIf(l_corner.size() != nd || l_shape.size() != nd, + "AscentWriter::defineMesh size mismatch", + HERE); + raise::ErrorIf(!l_first_cell.empty() && l_first_cell.size() != nd, + "AscentWriter::defineMesh l_first_cell size mismatch", + HERE); + raise::ErrorIf(!downsample.empty() && downsample.size() != nd, + "AscentWriter::defineMesh downsample size mismatch", + HERE); + m_dim = dim; + m_l_corner = l_corner; + m_l_shape.assign(l_shape.begin(), l_shape.end()); + + m_l_first_cell.assign(nd, 0u); + if (!l_first_cell.empty()) { + m_l_first_cell = l_first_cell; + } + m_downsample.assign(nd, 1u); + if (!downsample.empty()) { + m_downsample = downsample; + for (auto s : m_downsample) { + raise::ErrorIf(s == 0u, "downsample factor must be nonzero", HERE); + } + } + + // Published fields are vertex-centered, so the node grid has one extra + // point per axis (= cells + 1). Neighbouring domains' edge-coordinate + // arrays share the boundary node, so publishing matching values there + // ties the per-rank blocks into a seamless field. + m_l_nodes.assign(nd, 1u); + for (std::size_t d = 0; d < nd; ++d) { + m_l_nodes[d] = m_l_shape[d] + 1u; + } + + // Mesh structure is being (re)defined; force a fresh blueprint + // verification on the next render. + m_verified = false; + m_summary_logged = false; + + m_mesh.reset(); + // Use a `uniform` coordset (origin + spacing + dims), not `rectilinear`. + // The grid is uniform Cartesian for every Ascent-supported case, VTK-m + // renders it identically, and crucially Devil Ray (dray) only ingests + // uniform/explicit coordsets — it rejects `rectilinear` ("Bad coordinates + // type rectilinear"), which is what wedged earlier dray attempts. The + // origin/spacing/dims are filled per axis later via setMeshCoords(). + m_mesh["coordsets/coords/type"] = "uniform"; + m_mesh["topologies/mesh/type"] = "uniform"; + m_mesh["topologies/mesh/coordset"] = "coords"; + m_mesh_defined = true; + + // Allocate the per-field staging buffers once. Sized to the node grid + // (vertex association). Reused across every publishField() call (and across + // renders) to avoid per-call device + host + std::vector allocations. + std::size_t nelem = 1; + for (auto n : m_l_nodes) { + nelem *= n; + } + m_buf_nelem = nelem; + m_field_buf_d = array_t("ascent_field_buf", nelem); + m_field_buf_h = Kokkos::create_mirror_view(m_field_buf_d); + } + + void AscentWriter::setMeshCoords(unsigned short dim, + const array_t& xe) { + raise::ErrorIf(!m_mesh_defined, "AscentWriter mesh not defined", HERE); + raise::ErrorIf(dim >= static_cast(m_dim), + "AscentWriter::setMeshCoords invalid dim", + HERE); + // The axis has (cells + 1) edge coordinates = the vertex (node) positions + // the vertex-centered fields are sampled on. For a `uniform` coordset we + // collapse them to origin + spacing + dims. + raise::ErrorIf(xe.extent(0) != m_l_nodes[dim], + "AscentWriter::setMeshCoords edge count must equal " + "published node count (cells + 1)", + HERE); + auto xe_h = Kokkos::create_mirror_view(xe); + Kokkos::deep_copy(xe_h, xe); + + const std::size_t n_nodes = xe_h.extent(0); + const double origin = static_cast(xe_h(0)); + const double spacing = (n_nodes > 1u) + ? static_cast(xe_h(1)) - origin + : static_cast(1.0); + // Sanity-check that the published grid really is uniform; entity's Ascent + // path is Cartesian-only (constant spacing), but warn loudly rather than + // silently mis-place a stretched grid. + if (n_nodes > 2u) { + const double last = static_cast(xe_h(n_nodes - 1)); + const double expect = + origin + spacing * static_cast(n_nodes - 1); + const double aspc = std::abs(spacing); + const double scale = (aspc > 1e-30) ? aspc : 1e-30; + if (std::abs(last - expect) > 1e-4 * scale * + static_cast(n_nodes)) { + raise::Warning("AscentWriter::setMeshCoords: non-uniform spacing on a " + "uniform coordset; the render will be geometrically " + "off. Ascent output assumes a uniform Cartesian grid.", + HERE); + } + } + + static const char* const idim[3] = { "i", "j", "k" }; + static const char* const iorg[3] = { "x", "y", "z" }; + static const char* const ispc[3] = { "dx", "dy", "dz" }; + m_mesh["coordsets/coords/dims/" + std::string(idim[dim])] = + static_cast(n_nodes); + m_mesh["coordsets/coords/origin/" + std::string(iorg[dim])] = origin; + m_mesh["coordsets/coords/spacing/" + std::string(ispc[dim])] = spacing; + } + + template + void AscentWriter::publishField(const std::string& name, + const ndfield_t& fld, + std::size_t comp) { + raise::ErrorIf(!m_mesh_defined, "AscentWriter mesh not defined", HERE); + raise::ErrorIf(m_buf_nelem == 0, + "AscentWriter staging buffer not allocated", + HERE); + + const std::size_t gh = ntt::N_GHOSTS; + auto buf = m_field_buf_d; + + // Recenter the (possibly downsampled) cell field onto the grid NODES and + // write the node values into the persistent flat buffer in i-fastest order + // (Conduit/Blueprint layout). Conduit's `set(...)` below copies out, so the + // buffer can be reused across fields and across renders. Publishing on + // nodes (vs cells) puts every field component at the same location, which + // removes the Yee half-cell offset between B1/B2/B3 (and J) magnitudes and + // is the natural representation for a future curvilinear coordset. + // + // Node `i` along axis d sits between sampled cells (i-1) and i; its value + // averages the (up to 2^D) cells touching it. The straddling cells map to + // source indices N_GHOSTS + m_l_first_cell[d] + {(i-1), i} * stride, then + // are clamped to the LOCAL ACTIVE cell range [lo_d, hi_d]. The clamp makes + // boundary nodes one-sided (only this domain's cells), so no neighbour data + // / halo exchange is needed. Note this leaves the field discontinuous + // across MPI domains, but VTK-m's multi-domain volume compositor seams + // regardless of publish-side continuity, so the simpler one-sided form is + // used. + if constexpr (D == Dim::_3D) { + const std::size_t nn1 = m_l_nodes[0]; + const std::size_t nn2 = m_l_nodes[1]; + const std::size_t nn3 = m_l_nodes[2]; + const long s1 = static_cast(m_downsample[0]); + const long s2 = static_cast(m_downsample[1]); + const long s3 = static_cast(m_downsample[2]); + const long lo1 = static_cast(gh + m_l_first_cell[0]); + const long lo2 = static_cast(gh + m_l_first_cell[1]); + const long lo3 = static_cast(gh + m_l_first_cell[2]); + const long hi1 = lo1 + static_cast(nn1 - 2) * s1; + const long hi2 = lo2 + static_cast(nn2 - 2) * s2; + const long hi3 = lo3 + static_cast(nn3 - 2) * s3; + Kokkos::parallel_for( + "AscentRecenter3D", + CreateRangePolicy({ 0, 0, 0 }, + { static_cast(nn1), + static_cast(nn2), + static_cast(nn3) }), + Lambda(cellidx_t i1, cellidx_t i2, cellidx_t i3) { + const long b1 = lo1 + (static_cast(i1) - 1) * s1; + const long b2 = lo2 + (static_cast(i2) - 1) * s2; + const long b3 = lo3 + (static_cast(i3) - 1) * s3; + const long a1[2] = { clamp_idx(b1, lo1, hi1), + clamp_idx(b1 + s1, lo1, hi1) }; + const long a2[2] = { clamp_idx(b2, lo2, hi2), + clamp_idx(b2 + s2, lo2, hi2) }; + const long a3[2] = { clamp_idx(b3, lo3, hi3), + clamp_idx(b3 + s3, lo3, hi3) }; + double acc = 0.0; + for (int d3 = 0; d3 < 2; ++d3) { + for (int d2 = 0; d2 < 2; ++d2) { + for (int d1 = 0; d1 < 2; ++d1) { + acc += static_cast(fld(a1[d1], a2[d2], a3[d3], comp)); + } + } + } + buf(i1 + nn1 * (i2 + nn2 * i3)) = acc * 0.125; + }); + } else if constexpr (D == Dim::_2D) { + const std::size_t nn1 = m_l_nodes[0]; + const std::size_t nn2 = m_l_nodes[1]; + const long s1 = static_cast(m_downsample[0]); + const long s2 = static_cast(m_downsample[1]); + const long lo1 = static_cast(gh + m_l_first_cell[0]); + const long lo2 = static_cast(gh + m_l_first_cell[1]); + const long hi1 = lo1 + static_cast(nn1 - 2) * s1; + const long hi2 = lo2 + static_cast(nn2 - 2) * s2; + Kokkos::parallel_for( + "AscentRecenter2D", + CreateRangePolicy({ 0, 0 }, + { static_cast(nn1), + static_cast(nn2) }), + Lambda(cellidx_t i1, cellidx_t i2) { + const long b1 = lo1 + (static_cast(i1) - 1) * s1; + const long b2 = lo2 + (static_cast(i2) - 1) * s2; + const long a1[2] = { clamp_idx(b1, lo1, hi1), + clamp_idx(b1 + s1, lo1, hi1) }; + const long a2[2] = { clamp_idx(b2, lo2, hi2), + clamp_idx(b2 + s2, lo2, hi2) }; + double acc = 0.0; + for (int d2 = 0; d2 < 2; ++d2) { + for (int d1 = 0; d1 < 2; ++d1) { + acc += static_cast(fld(a1[d1], a2[d2], comp)); + } + } + buf(i1 + nn1 * i2) = acc * 0.25; + }); + } else { // Dim::_1D + const std::size_t nn1 = m_l_nodes[0]; + const long s1 = static_cast(m_downsample[0]); + const long lo1 = static_cast(gh + m_l_first_cell[0]); + const long hi1 = lo1 + static_cast(nn1 - 2) * s1; + Kokkos::parallel_for( + "AscentRecenter1D", + nn1, + Lambda(cellidx_t i1) { + const long b1 = lo1 + (static_cast(i1) - 1) * s1; + const long a0 = clamp_idx(b1, lo1, hi1); + const long a1 = clamp_idx(b1 + s1, lo1, hi1); + buf(i1) = 0.5 * (static_cast(fld(a0, comp)) + + static_cast(fld(a1, comp))); + }); + } + Kokkos::deep_copy(m_field_buf_h, m_field_buf_d); + + // Internal field names carry a leading "f" prefix (see out::OutputField). + // The user-facing name in Ascent / Conduit is the same name without it. + const std::string short_name = (name.size() > 1u && name.front() == 'f') + ? name.substr(1) + : name; + const std::string base = "fields/" + short_name; + m_mesh[base + "/topology"] = "mesh"; + m_mesh[base + "/association"] = "vertex"; + m_mesh[base + "/values"].set(m_field_buf_h.data(), m_buf_nelem); + + // Vector-field bonus path (gated by m_vector_aliases): when the + // user-facing name ends in "1", "2", or "3" (e.g. B1/B2/B3) ALSO + // publish the same values as the matching component of an MCArray + // vector field whose name is the prefix (e.g. "B"). VTK-h filters + // that need a 3-vector (streamline, gradient, ...) can then + // reference the prefix name directly without an extra + // `composite_vector` pipeline step — that workflow currently + // produces a vector layout VTK-h's streamline backend in Ascent + // v0.9.x can't unpack as Vec/Vec. + // Disable via `output.ascent.vector_aliases = false` in the toml + // input to halve published mesh size when the actions file only + // references the scalar form (or only the vector form). + if (m_vector_aliases && short_name.size() >= 2u) { + const char idx = short_name.back(); + if (idx >= '1' && idx <= '3') { + const std::string prefix = short_name.substr(0, short_name.size() - 1u); + if (!prefix.empty()) { + static const char* const sub[] = { "u", "v", "w" }; + const std::string vec_base = "fields/" + prefix; + m_mesh[vec_base + "/topology"] = "mesh"; + m_mesh[vec_base + "/association"] = "vertex"; + m_mesh[vec_base + "/values/" + sub[idx - '1']].set( + m_field_buf_h.data(), + m_buf_nelem); + } + } + } + m_pending_render = true; + } + + auto AscentWriter::render(timestep_t step, simtime_t time) -> bool { + if (!m_initialized || !m_mesh_defined || !m_pending_render) { + return false; + } + m_mesh["state/cycle"] = static_cast(step); + m_mesh["state/time"] = static_cast(time); + m_mesh["state/domain_id"] = 0; + #if defined(MPI_ENABLED) + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + m_mesh["state/domain_id"] = rank; + #endif + + // Mesh structure (coordsets, topologies, field names/associations) is + // identical from one render to the next — only state and field values + // change. Run the full Blueprint verify once; on subsequent renders it + // is pure overhead. `defineMesh` resets `m_verified` if the layout is + // re-declared. + if (!m_verified) { + conduit::Node verify_info; + if (!conduit::blueprint::mesh::verify(m_mesh, verify_info)) { + raise::Warning("Ascent blueprint verification failed: " + + verify_info.to_yaml(), + HERE); + m_pending_render = false; + return false; + } + m_verified = true; + } + + // One-shot diagnostic: log the published-mesh layout (node grid, field + // names + associations) and drop a values-free skeleton next to the + // renders so the published structure can be verified from a run without + // re-instrumenting. Rank 0 writes the skeleton; every rank logs its line. + if (!m_summary_logged) { + std::string nodes; + for (std::size_t d = 0; d < m_l_nodes.size(); ++d) { + nodes += (d ? "x" : "") + std::to_string(m_l_nodes[d]); + } + std::string flds; + if (m_mesh.has_child("fields")) { + const auto& fields = m_mesh["fields"]; + for (conduit::index_t i = 0; i < fields.number_of_children(); ++i) { + const auto& f = fields.child(i); + const std::string assoc = f.has_child("association") + ? f["association"].as_string() + : "?"; + flds += (flds.empty() ? "" : ", ") + f.name() + "[" + assoc + "]"; + } + } + logger::Checkpoint("Ascent published mesh: nodes=" + nodes + + " fields={" + flds + "}", + HERE); + int rank0 = 0; + #if defined(MPI_ENABLED) + MPI_Comm_rank(MPI_COMM_WORLD, &rank0); + #endif + if (rank0 == 0) { + try { + // Copy structure but blank the (huge) value arrays so the dump is + // small and human-readable. + conduit::Node skel; + skel.set(m_mesh); + if (skel.has_path("coordsets/coords/values")) { + auto& cv = skel["coordsets/coords/values"]; + for (conduit::index_t i = 0; i < cv.number_of_children(); ++i) { + cv.child(i).set(""); + } + } + if (skel.has_child("fields")) { + auto& fs = skel["fields"]; + for (conduit::index_t i = 0; i < fs.number_of_children(); ++i) { + if (fs.child(i).has_child("values")) { + fs.child(i)["values"].set(""); + } + } + } + const auto skel_path = std::filesystem::path(m_root) / "plots" / + "ascent_published_skeleton.yaml"; + conduit::relay::io::save(skel, skel_path.string(), "yaml"); + } catch (...) { + // best-effort; never let the diagnostic abort a render + } + } + m_summary_logged = true; + } + + m_ascent.publish(m_mesh); + + // When we successfully pre-parsed the actions file in init(), pass it + // to execute() directly (with int64→int32 demotion already applied). + // Otherwise fall back to an empty node, so Ascent reads + // ascent_actions.yaml itself via the `actions_file` option. + // + // With a non-zero `v_drift` or `v_rot` the camera entries in the + // actions tree need a fresh time-dependent rewrite each render, so + // we copy the pristine `m_actions` base into a working node and + // transform that. The base tree must remain untouched — applying + // the rewrite in place would compound across renders. + if (m_have_actions) { + const auto v_drift_zero = m_v_drift == static_cast(0.0); + const auto v_rot_zero = m_v_rot == static_cast(0.0); + if (!v_drift_zero || !v_rot_zero) { + m_actions_work.set(m_actions); + const double dx = static_cast(m_v_drift) * + static_cast(time); + const double theta = static_cast(m_v_rot) * + static_cast(time); + applyCameraTransforms(m_actions_work, dx, theta); + m_ascent.execute(m_actions_work); + } else { + m_ascent.execute(m_actions); + } + } else { + conduit::Node actions; + m_ascent.execute(actions); + } + m_pending_render = false; + return true; + } + + // Explicit instantiations matching the storage in the simulation. + template void AscentWriter::publishField(const std::string&, + const ndfield_t&, + std::size_t); + template void AscentWriter::publishField(const std::string&, + const ndfield_t&, + std::size_t); + template void AscentWriter::publishField(const std::string&, + const ndfield_t&, + std::size_t); + +} // namespace out + +#endif // ASCENT_ENABLED diff --git a/src/output/ascent_writer.h b/src/output/ascent_writer.h new file mode 100644 index 000000000..77b3e5bfc --- /dev/null +++ b/src/output/ascent_writer.h @@ -0,0 +1,219 @@ +/** + * @file output/ascent_writer.h + * @brief Wrapper around the Ascent in situ visualization library + * @implements + * - out::AscentWriter + * @cpp: + * - ascent_writer.cpp + * @namespaces: + * - out:: + * @macros: + * - MPI_ENABLED + * - OUTPUT_ENABLED + * - ASCENT_ENABLED + * @note + * AscentWriter mirrors the layout of out::Writer but instead of dumping data + * to ADIOS2 it publishes a Conduit-Blueprint mesh to Ascent and triggers a + * user-supplied actions file to render images on the fly. + */ + +#ifndef OUTPUT_ASCENT_WRITER_H +#define OUTPUT_ASCENT_WRITER_H + +#if defined(ASCENT_ENABLED) + + #include "enums.h" + #include "global.h" + + #include "arch/kokkos_aliases.h" + #include "utils/tools.h" + + #include + #include + + #if defined(MPI_ENABLED) + #include + #endif + + #include + #include + +namespace out { + + class AscentWriter { + ascent::Ascent m_ascent; + conduit::Node m_mesh; + conduit::Node m_options; + // Pristine pre-loaded actions tree (int64 leaves already demoted to + // int32 once at init time). Used as the source for `m_actions_work` + // when a non-zero `v_drift` requires a per-render rewrite. + conduit::Node m_actions; + // Working copy with `v_drift * t` applied to camera positions / + // look-at points. Allocated lazily; only populated when v_drift != 0. + conduit::Node m_actions_work; + bool m_initialized { false }; + bool m_mesh_defined { false }; + bool m_pending_render { false }; + bool m_have_actions { false }; + bool m_vector_aliases { true }; + // Drift velocity along the x-axis (code units). At render time the + // first component of each `camera/position` and `camera/look_at` + // entry in the actions tree is shifted by `m_v_drift * time`. Zero + // means the actions file is executed unmodified. + real_t m_v_drift { 0.0 }; + // Rotational velocity (radians per code time unit) around each + // camera's `look_at` point, using `up` as the rotation axis. The + // rotation `m_v_rot * time` is applied to the camera position + // before the v_drift translation, so the camera orbits the drifted + // focus point. Zero leaves the camera unrotated. + real_t m_v_rot { 0.0 }; + // Blueprint mesh structure does not change between renders (only state + // values + field values do). Verify once, skip on subsequent renders. + bool m_verified { false }; + + Dimension m_dim { Dim::_3D }; + // m_l_shape stores the *downsampled* local cell shape (what gets + // published). m_l_first_cell + m_downsample define how to map each + // downsampled cell back to an original (full-resolution) cell index. + // m_l_shape holds the (downsampled) local *cell* counts. The published + // mesh is vertex-centered: fields are recentered to the n+1 grid nodes per + // axis. This puts every component at the same location (removing the Yee + // half-cell offset between B1/B2/B3 and J magnitudes) and is the natural + // representation for a future curvilinear coordset. Boundary nodes are + // one-sided (average only this domain's active cells), so no halo exchange + // is needed. NB: this does NOT remove the inter-domain volume-render seam, + // which is a VTK-m multi-domain compositing limitation (see the + // ascent feedback / debug notes), independent of publish-side continuity. + std::vector m_l_shape; + std::vector m_l_corner; + std::vector m_l_first_cell; + std::vector m_downsample; + // Number of published vertices per axis (= m_l_shape + 1). Cached so the + // staging buffer and publish kernels agree on the node-grid extent. + std::vector m_l_nodes; + // One-shot guard for the published-mesh diagnostic dump/log. + bool m_summary_logged { false }; + std::string m_root; + std::string m_actions_file; + std::vector m_fields; + + // Persistent staging buffers reused across all publishField() calls. + // Sized once in defineMesh(); avoids per-render device + host allocations + // and a redundant LayoutLeft → C-order host reorder. + array_t m_field_buf_d; + array_mirror_t m_field_buf_h; + std::size_t m_buf_nelem { 0 }; + + tools::Tracker m_tracker; + + public: + AscentWriter() = default; + ~AscentWriter(); + + AscentWriter(const AscentWriter&) = delete; + AscentWriter& operator=(const AscentWriter&) = delete; + + /** + * @brief Initialize the underlying ascent::Ascent instance. + * @param title Simulation name (used for the output directory). + * @param actions_file Path to a yaml/json file with Ascent actions. + * @param fields List of field names (e.g. "B3") that will be published. + * @param interval Step interval between renders (used when interval_time<=0). + * @param interval_time Sim-time interval between renders. + * @param vector_aliases If true, scalar components named B1/B2/B3 (etc.) + * are *also* published as the matching component of an MCArray + * vector field "B". Set to false to halve the data volume when + * the actions file only references the scalar form. + * @param v_drift Drift velocity along the x-axis (code units). At + * render time `v_drift * time` is added to the x-component of + * every `camera/position` and `camera/look_at` in the actions + * tree, letting the camera follow a moving window. Zero + * disables the rewrite and keeps the actions file untouched. + * @param v_rot Rotational velocity (radians per code time unit) + * applied to each camera position about its `look_at` point, + * using `up` as the rotation axis. Combined freely with + * `v_drift`. Zero leaves the camera position unrotated. + */ + void init(const std::string& title, + const std::string& actions_file, + const std::vector& fields, + timestep_t interval, + simtime_t interval_time, + bool vector_aliases, + real_t v_drift, + real_t v_rot); + + /** + * @brief Whether the writer should fire on the current cycle. + */ + auto shouldRender(timestep_t step, simtime_t time) -> bool; + + /** + * @brief Define the local rectilinear-mesh layout. + * @param dim Dimensionality of the mesh (1/2/3). + * @param l_corner Local lower-left corner in global cell index space. + * @param l_shape Local number of *downsampled* active cells in each direction. + * @param l_first_cell Per-axis offset (in original-grid cells) of the first + * published cell within the local domain. Empty defaults to zeros. + * @param downsample Per-axis stride applied when sampling the original + * full-resolution field. Empty defaults to ones (no downsampling). + */ + void defineMesh(Dimension dim, + const std::vector& l_corner, + const std::vector& l_shape, + const std::vector& l_first_cell = {}, + const std::vector& downsample = {}); + + /** + * @brief Set the cell-edge coordinate arrays for one dimension. + */ + void setMeshCoords(unsigned short dim, const array_t& xe); + + /** + * @brief Push a single field component into the mesh blueprint. + * @param name Field name (used by the actions file). + * @param fld Backing storage for the simulation fields. + * @param comp Component index to extract. + */ + template + void publishField(const std::string& name, + const ndfield_t& fld, + std::size_t comp); + + /** + * @brief Trigger the Ascent pipeline for the current step. + * @return true if the pipeline actually executed (data was pending), + * false if there was nothing to render. + */ + auto render(timestep_t step, simtime_t time) -> bool; + + /** + * @brief Whether there is data published since the last render. + */ + [[nodiscard]] + auto hasPending() const -> bool { + return m_pending_render; + } + + /** + * @brief Whether the writer was successfully initialized. + */ + [[nodiscard]] + auto initialized() const -> bool { + return m_initialized; + } + + /** + * @brief Names of fields registered for in situ rendering. + */ + [[nodiscard]] + auto fields() const -> const std::vector& { + return m_fields; + } + }; + +} // namespace out + +#endif // ASCENT_ENABLED + +#endif // OUTPUT_ASCENT_WRITER_H diff --git a/src/output/utils/writers.cpp b/src/output/utils/writers.cpp index a02f298e4..d89cd8cab 100644 --- a/src/output/utils/writers.cpp +++ b/src/output/utils/writers.cpp @@ -76,13 +76,18 @@ namespace out { } template - void WriteNDField(adios2::IO& io, - adios2::Engine& writer, - const std::string& name, - const ndfield_t& data) { + void WriteNDField(adios2::IO& io, + adios2::Engine& writer, + const std::string& name, + const ndfield_t& data, + const adios2::Box& range) { + auto var = io.InquireVariable(name); + if (not range.first.empty()) { + var.SetSelection(range); + } auto data_h = Kokkos::create_mirror_view(data); Kokkos::deep_copy(data_h, data); - writer.Put(io.InquireVariable(name), data_h.data(), adios2::Mode::Sync); + writer.Put(var, data_h.data(), adios2::Mode::Sync); } // NOLINTBEGIN(bugprone-macro-parentheses) @@ -122,7 +127,8 @@ namespace out { template void WriteNDField(adios2::IO&, \ adios2::Engine&, \ const std::string&, \ - const ndfield_t&); + const ndfield_t&, \ + const adios2::Box&); NDFIELD_WRITERS(Dim::_1D, 3) NDFIELD_WRITERS(Dim::_1D, 6) NDFIELD_WRITERS(Dim::_2D, 3) diff --git a/src/output/utils/writers.h b/src/output/utils/writers.h index cbd089452..4b8909a19 100644 --- a/src/output/utils/writers.h +++ b/src/output/utils/writers.h @@ -78,7 +78,8 @@ namespace out { void WriteNDField(adios2::IO&, adios2::Engine&, const std::string&, - const ndfield_t&); + const ndfield_t&, + const adios2::Box& = {}); } // namespace out diff --git a/src/output/writer.cpp b/src/output/writer.cpp index a84745c5b..5b9ae89e2 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -73,6 +73,33 @@ namespace out { m_mode = mode; } + void Writer::setLocalLayout(const std::vector& loc_corner, + const std::vector& loc_shape) { + raise::ErrorIf(loc_corner.size() != m_flds_l_corner.size() or + loc_shape.size() != m_flds_l_shape.size(), + "setLocalLayout dim mismatch with the original layout", + HERE); + m_flds_l_corner = loc_corner; + m_flds_l_shape = loc_shape; + m_flds_l_corner_dwn.clear(); + m_flds_l_shape_dwn.clear(); + m_flds_l_first.clear(); + for (auto i { 0u }; i < m_flds_g_shape.size(); ++i) { + const double d = static_cast(m_dwn[i]); + const double l = static_cast(loc_corner[i]); + const double n = static_cast(loc_shape[i]); + const double f = math::ceil(l / d) * d - l; + m_flds_l_corner_dwn.push_back(static_cast(math::ceil(l / d))); + m_flds_l_first.push_back(static_cast(f)); + m_flds_l_shape_dwn.push_back(static_cast(math::ceil((n - f) / d))); + } + if constexpr (not std::is_same::array_layout, + Kokkos::LayoutRight>::value) { + std::reverse(m_flds_l_corner_dwn.begin(), m_flds_l_corner_dwn.end()); + std::reverse(m_flds_l_shape_dwn.begin(), m_flds_l_shape_dwn.end()); + } + } + void Writer::defineMeshLayout( const std::vector& glob_shape, const std::vector& loc_corner, @@ -111,25 +138,24 @@ namespace out { for (auto i { 0u }; i < m_flds_g_shape.size(); ++i) { // cell-centers + // ConstantDims is intentionally NOT set: per-rank slab shape can change + // when dynamic load balancing shifts domain boundaries between writes. m_io.DefineVariable("X" + std::to_string(i + 1), { m_flds_g_shape_dwn[i] }, { m_flds_l_corner_dwn[i] }, - { m_flds_l_shape_dwn[i] }, - adios2::ConstantDims); + { m_flds_l_shape_dwn[i] }); // cell-edges const auto is_last = (m_flds_l_corner[i] + m_flds_l_shape[i] == m_flds_g_shape[i]); m_io.DefineVariable("X" + std::to_string(i + 1) + "e", { m_flds_g_shape_dwn[i] + 1 }, { m_flds_l_corner_dwn[i] }, - { m_flds_l_shape_dwn[i] + (is_last ? 1 : 0) }, - adios2::ConstantDims); + { m_flds_l_shape_dwn[i] + (is_last ? 1 : 0) }); m_io.DefineVariable( "N" + std::to_string(i + 1) + "l", { static_cast(2 * domain_idx.second) }, { static_cast(2 * domain_idx.first) }, - { static_cast(2) }, - adios2::ConstantDims); + { static_cast(2) }); } if constexpr (std::is_same::array_layout, @@ -154,21 +180,21 @@ namespace out { m_flds_writers.emplace_back(S, fld); } for (const auto& fld : m_flds_writers) { + // ConstantDims is intentionally NOT set: per-rank slab shape can change + // when dynamic load balancing shifts domain boundaries between writes. if (fld.comp.empty()) { // scalar m_io.DefineVariable(fld.name(), m_flds_g_shape_dwn, m_flds_l_corner_dwn, - m_flds_l_shape_dwn, - adios2::ConstantDims); + m_flds_l_shape_dwn); } else { // vector or tensor for (auto i { 0u }; i < fld.comp.size(); ++i) { m_io.DefineVariable(fld.name(i), m_flds_g_shape_dwn, m_flds_l_corner_dwn, - m_flds_l_shape_dwn, - adios2::ConstantDims); + m_flds_l_shape_dwn); } } } @@ -198,9 +224,14 @@ namespace out { std::size_t comp, std::vector dwn, std::vector first_cell, - bool ghosts) { + bool ghosts, + const adios2::Dims& loc_corner_dwn, + const adios2::Dims& loc_shape_dwn) { // when dwn != 1 in any direction, it is assumed that ghosts == false - auto var = io.InquireVariable(varname); + auto var = io.InquireVariable(varname); + // Refresh the per-step (start, count) so the slab tracks any rebalance + // that happened since the variable was declared. + var.SetSelection(adios2::Box(loc_corner_dwn, loc_shape_dwn)); const auto gh_zones = ghosts ? 0 : N_GHOSTS; ndarray_t output_field {}; @@ -332,7 +363,9 @@ namespace out { addresses[i], m_dwn, m_flds_l_first, - m_flds_ghosts); + m_flds_ghosts, + m_flds_l_corner_dwn, + m_flds_l_shape_dwn); } } @@ -410,8 +443,28 @@ namespace out { const array_t& xc, const array_t& xe, const std::vector& loc_off_sz) { + // Per-step (start, count) for the per-rank slab; tracks the (possibly + // rebalanced) layout cached by setLocalLayout(). auto varc = m_io.InquireVariable("X" + std::to_string(dim + 1)); auto vare = m_io.InquireVariable("X" + std::to_string(dim + 1) + "e"); + // m_flds_l_corner_dwn / m_flds_l_shape_dwn are reversed for non-LayoutRight + // (see defineMeshLayout / setLocalLayout); m_flds_l_corner / m_flds_l_shape + // / m_flds_g_shape are not. Map the dim-order index to the dwn-array index. + constexpr bool layout_right = std::is_same< + typename ndfield_t::array_layout, + Kokkos::LayoutRight>::value; + const auto i_dwn = layout_right + ? static_cast(dim) + : (m_flds_g_shape.size() - 1u - + static_cast(dim)); + const auto is_last = (m_flds_l_corner[dim] + m_flds_l_shape[dim] == + m_flds_g_shape[dim]); + varc.SetSelection(adios2::Box( + { m_flds_l_corner_dwn[i_dwn] }, + { m_flds_l_shape_dwn[i_dwn] })); + vare.SetSelection(adios2::Box( + { m_flds_l_corner_dwn[i_dwn] }, + { m_flds_l_shape_dwn[i_dwn] + (is_last ? 1ul : 0ul) })); auto xc_h = Kokkos::create_mirror_view(xc); auto xe_h = Kokkos::create_mirror_view(xe); Kokkos::deep_copy(xc_h, xc); @@ -506,7 +559,9 @@ namespace out { std::size_t, \ std::vector, \ std::vector, \ - bool); + bool, \ + const adios2::Dims&, \ + const adios2::Dims&); WRITE_FIELD(Dim::_1D, 3) WRITE_FIELD(Dim::_1D, 6) WRITE_FIELD(Dim::_2D, 3) diff --git a/src/output/writer.h b/src/output/writer.h index 055cb215e..7231c738b 100644 --- a/src/output/writer.h +++ b/src/output/writer.h @@ -109,6 +109,17 @@ namespace out { bool, Coord); + /** + * @brief Refresh the cached per-rank slab corner and shape. + * + * Must be called before any per-step write whenever the local domain has + * been resized (e.g. by `Metadomain::Rebalance`); the cached values are + * passed to `var.SetSelection()` inside `writeMesh` and `WriteField`. + * The global shape is preserved (load balancing conserves total cells). + */ + void setLocalLayout(const std::vector& loc_corner, + const std::vector& loc_shape); + void defineFieldOutputs(const SimEngine&, const std::vector&); void defineSpectraOutputs(const std::vector&);