Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions input.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,35 @@
# @example: [2, 2, 2] (total of 8 domains)
decomposition = ""

# Diffusion-style dynamic load balancing (Cartesian metrics only).
# Domain boundaries between MPI neighbors are nudged to equalize the
# active-particle count per rank. All inter-rank traffic uses only the
# existing nearest-neighbor field/particle communication paths.
[simulation.domain.load_balance]
# Enable dynamic load balancing
# @type: bool
# @default: false
enable = ""
# Run the rebalancer every `interval` timesteps (0 disables)
# @type: int
# @default: 0
interval = ""
# Dimensions along which load is redistributed (1 = x1, 2 = x2, 3 = x3)
# @type: array of int, subset of [1, 2, 3]
# @default: [1]
dimensions = ""
# Skip rebalancing along a dim when (max - min) / mean of the per-slice
# particle count is below this fraction
# @type: float
# @default: 0.1
tolerance = ""
# Maximum cell-shift per interior boundary per event; clamped at compile
# time to N_GHOSTS so the migrating field strip is already cached in the
# rank's ghost zone.
# @type: int
# @default: N_GHOSTS
max_shift = ""

[grid]
# Spatial resolution of the grid
# @required
Expand Down
33 changes: 33 additions & 0 deletions src/engines/engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ namespace ntt {
"ParticlePusher", "FieldBoundaries",
"ParticleBoundaries", "Communications",
"Injector", "Custom",
"LoadBalance",
"ParticleSort", "Output",
"Checkpoint" },
[]() {
Expand All @@ -267,6 +268,17 @@ namespace ntt {
const auto clear_interval = m_params.template get<timestep_t>(
"particles.clear_interval");

const auto lb_enable = m_params.template get<bool>(
"simulation.domain.load_balance.enable");
const auto lb_interval = m_params.template get<timestep_t>(
"simulation.domain.load_balance.interval");
const auto lb_dim_mask = m_params.template get<unsigned int>(
"simulation.domain.load_balance.dim_mask");
const auto lb_tolerance = m_params.template get<real_t>(
"simulation.domain.load_balance.tolerance");
const auto lb_max_shift = m_params.template get<unsigned int>(
"simulation.domain.load_balance.max_shift");

// main algorithm loop
while (step < max_steps) {
// run the engine-dependent algorithm step
Expand All @@ -282,6 +294,27 @@ namespace ntt {
});
timers.stop("Custom");
}
if constexpr (MetricClass<M>) {
if (lb_enable and lb_interval > 0 and (step + 1) % lb_interval == 0 and
lb_dim_mask != 0u) {
timers.start("LoadBalance");
m_metadomain.Rebalance(lb_dim_mask,
lb_tolerance,
static_cast<ncells_t>(lb_max_shift));
timers.stop("LoadBalance");
}
}
// Sort particles last — after step_forward, CustomPostStep, and
// LoadBalance — so the tile layout (and dead-particle compaction) the
// next step's deposit relies on reflects every particle change made this
// step: moving-window shift/injection and in-place dead-tagging done in
// CustomPostStep, plus any rebalance migration.
timers.start("ParticleSort");
m_metadomain.runOnLocalDomains([this](auto& dom) {
m_metadomain.SortParticles(time, step, m_params, dom);
});
timers.stop("ParticleSort");

auto print_prtl_clear = (clear_interval > 0 and
step % clear_interval == 0 and step > 0);

Expand Down
6 changes: 3 additions & 3 deletions src/engines/grpic/grpic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,9 +613,9 @@ namespace ntt {
timers.stop("FieldBoundaries");
}

timers.start("ParticleSort");
m_metadomain.SortParticles(time, step, m_params, dom);
timers.stop("ParticleSort");
// NOTE: particle sorting is intentionally NOT done here. It runs once per
// step in the engine loop (Engine::run) after CustomPostStep and
// LoadBalance — see the SRPIC engine for the rationale.

/**
* Finally: em0::B at n-1/2
Expand Down
7 changes: 4 additions & 3 deletions src/engines/srpic/srpic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,10 @@ namespace ntt {
timers.stop("Injector");
}

timers.start("ParticleSort");
m_metadomain.SortParticles(time, step, m_params, dom);
timers.stop("ParticleSort");
// NOTE: particle sorting is intentionally NOT done here. It runs once per
// step in the engine loop (Engine::run) after CustomPostStep and
// LoadBalance, so the layout the next deposit uses reflects window
// shifts/injection and dead-tagging performed in CustomPostStep.
}
};

Expand Down
2 changes: 2 additions & 0 deletions src/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# * domain/metadomain_stats.cpp
# * domain/metadomain_io.cpp
# * domain/metadomain_reshape.cpp
# * domain/metadomain_loadbal.cpp
# * containers/particles.cpp
# * containers/particles_comm.cpp
# * containers/particles_io.cpp
Expand Down Expand Up @@ -59,6 +60,7 @@ set(SOURCES
${SRC_DIR}/domain/metadomain_sort.cpp
${SRC_DIR}/domain/metadomain_stats.cpp
${SRC_DIR}/domain/metadomain_reshape.cpp
${SRC_DIR}/domain/metadomain_loadbal.cpp
${SRC_DIR}/containers/particles.cpp
${SRC_DIR}/containers/particles_sort.cpp
${SRC_DIR}/containers/fields.cpp)
Expand Down
5 changes: 4 additions & 1 deletion src/framework/containers/fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,10 @@ namespace ntt {
void CheckpointRead(adios2::IO&,
adios2::Engine&,
const adios2::Box<adios2::Dims>&);
void CheckpointWrite(adios2::IO&, adios2::Engine&) const;
void CheckpointWrite(adios2::IO&,
adios2::Engine&,
const std::vector<ncells_t>&,
const std::vector<ncells_t>&) const;
#endif
};

Expand Down
27 changes: 22 additions & 5 deletions src/framework/containers/fields_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,27 @@ namespace ntt {
}

template <Dimension D, SimEngine::type S>
void Fields<D, S>::CheckpointWrite(adios2::IO& io, adios2::Engine& writer) const {
void Fields<D, S>::CheckpointWrite(
adios2::IO& io,
adios2::Engine& writer,
const std::vector<ncells_t>& local_shape,
const std::vector<ncells_t>& local_offset) const {
logger::Checkpoint("Writing fields checkpoint", HERE);

out::WriteNDField<D, 6>(io, writer, "em", em);
// Per-rank slab: re-set the variable selection to track the (possibly
// rebalanced) local layout. The component axis is always full.
auto build_range = [&](unsigned short ncomp) {
auto start = adios2::Dims(local_offset.begin(), local_offset.end());
auto count = adios2::Dims(local_shape.begin(), local_shape.end());
start.push_back(0);
count.push_back(ncomp);
return adios2::Box<adios2::Dims>(start, count);
};

out::WriteNDField<D, 6>(io, writer, "em", em, build_range(6));
if (S == ntt::SimEngine::GRPIC) {
out::WriteNDField<D, 6>(io, writer, "em0", em0);
out::WriteNDField<D, 3>(io, writer, "cur", cur);
out::WriteNDField<D, 6>(io, writer, "em0", em0, build_range(6));
out::WriteNDField<D, 3>(io, writer, "cur", cur, build_range(3));
}
}

Expand All @@ -81,7 +95,10 @@ namespace ntt {
template void Fields<D, S>::CheckpointRead(adios2::IO&, \
adios2::Engine&, \
const adios2::Box<adios2::Dims>&); \
template void Fields<D, S>::CheckpointWrite(adios2::IO&, adios2::Engine&) const;
template void Fields<D, S>::CheckpointWrite(adios2::IO&, \
adios2::Engine&, \
const std::vector<ncells_t>&, \
const std::vector<ncells_t>&) const;

FIELDS_CHECKPOINTS(Dim::_1D, SimEngine::SRPIC)
FIELDS_CHECKPOINTS(Dim::_2D, SimEngine::SRPIC)
Expand Down
4 changes: 4 additions & 0 deletions src/framework/domain/domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,10 @@ namespace ntt {
m_neighbor_idx[dir] = idx;
}

void set_offset_ncells(const std::vector<ncells_t>& off) {
m_offset_ncells = off;
}

/* printer overload ----------------------------------------------------- */
auto Report() const -> std::string {
std::string report;
Expand Down
9 changes: 9 additions & 0 deletions src/framework/domain/mesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ namespace ntt {
new (&metric) M { this->m_resolution, new_extent, m_metric_params_raw };
}

void set_resolution_and_extent(const std::vector<ncells_t>& new_res,
const boundaries_t<real_t>& new_extent) {
raise::ErrorIf(new_res.size() != D, "invalid resolution dim", HERE);
this->m_resolution = new_res;
m_extent = new_extent;
metric.~M();
new (&metric) M { this->m_resolution, m_extent, m_metric_params_raw };
}

/**
* @brief Get the intersection of the mesh with a box
* @param box physical extent
Expand Down
17 changes: 17 additions & 0 deletions src/framework/domain/metadomain.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,23 @@ namespace ntt {
/* domain update-related ------------------------------------------------ */
void ShiftByCells(int, in = in::x1);

/**
* @brief Rebalance the load (active particles) across MPI domains by
* shifting interior domain boundaries between neighbors.
* @param dim_mask bitmask: bit d (0,1,2) set => balance along dim x1/x2/x3
* @param tolerance skip if (max-min)/mean of the per-slice load is below
* this fraction
* @param max_shift_cells per-event cap for any single boundary movement,
* additionally clamped to N_GHOSTS so the field strip we need is already
* present in the local ghost zone
* @note Only neighbor communication is used (CommunicateFields ghosts +
* CommunicateParticles).
*/
void Rebalance(unsigned int dim_mask,
real_t tolerance,
ncells_t max_shift_cells)
requires(MetricClass<M>);

/* output-related ------------------------------------------------------- */
#if defined(OUTPUT_ENABLED)
void InitWriter(adios2::ADIOS*, const SimulationParams&);
Expand Down
13 changes: 12 additions & 1 deletion src/framework/domain/metadomain_chckpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,19 @@ namespace ntt {
}
params.saveTOML(g_checkpoint_writer.written().back().second, current_time);

// Recompute the local with-ghosts shape/offset every step so the
// ADIOS variable selection tracks any rebalance that has happened
// since InitCheckpointWriter.
std::vector<ncells_t> loc_off_with_ghosts;
for (auto d { 0u }; d < M::Dim; ++d) {
loc_off_with_ghosts.push_back(
local_domain->offset_ncells()[d] +
2 * N_GHOSTS * local_domain->offset_ndomains()[d]);
}
local_domain->fields.CheckpointWrite(g_checkpoint_writer.io(),
g_checkpoint_writer.writer());
g_checkpoint_writer.writer(),
local_domain->mesh.n_all(),
loc_off_with_ghosts);
#if !defined(MPI_ENABLED)
const std::size_t dom_tot = 1, dom_offset = 0;
#else
Expand Down
3 changes: 3 additions & 0 deletions src/framework/domain/metadomain_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ namespace ntt {
}
}
}
// Refresh the writer's cached per-rank slab so that field/mesh writes
// pick up the (possibly rebalanced) current local layout.
g_writer.setLocalLayout(off_ncells_with_ghosts, loc_shape_with_ghosts);
for (auto dim { 0u }; dim < M::Dim; ++dim) {
const auto l_size = local_domain->mesh.n_active()[dim];
const auto l_offset = local_domain->offset_ncells()[dim];
Expand Down
Loading
Loading