Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,10 @@ ArgOptions SDContextParams::get_options() {
};

options.bool_options = {
{"",
"--stream-layers",
"enable residency+prefetch streaming on top of --max-vram (no effect without --max-vram; defaults to false)",
true, &stream_layers},
{"",
"--force-sdxl-vae-conv-scale",
"force use of conv scale on sdxl vae",
Expand Down Expand Up @@ -720,6 +724,7 @@ std::string SDContextParams::to_string() const {
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
<< " max_vram: " << max_vram << ",\n"
<< " stream_layers: " << (stream_layers ? "true" : "false") << ",\n"
<< " backend: \"" << backend << "\",\n"
<< " params_backend: \"" << params_backend << "\",\n"
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
Expand Down Expand Up @@ -800,6 +805,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
qwen_image_zero_cond_t,
str_to_vae_format(vae_format),
max_vram,
stream_layers,
backend.c_str(),
params_backend.c_str(),
};
Expand Down
1 change: 1 addition & 0 deletions examples/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ struct SDContextParams {
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
bool offload_params_to_cpu = false;
float max_vram = 0.f;
bool stream_layers = false;
std::string backend;
std::string params_backend;
bool enable_mmap = false;
Expand Down
1 change: 1 addition & 0 deletions include/stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ typedef struct {
bool qwen_image_zero_cond_t;
enum sd_vae_format_t vae_format;
float max_vram; // GiB budget for graph-cut segmented param offload (0 = disabled, -1 = auto free VRAM minus 1 GiB)
bool stream_layers; // Enable residency+prefetch streaming on top of --max-vram (no effect without --max-vram)
const char* backend;
const char* params_backend;
} sd_ctx_params_t;
Expand Down
43 changes: 43 additions & 0 deletions src/conditioner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct Conditioner {
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) = 0;
virtual size_t get_params_buffer_size() = 0;
virtual void set_max_graph_vram_bytes(size_t max_vram_bytes) {}
virtual void set_stream_layers_enabled(bool enabled) {}
virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
virtual std::tuple<SDCondition, std::vector<bool>> get_learned_condition_with_trigger(int n_threads,
Expand Down Expand Up @@ -210,6 +211,13 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
}
}

void set_stream_layers_enabled(bool enabled) override {
text_model->set_stream_layers_enabled(enabled);
if (sd_version_is_sdxl(version)) {
text_model2->set_stream_layers_enabled(enabled);
}
}

void set_flash_attention_enabled(bool enabled) override {
text_model->set_flash_attention_enabled(enabled);
if (sd_version_is_sdxl(version)) {
Expand Down Expand Up @@ -843,6 +851,18 @@ struct SD3CLIPEmbedder : public Conditioner {
}
}

void set_stream_layers_enabled(bool enabled) override {
if (clip_l) {
clip_l->set_stream_layers_enabled(enabled);
}
if (clip_g) {
clip_g->set_stream_layers_enabled(enabled);
}
if (t5) {
t5->set_stream_layers_enabled(enabled);
}
}

void set_flash_attention_enabled(bool enabled) override {
if (clip_l) {
clip_l->set_flash_attention_enabled(enabled);
Expand Down Expand Up @@ -1200,6 +1220,15 @@ struct FluxCLIPEmbedder : public Conditioner {
}
}

void set_stream_layers_enabled(bool enabled) override {
if (clip_l) {
clip_l->set_stream_layers_enabled(enabled);
}
if (t5) {
t5->set_stream_layers_enabled(enabled);
}
}

void set_flash_attention_enabled(bool enabled) override {
if (clip_l) {
clip_l->set_flash_attention_enabled(enabled);
Expand Down Expand Up @@ -1434,6 +1463,12 @@ struct T5CLIPEmbedder : public Conditioner {
}
}

void set_stream_layers_enabled(bool enabled) override {
if (t5) {
t5->set_stream_layers_enabled(enabled);
}
}

void set_flash_attention_enabled(bool enabled) override {
if (t5) {
t5->set_flash_attention_enabled(enabled);
Expand Down Expand Up @@ -1617,6 +1652,10 @@ struct AnimaConditioner : public Conditioner {
llm->set_max_graph_vram_bytes(max_vram_bytes);
}

void set_stream_layers_enabled(bool enabled) override {
llm->set_stream_layers_enabled(enabled);
}

void set_flash_attention_enabled(bool enabled) override {
llm->set_flash_attention_enabled(enabled);
}
Expand Down Expand Up @@ -1765,6 +1804,10 @@ struct LLMEmbedder : public Conditioner {
llm->set_max_graph_vram_bytes(max_vram_bytes);
}

void set_stream_layers_enabled(bool enabled) override {
llm->set_stream_layers_enabled(enabled);
}

void set_flash_attention_enabled(bool enabled) override {
llm->set_flash_attention_enabled(enabled);
}
Expand Down
Loading
Loading