Skip to content

Commit 2ddbfe5

Browse files
committed
add Flux2FlowDenoiser
1 parent 7a2a7d0 commit 2ddbfe5

File tree

5 files changed

+63
-12
lines changed

5 files changed

+63
-12
lines changed

denoiser.hpp

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ struct Denoiser {
356356
virtual ggml_tensor* noise_scaling(float sigma, ggml_tensor* noise, ggml_tensor* latent) = 0;
357357
virtual ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) = 0;
358358

359-
virtual std::vector<float> get_sigmas(uint32_t n, scheduler_t scheduler_type, SDVersion version) {
359+
virtual std::vector<float> get_sigmas(uint32_t n, int /*image_seq_len*/, scheduler_t scheduler_type, SDVersion version) {
360360
auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
361361
std::shared_ptr<SigmaScheduler> scheduler;
362362
switch (scheduler_type) {
@@ -582,10 +582,14 @@ struct FluxFlowDenoiser : public Denoiser {
582582
set_parameters(shift);
583583
}
584584

585-
void set_parameters(float shift = 1.15f) {
585+
void set_shift(float shift) {
586586
this->shift = shift;
587-
for (int i = 1; i < TIMESTEPS + 1; i++) {
588-
sigmas[i - 1] = t_to_sigma(i / TIMESTEPS * TIMESTEPS);
587+
}
588+
589+
void set_parameters(float shift) {
590+
set_shift(shift);
591+
for (int i = 0; i < TIMESTEPS; i++) {
592+
sigmas[i] = t_to_sigma(i);
589593
}
590594
}
591595

@@ -627,6 +631,38 @@ struct FluxFlowDenoiser : public Denoiser {
627631
}
628632
};
629633

634+
struct Flux2FlowDenoiser : public FluxFlowDenoiser {
635+
Flux2FlowDenoiser() = default;
636+
637+
float compute_empirical_mu(uint32_t n, int image_seq_len) {
638+
const float a1 = 8.73809524e-05f;
639+
const float b1 = 1.89833333f;
640+
const float a2 = 0.00016927f;
641+
const float b2 = 0.45666666f;
642+
643+
if (image_seq_len > 4300) {
644+
float mu = a2 * image_seq_len + b2;
645+
return mu;
646+
}
647+
648+
float m_200 = a2 * image_seq_len + b2;
649+
float m_10 = a1 * image_seq_len + b1;
650+
651+
float a = (m_200 - m_10) / 190.0f;
652+
float b = m_200 - 200.0f * a;
653+
float mu = a * n + b;
654+
655+
return mu;
656+
}
657+
658+
std::vector<float> get_sigmas(uint32_t n, int image_seq_len, scheduler_t scheduler_type, SDVersion version) override {
659+
float mu = compute_empirical_mu(n, image_seq_len);
660+
LOG_DEBUG("Flux2FlowDenoiser: set shift to %.3f", mu);
661+
set_shift(mu);
662+
return Denoiser::get_sigmas(n, image_seq_len, scheduler_type, version);
663+
}
664+
};
665+
630666
typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
631667

632668
// k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t

examples/cli/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ Options:
101101
-s, --seed RNG seed (default: 42, use random seed for < 0)
102102
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
103103
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
104-
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]
104+
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]
105105
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
106106
contain any quantized parameters, the at_runtime mode will be used; otherwise,
107107
immediately will be used.The immediately mode may have precision and

examples/cli/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
11931193
on_sample_method_arg},
11941194
{"",
11951195
"--prediction",
1196-
"prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]",
1196+
"prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]",
11971197
on_prediction_arg},
11981198
{"",
11991199
"--lora-apply-mode",

stable-diffusion.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,11 @@ class StableDiffusionGGML {
802802
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
803803
break;
804804
}
805+
case FLUX2_FLOW_PRED: {
806+
LOG_INFO("running in Flux2 FLOW mode");
807+
denoiser = std::make_shared<Flux2FlowDenoiser>();
808+
break;
809+
}
805810
default: {
806811
LOG_ERROR("Unknown parametrization %i", sd_ctx_params->prediction);
807812
return false;
@@ -834,7 +839,7 @@ class StableDiffusionGGML {
834839
shift = 3.0;
835840
}
836841
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
837-
} else if (sd_version_is_flux(version) || sd_version_is_flux2(version)) {
842+
} else if (sd_version_is_flux(version)) {
838843
LOG_INFO("running in Flux FLOW mode");
839844
float shift = sd_ctx_params->flow_shift;
840845
if (shift == INFINITY) {
@@ -844,11 +849,11 @@ class StableDiffusionGGML {
844849
shift = 1.15f;
845850
}
846851
}
847-
if (sd_version_is_flux2(version)) {
848-
shift = 2.05f;
849-
}
850852
}
851853
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
854+
} else if (sd_version_is_flux2(version)) {
855+
LOG_INFO("running in Flux2 FLOW mode");
856+
denoiser = std::make_shared<Flux2FlowDenoiser>();
852857
} else if (sd_version_is_wan(version)) {
853858
LOG_INFO("running in FLOW mode");
854859
float shift = sd_ctx_params->flow_shift;
@@ -1869,6 +1874,11 @@ class StableDiffusionGGML {
18691874
return latent_channel;
18701875
}
18711876

1877+
int get_image_seq_len(int h, int w) {
1878+
int vae_scale_factor = get_vae_scale_factor();
1879+
return (h / vae_scale_factor) * (w / vae_scale_factor);
1880+
}
1881+
18721882
ggml_tensor* generate_init_latent(ggml_context* work_ctx,
18731883
int width,
18741884
int height,
@@ -2361,6 +2371,7 @@ const char* prediction_to_str[] = {
23612371
"edm_v",
23622372
"sd3_flow",
23632373
"flux_flow",
2374+
"flux2_flow",
23642375
};
23652376

23662377
const char* sd_prediction_name(enum prediction_t prediction) {
@@ -3131,7 +3142,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
31313142
LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]);
31323143

31333144
int sample_steps = sd_img_gen_params->sample_params.sample_steps;
3134-
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps, sd_img_gen_params->sample_params.scheduler, sd_ctx->sd->version);
3145+
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps,
3146+
sd_ctx->sd->get_image_seq_len(height, width),
3147+
sd_img_gen_params->sample_params.scheduler,
3148+
sd_ctx->sd->version);
31353149

31363150
ggml_tensor* init_latent = nullptr;
31373151
ggml_tensor* concat_latent = nullptr;
@@ -3384,7 +3398,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
33843398
if (high_noise_sample_steps > 0) {
33853399
total_steps += high_noise_sample_steps;
33863400
}
3387-
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(total_steps, sd_vid_gen_params->sample_params.scheduler, sd_ctx->sd->version);
3401+
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(total_steps, 0, sd_vid_gen_params->sample_params.scheduler, sd_ctx->sd->version);
33883402

33893403
if (high_noise_sample_steps < 0) {
33903404
// timesteps ∝ sigmas for Flow models (like wan2.2 a14b)

stable-diffusion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ enum prediction_t {
7171
EDM_V_PRED,
7272
SD3_FLOW_PRED,
7373
FLUX_FLOW_PRED,
74+
FLUX2_FLOW_PRED,
7475
PREDICTION_COUNT
7576
};
7677

0 commit comments

Comments
 (0)