@@ -2532,8 +2532,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
25322532 sd_image_to_ggml_tensor (sd_img_gen_params->mask_image , mask_img);
25332533 sd_image_to_ggml_tensor (sd_img_gen_params->init_image , init_img);
25342534
2535- init_latent = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
2536-
25372535 if (sd_version_is_inpaint (sd_ctx->sd ->version )) {
25382536 int64_t mask_channels = 1 ;
25392537 if (sd_ctx->sd ->version == VERSION_FLUX_FILL) {
@@ -2548,8 +2546,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
25482546 ggml_tensor* masked_img = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, width, height, 3 , 1 );
25492547 ggml_ext_tensor_apply_mask (init_img, mask_img, masked_img);
25502548 masked_latent = sd_ctx->sd ->encode_first_stage (work_ctx, masked_img);
2549+ init_latent = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
25512550 } else {
25522551 // mask after vae
2552+ init_latent = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
25532553 masked_latent = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, init_latent->ne [0 ], init_latent->ne [1 ], init_latent->ne [2 ], 1 );
25542554 ggml_ext_tensor_apply_mask (init_latent, mask_img, masked_latent, 0 .);
25552555 }
@@ -2593,6 +2593,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
25932593 }
25942594 }
25952595 }
2596+ } else {
2597+ init_latent = sd_ctx->sd ->encode_first_stage (work_ctx, init_img);
25962598 }
25972599
25982600 {
0 commit comments