@@ -120,14 +120,19 @@ struct SDParams {
120120 bool canny_preprocess = false ;
121121 bool color = false ;
122122 int upscale_repeats = 1 ;
123+
124+ std::vector<int > skip_layers = {7 , 8 , 9 };
125+ float slg_scale = 2.5 ;
126+ float skip_layer_start = 0.01 ;
127+ float skip_layer_end = 0.2 ;
123128};
124129
125130void print_params (SDParams params) {
126131 printf (" Option: \n " );
127132 printf (" n_threads: %d\n " , params.n_threads );
128133 printf (" mode: %s\n " , modes_str[params.mode ]);
129134 printf (" model_path: %s\n " , params.model_path .c_str ());
130- printf (" wtype: %s\n " , params.wtype < SD_TYPE_COUNT ? sd_type_name (params.wtype ) : " unspecified" );
135+ printf (" wtype: %s\n " , params.wtype < SD_TYPE_COUNT ? sd_type_name (params.wtype ) : " unspecified" );
131136 printf (" fallback_type: %s\n " , params.ftype < SD_TYPE_COUNT ? sd_type_name (params.ftype ) : " unspecified" );
132137 printf (" clip_l_path: %s\n " , params.clip_l_path .c_str ());
133138 printf (" clip_g_path: %s\n " , params.clip_g_path .c_str ());
@@ -201,6 +206,11 @@ void print_usage(int argc, const char* argv[]) {
201206 printf (" -p, --prompt [PROMPT] the prompt to render\n " );
202207 printf (" -n, --negative-prompt PROMPT the negative prompt (default: \"\" )\n " );
203208 printf (" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n " );
209+ printf (" --slg enable skip layer guidance (CFG variant)\n " );
210+ printf (" --skip_layers LAYERS Layers to skip for skip layer CFG (requires --slg): (default: [7,8,9])\n " );
211+ printf (" --slg-scale SCALE skip layer guidance scale (requires --slg): (default: 2.5)\n " );
212+ printf (" --skip_layer_start START skip layer enabling point (* steps) (requires --slg): (default: 0.01)\n " );
213+ printf (" --skip_layer_end END skip layer enabling point (* steps) (requires --slg): (default: 0.2)\n " );
204214 printf (" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n " );
205215 printf (" --style-ratio STYLE-RATIO strength for keeping input identity (default: 20%%)\n " );
206216 printf (" --control-strength STRENGTH strength to apply Control Net (default: 0.9)\n " );
@@ -227,6 +237,7 @@ void print_usage(int argc, const char* argv[]) {
227237
228238void parse_args (int argc, const char ** argv, SDParams& params) {
229239 bool invalid_arg = false ;
240+ bool cfg_skip = false ;
230241 std::string arg;
231242 for (int i = 1 ; i < argc; i++) {
232243 arg = argv[i];
@@ -563,6 +574,63 @@ void parse_args(int argc, const char** argv, SDParams& params) {
563574 params.verbose = true ;
564575 } else if (arg == " --color" ) {
565576 params.color = true ;
577+ } else if (arg == " --slg" ) {
578+ cfg_skip = true ;
579+ } else if (arg == " --skip-layers" ) {
580+ if (++i >= argc) {
581+ invalid_arg = true ;
582+ break ;
583+ }
584+ if (argv[i][0 ] != ' [' ) {
585+ invalid_arg = true ;
586+ break ;
587+ }
588+ std::string layers_str = argv[i];
589+ while (layers_str.back () != ' ]' ) {
590+ if (++i >= argc) {
591+ invalid_arg = true ;
592+ break ;
593+ }
594+ layers_str += " " + std::string (argv[i]);
595+ }
596+ layers_str = layers_str.substr (1 , layers_str.size () - 2 );
597+
598+ std::regex regex (" [, ]+" );
599+ std::sregex_token_iterator iter (layers_str.begin (), layers_str.end (), regex, -1 );
600+ std::sregex_token_iterator end;
601+ std::vector<std::string> tokens (iter, end);
602+ std::vector<int > layers;
603+ for (const auto & token : tokens) {
604+ try {
605+ layers.push_back (std::stoi (token));
606+ } catch (const std::invalid_argument& e) {
607+ invalid_arg = true ;
608+ break ;
609+ }
610+ }
611+ params.skip_layers = layers;
612+
613+ if (invalid_arg) {
614+ break ;
615+ }
616+ } else if (arg == " --slg-scale" ) {
617+ if (++i >= argc) {
618+ invalid_arg = true ;
619+ break ;
620+ }
621+ params.slg_scale = std::stof (argv[i]);
622+ } else if (arg == " --skip-layer-start" ) {
623+ if (++i >= argc) {
624+ invalid_arg = true ;
625+ break ;
626+ }
627+ params.skip_layer_start = std::stof (argv[i]);
628+ } else if (arg == " --skip-layer-end" ) {
629+ if (++i >= argc) {
630+ invalid_arg = true ;
631+ break ;
632+ }
633+ params.skip_layer_end = std::stof (argv[i]);
566634 } else {
567635 fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
568636 print_usage (argc, argv);
@@ -578,6 +646,11 @@ void parse_args(int argc, const char** argv, SDParams& params) {
578646 params.n_threads = get_num_physical_cores ();
579647 }
580648
649+ if (!cfg_skip) {
650+ // set skip_layers to empty
651+ params.skip_layers .clear ();
652+ }
653+
581654 if (params.mode != CONVERT && params.mode != IMG2VID && params.prompt .length () == 0 ) {
582655 fprintf (stderr, " error: the following arguments are required: prompt\n " );
583656 print_usage (argc, argv);
@@ -771,7 +844,6 @@ void step_callback(int step, struct ggml_tensor* latents, enum SDVersion version
771844
772845 const float (*latent_rgb_proj)[channel];
773846
774-
775847 if (dim == 16 ) {
776848 // 16 channels VAE -> Flux or SD3
777849
@@ -990,6 +1062,10 @@ int main(int argc, const char* argv[]) {
9901062 params.style_ratio ,
9911063 params.normalize_input ,
9921064 params.input_id_images_path .c_str (),
1065+ params.skip_layers ,
1066+ params.slg_scale ,
1067+ params.skip_layer_start ,
1068+ params.skip_layer_end ,
9931069 (step_callback_t )step_callback);
9941070 } else {
9951071 sd_image_t input_image = {(uint32_t )params.width ,
0 commit comments