@@ -43,12 +43,14 @@ const char* schedule_str[] = {
4343const char * modes_str[] = {
4444 " txt2img" ,
4545 " img2img" ,
46+ " img2vid" ,
4647 " convert" ,
4748};
4849
4950enum SDMode {
5051 TXT2IMG,
5152 IMG2IMG,
53+ IMG2VID,
5254 CONVERT,
5355 MODE_COUNT
5456};
@@ -71,12 +73,18 @@ struct SDParams {
7173
7274 std::string prompt;
7375 std::string negative_prompt;
76+ float min_cfg = 1 .0f ;
7477 float cfg_scale = 7 .0f ;
7578 int clip_skip = -1 ; // <= 0 represents unspecified
7679 int width = 512 ;
7780 int height = 512 ;
7881 int batch_count = 1 ;
7982
83+ int video_frames = 6 ;
84+ int motion_bucket_id = 127 ;
85+ int fps = 6 ;
86+ float augmentation_level = 0 .f;
87+
8088 sample_method_t sample_method = EULER_A;
8189 schedule_t schedule = DEFAULT;
8290 int sample_steps = 20 ;
@@ -108,6 +116,7 @@ void print_params(SDParams params) {
108116 printf (" strength(control): %.2f\n " , params.control_strength );
109117 printf (" prompt: %s\n " , params.prompt .c_str ());
110118 printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
119+ printf (" min_cfg: %.2f\n " , params.min_cfg );
111120 printf (" cfg_scale: %.2f\n " , params.cfg_scale );
112121 printf (" clip_skip: %d\n " , params.clip_skip );
113122 printf (" width: %d\n " , params.width );
@@ -190,7 +199,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
190199 }
191200 }
192201 if (mode_found == -1 ) {
193- fprintf (stderr, " error: invalid mode %s, must be one of [txt2img, img2img]\n " ,
202+ fprintf (stderr,
203+ " error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n " ,
194204 mode_selected);
195205 exit (1 );
196206 }
@@ -420,7 +430,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
420430 params.n_threads = get_num_physical_cores ();
421431 }
422432
423- if (params.mode != CONVERT && params.prompt .length () == 0 ) {
433+ if (params.mode != CONVERT && params.mode != IMG2VID && params. prompt .length () == 0 ) {
424434 fprintf (stderr, " error: the following arguments are required: prompt\n " );
425435 print_usage (argc, argv);
426436 exit (1 );
@@ -432,7 +442,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
432442 exit (1 );
433443 }
434444
435- if (params.mode == IMG2IMG && params.input_path .length () == 0 ) {
445+ if (( params.mode == IMG2IMG || params. mode == IMG2VID) && params.input_path .length () == 0 ) {
436446 fprintf (stderr, " error: when using the img2img mode, the following arguments are required: init-img\n " );
437447 print_usage (argc, argv);
438448 exit (1 );
@@ -539,9 +549,14 @@ int main(int argc, const char* argv[]) {
539549 }
540550 }
541551
552+ if (params.mode == IMG2VID) {
553+ fprintf (stderr, " SVD support is broken, do not use it!!!\n " );
554+ return 1 ;
555+ }
556+
542557 bool vae_decode_only = true ;
543558 uint8_t * input_image_buffer = NULL ;
544- if (params.mode == IMG2IMG) {
559+ if (params.mode == IMG2IMG || params. mode == IMG2VID ) {
545560 vae_decode_only = false ;
546561
547562 int c = 0 ;
@@ -625,19 +640,57 @@ int main(int argc, const char* argv[]) {
625640 3 ,
626641 input_image_buffer};
627642
628- results = img2img (sd_ctx,
629- input_image,
630- params.prompt .c_str (),
631- params.negative_prompt .c_str (),
632- params.clip_skip ,
633- params.cfg_scale ,
634- params.width ,
635- params.height ,
636- params.sample_method ,
637- params.sample_steps ,
638- params.strength ,
639- params.seed ,
640- params.batch_count );
643+ if (params.mode == IMG2VID) {
644+ results = img2vid (sd_ctx,
645+ input_image,
646+ params.width ,
647+ params.height ,
648+ params.video_frames ,
649+ params.motion_bucket_id ,
650+ params.fps ,
651+ params.augmentation_level ,
652+ params.min_cfg ,
653+ params.cfg_scale ,
654+ params.sample_method ,
655+ params.sample_steps ,
656+ params.strength ,
657+ params.seed );
658+ if (results == NULL ) {
659+ printf (" generate failed\n " );
660+ free_sd_ctx (sd_ctx);
661+ return 1 ;
662+ }
663+ size_t last = params.output_path .find_last_of (" ." );
664+ std::string dummy_name = last != std::string::npos ? params.output_path .substr (0 , last) : params.output_path ;
665+ for (int i = 0 ; i < params.video_frames ; i++) {
666+ if (results[i].data == NULL ) {
667+ continue ;
668+ }
669+ std::string final_image_path = i > 0 ? dummy_name + " _" + std::to_string (i + 1 ) + " .png" : dummy_name + " .png" ;
670+ stbi_write_png (final_image_path.c_str (), results[i].width , results[i].height , results[i].channel ,
671+ results[i].data , 0 , get_image_params (params, params.seed + i).c_str ());
672+ printf (" save result image to '%s'\n " , final_image_path.c_str ());
673+ free (results[i].data );
674+ results[i].data = NULL ;
675+ }
676+ free (results);
677+ free_sd_ctx (sd_ctx);
678+ return 0 ;
679+ } else {
680+ results = img2img (sd_ctx,
681+ input_image,
682+ params.prompt .c_str (),
683+ params.negative_prompt .c_str (),
684+ params.clip_skip ,
685+ params.cfg_scale ,
686+ params.width ,
687+ params.height ,
688+ params.sample_method ,
689+ params.sample_steps ,
690+ params.strength ,
691+ params.seed ,
692+ params.batch_count );
693+ }
641694 }
642695
643696 if (results == NULL ) {
0 commit comments