@@ -607,6 +607,33 @@ __STATIC_INLINE__ void ggml_tensor_scale_output(struct ggml_tensor* src) {
607607
608608typedef std::function<void (ggml_tensor*, ggml_tensor*, bool )> on_tile_process;
609609
610+ __STATIC_INLINE__ void
611+ sd_tiling_calc_tiles (int &num_tiles_dim, float & tile_overlap_factor_dim, int small_dim, int tile_size, const float tile_overlap_factor) {
612+
613+ int tile_overlap = (tile_size * tile_overlap_factor);
614+ int non_tile_overlap = tile_size - tile_overlap;
615+
616+ num_tiles_dim = (small_dim - tile_overlap) / non_tile_overlap;
617+ int overshoot_dim = ((num_tiles_dim + 1 ) * non_tile_overlap + tile_overlap) % small_dim;
618+
619+ if ((overshoot_dim != non_tile_overlap) && (overshoot_dim <= num_tiles_dim * (tile_size / 2 - tile_overlap))) {
620+ // if tiles don't fit perfectly using the desired overlap
621+ // and there is enough room to squeeze an extra tile without overlap becoming >0.5
622+ num_tiles_dim++;
623+ }
624+
625+ tile_overlap_factor_dim = (float )(tile_size * num_tiles_dim - small_dim) / (float )(tile_size * (num_tiles_dim - 1 ));
626+ if (num_tiles_dim <= 2 ) {
627+ if (small_dim <= tile_size) {
628+ num_tiles_dim = 1 ;
629+ tile_overlap_factor_dim = 0 ;
630+ } else {
631+ num_tiles_dim = 2 ;
632+ tile_overlap_factor_dim = (2 * tile_size - small_dim) / (float )tile_size;
633+ }
634+ }
635+ }
636+
610637// Tiling
611638__STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
612639 int input_width = (int )input->ne [0 ];
@@ -627,48 +654,13 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
627654 small_height = input_height;
628655 }
629656
630- int tile_overlap = (tile_size * tile_overlap_factor);
631- int non_tile_overlap = tile_size - tile_overlap;
632-
633- int num_tiles_x = (small_width - tile_overlap) / non_tile_overlap;
634- int overshoot_x = ((num_tiles_x + 1 ) * non_tile_overlap + tile_overlap) % small_width;
657+ int num_tiles_x;
658+ float tile_overlap_factor_x;
659+ sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, tile_size, tile_overlap_factor);
635660
636- if ((overshoot_x != non_tile_overlap) && (overshoot_x <= num_tiles_x * (tile_size / 2 - tile_overlap))) {
637- // if tiles don't fit perfectly using the desired overlap
638- // and there is enough room to squeeze an extra tile without overlap becoming >0.5
639- num_tiles_x++;
640- }
641-
642- float tile_overlap_factor_x = (float )(tile_size * num_tiles_x - small_width) / (float )(tile_size * (num_tiles_x - 1 ));
643- if (num_tiles_x <= 2 ) {
644- if (small_width <= tile_size) {
645- num_tiles_x = 1 ;
646- tile_overlap_factor_x = 0 ;
647- } else {
648- num_tiles_x = 2 ;
649- tile_overlap_factor_x = (2 * tile_size - small_width) / (float )tile_size;
650- }
651- }
652-
653- int num_tiles_y = (small_height - tile_overlap) / non_tile_overlap;
654- int overshoot_y = ((num_tiles_y + 1 ) * non_tile_overlap + tile_overlap) % small_height;
655-
656- if ((overshoot_y != non_tile_overlap) && (overshoot_y <= num_tiles_y * (tile_size / 2 - tile_overlap))) {
657- // if tiles don't fit perfectly using the desired overlap
658- // and there is enough room to squeeze an extra tile without overlap becoming >0.5
659- num_tiles_y++;
660- }
661-
662- float tile_overlap_factor_y = (float )(tile_size * num_tiles_y - small_height) / (float )(tile_size * (num_tiles_y - 1 ));
663- if (num_tiles_y <= 2 ) {
664- if (small_height <= tile_size) {
665- num_tiles_y = 1 ;
666- tile_overlap_factor_y = 0 ;
667- } else {
668- num_tiles_y = 2 ;
669- tile_overlap_factor_y = (2 * tile_size - small_height) / (float )tile_size;
670- }
671- }
661+ int num_tiles_y;
662+ float tile_overlap_factor_y;
663+ sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, tile_size, tile_overlap_factor);
672664
673665 LOG_DEBUG (" num tiles : %d, %d " , num_tiles_x, num_tiles_y);
674666 LOG_DEBUG (" optimal overlap : %f, %f (targeting %f)" , tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
0 commit comments