Skip to content

Commit 61b3ba6

Browse files
committed
refactor tile number calculation
1 parent 9036ee5 commit 61b3ba6

File tree

1 file changed

+33
-41
lines changed

1 file changed

+33
-41
lines changed

ggml_extend.hpp

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,33 @@ __STATIC_INLINE__ void ggml_tensor_scale_output(struct ggml_tensor* src) {
607607

608608
typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
609609

610+
__STATIC_INLINE__ void
611+
sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int small_dim, int tile_size, const float tile_overlap_factor) {
612+
613+
int tile_overlap = (tile_size * tile_overlap_factor);
614+
int non_tile_overlap = tile_size - tile_overlap;
615+
616+
num_tiles_dim = (small_dim - tile_overlap) / non_tile_overlap;
617+
int overshoot_dim = ((num_tiles_dim + 1) * non_tile_overlap + tile_overlap) % small_dim;
618+
619+
if ((overshoot_dim != non_tile_overlap) && (overshoot_dim <= num_tiles_dim * (tile_size / 2 - tile_overlap))) {
620+
// if tiles don't fit perfectly using the desired overlap
621+
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
622+
num_tiles_dim++;
623+
}
624+
625+
tile_overlap_factor_dim = (float)(tile_size * num_tiles_dim - small_dim) / (float)(tile_size * (num_tiles_dim - 1));
626+
if (num_tiles_dim <= 2) {
627+
if (small_dim <= tile_size) {
628+
num_tiles_dim = 1;
629+
tile_overlap_factor_dim = 0;
630+
} else {
631+
num_tiles_dim = 2;
632+
tile_overlap_factor_dim = (2 * tile_size - small_dim) / (float)tile_size;
633+
}
634+
}
635+
}
636+
610637
// Tiling
611638
__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
612639
int input_width = (int)input->ne[0];
@@ -627,48 +654,13 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
627654
small_height = input_height;
628655
}
629656

630-
int tile_overlap = (tile_size * tile_overlap_factor);
631-
int non_tile_overlap = tile_size - tile_overlap;
632-
633-
int num_tiles_x = (small_width - tile_overlap) / non_tile_overlap;
634-
int overshoot_x = ((num_tiles_x + 1) * non_tile_overlap + tile_overlap) % small_width;
657+
int num_tiles_x;
658+
float tile_overlap_factor_x;
659+
sd_tiling_calc_tiles(num_tiles_x, tile_overlap_factor_x, small_width, tile_size, tile_overlap_factor);
635660

636-
if ((overshoot_x != non_tile_overlap) && (overshoot_x <= num_tiles_x * (tile_size / 2 - tile_overlap))) {
637-
// if tiles don't fit perfectly using the desired overlap
638-
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
639-
num_tiles_x++;
640-
}
641-
642-
float tile_overlap_factor_x = (float)(tile_size * num_tiles_x - small_width) / (float)(tile_size * (num_tiles_x - 1));
643-
if (num_tiles_x <= 2) {
644-
if (small_width <= tile_size) {
645-
num_tiles_x = 1;
646-
tile_overlap_factor_x = 0;
647-
} else {
648-
num_tiles_x = 2;
649-
tile_overlap_factor_x = (2 * tile_size - small_width) / (float)tile_size;
650-
}
651-
}
652-
653-
int num_tiles_y = (small_height - tile_overlap) / non_tile_overlap;
654-
int overshoot_y = ((num_tiles_y + 1) * non_tile_overlap + tile_overlap) % small_height;
655-
656-
if ((overshoot_y != non_tile_overlap) && (overshoot_y <= num_tiles_y * (tile_size / 2 - tile_overlap))) {
657-
// if tiles don't fit perfectly using the desired overlap
658-
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
659-
num_tiles_y++;
660-
}
661-
662-
float tile_overlap_factor_y = (float)(tile_size * num_tiles_y - small_height) / (float)(tile_size * (num_tiles_y - 1));
663-
if (num_tiles_y <= 2) {
664-
if (small_height <= tile_size) {
665-
num_tiles_y = 1;
666-
tile_overlap_factor_y = 0;
667-
} else {
668-
num_tiles_y = 2;
669-
tile_overlap_factor_y = (2 * tile_size - small_height) / (float)tile_size;
670-
}
671-
}
661+
int num_tiles_y;
662+
float tile_overlap_factor_y;
663+
sd_tiling_calc_tiles(num_tiles_y, tile_overlap_factor_y, small_height, tile_size, tile_overlap_factor);
672664

673665
LOG_DEBUG("num tiles : %d, %d ", num_tiles_x, num_tiles_y);
674666
LOG_DEBUG("optimal overlap : %f, %f (targeting %f)", tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);

0 commit comments

Comments
 (0)