Support Custom ESRGAN tile size

pedroCabrera · pedroCabrera · commit dbc1f18fdbcf · 2025-10-16T21:27:06.000+02:00
diff --git a/esrgan.hpp b/esrgan.hpp
@@ -156,9 +156,10 @@ struct ESRGAN : public GGMLRunner {
 
     ESRGAN(ggml_backend_t backend,
            bool offload_params_to_cpu,
+           int tile_size = 128,
            const String2GGMLType& tensor_types = {})
         : GGMLRunner(backend, offload_params_to_cpu) {
-        // rrdb_net will be created in load_from_file
+        this->tile_size = tile_size;
     }
 
     void enable_conv2d_direct() {
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -116,6 +116,7 @@ struct SDParams {
     bool canny_preprocess      = false;
     bool color                 = false;
     int upscale_repeats        = 1;
+    int upscale_tile           = 128;
 
     // Photo Maker
     std::string photo_maker_path;
@@ -201,6 +202,7 @@ void print_params(SDParams params) {
     printf("    vae_tiling:                        %s\n", params.vae_tiling_params.enabled ? "true" : "false");
     printf("    force_sdxl_vae_conv_scale:         %s\n", params.force_sdxl_vae_conv_scale ? "true" : "false");
     printf("    upscale_repeats:                   %d\n", params.upscale_repeats);
+    printf("    upscale_tile:                      %d\n", params.upscale_tile);
     printf("    chroma_use_dit_mask:               %s\n", params.chroma_use_dit_mask ? "true" : "false");
     printf("    chroma_use_t5_mask:                %s\n", params.chroma_use_t5_mask ? "true" : "false");
     printf("    chroma_t5_mask_pad:                %d\n", params.chroma_t5_mask_pad);
@@ -235,6 +237,7 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --embd-dir [EMBEDDING_PATH]        path to embeddings\n");
     printf("  --upscale-model [ESRGAN_PATH]      path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n");
     printf("  --upscale-repeats                  Run the ESRGAN upscaler this many times (default 1)\n");
+    printf("  --upscale-tile                     Tile size for the ESRGAN upscaler (default 128)\n");
     printf("  --type [TYPE]                      weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n");
     printf("                                     If not specified, the default is the type of the weight file\n");
     printf("  --tensor-type-rules [EXPRESSION]   weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")\n");
@@ -527,6 +530,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
     options.int_options = {
         {"-t", "--threads", "", &params.n_threads},
         {"", "--upscale-repeats", "", &params.upscale_repeats},
+        {"","--upscale-tile", "", &params.upscale_tile},
         {"-H", "--height", "", &params.height},
         {"-W", "--width", "", &params.width},
         {"", "--steps", "", &params.sample_params.sample_steps},
@@ -917,6 +921,11 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         exit(1);
     }
 
+    if (params.upscale_tile < 1) {
+        fprintf(stderr, "error: upscale tile size must be at least 1\n");
+        exit(1);
+    }
+
     if (params.mode == UPSCALE) {
         if (params.esrgan_path.length() == 0) {
             fprintf(stderr, "error: upscale mode needs an upscaler model (--upscale-model)\n");
@@ -1486,7 +1495,8 @@ int main(int argc, const char* argv[]) {
         upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(),
                                                         params.offload_params_to_cpu,
                                                         params.diffusion_conv_direct,
-                                                        params.n_threads);
+                                                        params.n_threads,
+                                                        params.upscale_tile);
 
         if (upscaler_ctx == NULL) {
             printf("new_upscaler_ctx failed\n");
diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -292,7 +292,8 @@ typedef struct upscaler_ctx_t upscaler_ctx_t;
 SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
                                         bool offload_params_to_cpu,
                                         bool direct,
-                                        int n_threads);
+                                        int n_threads,
+                                        int tile_size);
 SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
 
 SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,
diff --git a/upscaler.cpp b/upscaler.cpp
@@ -10,11 +10,14 @@ struct UpscalerGGML {
     std::string esrgan_path;
     int n_threads;
     bool direct = false;
+    int tile_size = 128;
 
     UpscalerGGML(int n_threads,
-                 bool direct = false)
+                 bool direct = false,
+                 int tile_size = 128)
         : n_threads(n_threads),
-          direct(direct) {
+          direct(direct),
+          tile_size(tile_size) {
     }
 
     bool load_from_file(const std::string& esrgan_path,
@@ -51,7 +54,7 @@ struct UpscalerGGML {
             backend = ggml_backend_cpu_init();
         }
         LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
-        esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, model_loader.tensor_storages_types);
+        esrgan_upscaler = std::make_shared<ESRGAN>(backend, offload_params_to_cpu, tile_size, model_loader.tensor_storages_types);
         if (direct) {
             esrgan_upscaler->enable_conv2d_direct();
         }
@@ -113,14 +116,15 @@ struct upscaler_ctx_t {
 upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
                                  bool offload_params_to_cpu,
                                  bool direct,
-                                 int n_threads) {
+                                 int n_threads,
+                                 int tile_size) {
     upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
     if (upscaler_ctx == NULL) {
         return NULL;
     }
     std::string esrgan_path(esrgan_path_c_str);
 
-    upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct);
+    upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct, tile_size);
     if (upscaler_ctx->upscaler == NULL) {
         return NULL;
     }