From 193fb620b1aa10447fcbbd8826a35e0c41b667b0 Mon Sep 17 00:00:00 2001
From: Sean Bailey <34511443+sean-bailey@users.noreply.github.com>
Date: Sat, 24 Feb 2024 08:31:01 -0500
Subject: [PATCH] feat: add capability to repeatedly run the upscaler in a row
 (#174)

* Add in upscale repeater logic

---------

Co-authored-by: leejet <leejet714@gmail.com>
---
 README.md             |  4 +++-
 examples/cli/main.cpp | 31 ++++++++++++++++++++++++-------
 stable-diffusion.cpp  |  9 +++++----
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index d673f147..2e07fded 100644
--- a/README.md
+++ b/README.md
@@ -148,7 +148,7 @@ cmake --build . --config Release
 ### Run
 
 ```
-usage: ./bin/sd [arguments]
+usage: ./build/bin/sd [arguments]
 
 arguments:
   -h, --help                         show this help message and exit
@@ -161,6 +161,7 @@ arguments:
   --control-net [CONTROL_PATH]       path to control net model
   --embd-dir [EMBEDDING_PATH]        path to embeddings.
   --upscale-model [ESRGAN_PATH]      path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.
+  --upscale-repeats                  Run the ESRGAN upscaler this many times (default 1)
   --type [TYPE]                      weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
                                      If not specified, the default is the type of the weight file.
   --lora-model-dir [DIR]             lora model directory
@@ -186,6 +187,7 @@ arguments:
                                      <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
   --vae-tiling                       process vae in tiles to reduce memory usage
   --control-net-cpu                  keep controlnet in cpu (for low vram)
+  --canny                            apply canny preprocessor (edge detection)
   -v, --verbose                      print extra info
 ```
 
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index c1bc9e36..5d324845 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -96,6 +96,7 @@ struct SDParams {
     bool vae_tiling               = false;
     bool control_net_cpu          = false;
     bool canny_preprocess         = false;
+    int upscale_repeats           = 1;
 };
 
 void print_params(SDParams params) {
@@ -129,6 +130,7 @@ void print_params(SDParams params) {
     printf("    seed:              %ld\n", params.seed);
     printf("    batch_count:       %d\n", params.batch_count);
     printf("    vae_tiling:        %s\n", params.vae_tiling ? "true" : "false");
+    printf("    upscale_repeats:   %d\n", params.upscale_repeats);
 }
 
 void print_usage(int argc, const char* argv[]) {
@@ -145,6 +147,7 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --control-net [CONTROL_PATH]       path to control net model\n");
     printf("  --embd-dir [EMBEDDING_PATH]        path to embeddings.\n");
     printf("  --upscale-model [ESRGAN_PATH]      path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.\n");
+    printf("  --upscale-repeats                  Run the ESRGAN upscaler this many times (default 1)\n");
     printf("  --type [TYPE]                      weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)\n");
     printf("                                     If not specified, the default is the type of the weight file.\n");
     printf("  --lora-model-dir [DIR]             lora model directory\n");
@@ -296,6 +299,16 @@ void parse_args(int argc, const char** argv, SDParams& params) {
                 break;
             }
             params.prompt = argv[i];
+        } else if (arg == "--upscale-repeats") {
+            if (++i >= argc) {
+                invalid_arg = true;
+                break;
+            }
+            params.upscale_repeats = std::stoi(argv[i]);
+            if (params.upscale_repeats < 1) {
+                fprintf(stderr, "error: upscale multiplier must be at least 1\n");
+                exit(1);
+            }
         } else if (arg == "-n" || arg == "--negative-prompt") {
             if (++i >= argc) {
                 invalid_arg = true;
@@ -700,7 +713,7 @@ int main(int argc, const char* argv[]) {
     }
 
     int upscale_factor = 4;  // unused for RealESRGAN_x4plus_anime_6B.pth
-    if (params.esrgan_path.size() > 0) {
+    if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) {
         upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(),
                                                         params.n_threads,
                                                         params.wtype);
@@ -712,13 +725,17 @@ int main(int argc, const char* argv[]) {
                 if (results[i].data == NULL) {
                     continue;
                 }
-                sd_image_t upscaled_image = upscale(upscaler_ctx, results[i], upscale_factor);
-                if (upscaled_image.data == NULL) {
-                    printf("upscale failed\n");
-                    continue;
+                sd_image_t current_image = results[i];
+                for (int u = 0; u < params.upscale_repeats; ++u) {
+                    sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor);
+                    if (upscaled_image.data == NULL) {
+                        printf("upscale failed\n");
+                        break;
+                    }
+                    free(current_image.data);
+                    current_image = upscaled_image;
                 }
-                free(results[i].data);
-                results[i] = upscaled_image;
+                results[i] = current_image;  // Set the final upscaled image as the result
             }
         }
     }
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index be32f7f6..8f123fc1 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -173,10 +173,11 @@ class StableDiffusionGGML {
         if (version == VERSION_XL) {
             scale_factor = 0.13025f;
             if (vae_path.size() == 0 && taesd_path.size() == 0) {
-                LOG_WARN("!!!It looks like you are using SDXL model. "
-                         "If you find that the generated images are completely black, "
-                         "try specifying SDXL VAE FP16 Fix with the --vae parameter. "
-                         "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors");
+                LOG_WARN(
+                    "!!!It looks like you are using SDXL model. "
+                    "If you find that the generated images are completely black, "
+                    "try specifying SDXL VAE FP16 Fix with the --vae parameter. "
+                    "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors");
             }
         }