From 193fb620b1aa10447fcbbd8826a35e0c41b667b0 Mon Sep 17 00:00:00 2001 From: Sean Bailey <34511443+sean-bailey@users.noreply.github.com> Date: Sat, 24 Feb 2024 08:31:01 -0500 Subject: [PATCH] feat: add capability to repeatedly run the upscaler in a row (#174) * Add in upscale repeater logic --------- Co-authored-by: leejet --- README.md | 4 +++- examples/cli/main.cpp | 31 ++++++++++++++++++++++++------- stable-diffusion.cpp | 9 +++++---- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index d673f147..2e07fded 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ cmake --build . --config Release ### Run ``` -usage: ./bin/sd [arguments] +usage: ./build/bin/sd [arguments] arguments: -h, --help show this help message and exit @@ -161,6 +161,7 @@ arguments: --control-net [CONTROL_PATH] path to control net model --embd-dir [EMBEDDING_PATH] path to embeddings. --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now. + --upscale-repeats Run the ESRGAN upscaler this many times (default 1) --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0) If not specified, the default is the type of the weight file. --lora-model-dir [DIR] lora model directory @@ -186,6 +187,7 @@ arguments: <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x --vae-tiling process vae in tiles to reduce memory usage --control-net-cpu keep controlnet in cpu (for low vram) + --canny apply canny preprocessor (edge detection) -v, --verbose print extra info ``` diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index c1bc9e36..5d324845 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -96,6 +96,7 @@ struct SDParams { bool vae_tiling = false; bool control_net_cpu = false; bool canny_preprocess = false; + int upscale_repeats = 1; }; void print_params(SDParams params) { @@ -129,6 +130,7 @@ void print_params(SDParams params) { printf(" seed: %ld\n", params.seed); printf(" batch_count: %d\n", params.batch_count); printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false"); + printf(" upscale_repeats: %d\n", params.upscale_repeats); } void print_usage(int argc, const char* argv[]) { @@ -145,6 +147,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --control-net [CONTROL_PATH] path to control net model\n"); printf(" --embd-dir [EMBEDDING_PATH] path to embeddings.\n"); printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.\n"); + printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n"); printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)\n"); printf(" If not specified, the default is the type of the weight file.\n"); printf(" --lora-model-dir [DIR] lora model directory\n"); @@ -296,6 +299,16 @@ void parse_args(int argc, const char** argv, SDParams& params) { break; } params.prompt = argv[i]; + } else if (arg == "--upscale-repeats") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.upscale_repeats = std::stoi(argv[i]); + if (params.upscale_repeats < 1) { + fprintf(stderr, "error: upscale multiplier must be at least 1\n"); + exit(1); + } } else if (arg == "-n" || arg == "--negative-prompt") { if (++i >= argc) { invalid_arg = true; @@ -700,7 +713,7 @@ int main(int argc, const char* argv[]) { } int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth - if (params.esrgan_path.size() > 0) { + if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) { upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), params.n_threads, params.wtype); @@ -712,13 +725,17 @@ int main(int argc, const char* argv[]) { if (results[i].data == NULL) { continue; } - sd_image_t upscaled_image = upscale(upscaler_ctx, results[i], upscale_factor); - if (upscaled_image.data == NULL) { - printf("upscale failed\n"); - continue; + sd_image_t current_image = results[i]; + for (int u = 0; u < params.upscale_repeats; ++u) { + sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor); + if (upscaled_image.data == NULL) { + printf("upscale failed\n"); + break; + } + free(current_image.data); + current_image = upscaled_image; } - free(results[i].data); - results[i] = upscaled_image; + results[i] = current_image; // Set the final upscaled image as the result } } } diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index be32f7f6..8f123fc1 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -173,10 +173,11 @@ class StableDiffusionGGML { if (version == VERSION_XL) { scale_factor = 0.13025f; if (vae_path.size() == 0 && taesd_path.size() == 0) { - LOG_WARN("!!!It looks like you are using SDXL model. " - "If you find that the generated images are completely black, " - "try specifying SDXL VAE FP16 Fix with the --vae parameter. " - "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors"); + LOG_WARN( + "!!!It looks like you are using SDXL model. " + "If you find that the generated images are completely black, " + "try specifying SDXL VAE FP16 Fix with the --vae parameter. " + "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors"); } }