From 2b717bb195a3034853ed45a52c5752f010e1302b Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 23 Apr 2024 02:35:25 +0900 Subject: [PATCH 001/201] fix initial corrupt model loop if for some reason the initial loading model at loading phase of webui is corrupted after entering this state the user will not be able to load even a good model is selected, due the the unload_model_weights > send_model_to_cpu > m.lowvram attribute check will fail becaules m is None webui will be stuck in the loop unable to recover without manual intervention --- modules/sd_models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index ff245b7a668..1747ca62103 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -659,10 +659,11 @@ def get_empty_cond(sd_model): def send_model_to_cpu(m): - if m.lowvram: - lowvram.send_everything_to_cpu() - else: - m.to(devices.cpu) + if m is not None: + if m.lowvram: + lowvram.send_everything_to_cpu() + else: + m.to(devices.cpu) devices.torch_gc() From 4bc39d234d6535e3d8f8531d0c0f4e049261c922 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 23 Apr 2024 02:39:45 +0900 Subject: [PATCH 002/201] Show LoRA if model is None --- .../Lora/ui_extra_networks_lora.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py index b627f7dc29d..e35d90c6ea3 100644 --- a/extensions-builtin/Lora/ui_extra_networks_lora.py +++ b/extensions-builtin/Lora/ui_extra_networks_lora.py @@ -60,18 +60,19 @@ def create_item(self, name, index=None, enable_filter=True): else: sd_version = lora_on_disk.sd_version - if shared.opts.lora_show_all or not enable_filter: - pass - elif sd_version == network.SdVersion.Unknown: - model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1 - if model_version.name in shared.opts.lora_hide_unknown_for_versions: + if shared.sd_model is not None: # still show LoRA in case an error occurs during initial model loading + if shared.opts.lora_show_all or not enable_filter: + pass + elif sd_version == network.SdVersion.Unknown: + model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1 + if model_version.name in shared.opts.lora_hide_unknown_for_versions: + return None + elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL: + return None + elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2: + return None + elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1: return None - elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL: - return None - elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2: - return None - elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1: - return None return item From 246c269af87757998f57bb27ddda59fdc7cff976 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 23 Apr 2024 03:08:09 +0900 Subject: [PATCH 003/201] add option to check file hash after download if the sha256 hash does not match it will be automatically deleted --- modules/modelloader.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/modelloader.py b/modules/modelloader.py index 115415c8e65..5421e59b013 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -23,6 +23,7 @@ def load_file_from_url( model_dir: str, progress: bool = True, file_name: str | None = None, + hash_prefix: str | None = None, ) -> str: """Download a file from `url` into `model_dir`, using the file present if possible. @@ -36,11 +37,11 @@ def load_file_from_url( if not os.path.exists(cached_file): print(f'Downloading: "{url}" to {cached_file}\n') from torch.hub import download_url_to_file - download_url_to_file(url, cached_file, progress=progress) + download_url_to_file(url, cached_file, progress=progress, hash_prefix=hash_prefix) return cached_file -def load_models(model_path: str, model_url: str = None, command_path: str = None, ext_filter=None, download_name=None, ext_blacklist=None) -> list: +def load_models(model_path: str, model_url: str = None, command_path: str = None, ext_filter=None, download_name=None, ext_blacklist=None, hash_prefix=None) -> list: """ A one-and done loader to try finding the desired models in specified directories. @@ -49,6 +50,7 @@ def load_models(model_path: str, model_url: str = None, command_path: str = None @param model_path: The location to store/find models in. @param command_path: A command-line argument to search for models in first. @param ext_filter: An optional list of filename extensions to filter by + @param hash_prefix: the expected sha256 of the model_url @return: A list of paths containing the desired model(s) """ output = [] @@ -78,7 +80,7 @@ def load_models(model_path: str, model_url: str = None, command_path: str = None if model_url is not None and len(output) == 0: if download_name is not None: - output.append(load_file_from_url(model_url, model_dir=places[0], file_name=download_name)) + output.append(load_file_from_url(model_url, model_dir=places[0], file_name=download_name, hash_prefix=hash_prefix)) else: output.append(model_url) From c69773d7e8f23f8b6c46a8e177b50386e1f1b8e8 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 23 Apr 2024 03:08:57 +0900 Subject: [PATCH 004/201] ensure integrity for initial sd model download --- modules/sd_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index ff245b7a668..35d5952af07 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -149,10 +149,12 @@ def list_models(): cmd_ckpt = shared.cmd_opts.ckpt if shared.cmd_opts.no_download_sd_model or cmd_ckpt != shared.sd_model_file or os.path.exists(cmd_ckpt): model_url = None + expected_sha256 = None else: model_url = f"{shared.hf_endpoint}/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors" + expected_sha256 = '6ce0161689b3853acaa03779ec93eafe75a02f4ced659bee03f50797806fa2fa' - model_list = modelloader.load_models(model_path=model_path, model_url=model_url, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"], download_name="v1-5-pruned-emaonly.safetensors", ext_blacklist=[".vae.ckpt", ".vae.safetensors"]) + model_list = modelloader.load_models(model_path=model_path, model_url=model_url, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"], download_name="v1-5-pruned-emaonly.safetensors", ext_blacklist=[".vae.ckpt", ".vae.safetensors"], hash_prefix=expected_sha256) if os.path.exists(cmd_ckpt): checkpoint_info = CheckpointInfo(cmd_ckpt) From a1aa0af8a45f4c30f1d3fce5635c090d64d4e55b Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Mon, 22 Apr 2024 23:38:44 -0400 Subject: [PATCH 005/201] add code for skipping CFG on early steps --- modules/sd_samplers_cfg_denoiser.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 93581c9acc6..8ccc837aa13 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -212,6 +212,11 @@ def apply_blend(current_latent): uncond = denoiser_params.text_uncond skip_uncond = False + if self.step < shared.opts.skip_cond_steps: + skip_uncond = True + x_in = x_in[:-batch_size] + sigma_in = sigma_in[:-batch_size] + # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model: skip_uncond = True From 8016d78a4b9c8bdd02b0031694ad56553f89161e Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Mon, 22 Apr 2024 23:42:24 -0400 Subject: [PATCH 006/201] add option for early cfg skip --- modules/shared_options.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/shared_options.py b/modules/shared_options.py index 326a317e030..2f70ef65a0d 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -380,7 +380,8 @@ 'uni_pc_skip_type': OptionInfo("time_uniform", "UniPC skip type", gr.Radio, {"choices": ["time_uniform", "time_quadratic", "logSNR"]}, infotext='UniPC skip type'), 'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"), 'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'), - 'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models") + 'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"), + 'skip_cond_steps': OptionInfo(0, "Skip CFG on first N steps of sampling", gr.Slider, {"minimum": 0, "maximum": 50, "step": 1}, infotext="Skip CFG first steps"), })) options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), { From 83266205d0b55ddbff34ea36b47f69c5ea11cc28 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Tue, 23 Apr 2024 00:09:43 -0400 Subject: [PATCH 007/201] Add KL Optimal scheduler --- modules/sd_schedulers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 75eb3ac032f..10ae4e081f9 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -31,6 +31,15 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): return torch.FloatTensor(sigs).to(device) +def kl_optimal(n, sigma_min, sigma_max, device): + alpha_min = torch.arctan(torch.tensor(sigma_min, device=device)) + alpha_max = torch.arctan(torch.tensor(sigma_max, device=device)) + sigmas = torch.empty((n+1,), device=device) + for i in range(n+1): + sigmas[i] = torch.tan((i/n) * alpha_min + (1.0-i/n) * alpha_max) + return sigmas + + schedulers = [ Scheduler('automatic', 'Automatic', None), Scheduler('uniform', 'Uniform', uniform, need_inner_model=True), @@ -38,6 +47,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential), Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0), Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]), + Scheduler('kl_optimal', 'KL Optimal', kl_optimal), ] schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}} From 83182d2799f12ee2b5e5425d750db062ad67eb90 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Tue, 23 Apr 2024 03:07:25 -0400 Subject: [PATCH 008/201] change skip early cond option name and to float --- modules/shared_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared_options.py b/modules/shared_options.py index 2f70ef65a0d..91ba72b5ed6 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -381,7 +381,7 @@ 'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"), 'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'), 'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"), - 'skip_cond_steps': OptionInfo(0, "Skip CFG on first N steps of sampling", gr.Slider, {"minimum": 0, "maximum": 50, "step": 1}, infotext="Skip CFG first steps"), + 'skip_early_cond': OptionInfo(0, "Skip CFG during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("CFG will be disabled (set to 1) on early steps, can both improve sample diversity/quality and speed up sampling"), })) options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), { From 6e9b69a33853e1bcee81cea6f01cf13de612fef7 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Tue, 23 Apr 2024 03:08:28 -0400 Subject: [PATCH 009/201] change skip_early_cond code to use float --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 8ccc837aa13..fba5c48c03d 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -212,7 +212,7 @@ def apply_blend(current_latent): uncond = denoiser_params.text_uncond skip_uncond = False - if self.step < shared.opts.skip_cond_steps: + if self.step / self.total_steps <= shared.opts.skip_early_cond: skip_uncond = True x_in = x_in[:-batch_size] sigma_in = sigma_in[:-batch_size] From 33cbbf9f8b46666a2325c98b723b6cb2ec192ef7 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Tue, 23 Apr 2024 03:15:00 -0400 Subject: [PATCH 010/201] add s_min_uncond_all option --- modules/shared_options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/shared_options.py b/modules/shared_options.py index 91ba72b5ed6..c711fa5f609 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -210,6 +210,7 @@ options_templates.update(options_section(('optimizations', "Optimizations", "sd"), { "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}), "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"), + "s_min_uncond_all": OptionInfo(False, "NGMS: Skip every step").info("makes Negative Guidance minimum sigma skip negative guidance on every step instead of only half"), "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"), "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"), "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"), From 029adbe5318b57c04dbc0d92273cce38e1ecf457 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Tue, 23 Apr 2024 03:15:56 -0400 Subject: [PATCH 011/201] implement option to skip uncond on all steps below ngms --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index fba5c48c03d..082a4f63c63 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -218,7 +218,7 @@ def apply_blend(current_latent): sigma_in = sigma_in[:-batch_size] # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it - if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model: + if (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model: skip_uncond = True x_in = x_in[:-batch_size] sigma_in = sigma_in[:-batch_size] From 50bb6e1179745799038b26a228b8acd8cacfffc5 Mon Sep 17 00:00:00 2001 From: pinanew <851673+pinanew@users.noreply.github.com> Date: Tue, 23 Apr 2024 18:45:42 +0300 Subject: [PATCH 012/201] AVIF has quality setting too --- modules/images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/images.py b/modules/images.py index c0ff8a6306a..f4eb6f71a2a 100644 --- a/modules/images.py +++ b/modules/images.py @@ -608,7 +608,7 @@ def save_image_with_geninfo(image, geninfo, filename, extension=None, existing_p }) - image.save(filename,format=image_format, exif=exif_bytes) + image.save(filename,format=image_format, quality=opts.jpeg_quality, exif=exif_bytes) elif extension.lower() == ".gif": image.save(filename, format=image_format, comment=geninfo) else: From 8fa3fa76c39200e2af63ab86926c0c20cf02eb25 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 24 Apr 2024 02:41:31 +0900 Subject: [PATCH 013/201] fix exif_bytes referenced before assignment --- modules/images.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/images.py b/modules/images.py index f4eb6f71a2a..36b610322a1 100644 --- a/modules/images.py +++ b/modules/images.py @@ -606,7 +606,8 @@ def save_image_with_geninfo(image, geninfo, filename, extension=None, existing_p piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(geninfo or "", encoding="unicode") }, }) - + else: + exif_bytes = None image.save(filename,format=image_format, quality=opts.jpeg_quality, exif=exif_bytes) elif extension.lower() == ".gif": From 1091e3a37eb363d6ac5f4d3eb596526a85dea551 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 24 Apr 2024 02:54:26 +0900 Subject: [PATCH 014/201] update jpeg_quality description --- modules/shared_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared_options.py b/modules/shared_options.py index 326a317e030..98d477f5255 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -54,7 +54,7 @@ "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"), "save_mask": OptionInfo(False, "For inpainting, save a copy of the greyscale mask"), "save_mask_composite": OptionInfo(False, "For inpainting, save a masked composite"), - "jpeg_quality": OptionInfo(80, "Quality for saved jpeg images", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}), + "jpeg_quality": OptionInfo(80, "Quality for saved jpeg and avif images", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}), "webp_lossless": OptionInfo(False, "Use lossless compression for webp images"), "export_for_4chan": OptionInfo(True, "Save copy of large images as JPG").info("if the file size is above the limit, or either width or height are above the limit"), "img_downscale_threshold": OptionInfo(4.0, "File size limit for the above option, MB", gr.Number), From e85e327ae0409a6c7e6f98011465f07290b78567 Mon Sep 17 00:00:00 2001 From: Andray Date: Thu, 25 Apr 2024 13:26:26 +0400 Subject: [PATCH 015/201] more extension tag filtering options --- modules/ui_extensions.py | 68 +++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py index d822c0b8920..9bfd5f3b350 100644 --- a/modules/ui_extensions.py +++ b/modules/ui_extensions.py @@ -396,15 +396,15 @@ def install_extension_from_url(dirname, url, branch_name=None): shutil.rmtree(tmpdir, True) -def install_extension_from_index(url, hide_tags, sort_column, filter_text): +def install_extension_from_index(url, selected_tags, showing_type, filtering_type, sort_column, filter_text): ext_table, message = install_extension_from_url(None, url) - code, _ = refresh_available_extensions_from_data(hide_tags, sort_column, filter_text) + code, _ = refresh_available_extensions_from_data(selected_tags, showing_type, filtering_type, sort_column, filter_text) return code, ext_table, message, '' -def refresh_available_extensions(url, hide_tags, sort_column): +def refresh_available_extensions(url, selected_tags, showing_type, filtering_type, sort_column): global available_extensions import urllib.request @@ -413,19 +413,19 @@ def refresh_available_extensions(url, hide_tags, sort_column): available_extensions = json.loads(text) - code, tags = refresh_available_extensions_from_data(hide_tags, sort_column) + code, tags = refresh_available_extensions_from_data(selected_tags, showing_type, filtering_type, sort_column) return url, code, gr.CheckboxGroup.update(choices=tags), '', '' -def refresh_available_extensions_for_tags(hide_tags, sort_column, filter_text): - code, _ = refresh_available_extensions_from_data(hide_tags, sort_column, filter_text) +def refresh_available_extensions_for_tags(selected_tags, showing_type, filtering_type, sort_column, filter_text): + code, _ = refresh_available_extensions_from_data(selected_tags, showing_type, filtering_type, sort_column, filter_text) return code, '' -def search_extensions(filter_text, hide_tags, sort_column): - code, _ = refresh_available_extensions_from_data(hide_tags, sort_column, filter_text) +def search_extensions(filter_text, selected_tags, showing_type, filtering_type, sort_column): + code, _ = refresh_available_extensions_from_data(selected_tags, showing_type, filtering_type, sort_column, filter_text) return code, '' @@ -450,13 +450,13 @@ def get_date(info: dict, key): return '' -def refresh_available_extensions_from_data(hide_tags, sort_column, filter_text=""): +def refresh_available_extensions_from_data(selected_tags, showing_type, filtering_type, sort_column, filter_text=""): extlist = available_extensions["extensions"] installed_extensions = {extension.name for extension in extensions.extensions} installed_extension_urls = {normalize_git_url(extension.remote) for extension in extensions.extensions if extension.remote is not None} tags = available_extensions.get("tags", {}) - tags_to_hide = set(hide_tags) + selected_tags = set(selected_tags) hidden = 0 code = f""" @@ -489,9 +489,19 @@ def refresh_available_extensions_from_data(hide_tags, sort_column, filter_text=" existing = get_extension_dirname_from_url(url) in installed_extensions or normalize_git_url(url) in installed_extension_urls extension_tags = extension_tags + ["installed"] if existing else extension_tags - if any(x for x in extension_tags if x in tags_to_hide): - hidden += 1 - continue + if len(selected_tags) > 0: + matched_tags = [x for x in extension_tags if x in selected_tags] + if filtering_type == 'or': + need_hide = len(matched_tags) > 0 + else: + need_hide = len(matched_tags) == len(selected_tags) + + if showing_type == 'show': + need_hide = not need_hide + + if need_hide: + hidden += 1 + continue if filter_text and filter_text.strip(): if filter_text.lower() not in html.escape(name).lower() and filter_text.lower() not in html.escape(description).lower(): @@ -594,9 +604,13 @@ def create_ui(): install_extension_button = gr.Button(elem_id="install_extension_button", visible=False) with gr.Row(): - hide_tags = gr.CheckboxGroup(value=["ads", "localization", "installed"], label="Hide extensions with tags", choices=["script", "ads", "localization", "installed"]) + selected_tags = gr.CheckboxGroup(value=["ads", "localization", "installed"], label="Extension tags", choices=["script", "ads", "localization", "installed"]) sort_column = gr.Radio(value="newest first", label="Order", choices=["newest first", "oldest first", "a-z", "z-a", "internal order",'update time', 'create time', "stars"], type="index") + with gr.Row(): + showing_type = gr.Radio(value="hide", label="Showing type", choices=["hide", "show"]) + filtering_type = gr.Radio(value="or", label="Filtering type", choices=["or", "and"]) + with gr.Row(): search_extensions_text = gr.Text(label="Search", container=False) @@ -605,31 +619,43 @@ def create_ui(): refresh_available_extensions_button.click( fn=modules.ui.wrap_gradio_call(refresh_available_extensions, extra_outputs=[gr.update(), gr.update(), gr.update(), gr.update()]), - inputs=[available_extensions_index, hide_tags, sort_column], - outputs=[available_extensions_index, available_extensions_table, hide_tags, search_extensions_text, install_result], + inputs=[available_extensions_index, selected_tags, showing_type, filtering_type, sort_column], + outputs=[available_extensions_index, available_extensions_table, selected_tags, search_extensions_text, install_result], ) install_extension_button.click( fn=modules.ui.wrap_gradio_call(install_extension_from_index, extra_outputs=[gr.update(), gr.update()]), - inputs=[extension_to_install, hide_tags, sort_column, search_extensions_text], + inputs=[extension_to_install, selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, extensions_table, install_result], ) search_extensions_text.change( fn=modules.ui.wrap_gradio_call(search_extensions, extra_outputs=[gr.update()]), - inputs=[search_extensions_text, hide_tags, sort_column], + inputs=[search_extensions_text, selected_tags, showing_type, filtering_type, sort_column], outputs=[available_extensions_table, install_result], ) - hide_tags.change( + selected_tags.change( + fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), + inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], + outputs=[available_extensions_table, install_result] + ) + + showing_type.change( + fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), + inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], + outputs=[available_extensions_table, install_result] + ) + + filtering_type.change( fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), - inputs=[hide_tags, sort_column, search_extensions_text], + inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, install_result] ) sort_column.change( fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), - inputs=[hide_tags, sort_column, search_extensions_text], + inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, install_result] ) From d5f6fdb3c44204495067d4166a6a980a9f1165ed Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Fri, 26 Apr 2024 18:47:04 +0900 Subject: [PATCH 016/201] compact-checkbox-group --- modules/ui_extensions.py | 8 ++++---- style.css | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py index 9bfd5f3b350..6b6403f23d6 100644 --- a/modules/ui_extensions.py +++ b/modules/ui_extensions.py @@ -604,12 +604,12 @@ def create_ui(): install_extension_button = gr.Button(elem_id="install_extension_button", visible=False) with gr.Row(): - selected_tags = gr.CheckboxGroup(value=["ads", "localization", "installed"], label="Extension tags", choices=["script", "ads", "localization", "installed"]) - sort_column = gr.Radio(value="newest first", label="Order", choices=["newest first", "oldest first", "a-z", "z-a", "internal order",'update time', 'create time', "stars"], type="index") + selected_tags = gr.CheckboxGroup(value=["ads", "localization", "installed"], label="Extension tags", choices=["script", "ads", "localization", "installed"], elem_classes=['compact-checkbox-group']) + sort_column = gr.Radio(value="newest first", label="Order", choices=["newest first", "oldest first", "a-z", "z-a", "internal order",'update time', 'create time', "stars"], type="index", elem_classes=['compact-checkbox-group']) with gr.Row(): - showing_type = gr.Radio(value="hide", label="Showing type", choices=["hide", "show"]) - filtering_type = gr.Radio(value="or", label="Filtering type", choices=["or", "and"]) + showing_type = gr.Radio(value="hide", label="Showing type", choices=["hide", "show"], elem_classes=['compact-checkbox-group']) + filtering_type = gr.Radio(value="or", label="Filtering type", choices=["or", "and"], elem_classes=['compact-checkbox-group']) with gr.Row(): search_extensions_text = gr.Text(label="Search", container=False) diff --git a/style.css b/style.css index f6a89b8f90d..cca5456ccea 100644 --- a/style.css +++ b/style.css @@ -854,6 +854,10 @@ table.popup-table .link{ display: inline-block; } +.compact-checkbox-group div label { + padding: 0.1em 0.3em !important; +} + /* extensions tab table row hover highlight */ #extensions tr:hover td, From 3902aa222b00a24f2d7b7158b79efaac9f318923 Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 06:44:41 -0400 Subject: [PATCH 017/201] Better error handling to skip non-standard ss_tag_frequency content --- extensions-builtin/Lora/ui_edit_user_metadata.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/extensions-builtin/Lora/ui_edit_user_metadata.py b/extensions-builtin/Lora/ui_edit_user_metadata.py index 7a07a544e28..b6c4d1c6acb 100644 --- a/extensions-builtin/Lora/ui_edit_user_metadata.py +++ b/extensions-builtin/Lora/ui_edit_user_metadata.py @@ -21,10 +21,12 @@ def is_non_comma_tagset(tags): def build_tags(metadata): tags = {} - for _, tags_dict in metadata.get("ss_tag_frequency", {}).items(): - for tag, tag_count in tags_dict.items(): - tag = tag.strip() - tags[tag] = tags.get(tag, 0) + int(tag_count) + ss_tag_frequency = metadata.get("ss_tag_frequency", {}) + if ss_tag_frequency is not None and hasattr(ss_tag_frequency, 'items'): + for _, tags_dict in ss_tag_frequency.items(): + for tag, tag_count in tags_dict.items(): + tag = tag.strip() + tags[tag] = tags.get(tag, 0) + int(tag_count) if tags and is_non_comma_tagset(tags): new_tags = {} From 8dc920228e7c5181cc990845f0febd2ac4b42d87 Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 06:52:21 -0400 Subject: [PATCH 018/201] Better error handling when unable to read metadata from safetensors file --- modules/sd_models.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index ff245b7a668..59742d31118 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -280,18 +280,22 @@ def read_metadata_from_safetensors(filename): json_start = file.read(2) assert metadata_len > 2 and json_start in (b'{"', b"{'"), f"{filename} is not a safetensors file" - json_data = json_start + file.read(metadata_len-2) - json_obj = json.loads(json_data) res = {} - for k, v in json_obj.get("__metadata__", {}).items(): - res[k] = v - if isinstance(v, str) and v[0:1] == '{': - try: - res[k] = json.loads(v) - except Exception: - pass + try: + json_data = json_start + file.read(metadata_len-2) + json_obj = json.loads(json_data) + for k, v in json_obj.get("__metadata__", {}).items(): + res[k] = v + if isinstance(v, str) and v[0:1] == '{': + try: + res[k] = json.loads(v) + except Exception: + pass + except: + errors.report(f"Error reading metadata from file: {filename}", exc_info=True) + return res From c5b7559856c5f64792c2425d11890a121497e6bc Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 06:57:32 -0400 Subject: [PATCH 019/201] Better error handling when unable to extract contents of embedding/TI file --- modules/textual_inversion/textual_inversion.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 253f219c4ec..dc7833e9394 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -181,12 +181,16 @@ def load_from_file(self, path, filename): else: return - embedding = create_embedding_from_data(data, name, filename=filename, filepath=path) + if data is not None: + embedding = create_embedding_from_data(data, name, filename=filename, filepath=path) - if self.expected_shape == -1 or self.expected_shape == embedding.shape: - self.register_embedding(embedding, shared.sd_model) + if self.expected_shape == -1 or self.expected_shape == embedding.shape: + self.register_embedding(embedding, shared.sd_model) + else: + self.skipped_embeddings[name] = embedding else: - self.skipped_embeddings[name] = embedding + print(f"Unable to load Textual inversion embedding due to data issue: '{name}'.") + def load_from_dir(self, embdir): if not os.path.isdir(embdir.path): From c5ae2254182b803618a4b01c12fa88c42642e806 Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 07:55:39 -0400 Subject: [PATCH 020/201] Better handling of embeddings with two rare, but not unusual, files in them I have encountered pickled embeddings with a short byteorder file at the top-level, as well as a .data/serialization_id file. Both load fine after allowing these files in the dataset. I do not think it is likely adding them to the safe unpickle regular expression would be a security risk, but that's for the maintainers to decide. --- modules/safe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/safe.py b/modules/safe.py index b1d08a7928e..ee8789268d9 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -65,7 +65,7 @@ def find_class(self, module, name): # Regular expression that accepts 'dirname/version', 'dirname/data.pkl', and 'dirname/data/' -allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|version|(data\.pkl))$") +allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|byteorder|(\.data\/serialization_id)|version|(data\.pkl))$") data_pkl_re = re.compile(r"^([^/]+)/data\.pkl$") def check_zip_filenames(filename, names): From 44afb48447c2ef40f8546fe704bd817881da5a14 Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 08:17:37 -0400 Subject: [PATCH 021/201] Linter fix - extraneous whitespace --- modules/sd_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 59742d31118..06e881207ab 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -295,7 +295,7 @@ def read_metadata_from_safetensors(filename): pass except: errors.report(f"Error reading metadata from file: {filename}", exc_info=True) - + return res From 60c079995824ebe861029839ee12ca0df6a26e8d Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 08:21:12 -0400 Subject: [PATCH 022/201] Linter - except must not be bare. --- modules/sd_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 06e881207ab..06a7cf3f0a5 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -293,7 +293,7 @@ def read_metadata_from_safetensors(filename): res[k] = json.loads(v) except Exception: pass - except: + except Exception: errors.report(f"Error reading metadata from file: {filename}", exc_info=True) return res From 9d964d3fc3285b3df877479081968ebf6dbccce4 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 27 Apr 2024 19:21:34 +0900 Subject: [PATCH 023/201] no-referrer --- modules/ui_gradio_extensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_gradio_extensions.py b/modules/ui_gradio_extensions.py index f5278d22f02..18fbd6777e2 100644 --- a/modules/ui_gradio_extensions.py +++ b/modules/ui_gradio_extensions.py @@ -50,7 +50,7 @@ def reload_javascript(): def template_response(*args, **kwargs): res = shared.GradioTemplateResponseOriginal(*args, **kwargs) - res.body = res.body.replace(b'', f'{js}'.encode("utf8")) + res.body = res.body.replace(b'', f'{js}'.encode("utf8")) res.body = res.body.replace(b'', f'{css}'.encode("utf8")) res.init_headers() return res From 3a215deff23d28c06c8de98423c12628b8ce6326 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sun, 28 Apr 2024 00:15:58 -0400 Subject: [PATCH 024/201] vectorize kl-optimal sigma calculation Co-authored-by: mamei16 --- modules/sd_schedulers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 10ae4e081f9..99a6f7be26a 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -34,9 +34,8 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): def kl_optimal(n, sigma_min, sigma_max, device): alpha_min = torch.arctan(torch.tensor(sigma_min, device=device)) alpha_max = torch.arctan(torch.tensor(sigma_max, device=device)) - sigmas = torch.empty((n+1,), device=device) - for i in range(n+1): - sigmas[i] = torch.tan((i/n) * alpha_min + (1.0-i/n) * alpha_max) + step_indices = torch.arange(n + 1, device=device) + sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max) return sigmas From 3d3fc81f4858cae75fa33e55e7b88ede853d28ae Mon Sep 17 00:00:00 2001 From: huchenlei Date: Sun, 28 Apr 2024 16:14:12 -0400 Subject: [PATCH 025/201] Add correct mimetype for .mjs files --- modules/ui.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ui.py b/modules/ui.py index 403425f2985..c6c058fea7a 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -38,6 +38,7 @@ # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI mimetypes.init() mimetypes.add_type('application/javascript', '.js') +mimetypes.add_type('application/javascript', '.mjs') # Likewise, add explicit content-type header for certain missing image types mimetypes.add_type('image/webp', '.webp') From 579f1ef278080ff7545be3a42c5fe36fc2890887 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Sun, 28 Apr 2024 22:36:43 -0600 Subject: [PATCH 026/201] Allow old sampler names in API --- modules/api/api.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/modules/api/api.py b/modules/api/api.py index f468c385275..b1201fe778e 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -48,6 +48,15 @@ def validate_sampler_name(name): return name +def parse_old_sampler_name(name): + for scheduler in sd_schedulers.schedulers: + for scheduler_name in [scheduler.label, scheduler.name, *(scheduler.aliases or [])]: + if name.endswith(" " + scheduler_name): + return name[0:-(len(scheduler_name) + 1)], scheduler_name + + return name, "Automatic" + + def setUpscalers(req: dict): reqDict = vars(req) reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None) @@ -438,15 +447,19 @@ def text2imgapi(self, txt2imgreq: models.StableDiffusionTxt2ImgProcessingAPI): self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args) selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner) + sampler, scheduler = parse_old_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index) populate = txt2imgreq.copy(update={ # Override __init__ params - "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index), + "sampler_name": validate_sampler_name(sampler), "do_not_save_samples": not txt2imgreq.save_images, "do_not_save_grid": not txt2imgreq.save_images, }) if populate.sampler_name: populate.sampler_index = None # prevent a warning later on + if not populate.scheduler: + populate.scheduler = scheduler + args = vars(populate) args.pop('script_name', None) args.pop('script_args', None) # will refeed them to the pipeline directly after initializing them @@ -502,9 +515,10 @@ def img2imgapi(self, img2imgreq: models.StableDiffusionImg2ImgProcessingAPI): self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args) selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner) + sampler, scheduler = parse_old_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index) populate = img2imgreq.copy(update={ # Override __init__ params - "sampler_name": validate_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index), + "sampler_name": validate_sampler_name(sampler), "do_not_save_samples": not img2imgreq.save_images, "do_not_save_grid": not img2imgreq.save_images, "mask": mask, @@ -512,6 +526,9 @@ def img2imgapi(self, img2imgreq: models.StableDiffusionImg2ImgProcessingAPI): if populate.sampler_name: populate.sampler_index = None # prevent a warning later on + if not populate.scheduler: + populate.scheduler = scheduler + args = vars(populate) args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine. args.pop('script_name', None) From 4c7b22d37d14c8469b4510a11710f162940cdaa6 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Sun, 28 Apr 2024 22:46:11 -0600 Subject: [PATCH 027/201] Fix dragging text within prompt input --- javascript/dragdrop.js | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/javascript/dragdrop.js b/javascript/dragdrop.js index 0c018356419..882562d7367 100644 --- a/javascript/dragdrop.js +++ b/javascript/dragdrop.js @@ -56,6 +56,15 @@ function eventHasFiles(e) { return false; } +function isURL(url) { + try { + const _ = new URL(url); + return true; + } catch { + return false; + } +} + function dragDropTargetIsPrompt(target) { if (target?.placeholder && target?.placeholder.indexOf("Prompt") >= 0) return true; if (target?.parentNode?.parentNode?.className?.indexOf("prompt") > 0) return true; @@ -77,7 +86,7 @@ window.document.addEventListener('dragover', e => { window.document.addEventListener('drop', async e => { const target = e.composedPath()[0]; const url = e.dataTransfer.getData('text/uri-list') || e.dataTransfer.getData('text/plain'); - if (!eventHasFiles(e) && !url) return; + if (!eventHasFiles(e) && !isURL(url)) return; if (dragDropTargetIsPrompt(target)) { e.stopPropagation(); From c8336c45b98c2226923503e17b1d7f9170af0f8a Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Tue, 30 Apr 2024 01:53:41 -0600 Subject: [PATCH 028/201] Use existing function for old sampler names --- modules/api/api.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/modules/api/api.py b/modules/api/api.py index b1201fe778e..d8e54529b40 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -48,15 +48,6 @@ def validate_sampler_name(name): return name -def parse_old_sampler_name(name): - for scheduler in sd_schedulers.schedulers: - for scheduler_name in [scheduler.label, scheduler.name, *(scheduler.aliases or [])]: - if name.endswith(" " + scheduler_name): - return name[0:-(len(scheduler_name) + 1)], scheduler_name - - return name, "Automatic" - - def setUpscalers(req: dict): reqDict = vars(req) reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None) @@ -447,7 +438,7 @@ def text2imgapi(self, txt2imgreq: models.StableDiffusionTxt2ImgProcessingAPI): self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args) selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner) - sampler, scheduler = parse_old_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index) + sampler, scheduler = sd_samplers.get_sampler_and_scheduler(txt2imgreq.sampler_name or txt2imgreq.sampler_index, txt2imgreq.scheduler) populate = txt2imgreq.copy(update={ # Override __init__ params "sampler_name": validate_sampler_name(sampler), @@ -457,7 +448,7 @@ def text2imgapi(self, txt2imgreq: models.StableDiffusionTxt2ImgProcessingAPI): if populate.sampler_name: populate.sampler_index = None # prevent a warning later on - if not populate.scheduler: + if not populate.scheduler and scheduler != "Automatic": populate.scheduler = scheduler args = vars(populate) @@ -515,7 +506,7 @@ def img2imgapi(self, img2imgreq: models.StableDiffusionImg2ImgProcessingAPI): self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args) selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner) - sampler, scheduler = parse_old_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index) + sampler, scheduler = sd_samplers.get_sampler_and_scheduler(img2imgreq.sampler_name or img2imgreq.sampler_index, img2imgreq.scheduler) populate = img2imgreq.copy(update={ # Override __init__ params "sampler_name": validate_sampler_name(sampler), @@ -526,7 +517,7 @@ def img2imgapi(self, img2imgreq: models.StableDiffusionImg2ImgProcessingAPI): if populate.sampler_name: populate.sampler_index = None # prevent a warning later on - if not populate.scheduler: + if not populate.scheduler and scheduler != "Automatic": populate.scheduler = scheduler args = vars(populate) From 9d393807056199deade14154d885fcd07dee24b7 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 30 Apr 2024 19:17:53 +0900 Subject: [PATCH 029/201] fix extra batch mode P Transparency red, green, blue = transparency TypeError: cannot unpack non-iterable int object --- modules/postprocessing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/postprocessing.py b/modules/postprocessing.py index 812cbccae9a..8ec122b7c9d 100644 --- a/modules/postprocessing.py +++ b/modules/postprocessing.py @@ -62,11 +62,13 @@ def get_images(extras_mode, image, image_folder, input_dir): else: image_data = image_placeholder + image_data = image_data if image_data.mode in ("RGBA", "RGB") else image_data.convert("RGB") + parameters, existing_pnginfo = images.read_info_from_image(image_data) if parameters: existing_pnginfo["parameters"] = parameters - initial_pp = scripts_postprocessing.PostprocessedImage(image_data if image_data.mode in ("RGBA", "RGB") else image_data.convert("RGB")) + initial_pp = scripts_postprocessing.PostprocessedImage(image_data) scripts.scripts_postproc.run(initial_pp, args) From 89103b47475ba7bb8b9c4b36f8078c6416132ab0 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 1 May 2024 19:41:02 +0900 Subject: [PATCH 030/201] lora bundled TI infotext Co-Authored-By: Morgon Kanter <9632805+mx@users.noreply.github.com> --- extensions-builtin/Lora/networks.py | 9 +++++++++ extensions-builtin/Lora/scripts/lora_script.py | 1 + 2 files changed, 10 insertions(+) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 42b14dc239d..aa55fe242b1 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -143,6 +143,14 @@ def assign_network_names_to_compvis_modules(sd_model): sd_model.network_layer_mapping = network_layer_mapping +class BundledTIHash(str): + def __init__(self, hash_str): + self.hash = hash_str + + def __str__(self): + return self.hash if shared.opts.lora_bundled_ti_to_infotext else '' + + def load_network(name, network_on_disk): net = network.Network(name, network_on_disk) net.mtime = os.path.getmtime(network_on_disk.filename) @@ -229,6 +237,7 @@ def load_network(name, network_on_disk): for emb_name, data in bundle_embeddings.items(): embedding = textual_inversion.create_embedding_from_data(data, emb_name, filename=network_on_disk.filename + "/" + emb_name) embedding.loaded = None + embedding.shorthash = BundledTIHash(name) embeddings[emb_name] = embedding net.bundle_embeddings = embeddings diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py index 1518f7e5c89..d3ea369ae26 100644 --- a/extensions-builtin/Lora/scripts/lora_script.py +++ b/extensions-builtin/Lora/scripts/lora_script.py @@ -36,6 +36,7 @@ def before_ui(): "sd_lora": shared.OptionInfo("None", "Add network to prompt", gr.Dropdown, lambda: {"choices": ["None", *networks.available_networks]}, refresh=networks.list_available_networks), "lora_preferred_name": shared.OptionInfo("Alias from file", "When adding to prompt, refer to Lora by", gr.Radio, {"choices": ["Alias from file", "Filename"]}), "lora_add_hashes_to_infotext": shared.OptionInfo(True, "Add Lora hashes to infotext"), + "lora_bundled_ti_to_infotext": shared.OptionInfo(True, "Add Lora name as TI hashes for bundled Textual Inversion").info('"Add Textual Inversion hashes to infotext" needs to be enabled'), "lora_show_all": shared.OptionInfo(False, "Always show all networks on the Lora page").info("otherwise, those detected as for incompatible version of Stable Diffusion will be hidden"), "lora_hide_unknown_for_versions": shared.OptionInfo([], "Hide networks of unknown versions for model versions", gr.CheckboxGroup, {"choices": ["SD1", "SD2", "SDXL"]}), "lora_in_memory_limit": shared.OptionInfo(0, "Number of Lora networks to keep cached in memory", gr.Number, {"precision": 0}), From 0e0e41eabc5753034091e7c673100df66b3640ab Mon Sep 17 00:00:00 2001 From: Andray Date: Wed, 1 May 2024 16:54:47 +0400 Subject: [PATCH 031/201] use gradio theme colors in css --- style.css | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/style.css b/style.css index f6a89b8f90d..df4aca020bc 100644 --- a/style.css +++ b/style.css @@ -780,9 +780,9 @@ table.popup-table .link{ position:absolute; display:block; padding:0px 0; - border:2px solid #a55000; + border:2px solid var(--primary-800); border-radius:8px; - box-shadow:1px 1px 2px #CE6400; + box-shadow:1px 1px 2px var(--primary-500); width: 200px; } @@ -799,7 +799,7 @@ table.popup-table .link{ } .context-menu-items a:hover{ - background: #a55000; + background: var(--primary-700); } From 5d5224b322e8dbd817469a32d6c5578faff2df2f Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 2 May 2024 02:25:16 +0900 Subject: [PATCH 032/201] fix_p_invalid_sampler_and_scheduler --- modules/processing.py | 3 +++ modules/sd_samplers.py | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/processing.py b/modules/processing.py index 76557dd7f5e..cb646e2bfb3 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -842,6 +842,9 @@ def process_images(p: StableDiffusionProcessing) -> Processed: sd_models.apply_token_merging(p.sd_model, p.get_token_merging_ratio()) + # backwards compatibility, fix sampler and scheduler if invalid + sd_samplers.fix_p_invalid_sampler_and_scheduler(p) + res = process_images_inner(p) finally: diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 6b7b84b6d76..b8abac4a998 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -1,7 +1,7 @@ from __future__ import annotations import functools - +import logging from modules import sd_samplers_kdiffusion, sd_samplers_timesteps, sd_samplers_lcm, shared, sd_samplers_common, sd_schedulers # imports for functions that previously were here and are used by other modules @@ -122,4 +122,11 @@ def get_sampler_and_scheduler(sampler_name, scheduler_name): return sampler.name, found_scheduler.label +def fix_p_invalid_sampler_and_scheduler(p): + i_sampler_name, i_scheduler = p.sampler_name, p.scheduler + p.sampler_name, p.scheduler = get_sampler_and_scheduler(p.sampler_name, p.scheduler) + if p.sampler_name != i_sampler_name or i_scheduler != p.scheduler: + logging.warning(f'Sampler Scheduler autocorrection: "{i_sampler_name}" -> "{p.sampler_name}", "{i_scheduler}" -> "{p.scheduler}"') + + set_samplers() From 7195c4d42cf410c53d4d2f7a74d7059715d357a7 Mon Sep 17 00:00:00 2001 From: Andray Date: Wed, 1 May 2024 22:50:46 +0400 Subject: [PATCH 033/201] two fingers press to open context menu --- javascript/contextMenus.js | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/javascript/contextMenus.js b/javascript/contextMenus.js index ccae242f2b6..a00c3de9f9e 100644 --- a/javascript/contextMenus.js +++ b/javascript/contextMenus.js @@ -107,16 +107,23 @@ var contextMenuInit = function() { oldMenu.remove(); } }); - gradioApp().addEventListener("contextmenu", function(e) { - let oldMenu = gradioApp().querySelector('#context-menu'); - if (oldMenu) { - oldMenu.remove(); - } - menuSpecs.forEach(function(v, k) { - if (e.composedPath()[0].matches(k)) { - showContextMenu(e, e.composedPath()[0], v); - e.preventDefault(); + ['contextmenu', 'touchstart'].forEach((eventType) => { + gradioApp().addEventListener(eventType, function(e) { + let ev = e; + if (eventType.startsWith('touch')) { + if (e.touches.length !== 2) return; + ev = e.touches[0]; + } + let oldMenu = gradioApp().querySelector('#context-menu'); + if (oldMenu) { + oldMenu.remove(); } + menuSpecs.forEach(function(v, k) { + if (e.composedPath()[0].matches(k)) { + showContextMenu(ev, e.composedPath()[0], v); + e.preventDefault(); + } + }); }); }); eventListenerApplied = true; From f12886aefa4f2ac5d8e64a206a6b4d6df9d85b6b Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 4 May 2024 23:42:37 +0900 Subject: [PATCH 034/201] use script_path for webui root in launch_utils --- modules/launch_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/launch_utils.py b/modules/launch_utils.py index 5812b0e5855..e22da4ec64d 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -76,7 +76,7 @@ def git_tag(): except Exception: try: - changelog_md = os.path.join(os.path.dirname(os.path.dirname(__file__)), "CHANGELOG.md") + changelog_md = os.path.join(script_path, "CHANGELOG.md") with open(changelog_md, "r", encoding="utf-8") as file: line = next((line.strip() for line in file if line.strip()), "") line = line.replace("## ", "") @@ -231,7 +231,7 @@ def run_extension_installer(extension_dir): try: env = os.environ.copy() - env['PYTHONPATH'] = f"{os.path.abspath('.')}{os.pathsep}{env.get('PYTHONPATH', '')}" + env['PYTHONPATH'] = f"{script_path}{os.pathsep}{env.get('PYTHONPATH', '')}" stdout = run(f'"{python}" "{path_installer}"', errdesc=f"Error running install.py for extension {extension_dir}", custom_env=env).strip() if stdout: From dd93c47abfd9ed357f5d5827311d836ea399a236 Mon Sep 17 00:00:00 2001 From: bluelovers Date: Tue, 7 May 2024 19:53:18 +0800 Subject: [PATCH 035/201] Update imageviewer.js --- javascript/imageviewer.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js index d4d4f016ddd..a3f08ad16c8 100644 --- a/javascript/imageviewer.js +++ b/javascript/imageviewer.js @@ -51,14 +51,7 @@ function modalImageSwitch(offset) { var galleryButtons = all_gallery_buttons(); if (galleryButtons.length > 1) { - var currentButton = selected_gallery_button(); - - var result = -1; - galleryButtons.forEach(function(v, i) { - if (v == currentButton) { - result = i; - } - }); + var result = selected_gallery_index(); if (result != -1) { var nextButton = galleryButtons[negmod((result + offset), galleryButtons.length)]; From dbda59e58a7c90752ab9911a779dd1381ae530e1 Mon Sep 17 00:00:00 2001 From: Andray Date: Tue, 7 May 2024 19:26:16 +0400 Subject: [PATCH 036/201] fix context menu position --- javascript/contextMenus.js | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/javascript/contextMenus.js b/javascript/contextMenus.js index a00c3de9f9e..e01fd67e80e 100644 --- a/javascript/contextMenus.js +++ b/javascript/contextMenus.js @@ -8,9 +8,6 @@ var contextMenuInit = function() { }; function showContextMenu(event, element, menuEntries) { - let posx = event.clientX + document.body.scrollLeft + document.documentElement.scrollLeft; - let posy = event.clientY + document.body.scrollTop + document.documentElement.scrollTop; - let oldMenu = gradioApp().querySelector('#context-menu'); if (oldMenu) { oldMenu.remove(); @@ -23,10 +20,8 @@ var contextMenuInit = function() { contextMenu.style.background = baseStyle.background; contextMenu.style.color = baseStyle.color; contextMenu.style.fontFamily = baseStyle.fontFamily; - contextMenu.style.top = posy + 'px'; - contextMenu.style.left = posx + 'px'; - - + contextMenu.style.top = event.pageY + 'px'; + contextMenu.style.left = event.pageX + 'px'; const contextMenuList = document.createElement('ul'); contextMenuList.className = 'context-menu-items'; @@ -43,21 +38,6 @@ var contextMenuInit = function() { }); gradioApp().appendChild(contextMenu); - - let menuWidth = contextMenu.offsetWidth + 4; - let menuHeight = contextMenu.offsetHeight + 4; - - let windowWidth = window.innerWidth; - let windowHeight = window.innerHeight; - - if ((windowWidth - posx) < menuWidth) { - contextMenu.style.left = windowWidth - menuWidth + "px"; - } - - if ((windowHeight - posy) < menuHeight) { - contextMenu.style.top = windowHeight - menuHeight + "px"; - } - } function appendContextMenuOption(targetElementSelector, entryName, entryFunction) { From e736c3b36b5e450c3883719d1b73acf84bdf29f7 Mon Sep 17 00:00:00 2001 From: JLipnerPitt <122459494+JLipnerPitt@users.noreply.github.com> Date: Wed, 8 May 2024 05:22:12 -0400 Subject: [PATCH 037/201] Add files via upload Fixed an error (AttributeError: 'str' object has no attribute 'decode') coming from line 792 in images.py when trying to upscale certain images. --- modules/images.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/images.py b/modules/images.py index c0ff8a6306a..0f49caf7913 100644 --- a/modules/images.py +++ b/modules/images.py @@ -789,7 +789,10 @@ def read_info_from_image(image: Image.Image) -> tuple[str | None, dict]: if exif_comment: geninfo = exif_comment elif "comment" in items: # for gif - geninfo = items["comment"].decode('utf8', errors="ignore") + if isinstance(items["comment"], bytes): + geninfo = items["comment"].decode('utf8', errors="ignore") + else: + geninfo = items["comment"] for field in IGNORED_INFO_KEYS: items.pop(field, None) From f7e349cea49731b0e57cc2a2c1eb4904f1aea9b9 Mon Sep 17 00:00:00 2001 From: LoganBooker Date: Wed, 8 May 2024 21:23:18 +1000 Subject: [PATCH 038/201] Add AVIF MIME type support to mimetype definitions AVIF images will open, rather than download, as the default behaviour. --- modules/ui.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ui.py b/modules/ui.py index 403425f2985..cface5002e0 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -41,6 +41,7 @@ # Likewise, add explicit content-type header for certain missing image types mimetypes.add_type('image/webp', '.webp') +mimetypes.add_type('image/avif', '.avif') if not cmd_opts.share and not cmd_opts.listen: # fix gradio phoning home From 5fbac49791d9a4a6af85c8236ba9179d7415e0f9 Mon Sep 17 00:00:00 2001 From: MarcusNyne <69087098+MarcusNyne@users.noreply.github.com> Date: Wed, 8 May 2024 16:48:10 -0400 Subject: [PATCH 039/201] Added --models-dir option The --model-dir option overrides the location of the models directory for stable diffusion, so that models can be shared across multiple installations. When --data-dir is specified alone, both the extensions and models folders are present in this folder. --models-dir can be used independently, but when used with --data-dir, then the models folder is specified by --models-dir, and extensions are found in the --data-dir. --- modules/cmd_args.py | 1 + modules/paths_internal.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/cmd_args.py b/modules/cmd_args.py index 016a33d1057..a683c99e87c 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -20,6 +20,7 @@ parser.add_argument("--loglevel", type=str, help="log level; one of: CRITICAL, ERROR, WARNING, INFO, DEBUG", default=None) parser.add_argument("--do-not-download-clip", action='store_true', help="do not download CLIP model even if it's not included in the checkpoint") parser.add_argument("--data-dir", type=normalized_filepath, default=os.path.dirname(os.path.dirname(os.path.realpath(__file__))), help="base path where all user data is stored") +parser.add_argument("--models-dir", type=normalized_filepath, default=None, help="base path where models are stored; overrides --data-dir") parser.add_argument("--config", type=normalized_filepath, default=sd_default_config, help="path to config which constructs model",) parser.add_argument("--ckpt", type=normalized_filepath, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",) parser.add_argument("--ckpt-dir", type=normalized_filepath, default=None, help="Path to directory with stable diffusion checkpoints") diff --git a/modules/paths_internal.py b/modules/paths_internal.py index cf9da45ab43..884984c9c0a 100644 --- a/modules/paths_internal.py +++ b/modules/paths_internal.py @@ -24,11 +24,13 @@ # Parse the --data-dir flag first so we can use it as a base for our other argument default values parser_pre = argparse.ArgumentParser(add_help=False) parser_pre.add_argument("--data-dir", type=str, default=os.path.dirname(modules_path), help="base path where all user data is stored", ) +parser_pre.add_argument("--models-dir", type=str, default=None, help="base path where models are stored; overrides --data-dir", ) cmd_opts_pre = parser_pre.parse_known_args()[0] data_path = cmd_opts_pre.data_dir +models_override = cmd_opts_pre.models_dir -models_path = os.path.join(data_path, "models") +models_path = models_override if models_override else os.path.join(data_path, "models") extensions_dir = os.path.join(data_path, "extensions") extensions_builtin_dir = os.path.join(script_path, "extensions-builtin") config_states_dir = os.path.join(script_path, "config_states") From d2cc8ccb11558f1dbdb27a2351e34155c3a24ccf Mon Sep 17 00:00:00 2001 From: MarcusNyne <69087098+MarcusNyne@users.noreply.github.com> Date: Thu, 9 May 2024 17:16:53 -0400 Subject: [PATCH 040/201] When creating a virtual environment, upgrade pip Pip will be upgraded upon immediately creating the virtual environment. If the pip upgrade fails, this should not cause the script to fail (treat as a warning). After the environment is created, it will not attempt further updates to pip. --- webui.bat | 7 ++++++- webui.sh | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/webui.bat b/webui.bat index e2c9079d2fb..a8d479b05e2 100644 --- a/webui.bat +++ b/webui.bat @@ -37,10 +37,15 @@ if %ERRORLEVEL% == 0 goto :activate_venv for /f "delims=" %%i in ('CALL %PYTHON% -c "import sys; print(sys.executable)"') do set PYTHON_FULLNAME="%%i" echo Creating venv in directory %VENV_DIR% using python %PYTHON_FULLNAME% %PYTHON_FULLNAME% -m venv "%VENV_DIR%" >tmp/stdout.txt 2>tmp/stderr.txt -if %ERRORLEVEL% == 0 goto :activate_venv +if %ERRORLEVEL% == 0 goto :upgrade_pip echo Unable to create venv in directory "%VENV_DIR%" goto :show_stdout_stderr +:upgrade_pip +"%VENV_DIR%\Scripts\Python.exe" -m pip install --upgrade pip +if %ERRORLEVEL% == 0 goto :activate_venv +echo Warning: Failed to upgrade PIP version + :activate_venv set PYTHON="%VENV_DIR%\Scripts\Python.exe" echo venv %PYTHON% diff --git a/webui.sh b/webui.sh index c7c4bee9878..7acea902cfc 100755 --- a/webui.sh +++ b/webui.sh @@ -210,6 +210,7 @@ then if [[ ! -d "${venv_dir}" ]] then "${python_cmd}" -m venv "${venv_dir}" + "${venv_dir}"/bin/python -m pip install --upgrade pip first_launch=1 fi # shellcheck source=/dev/null From 73d1caf8f28a387f2db5a77a8892edad8ed505a0 Mon Sep 17 00:00:00 2001 From: Logan Date: Fri, 10 May 2024 12:38:10 +1000 Subject: [PATCH 041/201] Add Align Your Steps to available schedulers * Include both SDXL and SD 1.5 variants (https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html) --- modules/sd_schedulers.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 75eb3ac032f..2131eae46cc 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -4,6 +4,7 @@ import k_diffusion +import numpy as np @dataclasses.dataclass class Scheduler: @@ -30,6 +31,35 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): sigs += [0.0] return torch.FloatTensor(sigs).to(device) +def get_align_your_steps_sigmas(n, device, sigma_id): + # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html + def loglinear_interp(t_steps, num_steps): + """ + Performs log-linear interpolation of a given array of decreasing numbers. + """ + xs = np.linspace(0, 1, len(t_steps)) + ys = np.log(t_steps[::-1]) + + new_xs = np.linspace(0, 1, num_steps) + new_ys = np.interp(new_xs, xs, ys) + + interped_ys = np.exp(new_ys)[::-1].copy() + return interped_ys + + if sigma_id == "sdxl": + sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.029] + elif sigma_id == "sd15": + sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029] + else: + print(f'Align Your Steps sigma identifier "{sigma_id}" not recognized, defaulting to SD 1.5.') + sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029] + + if n != len(sigmas): + sigmas = np.append(loglinear_interp(sigmas, n), [0.0]) + else: + sigmas.append(0.0) + + return torch.FloatTensor(sigmas).to(device) schedulers = [ Scheduler('automatic', 'Automatic', None), @@ -38,6 +68,8 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential), Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0), Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]), + Scheduler('align_your_steps_sdxl', 'Align Your Steps (SDXL)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sdxl")), + Scheduler('align_your_steps_sd15', 'Align Your Steps (SD 1.5)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sd15")), ] schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}} From d6b4444069d36cf7554eb9932061ecf43e9b1335 Mon Sep 17 00:00:00 2001 From: Logan Date: Fri, 10 May 2024 18:05:45 +1000 Subject: [PATCH 042/201] Use shared.sd_model.is_sdxl to determine base AYS sigmas --- modules/sd_schedulers.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 2131eae46cc..0ac1f7a21f8 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -6,6 +6,8 @@ import numpy as np +from modules import shared + @dataclasses.dataclass class Scheduler: name: str @@ -31,7 +33,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): sigs += [0.0] return torch.FloatTensor(sigs).to(device) -def get_align_your_steps_sigmas(n, device, sigma_id): +def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device): # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html def loglinear_interp(t_steps, num_steps): """ @@ -46,12 +48,10 @@ def loglinear_interp(t_steps, num_steps): interped_ys = np.exp(new_ys)[::-1].copy() return interped_ys - if sigma_id == "sdxl": + if shared.sd_model.is_sdxl: sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.029] - elif sigma_id == "sd15": - sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029] else: - print(f'Align Your Steps sigma identifier "{sigma_id}" not recognized, defaulting to SD 1.5.') + # Default to SD 1.5 sigmas. sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029] if n != len(sigmas): @@ -68,8 +68,7 @@ def loglinear_interp(t_steps, num_steps): Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential), Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0), Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]), - Scheduler('align_your_steps_sdxl', 'Align Your Steps (SDXL)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sdxl")), - Scheduler('align_your_steps_sd15', 'Align Your Steps (SD 1.5)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sd15")), + Scheduler('align_your_steps', 'Align Your Steps', get_align_your_steps_sigmas), ] schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}} From d44f241317d63095176543839bc111b731069629 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 11 May 2024 13:13:39 +0900 Subject: [PATCH 043/201] use relative path for webui-assets css --- style.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/style.css b/style.css index f6a89b8f90d..8eefda56df5 100644 --- a/style.css +++ b/style.css @@ -1,6 +1,6 @@ /* temporary fix to load default gradio font in frontend instead of backend */ -@import url('/webui-assets/css/sourcesanspro.css'); +@import url('webui-assets/css/sourcesanspro.css'); /* temporary fix to hide gradio crop tool until it's fixed https://github.com/gradio-app/gradio/issues/3810 */ From ef7713fbb29fed183d669a5a081cda9ac1a8b629 Mon Sep 17 00:00:00 2001 From: elf-mouse Date: Tue, 14 May 2024 15:39:05 +0800 Subject: [PATCH 044/201] chore: sync v1.8.0 packages according to changelog, fix warning --- webui-macos-env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webui-macos-env.sh b/webui-macos-env.sh index db7e8b1a05b..4126005ae9e 100644 --- a/webui-macos-env.sh +++ b/webui-macos-env.sh @@ -11,7 +11,7 @@ fi export install_dir="$HOME" export COMMANDLINE_ARGS="--skip-torch-cuda-test --upcast-sampling --no-half-vae --use-cpu interrogate" -export TORCH_COMMAND="pip install torch==2.1.0 torchvision==0.16.0" +export TORCH_COMMAND="pip install torch==2.1.2 torchvision==0.16.2" export PYTORCH_ENABLE_MPS_FALLBACK=1 #################################################################### From 5ab7d08a0a99c88a60a13885e564fd7d2d05cfc1 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 15 May 2024 17:27:05 +0900 Subject: [PATCH 045/201] fix extention update when not on main branch --- modules/extensions.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/extensions.py b/modules/extensions.py index 5ad934b4df3..24de766eb90 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -191,8 +191,9 @@ def list_files(self, subdir, extension): def check_updates(self): repo = Repo(self.path) + branch_name = f'{repo.remote().name}/{self.branch}' for fetch in repo.remote().fetch(dry_run=True): - if self.branch and fetch.name != f'{repo.remote().name}/{self.branch}': + if self.branch and fetch.name != branch_name: continue if fetch.flags != fetch.HEAD_UPTODATE: self.can_update = True @@ -200,7 +201,7 @@ def check_updates(self): return try: - origin = repo.rev_parse('origin') + origin = repo.rev_parse(branch_name) if repo.head.commit != origin: self.can_update = True self.status = "behind HEAD" @@ -213,8 +214,10 @@ def check_updates(self): self.can_update = False self.status = "latest" - def fetch_and_reset_hard(self, commit='origin'): + def fetch_and_reset_hard(self, commit=None): repo = Repo(self.path) + if commit is None: + commit = f'{repo.remote().name}/{self.branch}' # Fix: `error: Your local changes to the following files would be overwritten by merge`, # because WSL2 Docker set 755 file permissions instead of 644, this results to the error. repo.git.fetch(all=True) From 022d835565f253841f7f9272ba320bb0cec4770d Mon Sep 17 00:00:00 2001 From: huchenlei Date: Wed, 15 May 2024 15:20:40 -0400 Subject: [PATCH 046/201] use_checkpoint = False --- configs/alt-diffusion-inference.yaml | 2 +- configs/alt-diffusion-m18-inference.yaml | 2 +- configs/instruct-pix2pix.yaml | 2 +- configs/sd_xl_inpaint.yaml | 2 +- configs/v1-inference.yaml | 2 +- configs/v1-inpainting-inference.yaml | 2 +- modules/sd_hijack_checkpoint.py | 9 ++++++--- modules/sd_models_config.py | 2 +- 8 files changed, 13 insertions(+), 10 deletions(-) diff --git a/configs/alt-diffusion-inference.yaml b/configs/alt-diffusion-inference.yaml index cfbee72d71b..4944ab5c8dc 100644 --- a/configs/alt-diffusion-inference.yaml +++ b/configs/alt-diffusion-inference.yaml @@ -40,7 +40,7 @@ model: use_spatial_transformer: True transformer_depth: 1 context_dim: 768 - use_checkpoint: True + use_checkpoint: False legacy: False first_stage_config: diff --git a/configs/alt-diffusion-m18-inference.yaml b/configs/alt-diffusion-m18-inference.yaml index 41a031d55f0..c60dca8c7b3 100644 --- a/configs/alt-diffusion-m18-inference.yaml +++ b/configs/alt-diffusion-m18-inference.yaml @@ -41,7 +41,7 @@ model: use_linear_in_transformer: True transformer_depth: 1 context_dim: 1024 - use_checkpoint: True + use_checkpoint: False legacy: False first_stage_config: diff --git a/configs/instruct-pix2pix.yaml b/configs/instruct-pix2pix.yaml index 4e896879dd7..564e50ae246 100644 --- a/configs/instruct-pix2pix.yaml +++ b/configs/instruct-pix2pix.yaml @@ -45,7 +45,7 @@ model: use_spatial_transformer: True transformer_depth: 1 context_dim: 768 - use_checkpoint: True + use_checkpoint: False legacy: False first_stage_config: diff --git a/configs/sd_xl_inpaint.yaml b/configs/sd_xl_inpaint.yaml index 3bad372186f..f40f45e3316 100644 --- a/configs/sd_xl_inpaint.yaml +++ b/configs/sd_xl_inpaint.yaml @@ -21,7 +21,7 @@ model: params: adm_in_channels: 2816 num_classes: sequential - use_checkpoint: True + use_checkpoint: False in_channels: 9 out_channels: 4 model_channels: 320 diff --git a/configs/v1-inference.yaml b/configs/v1-inference.yaml index d4effe569e8..25c4d9ed066 100644 --- a/configs/v1-inference.yaml +++ b/configs/v1-inference.yaml @@ -40,7 +40,7 @@ model: use_spatial_transformer: True transformer_depth: 1 context_dim: 768 - use_checkpoint: True + use_checkpoint: False legacy: False first_stage_config: diff --git a/configs/v1-inpainting-inference.yaml b/configs/v1-inpainting-inference.yaml index f9eec37d24b..68c199f99c3 100644 --- a/configs/v1-inpainting-inference.yaml +++ b/configs/v1-inpainting-inference.yaml @@ -40,7 +40,7 @@ model: use_spatial_transformer: True transformer_depth: 1 context_dim: 768 - use_checkpoint: True + use_checkpoint: False legacy: False first_stage_config: diff --git a/modules/sd_hijack_checkpoint.py b/modules/sd_hijack_checkpoint.py index 2604d969f91..b2f05bbdcf0 100644 --- a/modules/sd_hijack_checkpoint.py +++ b/modules/sd_hijack_checkpoint.py @@ -4,16 +4,19 @@ import ldm.modules.diffusionmodules.openaimodel +# Setting flag=False so that torch skips checking parameters. +# parameters checking is expensive in frequent operations. + def BasicTransformerBlock_forward(self, x, context=None): - return checkpoint(self._forward, x, context) + return checkpoint(self._forward, x, context, flag=False) def AttentionBlock_forward(self, x): - return checkpoint(self._forward, x) + return checkpoint(self._forward, x, flag=False) def ResBlock_forward(self, x, emb): - return checkpoint(self._forward, x, emb) + return checkpoint(self._forward, x, emb, flag=False) stored = [] diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py index b38137eb5a9..9cec4f13dc2 100644 --- a/modules/sd_models_config.py +++ b/modules/sd_models_config.py @@ -35,7 +35,7 @@ def is_using_v_parameterization_for_sd2(state_dict): with sd_disable_initialization.DisableInitialization(): unet = ldm.modules.diffusionmodules.openaimodel.UNetModel( - use_checkpoint=True, + use_checkpoint=False, use_fp16=False, image_size=32, in_channels=4, From 0e98529365477a4f240b2ac67d94ff59235144c5 Mon Sep 17 00:00:00 2001 From: huchenlei Date: Wed, 15 May 2024 15:46:53 -0400 Subject: [PATCH 047/201] Replace einops.rearrange with torch native --- modules/sd_hijack_optimizations.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 7f9e328d05a..4c2dc56d45d 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -486,7 +486,19 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs): k_in = self.to_k(context_k) v_in = self.to_v(context_v) - q, k, v = (rearrange(t, 'b n (h d) -> b n h d', h=h) for t in (q_in, k_in, v_in)) + def _reshape(t): + """rearrange(t, 'b n (h d) -> b n h d', h=h). + Using torch native operations to avoid overhead as this function is + called frequently. (70 times/it for SDXL) + """ + b, n, _ = t.shape # Get the batch size (b) and sequence length (n) + d = t.shape[2] // h # Determine the depth per head + return t.reshape(b, n, h, d) + + q = _reshape(q_in) + k = _reshape(k_in) + v = _reshape(v_in) + del q_in, k_in, v_in dtype = q.dtype @@ -497,7 +509,9 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs): out = out.to(dtype) - out = rearrange(out, 'b n h d -> b n (h d)', h=h) + # out = rearrange(out, 'b n h d -> b n (h d)', h=h) + b, n, h, d = out.shape + out = out.reshape(b, n, h * d) return self.to_out(out) From 9eb2f786316c0f7e94c3df5f5e8bda203e6b875d Mon Sep 17 00:00:00 2001 From: huchenlei Date: Wed, 15 May 2024 16:32:29 -0400 Subject: [PATCH 048/201] Precompute is_sdxl_inpaint flag --- modules/processing.py | 28 +++++++++++----------------- modules/sd_models.py | 7 +++++++ modules/sd_models_xl.py | 9 ++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index 76557dd7f5e..d82cb24fb95 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -115,20 +115,17 @@ def txt2img_image_conditioning(sd_model, x, width, height): return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device) else: - sd = sd_model.model.state_dict() - diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None) - if diffusion_model_input is not None: - if diffusion_model_input.shape[1] == 9: - # The "masked-image" in this case will just be all 0.5 since the entire image is masked. - image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5 - image_conditioning = images_tensor_to_samples(image_conditioning, - approximation_indexes.get(opts.sd_vae_encode_method)) + if sd_model.model.is_sdxl_inpaint: + # The "masked-image" in this case will just be all 0.5 since the entire image is masked. + image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5 + image_conditioning = images_tensor_to_samples(image_conditioning, + approximation_indexes.get(opts.sd_vae_encode_method)) - # Add the fake full 1s mask to the first dimension. - image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) - image_conditioning = image_conditioning.to(x.dtype) + # Add the fake full 1s mask to the first dimension. + image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0) + image_conditioning = image_conditioning.to(x.dtype) - return image_conditioning + return image_conditioning # Dummy zero conditioning if we're not using inpainting or unclip models. # Still takes up a bit of memory, but no encoder call. @@ -390,11 +387,8 @@ def img2img_image_conditioning(self, source_image, latent_image, image_mask=None if self.sampler.conditioning_key == "crossattn-adm": return self.unclip_image_conditioning(source_image) - sd = self.sampler.model_wrap.inner_model.model.state_dict() - diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None) - if diffusion_model_input is not None: - if diffusion_model_input.shape[1] == 9: - return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask) + if self.sampler.model_wrap.inner_model.model.is_sdxl_inpaint: + return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask) # Dummy zero conditioning if we're not using inpainting or depth model. return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1) diff --git a/modules/sd_models.py b/modules/sd_models.py index ff245b7a668..62e74d27ae0 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -380,6 +380,13 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer model.is_sd2 = not model.is_sdxl and hasattr(model.cond_stage_model, 'model') model.is_sd1 = not model.is_sdxl and not model.is_sd2 model.is_ssd = model.is_sdxl and 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight' not in state_dict.keys() + # Set is_sdxl_inpaint flag. + diffusion_model_input = state_dict.get('diffusion_model.input_blocks.0.0.weight', None) + model.is_sdxl_inpaint = ( + model.is_sdxl and + diffusion_model_input is not None and + diffusion_model_input.shape[1] == 9 + ) if model.is_sdxl: sd_models_xl.extend_sdxl(model) diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py index 94ff973fb84..35e21f6e470 100644 --- a/modules/sd_models_xl.py +++ b/modules/sd_models_xl.py @@ -35,11 +35,10 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond): - sd = self.model.state_dict() - diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None) - if diffusion_model_input is not None: - if diffusion_model_input.shape[1] == 9: - x = torch.cat([x] + cond['c_concat'], dim=1) + """WARNING: This function is called once per denoising iteration. DO NOT add + expensive functionc calls such as `model.state_dict`. """ + if self.model.is_sdxl_inpaint: + x = torch.cat([x] + cond['c_concat'], dim=1) return self.model(x, t, cond) From 6a48476502d6cdd19cb3d0c7f2a0b92aacd7c01f Mon Sep 17 00:00:00 2001 From: huchenlei Date: Wed, 15 May 2024 16:54:26 -0400 Subject: [PATCH 049/201] Fix flag check for SD15 --- modules/processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index d82cb24fb95..fff2595e70a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -115,7 +115,7 @@ def txt2img_image_conditioning(sd_model, x, width, height): return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device) else: - if sd_model.model.is_sdxl_inpaint: + if getattr(sd_model.model, "is_sdxl_inpaint", False): # The "masked-image" in this case will just be all 0.5 since the entire image is masked. image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5 image_conditioning = images_tensor_to_samples(image_conditioning, @@ -387,7 +387,7 @@ def img2img_image_conditioning(self, source_image, latent_image, image_mask=None if self.sampler.conditioning_key == "crossattn-adm": return self.unclip_image_conditioning(source_image) - if self.sampler.model_wrap.inner_model.model.is_sdxl_inpaint: + if getattr(self.sampler.model_wrap.inner_model.model, "is_sdxl_inpaint", False): return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask) # Dummy zero conditioning if we're not using inpainting or depth model. From 3e20b36e8f1b26f24db0c149732fb5479bff68bc Mon Sep 17 00:00:00 2001 From: huchenlei Date: Wed, 15 May 2024 17:27:01 -0400 Subject: [PATCH 050/201] Fix attr access --- modules/sd_models_xl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py index 35e21f6e470..1242a59369f 100644 --- a/modules/sd_models_xl.py +++ b/modules/sd_models_xl.py @@ -37,7 +37,7 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch: def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond): """WARNING: This function is called once per denoising iteration. DO NOT add expensive functionc calls such as `model.state_dict`. """ - if self.model.is_sdxl_inpaint: + if self.is_sdxl_inpaint: x = torch.cat([x] + cond['c_concat'], dim=1) return self.model(x, t, cond) From 9c8075ba8e538f695ef25f85e6513227b58b71ce Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 16 May 2024 23:16:50 +0900 Subject: [PATCH 051/201] torch_utils.float64 return torch.float64 if device is not mps or xpu, else return torch.float32 --- modules/torch_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/torch_utils.py b/modules/torch_utils.py index e5b52393ec8..a07e02853b1 100644 --- a/modules/torch_utils.py +++ b/modules/torch_utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import torch.nn +import torch def get_param(model) -> torch.nn.Parameter: @@ -15,3 +16,11 @@ def get_param(model) -> torch.nn.Parameter: return param raise ValueError(f"No parameters found in model {model!r}") + + +def float64(t: torch.Tensor): + """return torch.float64 if device is not mps or xpu, else return torch.float32""" + match t.device.type: + case 'mps', 'xpu': + return torch.float32 + return torch.float64 From 41f66849c7feac1efd0b9eb6884209be382e9e74 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 16 May 2024 23:18:20 +0900 Subject: [PATCH 052/201] mps, xpu compatibility --- .../soft-inpainting/scripts/soft_inpainting.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py index f56e1e2266d..0e629963af4 100644 --- a/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py +++ b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py @@ -3,6 +3,7 @@ import math from modules.ui_components import InputAccordion import modules.scripts as scripts +from modules.torch_utils import float64 class SoftInpaintingSettings: @@ -79,13 +80,11 @@ def latent_blend(settings, a, b, t): # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) # 64-bit operations are used here to allow large exponents. - current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001) + current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(float64(image_interp)).add_(0.00001) # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_( - settings.inpaint_detail_preservation) * one_minus_t3 - b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_( - settings.inpaint_detail_preservation) * t3 + a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(float64(a)).pow_(settings.inpaint_detail_preservation) * one_minus_t3 + b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(float64(b)).pow_(settings.inpaint_detail_preservation) * t3 desired_magnitude = a_magnitude desired_magnitude.add_(b_magnitude).pow_(1 / settings.inpaint_detail_preservation) del a_magnitude, b_magnitude, t3, one_minus_t3 From f015b94176d6df372ce153eddc018cb3b08c03ba Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 16 May 2024 23:19:06 +0900 Subject: [PATCH 053/201] use torch_utils.float64 --- modules/sd_samplers_timesteps_impl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/sd_samplers_timesteps_impl.py b/modules/sd_samplers_timesteps_impl.py index 930a64af590..84867d6ee65 100644 --- a/modules/sd_samplers_timesteps_impl.py +++ b/modules/sd_samplers_timesteps_impl.py @@ -5,13 +5,14 @@ from modules import shared from modules.models.diffusion.uni_pc import uni_pc +from modules.torch_utils import float64 @torch.no_grad() def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0): alphas_cumprod = model.inner_model.inner_model.alphas_cumprod alphas = alphas_cumprod[timesteps] - alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32) + alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(float64(x)) sqrt_one_minus_alphas = torch.sqrt(1 - alphas) sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy())) @@ -43,7 +44,7 @@ def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta= def plms(model, x, timesteps, extra_args=None, callback=None, disable=None): alphas_cumprod = model.inner_model.inner_model.alphas_cumprod alphas = alphas_cumprod[timesteps] - alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32) + alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(float64(x)) sqrt_one_minus_alphas = torch.sqrt(1 - alphas) extra_args = {} if extra_args is None else extra_args From 51b13a8c54854104f1510956b920399226a932f1 Mon Sep 17 00:00:00 2001 From: huchenlei Date: Thu, 16 May 2024 11:39:01 -0400 Subject: [PATCH 054/201] Prevent uncessary bias backup --- extensions-builtin/Lora/networks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 42b14dc239d..360455f8712 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -378,7 +378,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn self.network_weights_backup = weights_backup bias_backup = getattr(self, "network_bias_backup", None) - if bias_backup is None: + if bias_backup is None and wanted_names != (): + if current_names != (): + raise RuntimeError("no backup bias found and current bias are not unchanged") + if isinstance(self, torch.nn.MultiheadAttention) and self.out_proj.bias is not None: bias_backup = self.out_proj.bias.to(devices.cpu, copy=True) elif getattr(self, 'bias', None) is not None: From b2ae4490b9c225ff020941bcbf36c8975760deba Mon Sep 17 00:00:00 2001 From: huchenlei Date: Thu, 16 May 2024 14:45:00 -0400 Subject: [PATCH 055/201] Fix LoRA bias error --- extensions-builtin/Lora/networks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 360455f8712..aee4e9d9ca7 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -379,15 +379,17 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn bias_backup = getattr(self, "network_bias_backup", None) if bias_backup is None and wanted_names != (): - if current_names != (): - raise RuntimeError("no backup bias found and current bias are not unchanged") - if isinstance(self, torch.nn.MultiheadAttention) and self.out_proj.bias is not None: bias_backup = self.out_proj.bias.to(devices.cpu, copy=True) elif getattr(self, 'bias', None) is not None: bias_backup = self.bias.to(devices.cpu, copy=True) else: bias_backup = None + + # Unlike weight which always has value, some modules don't have bias. + # Only report if bias is not None and current bias are not unchanged. + if bias_backup is not None and current_names != (): + raise RuntimeError("no backup bias found and current bias are not unchanged") self.network_bias_backup = bias_backup if current_names != wanted_names: From 221ac0b9abd2e39ccc6f1969a434f05dcd72b29a Mon Sep 17 00:00:00 2001 From: Andray Date: Thu, 16 May 2024 23:08:24 +0400 Subject: [PATCH 056/201] img2img batch upload method --- modules/img2img.py | 20 +++++++++++++++----- modules/ui.py | 31 ++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/modules/img2img.py b/modules/img2img.py index a1d042c2123..24f869f5c6a 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -17,11 +17,14 @@ import modules.scripts -def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=False, scale_by=1.0, use_png_info=False, png_info_props=None, png_info_dir=None): +def process_batch(p, input, output_dir, inpaint_mask_dir, args, to_scale=False, scale_by=1.0, use_png_info=False, png_info_props=None, png_info_dir=None): output_dir = output_dir.strip() processing.fix_seed(p) - batch_images = list(shared.walk_files(input_dir, allowed_extensions=(".png", ".jpg", ".jpeg", ".webp", ".tif", ".tiff"))) + if isinstance(input, str): + batch_images = list(shared.walk_files(input, allowed_extensions=(".png", ".jpg", ".jpeg", ".webp", ".tif", ".tiff"))) + else: + batch_images = [os.path.abspath(x.name) for x in input] is_inpaint_batch = False if inpaint_mask_dir: @@ -146,7 +149,7 @@ def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=Fal return batch_results -def img2img(id_task: str, request: gr.Request, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, mask_blur: int, mask_alpha: float, inpainting_fill: int, n_iter: int, batch_size: int, cfg_scale: float, image_cfg_scale: float, denoising_strength: float, selected_scale_tab: int, height: int, width: int, scale_by: float, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, img2img_batch_use_png_info: bool, img2img_batch_png_info_props: list, img2img_batch_png_info_dir: str, *args): +def img2img(id_task: str, request: gr.Request, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, mask_blur: int, mask_alpha: float, inpainting_fill: int, n_iter: int, batch_size: int, cfg_scale: float, image_cfg_scale: float, denoising_strength: float, selected_scale_tab: int, height: int, width: int, scale_by: float, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, img2img_batch_use_png_info: bool, img2img_batch_png_info_props: list, img2img_batch_png_info_dir: str, img2img_batch_source_type: str, img2img_batch_upload: list, *args): override_settings = create_override_settings_dict(override_settings_texts) is_batch = mode == 5 @@ -221,8 +224,15 @@ def img2img(id_task: str, request: gr.Request, mode: int, prompt: str, negative_ with closing(p): if is_batch: - assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled" - processed = process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, args, to_scale=selected_scale_tab == 1, scale_by=scale_by, use_png_info=img2img_batch_use_png_info, png_info_props=img2img_batch_png_info_props, png_info_dir=img2img_batch_png_info_dir) + if img2img_batch_source_type == "upload": + assert isinstance(img2img_batch_upload, list) and img2img_batch_upload + output_dir = "" + inpaint_mask_dir = "" + png_info_dir = img2img_batch_png_info_dir if not shared.cmd_opts.hide_ui_dir_config else "" + processed = process_batch(p, img2img_batch_upload, output_dir, inpaint_mask_dir, args, to_scale=selected_scale_tab == 1, scale_by=scale_by, use_png_info=img2img_batch_use_png_info, png_info_props=img2img_batch_png_info_props, png_info_dir=png_info_dir) + else: # "from dir" + assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled" + processed = process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, args, to_scale=selected_scale_tab == 1, scale_by=scale_by, use_png_info=img2img_batch_use_png_info, png_info_props=img2img_batch_png_info_props, png_info_dir=img2img_batch_png_info_dir) if processed is None: processed = Processed(p, [], p.seed, "") diff --git a/modules/ui.py b/modules/ui.py index 403425f2985..f3ac4236774 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -566,18 +566,25 @@ def update_orig(image, state): init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", image_mode="RGBA", elem_id="img_inpaint_mask") with gr.TabItem('Batch', id='batch', elem_id="img2img_batch_tab") as tab_batch: - hidden = '
Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else '' - gr.HTML( - "

Process images in a directory on the same machine where the server is running." + - "
Use an empty output directory to save pictures normally instead of writing to the output directory." + - f"
Add inpaint batch mask directory to enable inpaint batch processing." - f"{hidden}

" - ) - img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir") - img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir") - img2img_batch_inpaint_mask_dir = gr.Textbox(label="Inpaint batch mask directory (required for inpaint batch processing only)", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir") + with gr.Tabs(elem_id="img2img_batch_source"): + img2img_batch_source_type = gr.Textbox(visible=False, value="upload") + with gr.TabItem('Upload', id='batch_upload', elem_id="img2img_batch_upload_tab") as tab_batch_upload: + img2img_batch_upload = gr.Files(label="Files", interactive=True, elem_id="img2img_batch_upload") + with gr.TabItem('From directory', id='batch_from_dir', elem_id="img2img_batch_from_dir_tab") as tab_batch_from_dir: + hidden = '
Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else '' + gr.HTML( + "

Process images in a directory on the same machine where the server is running." + + "
Use an empty output directory to save pictures normally instead of writing to the output directory." + + f"
Add inpaint batch mask directory to enable inpaint batch processing." + f"{hidden}

" + ) + img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir") + img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir") + img2img_batch_inpaint_mask_dir = gr.Textbox(label="Inpaint batch mask directory (required for inpaint batch processing only)", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir") + tab_batch_upload.select(fn=lambda: "upload", inputs=[], outputs=[img2img_batch_source_type]) + tab_batch_from_dir.select(fn=lambda: "from dir", inputs=[], outputs=[img2img_batch_source_type]) with gr.Accordion("PNG info", open=False): - img2img_batch_use_png_info = gr.Checkbox(label="Append png info to prompts", **shared.hide_dirs, elem_id="img2img_batch_use_png_info") + img2img_batch_use_png_info = gr.Checkbox(label="Append png info to prompts", elem_id="img2img_batch_use_png_info") img2img_batch_png_info_dir = gr.Textbox(label="PNG info directory", **shared.hide_dirs, placeholder="Leave empty to use input directory", elem_id="img2img_batch_png_info_dir") img2img_batch_png_info_props = gr.CheckboxGroup(["Prompt", "Negative prompt", "Seed", "CFG scale", "Sampler", "Steps", "Model hash"], label="Parameters to take from png info", info="Prompts from png info will be appended to prompts set in ui.") @@ -759,6 +766,8 @@ def select_img2img_tab(tab): img2img_batch_use_png_info, img2img_batch_png_info_props, img2img_batch_png_info_dir, + img2img_batch_source_type, + img2img_batch_upload, ] + custom_inputs, outputs=[ output_panel.gallery, From 58eec83a546b8d61500c7b801cb0bdbe7650f6a6 Mon Sep 17 00:00:00 2001 From: huchenlei Date: Thu, 16 May 2024 16:39:02 -0400 Subject: [PATCH 057/201] Fully prevent use_checkpoint --- modules/sd_models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/sd_models.py b/modules/sd_models.py index ff245b7a668..a33fa7c33f8 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -551,6 +551,11 @@ def repair_config(sd_config): karlo_path = os.path.join(paths.models_path, 'karlo') sd_config.model.params.noise_aug_config.params.clip_stats_path = sd_config.model.params.noise_aug_config.params.clip_stats_path.replace("checkpoints/karlo_models", karlo_path) + # Do not use checkpoint for inference. + # This helps prevent extra performance overhead on checking parameters. + # The perf overhead is about 100ms/it on 4090. + sd_config.model.params.network_config.params.use_checkpoint = False + def rescale_zero_terminal_snr_abar(alphas_cumprod): alphas_bar_sqrt = alphas_cumprod.sqrt() From 2a8a60c2c50473f0ece5804d4a2cde0d1ff3d35e Mon Sep 17 00:00:00 2001 From: huchenlei Date: Thu, 16 May 2024 19:50:06 -0400 Subject: [PATCH 058/201] Add --precision half cmd option --- modules/cmd_args.py | 2 +- modules/devices.py | 24 ++++++++++++++++++++++++ modules/sd_hijack_unet.py | 29 ++++++++++++++++++++++------- modules/sd_hijack_utils.py | 26 +++++++++++++++----------- modules/sd_models.py | 1 + modules/shared_init.py | 8 ++++++++ 6 files changed, 71 insertions(+), 19 deletions(-) diff --git a/modules/cmd_args.py b/modules/cmd_args.py index 016a33d1057..58c5e5d5b10 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -41,7 +41,7 @@ parser.add_argument("--lowram", action='store_true', help="load stable diffusion checkpoint weights to VRAM instead of RAM") parser.add_argument("--always-batch-cond-uncond", action='store_true', help="does not do anything") parser.add_argument("--unload-gfpgan", action='store_true', help="does not do anything.") -parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast") +parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "half", "autocast"], default="autocast") parser.add_argument("--upcast-sampling", action='store_true', help="upcast sampling. No effect with --no-half. Usually produces similar results to --no-half with better performance while using less memory.") parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site") parser.add_argument("--ngrok", type=str, help="ngrok authtoken, alternative to gradio --share", default=None) diff --git a/modules/devices.py b/modules/devices.py index e4f671ac659..7de34ac51c3 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -114,6 +114,9 @@ def enable_tf32(): cpu: torch.device = torch.device("cpu") fp8: bool = False +# Force fp16 for all models in inference. No casting during inference. +# This flag is controlled by "--precision half" command line arg. +force_fp16: bool = False device: torch.device = None device_interrogate: torch.device = None device_gfpgan: torch.device = None @@ -127,6 +130,8 @@ def enable_tf32(): def cond_cast_unet(input): + if force_fp16: + return input.to(torch.float16) return input.to(dtype_unet) if unet_needs_upcast else input @@ -206,6 +211,11 @@ def autocast(disable=False): if disable: return contextlib.nullcontext() + if force_fp16: + # No casting during inference if force_fp16 is enabled. + # All tensor dtype conversion happens before inference. + return contextlib.nullcontext() + if fp8 and device==cpu: return torch.autocast("cpu", dtype=torch.bfloat16, enabled=True) @@ -269,3 +279,17 @@ def first_time_calculation(): x = torch.zeros((1, 1, 3, 3)).to(device, dtype) conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype) conv2d(x) + + +def force_model_fp16(): + """ + ldm and sgm has modules.diffusionmodules.util.GroupNorm32.forward, which + force conversion of input to float32. If force_fp16 is enabled, we need to + prevent this casting. + """ + assert force_fp16 + import sgm.modules.diffusionmodules.util as sgm_util + import ldm.modules.diffusionmodules.util as ldm_util + sgm_util.GroupNorm32 = torch.nn.GroupNorm + ldm_util.GroupNorm32 = torch.nn.GroupNorm + print("ldm/sgm GroupNorm32 replaced with normal torch.nn.GroupNorm due to `--precision half`.") diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index 2101f1a0415..41955313a31 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -36,7 +36,7 @@ def cat(self, tensors, *args, **kwargs): # Below are monkey patches to enable upcasting a float16 UNet for float32 sampling def apply_model(orig_func, self, x_noisy, t, cond, **kwargs): - + """Always make sure inputs to unet are in correct dtype.""" if isinstance(cond, dict): for y in cond.keys(): if isinstance(cond[y], list): @@ -45,7 +45,11 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs): cond[y] = cond[y].to(devices.dtype_unet) if isinstance(cond[y], torch.Tensor) else cond[y] with devices.autocast(): - return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float() + result = orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs) + if devices.unet_needs_upcast: + return result.float() + else: + return result class GELUHijack(torch.nn.GELU, torch.nn.Module): @@ -64,12 +68,11 @@ def hijack_ddpm_edit(): if not ddpm_edit_hijack: CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond) CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond) - ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) + ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model) unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast -CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) -CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) + if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available(): CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast) CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast) @@ -81,5 +84,17 @@ def hijack_ddpm_edit(): CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond) CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond) -CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model, unet_needs_upcast) -CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) +CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model) +CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model) + + +def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs): + if devices.unet_needs_upcast and timesteps.dtype == torch.int64: + dtype = torch.float32 + else: + dtype = devices.dtype_unet + return orig_func(timesteps, *args, **kwargs).to(dtype=dtype) + + +CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result) +CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result) diff --git a/modules/sd_hijack_utils.py b/modules/sd_hijack_utils.py index 79bf6e46862..546f2eda4ec 100644 --- a/modules/sd_hijack_utils.py +++ b/modules/sd_hijack_utils.py @@ -1,7 +1,11 @@ import importlib + +always_true_func = lambda *args, **kwargs: True + + class CondFunc: - def __new__(cls, orig_func, sub_func, cond_func): + def __new__(cls, orig_func, sub_func, cond_func=always_true_func): self = super(CondFunc, cls).__new__(cls) if isinstance(orig_func, str): func_path = orig_func.split('.') @@ -20,13 +24,13 @@ def __new__(cls, orig_func, sub_func, cond_func): print(f"Warning: Failed to resolve {orig_func} for CondFunc hijack") pass self.__init__(orig_func, sub_func, cond_func) - return lambda *args, **kwargs: self(*args, **kwargs) - def __init__(self, orig_func, sub_func, cond_func): - self.__orig_func = orig_func - self.__sub_func = sub_func - self.__cond_func = cond_func - def __call__(self, *args, **kwargs): - if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs): - return self.__sub_func(self.__orig_func, *args, **kwargs) - else: - return self.__orig_func(*args, **kwargs) + return lambda *args, **kwargs: self(*args, **kwargs) + def __init__(self, orig_func, sub_func, cond_func): + self.__orig_func = orig_func + self.__sub_func = sub_func + self.__cond_func = cond_func + def __call__(self, *args, **kwargs): + if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs): + return self.__sub_func(self.__orig_func, *args, **kwargs) + else: + return self.__orig_func(*args, **kwargs) diff --git a/modules/sd_models.py b/modules/sd_models.py index ff245b7a668..9c59091683a 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -403,6 +403,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer model.float() model.alphas_cumprod_original = model.alphas_cumprod devices.dtype_unet = torch.float32 + assert shared.cmd_opts.precision != "half", "Cannot use --precision half with --no-half" timer.record("apply float()") else: vae = model.first_stage_model diff --git a/modules/shared_init.py b/modules/shared_init.py index 935e3a21cf2..a6ad0433d6f 100644 --- a/modules/shared_init.py +++ b/modules/shared_init.py @@ -31,6 +31,14 @@ def initialize(): devices.dtype_vae = torch.float32 if cmd_opts.no_half or cmd_opts.no_half_vae else torch.float16 devices.dtype_inference = torch.float32 if cmd_opts.precision == 'full' else devices.dtype + if cmd_opts.precision == "half": + msg = "--no-half and --no-half-vae conflict with --precision half" + assert devices.dtype == torch.float16, msg + assert devices.dtype_vae == torch.float16, msg + assert devices.dtype_inference == torch.float16, msg + devices.force_fp16 = True + devices.force_model_fp16() + shared.device = devices.device shared.weight_load_location = None if cmd_opts.lowram else "cpu" From 47f1d42a7e77259e2e7418ae8f941718c55cfd25 Mon Sep 17 00:00:00 2001 From: huchenlei Date: Thu, 16 May 2024 20:06:04 -0400 Subject: [PATCH 059/201] Fix for SD15 models --- modules/sd_models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index a33fa7c33f8..cda142bdd3c 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -553,8 +553,11 @@ def repair_config(sd_config): # Do not use checkpoint for inference. # This helps prevent extra performance overhead on checking parameters. - # The perf overhead is about 100ms/it on 4090. - sd_config.model.params.network_config.params.use_checkpoint = False + # The perf overhead is about 100ms/it on 4090 for SDXL. + if hasattr(sd_config.model.params, "network_config"): + sd_config.model.params.network_config.params.use_checkpoint = False + if hasattr(sd_config.model.params, "unet_config"): + sd_config.model.params.unet_config.params.use_checkpoint = False def rescale_zero_terminal_snr_abar(alphas_cumprod): From 01491d303ce216820513c5cee998801359b8cbba Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Fri, 17 May 2024 10:36:08 -0400 Subject: [PATCH 060/201] Keep sigmas on CPU --- modules/sd_samplers_kdiffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index b45f85b07a8..228de49449e 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -115,7 +115,7 @@ def get_sigmas(self, p, steps): if scheduler.need_inner_model: sigmas_kwargs['inner_model'] = self.model_wrap - sigmas = scheduler.function(n=steps, **sigmas_kwargs, device=shared.device) + sigmas = scheduler.function(n=steps, **sigmas_kwargs) if discard_next_to_last_sigma: sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) From 10f2407f48fa3a8bbd299068e5f67108f272b87d Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 18 May 2024 00:44:02 +0900 Subject: [PATCH 061/201] xyz csv skipinitialspace --- scripts/xyz_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b9fd66fe54b..d416e4c0759 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -212,7 +212,7 @@ def list_to_csv_string(data_list): def csv_string_to_list_strip(data_str): - return list(map(str.strip, chain.from_iterable(csv.reader(StringIO(data_str))))) + return list(map(str.strip, chain.from_iterable(csv.reader(StringIO(data_str), skipinitialspace=True)))) class AxisOption: From 53d67088ee0fb190c3ae1330c2b876dedb16dd8b Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Fri, 17 May 2024 12:12:57 -0400 Subject: [PATCH 062/201] Patch timestep embedding to create tensor on-device --- modules/sd_hijack_unet.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index 2101f1a0415..0dabbe0e43d 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -1,5 +1,7 @@ import torch from packaging import version +from einops import repeat +import math from modules import devices from modules.sd_hijack_utils import CondFunc @@ -48,6 +50,30 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs): return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float() +# Monkey patch to create timestep embed tensor on device, avoiding a block. +def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False): + """ + Create sinusoidal timestep embeddings. + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + if not repeat_only: + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half + ) + args = timesteps[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + else: + embedding = repeat(timesteps, 'b -> b d', d=dim) + return embedding + + class GELUHijack(torch.nn.GELU, torch.nn.Module): def __init__(self, *args, **kwargs): torch.nn.GELU.__init__(self, *args, **kwargs) @@ -69,6 +95,7 @@ def hijack_ddpm_edit(): unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) +CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding) CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available(): CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast) From cc9ca67664ef72931af9a4dced88a8434c5d4f16 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Fri, 17 May 2024 13:14:26 -0400 Subject: [PATCH 063/201] Add transformer forward patch --- modules/sd_hijack_unet.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index 0dabbe0e43d..c680367ebb3 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -74,6 +74,30 @@ def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False): return embedding +# Monkey patch to SpatialTransformer removing unnecessary contiguous calls. +# Prevents a lot of unnecessary aten::copy_ calls +def spatial_transformer_forward(_, self, x: torch.Tensor, context=None): + # note: if no context is given, cross-attention defaults to self-attention + if not isinstance(context, list): + context = [context] + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + if not self.use_linear: + x = self.proj_in(x) + x = x.permute(0, 2, 3, 1).reshape(b, h * w, c) + if self.use_linear: + x = self.proj_in(x) + for i, block in enumerate(self.transformer_blocks): + x = block(x, context=context[i]) + if self.use_linear: + x = self.proj_out(x) + x = x.view(b, h, w, c).permute(0, 3, 1, 2) + if not self.use_linear: + x = self.proj_out(x) + return x + x_in + + class GELUHijack(torch.nn.GELU, torch.nn.Module): def __init__(self, *args, **kwargs): torch.nn.GELU.__init__(self, *args, **kwargs) @@ -95,7 +119,8 @@ def hijack_ddpm_edit(): unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) -CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding) +CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding, lambda *args, **kwargs: True) +CondFunc('ldm.modules.attention.SpatialTransformer.forward', spatial_transformer_forward, lambda *args, **kwargs: True) CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available(): CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast) From dca9007ac7a9852752d91d34d2ed1feaef6a03f2 Mon Sep 17 00:00:00 2001 From: huchenlei Date: Fri, 17 May 2024 13:23:12 -0400 Subject: [PATCH 064/201] Fix SD15 dtype --- modules/sd_models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/sd_models.py b/modules/sd_models.py index 9c59091683a..7d4ab0fd89a 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -733,6 +733,10 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): sd_model = instantiate_from_config(sd_config.model) sd_model.used_config = checkpoint_config + # ldm's Unet is using self.dtype to cast input tensor. If we do not overwrite + # UnetModel.dtype, it will be the default dtype from config. + # sgm's Unet is not using dtype for casting. The value will be ignored. + sd_model.model.diffusion_model.dtype = devices.dtype_unet timer.record("create model") From b57a70f37322142939f7429f287599e027108bfc Mon Sep 17 00:00:00 2001 From: huchenlei Date: Fri, 17 May 2024 13:34:04 -0400 Subject: [PATCH 065/201] Proper fix of SD15 dtype --- modules/sd_models.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 7d4ab0fd89a..26a5127cd04 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -541,7 +541,7 @@ def repair_config(sd_config): if hasattr(sd_config.model.params, 'unet_config'): if shared.cmd_opts.no_half: sd_config.model.params.unet_config.params.use_fp16 = False - elif shared.cmd_opts.upcast_sampling: + elif shared.cmd_opts.upcast_sampling or shared.cmd_opts.precision == "half": sd_config.model.params.unet_config.params.use_fp16 = True if getattr(sd_config.model.params.first_stage_config.params.ddconfig, "attn_type", None) == "vanilla-xformers" and not shared.xformers_available: @@ -733,10 +733,6 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): sd_model = instantiate_from_config(sd_config.model) sd_model.used_config = checkpoint_config - # ldm's Unet is using self.dtype to cast input tensor. If we do not overwrite - # UnetModel.dtype, it will be the default dtype from config. - # sgm's Unet is not using dtype for casting. The value will be ignored. - sd_model.model.diffusion_model.dtype = devices.dtype_unet timer.record("create model") From 1d7448281751ea3223c681a82de8219a6fbe1d22 Mon Sep 17 00:00:00 2001 From: Logan Date: Sat, 18 May 2024 09:09:57 +1000 Subject: [PATCH 066/201] Default device for sigma tensor to CPU * Consistent with implementations in k-diffusion. * Makes this compatible with https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15823 --- modules/sd_schedulers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 0ac1f7a21f8..4ddb778501a 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -33,7 +33,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): sigs += [0.0] return torch.FloatTensor(sigs).to(device) -def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device): +def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device='cpu'): # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html def loglinear_interp(t_steps, num_steps): """ From 281e0a007b102c7fc9f6150fb88c95470dc25a17 Mon Sep 17 00:00:00 2001 From: Andray Date: Sat, 18 May 2024 09:13:16 +0400 Subject: [PATCH 067/201] scroll extensions table on overflow --- style.css | 2 ++ 1 file changed, 2 insertions(+) diff --git a/style.css b/style.css index f6a89b8f90d..5ec803a0411 100644 --- a/style.css +++ b/style.css @@ -807,6 +807,8 @@ table.popup-table .link{ #tab_extensions table{ border-collapse: collapse; + overflow-x: auto; + display: block; } #tab_extensions table td, #tab_extensions table th{ From feeb6802aa71fad190da2e051e50af84a94eda85 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sat, 18 May 2024 01:22:31 -0400 Subject: [PATCH 068/201] fix case where first step skilled if skip early cond is 0 --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 082a4f63c63..d89ea2c8bfc 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -212,7 +212,7 @@ def apply_blend(current_latent): uncond = denoiser_params.text_uncond skip_uncond = False - if self.step / self.total_steps <= shared.opts.skip_early_cond: + if shared.opts.skip_early_cond != 0. and self.step / self.total_steps <= shared.opts.skip_early_cond: skip_uncond = True x_in = x_in[:-batch_size] sigma_in = sigma_in[:-batch_size] From 501ac016da8c28ff4778219f142f0622083237ce Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 18 May 2024 18:37:37 +0900 Subject: [PATCH 069/201] Reformat --- scripts/xyz_grid.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b9fd66fe54b..b23fd4770ef 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -162,12 +162,14 @@ def fun(p, x, xs): if boolean: x = True if x.lower() == "true" else False p.override_settings[field] = x + return fun def boolean_choice(reverse: bool = False): def choice(): return ["False", "True"] if reverse else ["True", "False"] + return choice @@ -572,7 +574,7 @@ def process_axis(opt, vals, vals_dropdown): mc = re_range_count.fullmatch(val) if m is not None: start = int(m.group(1)) - end = int(m.group(2))+1 + end = int(m.group(2)) + 1 step = int(m.group(3)) if m.group(3) is not None else 1 valslist_ext += list(range(start, end, step)) @@ -725,11 +727,11 @@ def cell(x, y, z, ix, iy, iz): ydim = len(ys) if vary_seeds_y else 1 if vary_seeds_x: - pc.seed += ix + pc.seed += ix if vary_seeds_y: - pc.seed += iy * xdim + pc.seed += iy * xdim if vary_seeds_z: - pc.seed += iz * xdim * ydim + pc.seed += iz * xdim * ydim try: res = process_images(pc) @@ -797,18 +799,18 @@ def cell(x, y, z, ix, iy, iz): z_count = len(zs) # Set the grid infotexts to the real ones with extra_generation_params (1 main grid + z_count sub-grids) - processed.infotexts[:1+z_count] = grid_infotext[:1+z_count] + processed.infotexts[:1 + z_count] = grid_infotext[:1 + z_count] if not include_lone_images: # Don't need sub-images anymore, drop from list: - processed.images = processed.images[:z_count+1] + processed.images = processed.images[:z_count + 1] if opts.grid_save: # Auto-save main and sub-grids: grid_count = z_count + 1 if z_count > 1 else 1 for g in range(grid_count): # TODO: See previous comment about intentional data misalignment. - adj_g = g-1 if g > 0 else g + adj_g = g - 1 if g > 0 else g images.save_image(processed.images[g], p.outpath_grids, "xyz_grid", info=processed.infotexts[g], extension=opts.grid_format, prompt=processed.all_prompts[adj_g], seed=processed.all_seeds[adj_g], grid=True, p=processed) if not include_sub_grids: # if not include_sub_grids then skip saving after the first grid break From 969a462ac9ea52eb61b8de9fd685cc477c8b8dac Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 18 May 2024 18:27:34 +0900 Subject: [PATCH 070/201] xyz util confirm_range --- scripts/xyz_grid.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b23fd4770ef..81c7abe9562 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -95,6 +95,17 @@ def confirm_checkpoints_or_none(p, xs): raise RuntimeError(f"Unknown checkpoint: {x}") +def confirm_range(min_val, max_val, axis_label): + """Generates a AxisOption.confirm() function that checks all values are within the specified range.""" + + def confirm_range_fun(p, xs): + for x in xs: + if not (max_val >= x >= min_val): + raise ValueError(f'{axis_label} value "{x}" out of range [{min_val}, {max_val}]') + + return confirm_range_fun + + def apply_clip_skip(p, x, xs): opts.data["CLIP_stop_at_last_layers"] = x From 24a59ad3d2f9f44130746fdfe54f9f51ba74e77f Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 18 May 2024 15:36:49 +0900 Subject: [PATCH 071/201] fix Hypertile xyz grid --- .../hypertile/scripts/hypertile_script.py | 17 ++++++- .../hypertile/scripts/hypertile_xyz.py | 51 ------------------- 2 files changed, 15 insertions(+), 53 deletions(-) delete mode 100644 extensions-builtin/hypertile/scripts/hypertile_xyz.py diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py index 395d584b605..59e7f9907e5 100644 --- a/extensions-builtin/hypertile/scripts/hypertile_script.py +++ b/extensions-builtin/hypertile/scripts/hypertile_script.py @@ -1,6 +1,5 @@ import hypertile from modules import scripts, script_callbacks, shared -from scripts.hypertile_xyz import add_axis_options class ScriptHypertile(scripts.Script): @@ -93,7 +92,6 @@ def on_ui_settings(): "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile U-Net max depth").info("larger = more neural network layers affected; minor effect on performance"), "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-Net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile U-Net max tile size").info("larger = worse performance"), "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-Net swap size", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, infotext="Hypertile U-Net swap size"), - "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE", infotext="Hypertile VAE").info("minimal change in the generated picture"), "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile VAE max depth"), "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile VAE max tile size"), @@ -105,5 +103,20 @@ def on_ui_settings(): shared.opts.add_option(name, opt) +def add_axis_options(): + xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module + xyz_grid.axis_options.extend([ + xyz_grid.AxisOption("[Hypertile] Unet First pass Enabled", str, xyz_grid.apply_override('hypertile_enable_unet', boolean=True), choices=xyz_grid.boolean_choice(reverse=True)), + xyz_grid.AxisOption("[Hypertile] Unet Second pass Enabled", str, xyz_grid.apply_override('hypertile_enable_unet_secondpass', boolean=True), choices=xyz_grid.boolean_choice(reverse=True)), + xyz_grid.AxisOption("[Hypertile] Unet Max Depth", int, xyz_grid.apply_override("hypertile_max_depth_unet"), confirm=xyz_grid.confirm_range(0, 3, '[Hypertile] Unet Max Depth'), choices=lambda: [str(x) for x in range(4)]), + xyz_grid.AxisOption("[Hypertile] Unet Max Tile Size", int, xyz_grid.apply_override("hypertile_max_tile_unet"), confirm=xyz_grid.confirm_range(0, 512, '[Hypertile] Unet Max Tile Size')), + xyz_grid.AxisOption("[Hypertile] Unet Swap Size", int, xyz_grid.apply_override("hypertile_swap_size_unet"), confirm=xyz_grid.confirm_range(0, 64, '[Hypertile] Unet Swap Size')), + xyz_grid.AxisOption("[Hypertile] VAE Enabled", str, xyz_grid.apply_override('hypertile_enable_vae', boolean=True), choices=xyz_grid.boolean_choice(reverse=True)), + xyz_grid.AxisOption("[Hypertile] VAE Max Depth", int, xyz_grid.apply_override("hypertile_max_depth_vae"), confirm=xyz_grid.confirm_range(0, 3, '[Hypertile] VAE Max Depth'), choices=lambda: [str(x) for x in range(4)]), + xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, xyz_grid.apply_override("hypertile_max_tile_vae"), confirm=xyz_grid.confirm_range(0, 512, '[Hypertile] VAE Max Tile Size')), + xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, xyz_grid.apply_override("hypertile_swap_size_vae"), confirm=xyz_grid.confirm_range(0, 64, '[Hypertile] VAE Swap Size')), + ]) + + script_callbacks.on_ui_settings(on_ui_settings) script_callbacks.on_before_ui(add_axis_options) diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py deleted file mode 100644 index 9e96ae3c527..00000000000 --- a/extensions-builtin/hypertile/scripts/hypertile_xyz.py +++ /dev/null @@ -1,51 +0,0 @@ -from modules import scripts -from modules.shared import opts - -xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module - -def int_applier(value_name:str, min_range:int = -1, max_range:int = -1): - """ - Returns a function that applies the given value to the given value_name in opts.data. - """ - def validate(value_name:str, value:str): - value = int(value) - # validate value - if not min_range == -1: - assert value >= min_range, f"Value {value} for {value_name} must be greater than or equal to {min_range}" - if not max_range == -1: - assert value <= max_range, f"Value {value} for {value_name} must be less than or equal to {max_range}" - def apply_int(p, x, xs): - validate(value_name, x) - opts.data[value_name] = int(x) - return apply_int - -def bool_applier(value_name:str): - """ - Returns a function that applies the given value to the given value_name in opts.data. - """ - def validate(value_name:str, value:str): - assert value.lower() in ["true", "false"], f"Value {value} for {value_name} must be either true or false" - def apply_bool(p, x, xs): - validate(value_name, x) - value_boolean = x.lower() == "true" - opts.data[value_name] = value_boolean - return apply_bool - -def add_axis_options(): - extra_axis_options = [ - xyz_grid.AxisOption("[Hypertile] Unet First pass Enabled", str, bool_applier("hypertile_enable_unet"), choices=xyz_grid.boolean_choice(reverse=True)), - xyz_grid.AxisOption("[Hypertile] Unet Second pass Enabled", str, bool_applier("hypertile_enable_unet_secondpass"), choices=xyz_grid.boolean_choice(reverse=True)), - xyz_grid.AxisOption("[Hypertile] Unet Max Depth", int, int_applier("hypertile_max_depth_unet", 0, 3), choices=lambda: [str(x) for x in range(4)]), - xyz_grid.AxisOption("[Hypertile] Unet Max Tile Size", int, int_applier("hypertile_max_tile_unet", 0, 512)), - xyz_grid.AxisOption("[Hypertile] Unet Swap Size", int, int_applier("hypertile_swap_size_unet", 0, 64)), - xyz_grid.AxisOption("[Hypertile] VAE Enabled", str, bool_applier("hypertile_enable_vae"), choices=xyz_grid.boolean_choice(reverse=True)), - xyz_grid.AxisOption("[Hypertile] VAE Max Depth", int, int_applier("hypertile_max_depth_vae", 0, 3), choices=lambda: [str(x) for x in range(4)]), - xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)), - xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)), - ] - set_a = {opt.label for opt in xyz_grid.axis_options} - set_b = {opt.label for opt in extra_axis_options} - if set_a.intersection(set_b): - return - - xyz_grid.axis_options.extend(extra_axis_options) From 82884da18c8f183c4ce0e7237953303f26610370 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 19 May 2024 04:55:45 +0900 Subject: [PATCH 072/201] use apply_override for Clip skip --- scripts/xyz_grid.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b9fd66fe54b..c7cb513338c 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -264,7 +264,7 @@ def __init__(self, *args, **kwargs): AxisOption("Schedule max sigma", float, apply_override("sigma_max")), AxisOption("Schedule rho", float, apply_override("rho")), AxisOption("Eta", float, apply_field("eta")), - AxisOption("Clip skip", int, apply_clip_skip), + AxisOption("Clip skip", int, apply_override('CLIP_stop_at_last_layers')), AxisOption("Denoising", float, apply_field("denoising_strength")), AxisOption("Initial noise multiplier", float, apply_field("initial_noise_multiplier")), AxisOption("Extra noise", float, apply_override("img2img_extra_noise")), @@ -399,7 +399,6 @@ def index(ix, iy, iz): class SharedSettingsStackHelper(object): def __enter__(self): - self.CLIP_stop_at_last_layers = opts.CLIP_stop_at_last_layers self.vae = opts.sd_vae self.uni_pc_order = opts.uni_pc_order @@ -409,8 +408,6 @@ def __exit__(self, exc_type, exc_value, tb): modules.sd_models.reload_model_weights() modules.sd_vae.reload_vae_weights() - opts.data["CLIP_stop_at_last_layers"] = self.CLIP_stop_at_last_layers - re_range = re.compile(r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\(([+-]\d+)\s*\))?\s*") re_range_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d*)?)(?:\s*\(([+-]\d+(?:.\d*)?)\s*\))?\s*") From 1f392517f8938e0082e189fa0c28f4eb89fb0eb2 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 19 May 2024 04:59:05 +0900 Subject: [PATCH 073/201] use override for uni_pc_order --- scripts/xyz_grid.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index c7cb513338c..622cc43c3ca 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -140,7 +140,7 @@ def apply_styles(p: StableDiffusionProcessingTxt2Img, x: str, _): def apply_uni_pc_order(p, x, xs): - opts.data["uni_pc_order"] = min(x, p.steps - 1) + p.override_settings['uni_pc_order'] = min(x, p.steps - 1) def apply_face_restore(p, opt, x): @@ -400,11 +400,9 @@ def index(ix, iy, iz): class SharedSettingsStackHelper(object): def __enter__(self): self.vae = opts.sd_vae - self.uni_pc_order = opts.uni_pc_order def __exit__(self, exc_type, exc_value, tb): opts.data["sd_vae"] = self.vae - opts.data["uni_pc_order"] = self.uni_pc_order modules.sd_models.reload_model_weights() modules.sd_vae.reload_vae_weights() From 1e696b028adbd449df8c30ed760103b120ec5546 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 19 May 2024 05:14:32 +0900 Subject: [PATCH 074/201] use override of sd_vae --- scripts/xyz_grid.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 622cc43c3ca..4c83e92b24e 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -118,21 +118,16 @@ def apply_size(p, x: str, xs) -> None: def find_vae(name: str): - if name.lower() in ['auto', 'automatic']: - return modules.sd_vae.unspecified - if name.lower() == 'none': - return None - else: - choices = [x for x in sorted(modules.sd_vae.vae_dict, key=lambda x: len(x)) if name.lower().strip() in x.lower()] - if len(choices) == 0: - print(f"No VAE found for {name}; using automatic") - return modules.sd_vae.unspecified - else: - return modules.sd_vae.vae_dict[choices[0]] + match name := name.lower().strip(): + case 'auto', 'automatic': + return 'Automatic' + case 'none': + return 'None' + return next((k for k in modules.sd_vae.vae_dict if k.lower() == name), print(f'No VAE found for {name}; using Automatic') or 'Automatic') def apply_vae(p, x, xs): - modules.sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x)) + p.override_settings['sd_vae'] = find_vae(x) def apply_styles(p: StableDiffusionProcessingTxt2Img, x: str, _): @@ -270,7 +265,7 @@ def __init__(self, *args, **kwargs): AxisOption("Extra noise", float, apply_override("img2img_extra_noise")), AxisOptionTxt2Img("Hires upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]), AxisOptionImg2Img("Cond. Image Mask Weight", float, apply_field("inpainting_mask_weight")), - AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)), + AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['Automatic', 'None'] + list(sd_vae.vae_dict)), AxisOption("Styles", str, apply_styles, choices=lambda: list(shared.prompt_styles.styles)), AxisOption("UniPC Order", int, apply_uni_pc_order, cost=0.5), AxisOption("Face restore", str, apply_face_restore, format_value=format_value), @@ -399,10 +394,9 @@ def index(ix, iy, iz): class SharedSettingsStackHelper(object): def __enter__(self): - self.vae = opts.sd_vae + pass def __exit__(self, exc_type, exc_value, tb): - opts.data["sd_vae"] = self.vae modules.sd_models.reload_model_weights() modules.sd_vae.reload_vae_weights() From 51e7122f25c276b258a8f55a64e60e5b2265287f Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 19 May 2024 05:17:44 +0900 Subject: [PATCH 075/201] remove unused code --- scripts/xyz_grid.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 4c83e92b24e..23dafd47778 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -95,17 +95,6 @@ def confirm_checkpoints_or_none(p, xs): raise RuntimeError(f"Unknown checkpoint: {x}") -def apply_clip_skip(p, x, xs): - opts.data["CLIP_stop_at_last_layers"] = x - - -def apply_upscale_latent_space(p, x, xs): - if x.lower().strip() != '0': - opts.data["use_scale_latent_for_hires_fix"] = True - else: - opts.data["use_scale_latent_for_hires_fix"] = False - - def apply_size(p, x: str, xs) -> None: try: width, _, height = x.partition('x') From 5867be2914c303c2f8ba86ff23dba4b31aeafa79 Mon Sep 17 00:00:00 2001 From: viking1304 Date: Mon, 20 May 2024 23:44:17 +0200 Subject: [PATCH 076/201] Use different torch versions for Intel and ARM Macs --- webui-macos-env.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/webui-macos-env.sh b/webui-macos-env.sh index db7e8b1a05b..ad0736378c6 100644 --- a/webui-macos-env.sh +++ b/webui-macos-env.sh @@ -11,7 +11,12 @@ fi export install_dir="$HOME" export COMMANDLINE_ARGS="--skip-torch-cuda-test --upcast-sampling --no-half-vae --use-cpu interrogate" -export TORCH_COMMAND="pip install torch==2.1.0 torchvision==0.16.0" export PYTORCH_ENABLE_MPS_FALLBACK=1 +if [[ "$(sysctl -n machdep.cpu.brand_string)" =~ ^.*"Intel".*$ ]]; then + export TORCH_COMMAND="pip install torch==2.1.2 torchvision==0.16.2" +else + export TORCH_COMMAND="pip install torch==2.3.0 torchvision==0.18.0" +fi + #################################################################### From 344eda55d4550e91b1a3e95f8e669084a74c876f Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 22 May 2024 23:06:07 +0900 Subject: [PATCH 077/201] ReloadUI backgroundColor --background-fill-primary --- javascript/ui.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ui.js b/javascript/ui.js index e0f5feebd6c..16faacebb8b 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -337,8 +337,8 @@ onOptionsChanged(function() { let txt2img_textarea, img2img_textarea = undefined; function restart_reload() { + document.body.style.backgroundColor = "var(--background-fill-primary)"; document.body.innerHTML = '

Reloading...

'; - var requestPing = function() { requestGet("./internal/ping", {}, function(data) { location.reload(); From 6dd53ce63dc70b3fcf7f25402d40b48f50abdf74 Mon Sep 17 00:00:00 2001 From: alcacode Date: Sun, 26 May 2024 15:36:55 +0200 Subject: [PATCH 078/201] Fix bug where file extension had an extra '.' under some circumstances Fix bug where under some circumstances an extra "." was inserted between the file base name and the file extension. The bug is triggered when the extension argument is one of "jpg", "jpeg", or "webp", and the image exceeds the format's dimension limit. Then the extension variable is set to ".png", resulting in the fullfn variable to evaluate to a string ending with "..png". --- modules/images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/images.py b/modules/images.py index c0ff8a6306a..1be176cdfad 100644 --- a/modules/images.py +++ b/modules/images.py @@ -653,7 +653,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i # WebP and JPG formats have maximum dimension limits of 16383 and 65535 respectively. switch to PNG which has a much higher limit if (image.height > 65535 or image.width > 65535) and extension.lower() in ("jpg", "jpeg") or (image.height > 16383 or image.width > 16383) and extension.lower() == "webp": print('Image dimensions too large; saving as PNG') - extension = ".png" + extension = "png" if save_to_dirs is None: save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt) From 8d6f7417385d1cacfd827800bdf02a0e8dd8f092 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Wed, 29 May 2024 03:33:32 +0900 Subject: [PATCH 079/201] #15883 -> #15882 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c16b561132..596b1ec45a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ ## 1.9.4 ### Bug Fixes: -* pin setuptools version to fix the startup error ([#15883](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15883)) +* pin setuptools version to fix the startup error ([#15882](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15882)) ## 1.9.3 From 10f8d0f84216e3642e960ea7118a5acc8a79546f Mon Sep 17 00:00:00 2001 From: eatmoreapple Date: Tue, 4 Jun 2024 15:02:13 +0800 Subject: [PATCH 080/201] feat: lora partial update precede full update. --- extensions-builtin/Lora/networks.py | 40 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 42b14dc239d..18809364b61 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -260,6 +260,16 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No loaded_networks.clear() + unavailable_networks = [] + for name in names: + if name.lower() in forbidden_network_aliases and available_networks.get(name) is None: + unavailable_networks.append(name) + elif available_network_aliases.get(name) is None: + unavailable_networks.append(name) + + if unavailable_networks: + update_available_networks_by_names(unavailable_networks) + networks_on_disk = [available_networks.get(name, None) if name.lower() in forbidden_network_aliases else available_network_aliases.get(name, None) for name in names] if any(x is None for x in networks_on_disk): list_available_networks() @@ -566,22 +576,16 @@ def network_MultiheadAttention_load_state_dict(self, *args, **kwargs): return originals.MultiheadAttention_load_state_dict(self, *args, **kwargs) -def list_available_networks(): - available_networks.clear() - available_network_aliases.clear() - forbidden_network_aliases.clear() - available_network_hash_lookup.clear() - forbidden_network_aliases.update({"none": 1, "Addams": 1}) - - os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True) - +def process_network_files(names: list[str] | None = None): candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"])) candidates += list(shared.walk_files(shared.cmd_opts.lyco_dir_backcompat, allowed_extensions=[".pt", ".ckpt", ".safetensors"])) for filename in candidates: if os.path.isdir(filename): continue - name = os.path.splitext(os.path.basename(filename))[0] + # if names is provided, only load networks with names in the list + if names and name not in names: + continue try: entry = network.NetworkOnDisk(name, filename) except OSError: # should catch FileNotFoundError and PermissionError etc. @@ -597,6 +601,22 @@ def list_available_networks(): available_network_aliases[entry.alias] = entry +def update_available_networks_by_names(names: list[str]): + process_network_files(names) + + +def list_available_networks(): + available_networks.clear() + available_network_aliases.clear() + forbidden_network_aliases.clear() + available_network_hash_lookup.clear() + forbidden_network_aliases.update({"none": 1, "Addams": 1}) + + os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True) + + process_network_files() + + re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)") From 25bbf31f5701b85804908a54b2f6af38a1d50f1f Mon Sep 17 00:00:00 2001 From: NouberNou Date: Thu, 6 Jun 2024 16:22:49 -0700 Subject: [PATCH 081/201] Fix for grids without comprehensive infotexts When generating grids, some scripts such as img2img loopback and ultimate SD upscale do not pass infotexts for each image since they are the same prompt. If you attempt to save those images using the saved button in the UI it will fail because it will look for the selected image info text. This fixes those errors by replicating the infotext for as many images are passed into the image list if the infotext parameter is none. --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/processing.py b/modules/processing.py index 76557dd7f5e..cb37a77dff2 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -569,7 +569,7 @@ def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt] self.all_seeds = all_seeds or p.all_seeds or [self.seed] self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed] - self.infotexts = infotexts or [info] + self.infotexts = infotexts or [info] * len(image_list) self.version = program_version() def js(self): From 53f62674ae55e84aff4d4c9ed104ba9dce8ae887 Mon Sep 17 00:00:00 2001 From: NouberNou Date: Thu, 6 Jun 2024 16:30:01 -0700 Subject: [PATCH 082/201] Typo on edit Edited in fix in Github editor and mistyped from local copy --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/processing.py b/modules/processing.py index cb37a77dff2..c22da41692f 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -569,7 +569,7 @@ def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt] self.all_seeds = all_seeds or p.all_seeds or [self.seed] self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed] - self.infotexts = infotexts or [info] * len(image_list) + self.infotexts = infotexts or [info] * len(images_list) self.version = program_version() def js(self): From 0769aa318a1896ccf74f57e6e943eb6b5fab5051 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 09:05:35 +0300 Subject: [PATCH 083/201] integrated edits as recommended in the PR #15804 --- modules/sd_hijack_optimizations.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 4c2dc56d45d..0269f1f5b4b 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -486,18 +486,7 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs): k_in = self.to_k(context_k) v_in = self.to_v(context_v) - def _reshape(t): - """rearrange(t, 'b n (h d) -> b n h d', h=h). - Using torch native operations to avoid overhead as this function is - called frequently. (70 times/it for SDXL) - """ - b, n, _ = t.shape # Get the batch size (b) and sequence length (n) - d = t.shape[2] // h # Determine the depth per head - return t.reshape(b, n, h, d) - - q = _reshape(q_in) - k = _reshape(k_in) - v = _reshape(v_in) + q, k, v = (t.reshape(t.shape[0], t.shape[1], h, -1) for t in (q_in, k_in, v_in)) del q_in, k_in, v_in @@ -509,7 +498,6 @@ def _reshape(t): out = out.to(dtype) - # out = rearrange(out, 'b n h d -> b n (h d)', h=h) b, n, h, d = out.shape out = out.reshape(b, n, h * d) return self.to_out(out) From 5429e4cff514df2f4cab242212ba347741eadc08 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 09:56:09 +0300 Subject: [PATCH 084/201] add proper infotext support for #15607 fix settings override not working for NGMI, s_churn, etc... --- modules/processing.py | 14 ++++++++------ modules/sd_samplers_cfg_denoiser.py | 12 +++++++----- modules/shared_options.py | 6 +++--- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index c22da41692f..97a7162aae2 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -238,11 +238,6 @@ def __post_init__(self): self.styles = [] self.sampler_noise_scheduler_override = None - self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond - self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn - self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin - self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf') - self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise self.extra_generation_params = self.extra_generation_params or {} self.override_settings = self.override_settings or {} @@ -259,6 +254,13 @@ def __post_init__(self): self.cached_uc = StableDiffusionProcessing.cached_uc self.cached_c = StableDiffusionProcessing.cached_c + def fill_fields_from_opts(self): + self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond + self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn + self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin + self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf') + self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise + @property def sd_model(self): return shared.sd_model @@ -794,7 +796,6 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr, "Init image hash": getattr(p, 'init_img_hash', None), "RNG": opts.randn_source if opts.randn_source != "GPU" else None, - "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond, "Tiling": "True" if p.tiling else None, **p.extra_generation_params, "Version": program_version() if opts.add_version_to_infotext else None, @@ -890,6 +891,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: modules.sd_hijack.model_hijack.apply_circular(p.tiling) modules.sd_hijack.model_hijack.clear_comments() + p.fill_fields_from_opts() p.setup_prompts() if isinstance(seed, list): diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index d89ea2c8bfc..f48f58a5089 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -214,12 +214,14 @@ def apply_blend(current_latent): if shared.opts.skip_early_cond != 0. and self.step / self.total_steps <= shared.opts.skip_early_cond: skip_uncond = True - x_in = x_in[:-batch_size] - sigma_in = sigma_in[:-batch_size] - - # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it - if (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model: + self.p.extra_generation_params["Skip Early CFG"] = shared.opts.skip_early_cond + elif (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model: skip_uncond = True + self.p.extra_generation_params["NGMS"] = s_min_uncond + if shared.opts.s_min_uncond_all: + self.p.extra_generation_params["NGMS all steps"] = shared.opts.s_min_uncond_all + + if skip_uncond: x_in = x_in[:-batch_size] sigma_in = sigma_in[:-batch_size] diff --git a/modules/shared_options.py b/modules/shared_options.py index c711fa5f609..05c3d939160 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -209,8 +209,8 @@ options_templates.update(options_section(('optimizations', "Optimizations", "sd"), { "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}), - "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"), - "s_min_uncond_all": OptionInfo(False, "NGMS: Skip every step").info("makes Negative Guidance minimum sigma skip negative guidance on every step instead of only half"), + "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}, infotext='NGMS').link("PR", "https://github.com/AUTOMATIC1111/stablediffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"), + "s_min_uncond_all": OptionInfo(False, "Negative Guidance minimum sigma all steps", infotext='NGMS all steps').info("By default, NGMS above skips every other step; this makes it skip all steps"), "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"), "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"), "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"), @@ -382,7 +382,7 @@ 'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"), 'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'), 'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"), - 'skip_early_cond': OptionInfo(0, "Skip CFG during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("CFG will be disabled (set to 1) on early steps, can both improve sample diversity/quality and speed up sampling"), + 'skip_early_cond': OptionInfo(0.0, "Ignore negative prompt during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("disables CFG on a proportion of steps at the beginning of generation; 0=skip none; 1=skip all; can both improve sample diversity/quality and speed up sampling"), })) options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), { From cd9e9e404955df19a72c832d68888db44ab7b382 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 10:13:38 +0300 Subject: [PATCH 085/201] remove unneeded tabulation --- .../Lora/ui_extra_networks_lora.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py index e35d90c6ea3..3e34d69dca4 100644 --- a/extensions-builtin/Lora/ui_extra_networks_lora.py +++ b/extensions-builtin/Lora/ui_extra_networks_lora.py @@ -60,19 +60,18 @@ def create_item(self, name, index=None, enable_filter=True): else: sd_version = lora_on_disk.sd_version - if shared.sd_model is not None: # still show LoRA in case an error occurs during initial model loading - if shared.opts.lora_show_all or not enable_filter: - pass - elif sd_version == network.SdVersion.Unknown: - model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1 - if model_version.name in shared.opts.lora_hide_unknown_for_versions: - return None - elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL: - return None - elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2: - return None - elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1: + if shared.opts.lora_show_all or not enable_filter or not shared.sd_model: + pass + elif sd_version == network.SdVersion.Unknown: + model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1 + if model_version.name in shared.opts.lora_hide_unknown_for_versions: return None + elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL: + return None + elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2: + return None + elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1: + return None return item From 510f025a01733f20ebe3997c1c3d159e6ac50148 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 4 Jun 2024 02:23:43 +0900 Subject: [PATCH 086/201] replace wsl-open with wslpath and explorer.exe --- modules/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/util.py b/modules/util.py index 0db13736cc7..768bf32d0bd 100644 --- a/modules/util.py +++ b/modules/util.py @@ -208,6 +208,6 @@ def open_folder(path): elif platform.system() == "Darwin": subprocess.Popen(["open", path]) elif "microsoft-standard-WSL2" in platform.uname().release: - subprocess.Popen(["wsl-open", path]) + subprocess.Popen(["explorer.exe", subprocess.check_output(["wslpath", "-w", path])]) else: subprocess.Popen(["xdg-open", path]) From 603509ec905a9c9ac1011e9531a9da180828fcc0 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 10:54:41 +0300 Subject: [PATCH 087/201] as per wfjsw's suggestion, revert changes for sd_hijack_checkpoint.py --- modules/sd_hijack_checkpoint.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/modules/sd_hijack_checkpoint.py b/modules/sd_hijack_checkpoint.py index b2f05bbdcf0..2604d969f91 100644 --- a/modules/sd_hijack_checkpoint.py +++ b/modules/sd_hijack_checkpoint.py @@ -4,19 +4,16 @@ import ldm.modules.diffusionmodules.openaimodel -# Setting flag=False so that torch skips checking parameters. -# parameters checking is expensive in frequent operations. - def BasicTransformerBlock_forward(self, x, context=None): - return checkpoint(self._forward, x, context, flag=False) + return checkpoint(self._forward, x, context) def AttentionBlock_forward(self, x): - return checkpoint(self._forward, x, flag=False) + return checkpoint(self._forward, x) def ResBlock_forward(self, x, emb): - return checkpoint(self._forward, x, emb, flag=False) + return checkpoint(self._forward, x, emb) stored = [] From 07cf95c76ef052c120fbf1cfb69e3018e1cb06f8 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 11:26:34 +0300 Subject: [PATCH 088/201] update pickle safe filenames --- modules/safe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/safe.py b/modules/safe.py index b1d08a7928e..af019ffd980 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -64,8 +64,8 @@ def find_class(self, module, name): raise Exception(f"global '{module}/{name}' is forbidden") -# Regular expression that accepts 'dirname/version', 'dirname/data.pkl', and 'dirname/data/' -allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|version|(data\.pkl))$") +# Regular expression that accepts 'dirname/version', 'dirname/byteorder', 'dirname/data.pkl', '.data/serialization_id', and 'dirname/data/' +allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|version|byteorder|.data/serialization_id|(data\.pkl))$") data_pkl_re = re.compile(r"^([^/]+)/data\.pkl$") def check_zip_filenames(filename, names): From 1a7ffa2c76b0e68cd647c1f7f07235bcf85c985d Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 11:35:45 +0300 Subject: [PATCH 089/201] remove extra local variable --- modules/paths_internal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/paths_internal.py b/modules/paths_internal.py index 884984c9c0a..67521f5cd5d 100644 --- a/modules/paths_internal.py +++ b/modules/paths_internal.py @@ -28,9 +28,8 @@ cmd_opts_pre = parser_pre.parse_known_args()[0] data_path = cmd_opts_pre.data_dir -models_override = cmd_opts_pre.models_dir -models_path = models_override if models_override else os.path.join(data_path, "models") +models_path = cmd_opts_pre.models_dir if cmd_opts_pre.models_dir else os.path.join(data_path, "models") extensions_dir = os.path.join(data_path, "extensions") extensions_builtin_dir = os.path.join(script_path, "extensions-builtin") config_states_dir = os.path.join(script_path, "config_states") From 547778b10f25def4e040b81942a2b23295567de3 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 8 Jun 2024 12:41:28 +0300 Subject: [PATCH 090/201] possibly make NaN check cheaper --- modules/devices.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index 7de34ac51c3..d574975e51b 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -243,22 +243,22 @@ def test_for_nans(x, where): if shared.cmd_opts.disable_nan_check: return - if not torch.all(torch.isnan(x)).item(): + if not torch.isnan(x[(0, ) * len(x.shape)]): return if where == "unet": - message = "A tensor with all NaNs was produced in Unet." + message = "A tensor with NaNs was produced in Unet." if not shared.cmd_opts.no_half: message += " This could be either because there's not enough precision to represent the picture, or because your video card does not support half type. Try setting the \"Upcast cross attention layer to float32\" option in Settings > Stable Diffusion or using the --no-half commandline argument to fix this." elif where == "vae": - message = "A tensor with all NaNs was produced in VAE." + message = "A tensor with NaNs was produced in VAE." if not shared.cmd_opts.no_half and not shared.cmd_opts.no_half_vae: message += " This could be because there's not enough precision to represent the picture. Try adding --no-half-vae commandline argument to fix this." else: - message = "A tensor with all NaNs was produced." + message = "A tensor with NaNs was produced." message += " Use --disable-nan-check commandline argument to disable this check." From 39a6d5655f6c162e2b8da024a1719d79304332a2 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sat, 8 Jun 2024 18:55:07 -0400 Subject: [PATCH 091/201] patch k_diffusion to_d and strip device from schedulers --- modules/sd_schedulers.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 0c09af8d0b5..a2b9eb290a2 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -4,6 +4,12 @@ import k_diffusion +def to_d(x, sigma, denoised): + """Converts a denoiser output to a Karras ODE derivative.""" + return (x - denoised) / sigma + +k_diffusion.sampling.to_d = to_d + import numpy as np from modules import shared @@ -19,11 +25,11 @@ class Scheduler: aliases: list = None -def uniform(n, sigma_min, sigma_max, inner_model, device): +def uniform(n, sigma_min, sigma_max, inner_model): return inner_model.get_sigmas(n) -def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): +def sgm_uniform(n, sigma_min, sigma_max, inner_model): start = inner_model.sigma_to_t(torch.tensor(sigma_max)) end = inner_model.sigma_to_t(torch.tensor(sigma_min)) sigs = [ @@ -31,9 +37,9 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): for ts in torch.linspace(start, end, n + 1)[:-1] ] sigs += [0.0] - return torch.FloatTensor(sigs).to(device) + return torch.FloatTensor(sigs) -def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device='cpu'): +def get_align_your_steps_sigmas(n, sigma_min, sigma_max): # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html def loglinear_interp(t_steps, num_steps): """ @@ -59,12 +65,12 @@ def loglinear_interp(t_steps, num_steps): else: sigmas.append(0.0) - return torch.FloatTensor(sigmas).to(device) + return torch.FloatTensor(sigmas) -def kl_optimal(n, sigma_min, sigma_max, device): - alpha_min = torch.arctan(torch.tensor(sigma_min, device=device)) - alpha_max = torch.arctan(torch.tensor(sigma_max, device=device)) - step_indices = torch.arange(n + 1, device=device) +def kl_optimal(n, sigma_min, sigma_max): + alpha_min = torch.arctan(torch.tensor(sigma_min)) + alpha_max = torch.arctan(torch.tensor(sigma_max)) + step_indices = torch.arange(n + 1) sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max) return sigmas From d52a1e1a22f19c941d581b92904a99d4dd7b22c1 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sat, 8 Jun 2024 18:56:23 -0400 Subject: [PATCH 092/201] lint --- modules/sd_schedulers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index a2b9eb290a2..9916cf05ace 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -4,16 +4,16 @@ import k_diffusion +import numpy as np + +from modules import shared + def to_d(x, sigma, denoised): """Converts a denoiser output to a Karras ODE derivative.""" return (x - denoised) / sigma k_diffusion.sampling.to_d = to_d -import numpy as np - -from modules import shared - @dataclasses.dataclass class Scheduler: name: str From d875cda565171407e1e2dc087fb5c5140359c6ec Mon Sep 17 00:00:00 2001 From: huchenlei Date: Sat, 8 Jun 2024 22:11:11 -0400 Subject: [PATCH 093/201] Fix sdxl inpaint --- modules/processing.py | 4 ++-- modules/sd_models.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index 0ff6a45c0c5..dc538272116 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -115,7 +115,7 @@ def txt2img_image_conditioning(sd_model, x, width, height): return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device) else: - if getattr(sd_model.model, "is_sdxl_inpaint", False): + if sd_model.is_sdxl_inpaint: # The "masked-image" in this case will just be all 0.5 since the entire image is masked. image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5 image_conditioning = images_tensor_to_samples(image_conditioning, @@ -389,7 +389,7 @@ def img2img_image_conditioning(self, source_image, latent_image, image_mask=None if self.sampler.conditioning_key == "crossattn-adm": return self.unclip_image_conditioning(source_image) - if getattr(self.sampler.model_wrap.inner_model.model, "is_sdxl_inpaint", False): + if self.sampler.model_wrap.inner_model.is_sdxl_inpaint: return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask) # Dummy zero conditioning if we're not using inpainting or depth model. diff --git a/modules/sd_models.py b/modules/sd_models.py index 61bd15d8f05..93ff6c5fe9e 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -386,13 +386,6 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer model.is_sd2 = not model.is_sdxl and hasattr(model.cond_stage_model, 'model') model.is_sd1 = not model.is_sdxl and not model.is_sd2 model.is_ssd = model.is_sdxl and 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight' not in state_dict.keys() - # Set is_sdxl_inpaint flag. - diffusion_model_input = state_dict.get('diffusion_model.input_blocks.0.0.weight', None) - model.is_sdxl_inpaint = ( - model.is_sdxl and - diffusion_model_input is not None and - diffusion_model_input.shape[1] == 9 - ) if model.is_sdxl: sd_models_xl.extend_sdxl(model) @@ -408,6 +401,18 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer del state_dict + # Set is_sdxl_inpaint flag. + # Perform this check after model initialization to make sure state_dict + # structure is already known. + diffusion_model_input = model.model.state_dict().get( + 'diffusion_model.input_blocks.0.0.weight' + ) + model.is_sdxl_inpaint = ( + model.is_sdxl and + diffusion_model_input is not None and + diffusion_model_input.shape[1] == 9 + ) + if shared.cmd_opts.opt_channelslast: model.to(memory_format=torch.channels_last) timer.record("apply channels_last") From f89b5dbbd282091fd6b3318f3ef20cf23cf9ea3a Mon Sep 17 00:00:00 2001 From: huchenlei Date: Sat, 8 Jun 2024 22:15:37 -0400 Subject: [PATCH 094/201] nit --- modules/sd_models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 93ff6c5fe9e..af35187cdb0 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -402,8 +402,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer del state_dict # Set is_sdxl_inpaint flag. - # Perform this check after model initialization to make sure state_dict - # structure is already known. + # Checks Unet structure to detect inpaint model. The inpaint model's + # checkpoint state_dict does not contain the key + # 'diffusion_model.input_blocks.0.0.weight'. diffusion_model_input = model.model.state_dict().get( 'diffusion_model.input_blocks.0.0.weight' ) From 6447ff49d335edd7dccc4b75e262615ce13e76ac Mon Sep 17 00:00:00 2001 From: bluelovers Date: Sun, 9 Jun 2024 19:07:32 +0800 Subject: [PATCH 095/201] feat: save pattern add `basename` `grid` or `xyz_grid` or `img` ```py 'basename': lambda self: 'img' if self.basename == '' else self.basename, ``` --- modules/images.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/images.py b/modules/images.py index 05432e3a7f6..90c65b740c8 100644 --- a/modules/images.py +++ b/modules/images.py @@ -377,6 +377,7 @@ def get_sampler_scheduler(p, sampler): class FilenameGenerator: replacements = { + 'basename': lambda self: 'img' if self.basename == '' else self.basename, 'seed': lambda self: self.seed if self.seed is not None else '', 'seed_first': lambda self: self.seed if self.p.batch_size == 1 else self.p.all_seeds[0], 'seed_last': lambda self: NOTHING_AND_SKIP_PREVIOUS_TEXT if self.p.batch_size == 1 else self.p.all_seeds[-1], @@ -413,12 +414,13 @@ class FilenameGenerator: } default_time_format = '%Y%m%d%H%M%S' - def __init__(self, p, seed, prompt, image, zip=False): + def __init__(self, p, seed, prompt, image, zip=False, basename=""): self.p = p self.seed = seed self.prompt = prompt self.image = image self.zip = zip + self.basename = basename def get_vae_filename(self): """Get the name of the VAE file.""" @@ -649,7 +651,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i txt_fullfn (`str` or None): If a text file is saved for this image, this will be its full path. Otherwise None. """ - namegen = FilenameGenerator(p, seed, prompt, image) + namegen = FilenameGenerator(p, seed, prompt, image, zip=False, basename=basename) # WebP and JPG formats have maximum dimension limits of 16383 and 65535 respectively. switch to PNG which has a much higher limit if (image.height > 65535 or image.width > 65535) and extension.lower() in ("jpg", "jpeg") or (image.height > 16383 or image.width > 16383) and extension.lower() == "webp": From 6214aa7d2a84aa2a12962706579a2dba3470fb51 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 9 Jun 2024 16:24:04 +0300 Subject: [PATCH 096/201] performance: check for nans in unet only once, after all steps have been completed --- modules/processing.py | 5 +++++ modules/sd_samplers_cfg_denoiser.py | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index dc538272116..65e37db0a46 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -625,6 +625,9 @@ class DecodedSamples(list): def decode_latent_batch(model, batch, target_device=None, check_for_nans=False): samples = DecodedSamples() + if check_for_nans: + devices.test_for_nans(batch, "unet") + for i in range(batch.shape[0]): sample = decode_first_stage(model, batch[i:i + 1])[0] @@ -987,6 +990,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if getattr(samples_ddim, 'already_decoded', False): x_samples_ddim = samples_ddim else: + devices.test_for_nans(samples_ddim, "unet") + if opts.sd_vae_decode_method != 'Full': p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index f48f58a5089..06d2661f4f5 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -273,8 +273,6 @@ def apply_blend(current_latent): denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model) cfg_denoised_callback(denoised_params) - devices.test_for_nans(x_out, "unet") - if is_edit_model: denoised = self.combine_denoised_for_edit_model(x_out, cond_scale) elif skip_uncond: From e368cd2810af0c7a734c33b25549110beacdf53f Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 9 Jun 2024 16:46:08 +0300 Subject: [PATCH 097/201] stylistic changes for #15978 --- modules/images.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/images.py b/modules/images.py index 90c65b740c8..3253847fed1 100644 --- a/modules/images.py +++ b/modules/images.py @@ -377,7 +377,7 @@ def get_sampler_scheduler(p, sampler): class FilenameGenerator: replacements = { - 'basename': lambda self: 'img' if self.basename == '' else self.basename, + 'basename': lambda self: self.basename or 'img', 'seed': lambda self: self.seed if self.seed is not None else '', 'seed_first': lambda self: self.seed if self.p.batch_size == 1 else self.p.all_seeds[0], 'seed_last': lambda self: NOTHING_AND_SKIP_PREVIOUS_TEXT if self.p.batch_size == 1 else self.p.all_seeds[-1], @@ -651,7 +651,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i txt_fullfn (`str` or None): If a text file is saved for this image, this will be its full path. Otherwise None. """ - namegen = FilenameGenerator(p, seed, prompt, image, zip=False, basename=basename) + namegen = FilenameGenerator(p, seed, prompt, image, basename=basename) # WebP and JPG formats have maximum dimension limits of 16383 and 65535 respectively. switch to PNG which has a much higher limit if (image.height > 65535 or image.width > 65535) and extension.lower() in ("jpg", "jpeg") or (image.height > 16383 or image.width > 16383) and extension.lower() == "webp": From aafbb5b403e524b94367c5893f76f834b98de26d Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 9 Jun 2024 16:47:08 +0300 Subject: [PATCH 098/201] lint --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 06d2661f4f5..a86fa88eee9 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -1,5 +1,5 @@ import torch -from modules import prompt_parser, devices, sd_samplers_common +from modules import prompt_parser, sd_samplers_common from modules.shared import opts, state import modules.shared as shared From 57e6d05a43e4bdf4575e520f1a04c17e80fe58cc Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 9 Jun 2024 21:18:36 +0300 Subject: [PATCH 099/201] added tool for profiling code --- modules/call_queue.py | 10 +++++++-- modules/processing.py | 5 +++-- modules/profiling.py | 46 +++++++++++++++++++++++++++++++++++++++ modules/shared_options.py | 16 ++++++++++++++ style.css | 6 ++++- 5 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 modules/profiling.py diff --git a/modules/call_queue.py b/modules/call_queue.py index b50931bcdb9..d22c23b317c 100644 --- a/modules/call_queue.py +++ b/modules/call_queue.py @@ -1,8 +1,9 @@ +import os.path from functools import wraps import html import time -from modules import shared, progress, errors, devices, fifo_lock +from modules import shared, progress, errors, devices, fifo_lock, profiling queue_lock = fifo_lock.FIFOLock() @@ -111,8 +112,13 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs): else: vram_html = '' + if shared.opts.profiling_enable and os.path.exists(shared.opts.profiling_filename): + profiling_html = f"

[ Profile ]

" + else: + profiling_html = '' + # last item is always HTML - res[-1] += f"

Time taken: {elapsed_text}

{vram_html}
" + res[-1] += f"

Time taken: {elapsed_text}

{vram_html}{profiling_html}
" return tuple(res) diff --git a/modules/processing.py b/modules/processing.py index 65e37db0a46..91cb94db1b9 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -16,7 +16,7 @@ from typing import Any import modules.sd_hijack -from modules import devices, prompt_parser, masking, sd_samplers, lowvram, infotext_utils, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors, rng +from modules import devices, prompt_parser, masking, sd_samplers, lowvram, infotext_utils, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors, rng, profiling from modules.rng import slerp # noqa: F401 from modules.sd_hijack import model_hijack from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes @@ -843,7 +843,8 @@ def process_images(p: StableDiffusionProcessing) -> Processed: # backwards compatibility, fix sampler and scheduler if invalid sd_samplers.fix_p_invalid_sampler_and_scheduler(p) - res = process_images_inner(p) + with profiling.Profiler(): + res = process_images_inner(p) finally: sd_models.apply_token_merging(p.sd_model, 0) diff --git a/modules/profiling.py b/modules/profiling.py new file mode 100644 index 00000000000..95b59f71a20 --- /dev/null +++ b/modules/profiling.py @@ -0,0 +1,46 @@ +import torch + +from modules import shared, ui_gradio_extensions + + +class Profiler: + def __init__(self): + if not shared.opts.profiling_enable: + self.profiler = None + return + + activities = [] + if "CPU" in shared.opts.profiling_activities: + activities.append(torch.profiler.ProfilerActivity.CPU) + if "CUDA" in shared.opts.profiling_activities: + activities.append(torch.profiler.ProfilerActivity.CUDA) + + if not activities: + self.profiler = None + return + + self.profiler = torch.profiler.profile( + activities=activities, + record_shapes=shared.opts.profiling_record_shapes, + profile_memory=shared.opts.profiling_profile_memory, + with_stack=shared.opts.profiling_with_stack + ) + + def __enter__(self): + if self.profiler: + self.profiler.__enter__() + + return self + + def __exit__(self, exc_type, exc, exc_tb): + if self.profiler: + shared.state.textinfo = "Finishing profile..." + + self.profiler.__exit__(exc_type, exc, exc_tb) + + self.profiler.export_chrome_trace(shared.opts.profiling_filename) + + +def webpath(): + return ui_gradio_extensions.webpath(shared.opts.profiling_filename) + diff --git a/modules/shared_options.py b/modules/shared_options.py index e2e02094fef..104d8a54484 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -129,6 +129,22 @@ "dump_stacks_on_signal": OptionInfo(False, "Print stack traces before exiting the program with ctrl+c."), })) +options_templates.update(options_section(('profiler', "Profiler", "system"), { + "profiling_explanation": OptionHTML(""" +Those settings allow you to enable torch profiler when generating pictures. +Profiling allows you to see which code uses how much of computer's resources during generation. +Each generation writes its own profile to one file, overwriting previous. +The file can be viewed in Chrome, or on a Perfetto web site. +Warning: writing profile can take a lot of time, up to 30 seconds, and the file itelf can be around 500MB in size. +"""), + "profiling_enable": OptionInfo(False, "Enable profiling"), + "profiling_activities": OptionInfo(["CPU"], "Activities", gr.CheckboxGroup, {"choices": ["CPU", "CUDA"]}), + "profiling_record_shapes": OptionInfo(True, "Record shapes"), + "profiling_profile_memory": OptionInfo(True, "Profile memory"), + "profiling_with_stack": OptionInfo(True, "Include python stack"), + "profiling_filename": OptionInfo("trace.json", "Profile filename"), +})) + options_templates.update(options_section(('API', "API", "system"), { "api_enable_requests": OptionInfo(True, "Allow http:// and https:// URLs for input images in API", restrict_api=True), "api_forbid_local_requests": OptionInfo(True, "Forbid URLs to local resources", restrict_api=True), diff --git a/style.css b/style.css index 467c29cdf97..64ef61bad46 100644 --- a/style.css +++ b/style.css @@ -279,7 +279,7 @@ input[type="checkbox"].input-accordion-checkbox{ display: inline-block; } -.html-log .performance p.time, .performance p.vram, .performance p.time abbr, .performance p.vram abbr { +.html-log .performance p.time, .performance p.vram, .performance p.profile, .performance p.time abbr, .performance p.vram abbr { margin-bottom: 0; color: var(--block-title-text-color); } @@ -291,6 +291,10 @@ input[type="checkbox"].input-accordion-checkbox{ margin-left: auto; } +.html-log .performance p.profile { + margin-left: 0.5em; +} + .html-log .performance .measurement{ color: var(--body-text-color); font-weight: bold; From 99e65ec6182c4e1201d16713f58c899bf26ba2ac Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 9 Jun 2024 21:23:53 +0300 Subject: [PATCH 100/201] undo some changes from #15823 and fix whitespace --- modules/sd_samplers_kdiffusion.py | 4 ++-- modules/sd_schedulers.py | 28 +++++++++++++++------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 228de49449e..64e14e0c2a3 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -1,7 +1,7 @@ import torch import inspect import k_diffusion.sampling -from modules import sd_samplers_common, sd_samplers_extra, sd_samplers_cfg_denoiser, sd_schedulers +from modules import sd_samplers_common, sd_samplers_extra, sd_samplers_cfg_denoiser, sd_schedulers, devices from modules.sd_samplers_cfg_denoiser import CFGDenoiser # noqa: F401 from modules.script_callbacks import ExtraNoiseParams, extra_noise_callback @@ -115,7 +115,7 @@ def get_sigmas(self, p, steps): if scheduler.need_inner_model: sigmas_kwargs['inner_model'] = self.model_wrap - sigmas = scheduler.function(n=steps, **sigmas_kwargs) + sigmas = scheduler.function(n=steps, **sigmas_kwargs, device=devices.cpu) if discard_next_to_last_sigma: sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 9916cf05ace..0165e6a0286 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -1,19 +1,19 @@ import dataclasses - import torch - import k_diffusion - import numpy as np from modules import shared + def to_d(x, sigma, denoised): """Converts a denoiser output to a Karras ODE derivative.""" return (x - denoised) / sigma + k_diffusion.sampling.to_d = to_d + @dataclasses.dataclass class Scheduler: name: str @@ -25,11 +25,11 @@ class Scheduler: aliases: list = None -def uniform(n, sigma_min, sigma_max, inner_model): - return inner_model.get_sigmas(n) +def uniform(n, sigma_min, sigma_max, inner_model, device): + return inner_model.get_sigmas(n).to(device) -def sgm_uniform(n, sigma_min, sigma_max, inner_model): +def sgm_uniform(n, sigma_min, sigma_max, inner_model, device): start = inner_model.sigma_to_t(torch.tensor(sigma_max)) end = inner_model.sigma_to_t(torch.tensor(sigma_min)) sigs = [ @@ -37,9 +37,10 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model): for ts in torch.linspace(start, end, n + 1)[:-1] ] sigs += [0.0] - return torch.FloatTensor(sigs) + return torch.FloatTensor(sigs).to(device) -def get_align_your_steps_sigmas(n, sigma_min, sigma_max): + +def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device): # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html def loglinear_interp(t_steps, num_steps): """ @@ -65,12 +66,13 @@ def loglinear_interp(t_steps, num_steps): else: sigmas.append(0.0) - return torch.FloatTensor(sigmas) + return torch.FloatTensor(sigmas).to(device) + -def kl_optimal(n, sigma_min, sigma_max): - alpha_min = torch.arctan(torch.tensor(sigma_min)) - alpha_max = torch.arctan(torch.tensor(sigma_max)) - step_indices = torch.arange(n + 1) +def kl_optimal(n, sigma_min, sigma_max, device): + alpha_min = torch.arctan(torch.tensor(sigma_min, device=device)) + alpha_max = torch.arctan(torch.tensor(sigma_max, device=device)) + step_indices = torch.arange(n + 1, device=device) sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max) return sigmas From d2097dbdd99aa528d8459ad7b62d3a2230a14e65 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 9 Jun 2024 21:33:32 +0300 Subject: [PATCH 101/201] added onOptionsAvailable callback for javascript for --- javascript/ui.js | 1 + script.js | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/javascript/ui.js b/javascript/ui.js index 16faacebb8b..ff6f8974b0e 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -299,6 +299,7 @@ onAfterUiUpdate(function() { var jsdata = textarea.value; opts = JSON.parse(jsdata); + executeCallbacks(optionsAvailableCallbacks); /*global optionsAvailableCallbacks*/ executeCallbacks(optionsChangedCallbacks); /*global optionsChangedCallbacks*/ Object.defineProperty(textarea, 'value', { diff --git a/script.js b/script.js index f069b1ef002..de1a9000d4f 100644 --- a/script.js +++ b/script.js @@ -29,6 +29,7 @@ var uiAfterUpdateCallbacks = []; var uiLoadedCallbacks = []; var uiTabChangeCallbacks = []; var optionsChangedCallbacks = []; +var optionsAvailableCallbacks = []; var uiAfterUpdateTimeout = null; var uiCurrentTab = null; @@ -77,6 +78,20 @@ function onOptionsChanged(callback) { optionsChangedCallbacks.push(callback); } +/** + * Register callback to be called when the options (in opts global variable) are available. + * The callback receives no arguments. + * If you register the callback after the options are available, it's just immediately called. + */ +function onOptionsAvailable(callback) { + if (Object.keys(opts).length != 0) { + callback(); + return; + } + + optionsAvailableCallbacks.push(callback); +} + function executeCallbacks(queue, arg) { for (const callback of queue) { try { From 74ee8fd1e32d06a45e669dd5cce6a2bff786d3a4 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 10 Jun 2024 04:35:11 +0900 Subject: [PATCH 102/201] .gitignore trace.json --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 519b4a53dcb..96cfe22dbd1 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ notification.mp3 /.coverage* /test/test_outputs /cache +trace.json From 17e846150c49395e44550e18cbe5120fcf64c173 Mon Sep 17 00:00:00 2001 From: huchenlei Date: Tue, 28 May 2024 19:35:35 -0400 Subject: [PATCH 103/201] Add process_before_every_sampling hook --- modules/processing.py | 24 ++++++++++++++++++++++++ modules/scripts.py | 15 +++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/modules/processing.py b/modules/processing.py index 91cb94db1b9..79a3f0a726c 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1330,6 +1330,15 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs # here we generate an image normally x = self.rng.next() + if self.scripts is not None: + self.scripts.process_before_every_sampling( + p=self, + x=x, + noise=x, + c=conditioning, + uc=unconditional_conditioning + ) + samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x)) del x @@ -1430,6 +1439,13 @@ def save_intermediate(image, index): if self.scripts is not None: self.scripts.before_hr(self) + self.scripts.process_before_every_sampling( + p=self, + x=samples, + noise=noise, + c=self.hr_c, + uc=self.hr_uc, + ) samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) @@ -1743,6 +1759,14 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier x *= self.initial_noise_multiplier + if self.scripts is not None: + self.scripts.process_before_every_sampling( + p=self, + x=self.init_latent, + noise=x, + c=conditioning, + uc=unconditional_conditioning + ) samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning) if self.mask is not None: diff --git a/modules/scripts.py b/modules/scripts.py index 70ccfbe46b1..8eca396b140 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -187,6 +187,13 @@ def after_extra_networks_activate(self, p, *args, **kwargs): """ pass + def process_before_every_sampling(self, p, *args, **kwargs): + """ + Similar to process(), called before every sampling. + If you use high-res fix, this will be called two times. + """ + pass + def process_batch(self, p, *args, **kwargs): """ Same as process(), but called for every batch. @@ -826,6 +833,14 @@ def process(self, p): except Exception: errors.report(f"Error running process: {script.filename}", exc_info=True) + def process_before_every_sampling(self, p, **kwargs): + for script in self.ordered_scripts('process_before_every_sampling'): + try: + script_args = p.script_args[script.args_from:script.args_to] + script.process_before_every_sampling(p, *script_args, **kwargs) + except Exception: + errors.report(f"Error running process_before_every_sampling: {script.filename}", exc_info=True) + def before_process_batch(self, p, **kwargs): for script in self.ordered_scripts('before_process_batch'): try: From abacb735f4f20a301e11a36442950b55b19626fe Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 10 Jun 2024 20:47:12 +0900 Subject: [PATCH 104/201] multi size grid --- modules/images.py | 9 ++++++--- scripts/xyz_grid.py | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/modules/images.py b/modules/images.py index 3253847fed1..cfdfb338446 100644 --- a/modules/images.py +++ b/modules/images.py @@ -54,11 +54,14 @@ def image_grid(imgs, batch_size=1, rows=None): params = script_callbacks.ImageGridLoopParams(imgs, cols, rows) script_callbacks.image_grid_callback(params) - w, h = imgs[0].size - grid = Image.new('RGB', size=(params.cols * w, params.rows * h), color='black') + w, h = map(max, zip(*(img.size for img in imgs))) + grid_background_color = ImageColor.getcolor(opts.grid_background_color, 'RGB') + grid = Image.new('RGB', size=(params.cols * w, params.rows * h), color=grid_background_color) for i, img in enumerate(params.imgs): - grid.paste(img, box=(i % params.cols * w, i // params.cols * h)) + img_w, img_h = img.size + w_offset, h_offset = 0 if img_w == w else (w - img_w) // 2, 0 if img_h == h else (h - img_h) // 2 + grid.paste(img, box=(i % params.cols * w + w_offset, i // params.cols * h + h_offset)) return grid diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 52e343c4fb8..606d72d42af 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -375,16 +375,18 @@ def index(ix, iy, iz): end_index = start_index + len(xs) * len(ys) grid = images.image_grid(processed_result.images[start_index:end_index], rows=len(ys)) if draw_legend: - grid = images.draw_grid_annotations(grid, processed_result.images[start_index].size[0], processed_result.images[start_index].size[1], hor_texts, ver_texts, margin_size) + grid_max_w, grid_max_h = map(max, zip(*(img.size for img in processed_result.images[start_index:end_index]))) + grid = images.draw_grid_annotations(grid, grid_max_w, grid_max_h, hor_texts, ver_texts, margin_size) processed_result.images.insert(i, grid) processed_result.all_prompts.insert(i, processed_result.all_prompts[start_index]) processed_result.all_seeds.insert(i, processed_result.all_seeds[start_index]) processed_result.infotexts.insert(i, processed_result.infotexts[start_index]) - sub_grid_size = processed_result.images[0].size + # sub_grid_size = processed_result.images[0].size z_grid = images.image_grid(processed_result.images[:z_count], rows=1) + z_sub_grid_max_w, z_sub_grid_max_h = map(max, zip(*(img.size for img in processed_result.images[:z_count]))) if draw_legend: - z_grid = images.draw_grid_annotations(z_grid, sub_grid_size[0], sub_grid_size[1], title_texts, [[images.GridAnnotation()]]) + z_grid = images.draw_grid_annotations(z_grid, z_sub_grid_max_w, z_sub_grid_max_h, title_texts, [[images.GridAnnotation()]]) processed_result.images.insert(0, z_grid) # TODO: Deeper aspects of the program rely on grid info being misaligned between metadata arrays, which is not ideal. # processed_result.all_prompts.insert(0, processed_result.all_prompts[0]) From 00e09382cd85c77ac35545c7264855823dfc2eb1 Mon Sep 17 00:00:00 2001 From: Silver Date: Mon, 10 Jun 2024 22:11:11 +0200 Subject: [PATCH 105/201] Add option to enable clip skip for clip L on SDXL --- modules/sd_hijack_clip.py | 2 ++ modules/shared_options.py | 1 + 2 files changed, 3 insertions(+) diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py index 6ef10ac7cd8..3db047dfd96 100644 --- a/modules/sd_hijack_clip.py +++ b/modules/sd_hijack_clip.py @@ -355,6 +355,8 @@ def encode_with_transformers(self, tokens): if self.wrapped.layer == "last": z = outputs.last_hidden_state + elif opts.sdxl_clip_l_skip is True: + z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers] else: z = outputs.hidden_states[self.wrapped.layer_idx] diff --git a/modules/shared_options.py b/modules/shared_options.py index 104d8a54484..51d51d8ee66 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -176,6 +176,7 @@ "emphasis": OptionInfo("Original", "Emphasis mode", gr.Radio, lambda: {"choices": [x.name for x in sd_emphasis.options]}, infotext="Emphasis").info("makes it possible to make model to pay (more:1.1) or (less:0.9) attention to text when you use the syntax in prompt; " + sd_emphasis.get_options_descriptions()), "enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"), "comma_padding_backtrack": OptionInfo(20, "Prompt word wrap length limit", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1}).info("in tokens - for texts shorter than specified, if they don't fit into 75 token limit, move them to the next 75 token chunk"), + "sdxl_clip_l_skip": OptionInfo(False, "Clip skip SDXL", gr.Checkbox).info("Enable Clip skip for the secondary clip model in sdxl. Has no effect on SD 1.5 or SD 2.0/2.1."), "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}, infotext="Clip skip").link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer"), "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"), "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"), From 91ecc750bebcd25c5ad970a7bddc8f7603a136d7 Mon Sep 17 00:00:00 2001 From: Silver <65376327+silveroxides@users.noreply.github.com> Date: Tue, 11 Jun 2024 00:40:26 +0200 Subject: [PATCH 106/201] Update sd_hijack_clip.py --- modules/sd_hijack_clip.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py index 3db047dfd96..355df3d30d1 100644 --- a/modules/sd_hijack_clip.py +++ b/modules/sd_hijack_clip.py @@ -353,10 +353,10 @@ def __init__(self, wrapped, hijack): def encode_with_transformers(self, tokens): outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=self.wrapped.layer == "hidden") - if self.wrapped.layer == "last": - z = outputs.last_hidden_state - elif opts.sdxl_clip_l_skip is True: + if opts.sdxl_clip_l_skip is True: z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers] + elif self.wrapped.layer == "last": + z = outputs.last_hidden_state else: z = outputs.hidden_states[self.wrapped.layer_idx] From 1f8f3a6e8bdbc687bc449aa3ce39bf4bb668f6f1 Mon Sep 17 00:00:00 2001 From: YSH Date: Tue, 11 Jun 2024 16:50:00 -0700 Subject: [PATCH 107/201] feat: prevent screen sleep during generation --- javascript/progressbar.js | 25 +++++++++++++++++++++++++ modules/shared_options.py | 1 + 2 files changed, 26 insertions(+) diff --git a/javascript/progressbar.js b/javascript/progressbar.js index f068bac6aba..00b0429bba9 100644 --- a/javascript/progressbar.js +++ b/javascript/progressbar.js @@ -76,6 +76,28 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre var dateStart = new Date(); var wasEverActive = false; var parentProgressbar = progressbarContainer.parentNode; + var wakeLock = null; + + var requestWakeLock = async function() { + if (!opts.prevent_screen_sleep_during_generation) return; + try { + wakeLock = await navigator.wakeLock.request('screen'); + console.log('Wake Lock is active.'); + } catch (err) { + console.log('Wake Lock is not supported.'); + } + }; + + var releaseWakeLock = async function() { + if (!opts.prevent_screen_sleep_during_generation || !wakeLock) return; + try { + await wakeLock.release(); + console.log('Wake Lock is released.'); + wakeLock = null; + } catch (err) { + console.error('Wake Lock release failed', err); + } + }; var divProgress = document.createElement('div'); divProgress.className = 'progressDiv'; @@ -89,6 +111,7 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre var livePreview = null; var removeProgressBar = function() { + releaseWakeLock(); if (!divProgress) return; setTitle(""); @@ -100,6 +123,8 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre }; var funProgress = function(id_task) { + // Request the wake lock at the start of the progress + requestWakeLock(); request("./internal/progress", {id_task: id_task, live_preview: false}, function(res) { if (res.completed) { removeProgressBar(); diff --git a/modules/shared_options.py b/modules/shared_options.py index 326a317e030..3741cf1f594 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -359,6 +359,7 @@ "live_preview_refresh_period": OptionInfo(1000, "Progressbar and preview update period").info("in milliseconds"), "live_preview_fast_interrupt": OptionInfo(False, "Return image with chosen live preview method on interrupt").info("makes interrupts faster"), "js_live_preview_in_modal_lightbox": OptionInfo(False, "Show Live preview in full page image viewer"), + "prevent_screen_sleep_during_generation": OptionInfo(True, "Prevent screen sleep during generation"), })) options_templates.update(options_section(('sampler-params', "Sampler parameters", "sd"), { From c803e11505cae54c7e8e467cd773b2053c2bfc38 Mon Sep 17 00:00:00 2001 From: YSH Date: Tue, 11 Jun 2024 18:14:32 -0700 Subject: [PATCH 108/201] fix: prevent create multiple wake lock --- javascript/progressbar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/progressbar.js b/javascript/progressbar.js index 00b0429bba9..751fc096c53 100644 --- a/javascript/progressbar.js +++ b/javascript/progressbar.js @@ -79,7 +79,7 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre var wakeLock = null; var requestWakeLock = async function() { - if (!opts.prevent_screen_sleep_during_generation) return; + if (!opts.prevent_screen_sleep_during_generation || wakeLock) return; try { wakeLock = await navigator.wakeLock.request('screen'); console.log('Wake Lock is active.'); From f1e0bfebfc9418f14f36ea255162ed1eaba3a62f Mon Sep 17 00:00:00 2001 From: YSH Date: Tue, 11 Jun 2024 22:33:11 -0700 Subject: [PATCH 109/201] ci: remove comments and console logs --- javascript/progressbar.js | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/javascript/progressbar.js b/javascript/progressbar.js index 751fc096c53..23dea64ceda 100644 --- a/javascript/progressbar.js +++ b/javascript/progressbar.js @@ -82,9 +82,8 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre if (!opts.prevent_screen_sleep_during_generation || wakeLock) return; try { wakeLock = await navigator.wakeLock.request('screen'); - console.log('Wake Lock is active.'); } catch (err) { - console.log('Wake Lock is not supported.'); + console.error('Wake Lock is not supported.'); } }; @@ -92,7 +91,6 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre if (!opts.prevent_screen_sleep_during_generation || !wakeLock) return; try { await wakeLock.release(); - console.log('Wake Lock is released.'); wakeLock = null; } catch (err) { console.error('Wake Lock release failed', err); @@ -123,7 +121,6 @@ function requestProgress(id_task, progressbarContainer, gallery, atEnd, onProgre }; var funProgress = function(id_task) { - // Request the wake lock at the start of the progress requestWakeLock(); request("./internal/progress", {id_task: id_task, live_preview: false}, function(res) { if (res.completed) { From 9e0f6d2012bfdd11c0b8a4bd42e176e034c4848c Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 11 Jun 2024 21:06:36 +0900 Subject: [PATCH 110/201] remove commented code --- scripts/xyz_grid.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 606d72d42af..b184721bebc 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -382,7 +382,6 @@ def index(ix, iy, iz): processed_result.all_seeds.insert(i, processed_result.all_seeds[start_index]) processed_result.infotexts.insert(i, processed_result.infotexts[start_index]) - # sub_grid_size = processed_result.images[0].size z_grid = images.image_grid(processed_result.images[:z_count], rows=1) z_sub_grid_max_w, z_sub_grid_max_h = map(max, zip(*(img.size for img in processed_result.images[:z_count]))) if draw_legend: From a7116aa9a11c7d7d10df81aa52ad5ec33f4b6db6 Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Sun, 16 Jun 2024 07:13:57 +0300 Subject: [PATCH 111/201] add SD3 reference implementation from https://github.com/mcmonkey4eva/sd3-ref/ --- modules/models/sd3/mmdit.py | 619 ++++++++++++++++++++++++++++++ modules/models/sd3/other_impls.py | 492 ++++++++++++++++++++++++ modules/models/sd3/sd3_impls.py | 371 ++++++++++++++++++ 3 files changed, 1482 insertions(+) create mode 100644 modules/models/sd3/mmdit.py create mode 100644 modules/models/sd3/other_impls.py create mode 100644 modules/models/sd3/sd3_impls.py diff --git a/modules/models/sd3/mmdit.py b/modules/models/sd3/mmdit.py new file mode 100644 index 00000000000..6d8b65bdf45 --- /dev/null +++ b/modules/models/sd3/mmdit.py @@ -0,0 +1,619 @@ +### This file contains impls for MM-DiT, the core model component of SD3 + +import math +from typing import Dict, Optional +import numpy as np +import torch +import torch.nn as nn +from einops import rearrange, repeat +from other_impls import attention, Mlp + +class PatchEmbed(nn.Module): + """ 2D Image to Patch Embedding""" + def __init__( + self, + img_size: Optional[int] = 224, + patch_size: int = 16, + in_chans: int = 3, + embed_dim: int = 768, + flatten: bool = True, + bias: bool = True, + strict_img_size: bool = True, + dynamic_img_pad: bool = False, + dtype=None, + device=None, + ): + super().__init__() + self.patch_size = (patch_size, patch_size) + if img_size is not None: + self.img_size = (img_size, img_size) + self.grid_size = tuple([s // p for s, p in zip(self.img_size, self.patch_size)]) + self.num_patches = self.grid_size[0] * self.grid_size[1] + else: + self.img_size = None + self.grid_size = None + self.num_patches = None + + # flatten spatial dim and transpose to channels last, kept for bwd compat + self.flatten = flatten + self.strict_img_size = strict_img_size + self.dynamic_img_pad = dynamic_img_pad + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias, dtype=dtype, device=device) + + def forward(self, x): + B, C, H, W = x.shape + x = self.proj(x) + if self.flatten: + x = x.flatten(2).transpose(1, 2) # NCHW -> NLC + return x + + +def modulate(x, shift, scale): + if shift is None: + shift = torch.zeros_like(scale) + return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1) + + +################################################################################# +# Sine/Cosine Positional Embedding Functions # +################################################################################# + + +def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0, scaling_factor=None, offset=None): + """ + grid_size: int of the grid height and width + return: + pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) + """ + grid_h = np.arange(grid_size, dtype=np.float32) + grid_w = np.arange(grid_size, dtype=np.float32) + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + if scaling_factor is not None: + grid = grid / scaling_factor + if offset is not None: + grid = grid - offset + grid = grid.reshape([2, 1, grid_size, grid_size]) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid) + if cls_token and extra_tokens > 0: + pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid(embed_dim, grid): + assert embed_dim % 2 == 0 + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2) + emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) + return emb + + +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): + """ + embed_dim: output dimension for each position + pos: a list of positions to be encoded: size (M,) + out: (M, D) + """ + assert embed_dim % 2 == 0 + omega = np.arange(embed_dim // 2, dtype=np.float64) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000**omega # (D/2,) + pos = pos.reshape(-1) # (M,) + out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + return np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) + + +################################################################################# +# Embedding Layers for Timesteps and Class Labels # +################################################################################# + + +class TimestepEmbedder(nn.Module): + """Embeds scalar timesteps into vector representations.""" + + def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None): + super().__init__() + self.mlp = nn.Sequential( + nn.Linear(frequency_embedding_size, hidden_size, bias=True, dtype=dtype, device=device), + nn.SiLU(), + nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device), + ) + self.frequency_embedding_size = frequency_embedding_size + + @staticmethod + def timestep_embedding(t, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + :param t: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an (N, D) Tensor of positional embeddings. + """ + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) + * torch.arange(start=0, end=half, dtype=torch.float32) + / half + ).to(device=t.device) + args = t[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + if torch.is_floating_point(t): + embedding = embedding.to(dtype=t.dtype) + return embedding + + def forward(self, t, dtype, **kwargs): + t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(dtype) + t_emb = self.mlp(t_freq) + return t_emb + + +class VectorEmbedder(nn.Module): + """Embeds a flat vector of dimension input_dim""" + + def __init__(self, input_dim: int, hidden_size: int, dtype=None, device=None): + super().__init__() + self.mlp = nn.Sequential( + nn.Linear(input_dim, hidden_size, bias=True, dtype=dtype, device=device), + nn.SiLU(), + nn.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.mlp(x) + + +################################################################################# +# Core DiT Model # +################################################################################# + + +def split_qkv(qkv, head_dim): + qkv = qkv.reshape(qkv.shape[0], qkv.shape[1], 3, -1, head_dim).movedim(2, 0) + return qkv[0], qkv[1], qkv[2] + +def optimized_attention(qkv, num_heads): + return attention(qkv[0], qkv[1], qkv[2], num_heads) + +class SelfAttention(nn.Module): + ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug") + + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = False, + qk_scale: Optional[float] = None, + attn_mode: str = "xformers", + pre_only: bool = False, + qk_norm: Optional[str] = None, + rmsnorm: bool = False, + dtype=None, + device=None, + ): + super().__init__() + self.num_heads = num_heads + self.head_dim = dim // num_heads + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) + if not pre_only: + self.proj = nn.Linear(dim, dim, dtype=dtype, device=device) + assert attn_mode in self.ATTENTION_MODES + self.attn_mode = attn_mode + self.pre_only = pre_only + + if qk_norm == "rms": + self.ln_q = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + self.ln_k = RMSNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + elif qk_norm == "ln": + self.ln_q = nn.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + self.ln_k = nn.LayerNorm(self.head_dim, elementwise_affine=True, eps=1.0e-6, dtype=dtype, device=device) + elif qk_norm is None: + self.ln_q = nn.Identity() + self.ln_k = nn.Identity() + else: + raise ValueError(qk_norm) + + def pre_attention(self, x: torch.Tensor): + B, L, C = x.shape + qkv = self.qkv(x) + q, k, v = split_qkv(qkv, self.head_dim) + q = self.ln_q(q).reshape(q.shape[0], q.shape[1], -1) + k = self.ln_k(k).reshape(q.shape[0], q.shape[1], -1) + return (q, k, v) + + def post_attention(self, x: torch.Tensor) -> torch.Tensor: + assert not self.pre_only + x = self.proj(x) + return x + + def forward(self, x: torch.Tensor) -> torch.Tensor: + (q, k, v) = self.pre_attention(x) + x = attention(q, k, v, self.num_heads) + x = self.post_attention(x) + return x + + +class RMSNorm(torch.nn.Module): + def __init__( + self, dim: int, elementwise_affine: bool = False, eps: float = 1e-6, device=None, dtype=None + ): + """ + Initialize the RMSNorm normalization layer. + Args: + dim (int): The dimension of the input tensor. + eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. + Attributes: + eps (float): A small value added to the denominator for numerical stability. + weight (nn.Parameter): Learnable scaling parameter. + """ + super().__init__() + self.eps = eps + self.learnable_scale = elementwise_affine + if self.learnable_scale: + self.weight = nn.Parameter(torch.empty(dim, device=device, dtype=dtype)) + else: + self.register_parameter("weight", None) + + def _norm(self, x): + """ + Apply the RMSNorm normalization to the input tensor. + Args: + x (torch.Tensor): The input tensor. + Returns: + torch.Tensor: The normalized tensor. + """ + return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) + + def forward(self, x): + """ + Forward pass through the RMSNorm layer. + Args: + x (torch.Tensor): The input tensor. + Returns: + torch.Tensor: The output tensor after applying RMSNorm. + """ + x = self._norm(x) + if self.learnable_scale: + return x * self.weight.to(device=x.device, dtype=x.dtype) + else: + return x + + +class SwiGLUFeedForward(nn.Module): + def __init__( + self, + dim: int, + hidden_dim: int, + multiple_of: int, + ffn_dim_multiplier: Optional[float] = None, + ): + """ + Initialize the FeedForward module. + + Args: + dim (int): Input dimension. + hidden_dim (int): Hidden dimension of the feedforward layer. + multiple_of (int): Value to ensure hidden dimension is a multiple of this value. + ffn_dim_multiplier (float, optional): Custom multiplier for hidden dimension. Defaults to None. + + Attributes: + w1 (ColumnParallelLinear): Linear transformation for the first layer. + w2 (RowParallelLinear): Linear transformation for the second layer. + w3 (ColumnParallelLinear): Linear transformation for the third layer. + + """ + super().__init__() + hidden_dim = int(2 * hidden_dim / 3) + # custom dim factor multiplier + if ffn_dim_multiplier is not None: + hidden_dim = int(ffn_dim_multiplier * hidden_dim) + hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) + + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + self.w3 = nn.Linear(dim, hidden_dim, bias=False) + + def forward(self, x): + return self.w2(nn.functional.silu(self.w1(x)) * self.w3(x)) + + +class DismantledBlock(nn.Module): + """A DiT block with gated adaptive layer norm (adaLN) conditioning.""" + + ATTENTION_MODES = ("xformers", "torch", "torch-hb", "math", "debug") + + def __init__( + self, + hidden_size: int, + num_heads: int, + mlp_ratio: float = 4.0, + attn_mode: str = "xformers", + qkv_bias: bool = False, + pre_only: bool = False, + rmsnorm: bool = False, + scale_mod_only: bool = False, + swiglu: bool = False, + qk_norm: Optional[str] = None, + dtype=None, + device=None, + **block_kwargs, + ): + super().__init__() + assert attn_mode in self.ATTENTION_MODES + if not rmsnorm: + self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + else: + self.norm1 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, attn_mode=attn_mode, pre_only=pre_only, qk_norm=qk_norm, rmsnorm=rmsnorm, dtype=dtype, device=device) + if not pre_only: + if not rmsnorm: + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + else: + self.norm2 = RMSNorm(hidden_size, elementwise_affine=False, eps=1e-6) + mlp_hidden_dim = int(hidden_size * mlp_ratio) + if not pre_only: + if not swiglu: + self.mlp = Mlp(in_features=hidden_size, hidden_features=mlp_hidden_dim, act_layer=nn.GELU(approximate="tanh"), dtype=dtype, device=device) + else: + self.mlp = SwiGLUFeedForward(dim=hidden_size, hidden_dim=mlp_hidden_dim, multiple_of=256) + self.scale_mod_only = scale_mod_only + if not scale_mod_only: + n_mods = 6 if not pre_only else 2 + else: + n_mods = 4 if not pre_only else 1 + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, n_mods * hidden_size, bias=True, dtype=dtype, device=device)) + self.pre_only = pre_only + + def pre_attention(self, x: torch.Tensor, c: torch.Tensor): + assert x is not None, "pre_attention called with None input" + if not self.pre_only: + if not self.scale_mod_only: + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(c).chunk(6, dim=1) + else: + shift_msa = None + shift_mlp = None + scale_msa, gate_msa, scale_mlp, gate_mlp = self.adaLN_modulation(c).chunk(4, dim=1) + qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa)) + return qkv, (x, gate_msa, shift_mlp, scale_mlp, gate_mlp) + else: + if not self.scale_mod_only: + shift_msa, scale_msa = self.adaLN_modulation(c).chunk(2, dim=1) + else: + shift_msa = None + scale_msa = self.adaLN_modulation(c) + qkv = self.attn.pre_attention(modulate(self.norm1(x), shift_msa, scale_msa)) + return qkv, None + + def post_attention(self, attn, x, gate_msa, shift_mlp, scale_mlp, gate_mlp): + assert not self.pre_only + x = x + gate_msa.unsqueeze(1) * self.attn.post_attention(attn) + x = x + gate_mlp.unsqueeze(1) * self.mlp(modulate(self.norm2(x), shift_mlp, scale_mlp)) + return x + + def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: + assert not self.pre_only + (q, k, v), intermediates = self.pre_attention(x, c) + attn = attention(q, k, v, self.attn.num_heads) + return self.post_attention(attn, *intermediates) + + +def block_mixing(context, x, context_block, x_block, c): + assert context is not None, "block_mixing called with None context" + context_qkv, context_intermediates = context_block.pre_attention(context, c) + + x_qkv, x_intermediates = x_block.pre_attention(x, c) + + o = [] + for t in range(3): + o.append(torch.cat((context_qkv[t], x_qkv[t]), dim=1)) + q, k, v = tuple(o) + + attn = attention(q, k, v, x_block.attn.num_heads) + context_attn, x_attn = (attn[:, : context_qkv[0].shape[1]], attn[:, context_qkv[0].shape[1] :]) + + if not context_block.pre_only: + context = context_block.post_attention(context_attn, *context_intermediates) + else: + context = None + x = x_block.post_attention(x_attn, *x_intermediates) + return context, x + + +class JointBlock(nn.Module): + """just a small wrapper to serve as a fsdp unit""" + + def __init__(self, *args, **kwargs): + super().__init__() + pre_only = kwargs.pop("pre_only") + qk_norm = kwargs.pop("qk_norm", None) + self.context_block = DismantledBlock(*args, pre_only=pre_only, qk_norm=qk_norm, **kwargs) + self.x_block = DismantledBlock(*args, pre_only=False, qk_norm=qk_norm, **kwargs) + + def forward(self, *args, **kwargs): + return block_mixing(*args, context_block=self.context_block, x_block=self.x_block, **kwargs) + + +class FinalLayer(nn.Module): + """ + The final layer of DiT. + """ + + def __init__(self, hidden_size: int, patch_size: int, out_channels: int, total_out_channels: Optional[int] = None, dtype=None, device=None): + super().__init__() + self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device) + self.linear = ( + nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) + if (total_out_channels is None) + else nn.Linear(hidden_size, total_out_channels, bias=True, dtype=dtype, device=device) + ) + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)) + + def forward(self, x: torch.Tensor, c: torch.Tensor) -> torch.Tensor: + shift, scale = self.adaLN_modulation(c).chunk(2, dim=1) + x = modulate(self.norm_final(x), shift, scale) + x = self.linear(x) + return x + + +class MMDiT(nn.Module): + """Diffusion model with a Transformer backbone.""" + + def __init__( + self, + input_size: int = 32, + patch_size: int = 2, + in_channels: int = 4, + depth: int = 28, + mlp_ratio: float = 4.0, + learn_sigma: bool = False, + adm_in_channels: Optional[int] = None, + context_embedder_config: Optional[Dict] = None, + register_length: int = 0, + attn_mode: str = "torch", + rmsnorm: bool = False, + scale_mod_only: bool = False, + swiglu: bool = False, + out_channels: Optional[int] = None, + pos_embed_scaling_factor: Optional[float] = None, + pos_embed_offset: Optional[float] = None, + pos_embed_max_size: Optional[int] = None, + num_patches = None, + qk_norm: Optional[str] = None, + qkv_bias: bool = True, + dtype = None, + device = None, + ): + super().__init__() + print(f"mmdit initializing with: {input_size=}, {patch_size=}, {in_channels=}, {depth=}, {mlp_ratio=}, {learn_sigma=}, {adm_in_channels=}, {context_embedder_config=}, {register_length=}, {attn_mode=}, {rmsnorm=}, {scale_mod_only=}, {swiglu=}, {out_channels=}, {pos_embed_scaling_factor=}, {pos_embed_offset=}, {pos_embed_max_size=}, {num_patches=}, {qk_norm=}, {qkv_bias=}, {dtype=}, {device=}") + self.dtype = dtype + self.learn_sigma = learn_sigma + self.in_channels = in_channels + default_out_channels = in_channels * 2 if learn_sigma else in_channels + self.out_channels = out_channels if out_channels is not None else default_out_channels + self.patch_size = patch_size + self.pos_embed_scaling_factor = pos_embed_scaling_factor + self.pos_embed_offset = pos_embed_offset + self.pos_embed_max_size = pos_embed_max_size + + # apply magic --> this defines a head_size of 64 + hidden_size = 64 * depth + num_heads = depth + + self.num_heads = num_heads + + self.x_embedder = PatchEmbed(input_size, patch_size, in_channels, hidden_size, bias=True, strict_img_size=self.pos_embed_max_size is None, dtype=dtype, device=device) + self.t_embedder = TimestepEmbedder(hidden_size, dtype=dtype, device=device) + + if adm_in_channels is not None: + assert isinstance(adm_in_channels, int) + self.y_embedder = VectorEmbedder(adm_in_channels, hidden_size, dtype=dtype, device=device) + + self.context_embedder = nn.Identity() + if context_embedder_config is not None: + if context_embedder_config["target"] == "torch.nn.Linear": + self.context_embedder = nn.Linear(**context_embedder_config["params"], dtype=dtype, device=device) + + self.register_length = register_length + if self.register_length > 0: + self.register = nn.Parameter(torch.randn(1, register_length, hidden_size, dtype=dtype, device=device)) + + # num_patches = self.x_embedder.num_patches + # Will use fixed sin-cos embedding: + # just use a buffer already + if num_patches is not None: + self.register_buffer( + "pos_embed", + torch.zeros(1, num_patches, hidden_size, dtype=dtype, device=device), + ) + else: + self.pos_embed = None + + self.joint_blocks = nn.ModuleList( + [ + JointBlock(hidden_size, num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, attn_mode=attn_mode, pre_only=i == depth - 1, rmsnorm=rmsnorm, scale_mod_only=scale_mod_only, swiglu=swiglu, qk_norm=qk_norm, dtype=dtype, device=device) + for i in range(depth) + ] + ) + + self.final_layer = FinalLayer(hidden_size, patch_size, self.out_channels, dtype=dtype, device=device) + + def cropped_pos_embed(self, hw): + assert self.pos_embed_max_size is not None + p = self.x_embedder.patch_size[0] + h, w = hw + # patched size + h = h // p + w = w // p + assert h <= self.pos_embed_max_size, (h, self.pos_embed_max_size) + assert w <= self.pos_embed_max_size, (w, self.pos_embed_max_size) + top = (self.pos_embed_max_size - h) // 2 + left = (self.pos_embed_max_size - w) // 2 + spatial_pos_embed = rearrange( + self.pos_embed, + "1 (h w) c -> 1 h w c", + h=self.pos_embed_max_size, + w=self.pos_embed_max_size, + ) + spatial_pos_embed = spatial_pos_embed[:, top : top + h, left : left + w, :] + spatial_pos_embed = rearrange(spatial_pos_embed, "1 h w c -> 1 (h w) c") + return spatial_pos_embed + + def unpatchify(self, x, hw=None): + """ + x: (N, T, patch_size**2 * C) + imgs: (N, H, W, C) + """ + c = self.out_channels + p = self.x_embedder.patch_size[0] + if hw is None: + h = w = int(x.shape[1] ** 0.5) + else: + h, w = hw + h = h // p + w = w // p + assert h * w == x.shape[1] + + x = x.reshape(shape=(x.shape[0], h, w, p, p, c)) + x = torch.einsum("nhwpqc->nchpwq", x) + imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p)) + return imgs + + def forward_core_with_concat(self, x: torch.Tensor, c_mod: torch.Tensor, context: Optional[torch.Tensor] = None) -> torch.Tensor: + if self.register_length > 0: + context = torch.cat((repeat(self.register, "1 ... -> b ...", b=x.shape[0]), context if context is not None else torch.Tensor([]).type_as(x)), 1) + + # context is B, L', D + # x is B, L, D + for block in self.joint_blocks: + context, x = block(context, x, c=c_mod) + + x = self.final_layer(x, c_mod) # (N, T, patch_size ** 2 * out_channels) + return x + + def forward(self, x: torch.Tensor, t: torch.Tensor, y: Optional[torch.Tensor] = None, context: Optional[torch.Tensor] = None) -> torch.Tensor: + """ + Forward pass of DiT. + x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images) + t: (N,) tensor of diffusion timesteps + y: (N,) tensor of class labels + """ + hw = x.shape[-2:] + x = self.x_embedder(x) + self.cropped_pos_embed(hw) + c = self.t_embedder(t, dtype=x.dtype) # (N, D) + if y is not None: + y = self.y_embedder(y) # (N, D) + c = c + y # (N, D) + + context = self.context_embedder(context) + + x = self.forward_core_with_concat(x, c, context) + + x = self.unpatchify(x, hw=hw) # (N, out_channels, H, W) + return x diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py new file mode 100644 index 00000000000..2c76e1cb64f --- /dev/null +++ b/modules/models/sd3/other_impls.py @@ -0,0 +1,492 @@ +### This file contains impls for underlying related models (CLIP, T5, etc) + +import torch, math +from torch import nn +from transformers import CLIPTokenizer, T5TokenizerFast + + +################################################################################################# +### Core/Utility +################################################################################################# + + +def attention(q, k, v, heads, mask=None): + """Convenience wrapper around a basic attention operation""" + b, _, dim_head = q.shape + dim_head //= heads + q, k, v = map(lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), (q, k, v)) + out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) + return out.transpose(1, 2).reshape(b, -1, heads * dim_head) + + +class Mlp(nn.Module): + """ MLP as used in Vision Transformer, MLP-Mixer and related networks""" + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, bias=True, dtype=None, device=None): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + + self.fc1 = nn.Linear(in_features, hidden_features, bias=bias, dtype=dtype, device=device) + self.act = act_layer + self.fc2 = nn.Linear(hidden_features, out_features, bias=bias, dtype=dtype, device=device) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.fc2(x) + return x + + +################################################################################################# +### CLIP +################################################################################################# + + +class CLIPAttention(torch.nn.Module): + def __init__(self, embed_dim, heads, dtype, device): + super().__init__() + self.heads = heads + self.q_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + self.out_proj = nn.Linear(embed_dim, embed_dim, bias=True, dtype=dtype, device=device) + + def forward(self, x, mask=None): + q = self.q_proj(x) + k = self.k_proj(x) + v = self.v_proj(x) + out = attention(q, k, v, self.heads, mask) + return self.out_proj(out) + + +ACTIVATIONS = { + "quick_gelu": lambda a: a * torch.sigmoid(1.702 * a), + "gelu": torch.nn.functional.gelu, +} + +class CLIPLayer(torch.nn.Module): + def __init__(self, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device): + super().__init__() + self.layer_norm1 = nn.LayerNorm(embed_dim, dtype=dtype, device=device) + self.self_attn = CLIPAttention(embed_dim, heads, dtype, device) + self.layer_norm2 = nn.LayerNorm(embed_dim, dtype=dtype, device=device) + #self.mlp = CLIPMLP(embed_dim, intermediate_size, intermediate_activation, dtype, device) + self.mlp = Mlp(embed_dim, intermediate_size, embed_dim, act_layer=ACTIVATIONS[intermediate_activation], dtype=dtype, device=device) + + def forward(self, x, mask=None): + x += self.self_attn(self.layer_norm1(x), mask) + x += self.mlp(self.layer_norm2(x)) + return x + + +class CLIPEncoder(torch.nn.Module): + def __init__(self, num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device): + super().__init__() + self.layers = torch.nn.ModuleList([CLIPLayer(embed_dim, heads, intermediate_size, intermediate_activation, dtype, device) for i in range(num_layers)]) + + def forward(self, x, mask=None, intermediate_output=None): + if intermediate_output is not None: + if intermediate_output < 0: + intermediate_output = len(self.layers) + intermediate_output + intermediate = None + for i, l in enumerate(self.layers): + x = l(x, mask) + if i == intermediate_output: + intermediate = x.clone() + return x, intermediate + + +class CLIPEmbeddings(torch.nn.Module): + def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None): + super().__init__() + self.token_embedding = torch.nn.Embedding(vocab_size, embed_dim, dtype=dtype, device=device) + self.position_embedding = torch.nn.Embedding(num_positions, embed_dim, dtype=dtype, device=device) + + def forward(self, input_tokens): + return self.token_embedding(input_tokens) + self.position_embedding.weight + + +class CLIPTextModel_(torch.nn.Module): + def __init__(self, config_dict, dtype, device): + num_layers = config_dict["num_hidden_layers"] + embed_dim = config_dict["hidden_size"] + heads = config_dict["num_attention_heads"] + intermediate_size = config_dict["intermediate_size"] + intermediate_activation = config_dict["hidden_act"] + super().__init__() + self.embeddings = CLIPEmbeddings(embed_dim, dtype=torch.float32, device=device) + self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device) + self.final_layer_norm = nn.LayerNorm(embed_dim, dtype=dtype, device=device) + + def forward(self, input_tokens, intermediate_output=None, final_layer_norm_intermediate=True): + x = self.embeddings(input_tokens) + causal_mask = torch.empty(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1) + x, i = self.encoder(x, mask=causal_mask, intermediate_output=intermediate_output) + x = self.final_layer_norm(x) + if i is not None and final_layer_norm_intermediate: + i = self.final_layer_norm(i) + pooled_output = x[torch.arange(x.shape[0], device=x.device), input_tokens.to(dtype=torch.int, device=x.device).argmax(dim=-1),] + return x, i, pooled_output + + +class CLIPTextModel(torch.nn.Module): + def __init__(self, config_dict, dtype, device): + super().__init__() + self.num_layers = config_dict["num_hidden_layers"] + self.text_model = CLIPTextModel_(config_dict, dtype, device) + embed_dim = config_dict["hidden_size"] + self.text_projection = nn.Linear(embed_dim, embed_dim, bias=False, dtype=dtype, device=device) + self.text_projection.weight.copy_(torch.eye(embed_dim)) + self.dtype = dtype + + def get_input_embeddings(self): + return self.text_model.embeddings.token_embedding + + def set_input_embeddings(self, embeddings): + self.text_model.embeddings.token_embedding = embeddings + + def forward(self, *args, **kwargs): + x = self.text_model(*args, **kwargs) + out = self.text_projection(x[2]) + return (x[0], x[1], out, x[2]) + + +class SDTokenizer: + def __init__(self, max_length=77, pad_with_end=True, tokenizer=None, has_start_token=True, pad_to_max_length=True, min_length=None): + self.tokenizer = tokenizer + self.max_length = max_length + self.min_length = min_length + empty = self.tokenizer('')["input_ids"] + if has_start_token: + self.tokens_start = 1 + self.start_token = empty[0] + self.end_token = empty[1] + else: + self.tokens_start = 0 + self.start_token = None + self.end_token = empty[0] + self.pad_with_end = pad_with_end + self.pad_to_max_length = pad_to_max_length + vocab = self.tokenizer.get_vocab() + self.inv_vocab = {v: k for k, v in vocab.items()} + self.max_word_length = 8 + + + def tokenize_with_weights(self, text:str): + """Tokenize the text, with weight values - presume 1.0 for all and ignore other features here. The details aren't relevant for a reference impl, and weights themselves has weak effect on SD3.""" + if self.pad_with_end: + pad_token = self.end_token + else: + pad_token = 0 + batch = [] + if self.start_token is not None: + batch.append((self.start_token, 1.0)) + to_tokenize = text.replace("\n", " ").split(' ') + to_tokenize = [x for x in to_tokenize if x != ""] + for word in to_tokenize: + batch.extend([(t, 1) for t in self.tokenizer(word)["input_ids"][self.tokens_start:-1]]) + batch.append((self.end_token, 1.0)) + if self.pad_to_max_length: + batch.extend([(pad_token, 1.0)] * (self.max_length - len(batch))) + if self.min_length is not None and len(batch) < self.min_length: + batch.extend([(pad_token, 1.0)] * (self.min_length - len(batch))) + return [batch] + + +class SDXLClipGTokenizer(SDTokenizer): + def __init__(self, tokenizer): + super().__init__(pad_with_end=False, tokenizer=tokenizer) + + +class SD3Tokenizer: + def __init__(self): + clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + self.clip_l = SDTokenizer(tokenizer=clip_tokenizer) + self.clip_g = SDXLClipGTokenizer(clip_tokenizer) + self.t5xxl = T5XXLTokenizer() + + def tokenize_with_weights(self, text:str): + out = {} + out["g"] = self.clip_g.tokenize_with_weights(text) + out["l"] = self.clip_l.tokenize_with_weights(text) + out["t5xxl"] = self.t5xxl.tokenize_with_weights(text) + return out + + +class ClipTokenWeightEncoder: + def encode_token_weights(self, token_weight_pairs): + tokens = list(map(lambda a: a[0], token_weight_pairs[0])) + out, pooled = self([tokens]) + if pooled is not None: + first_pooled = pooled[0:1].cpu() + else: + first_pooled = pooled + output = [out[0:1]] + return torch.cat(output, dim=-2).cpu(), first_pooled + + +class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): + """Uses the CLIP transformer encoder for text (from huggingface)""" + LAYERS = ["last", "pooled", "hidden"] + def __init__(self, device="cpu", max_length=77, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=CLIPTextModel, + special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, return_projected_pooled=True): + super().__init__() + assert layer in self.LAYERS + self.transformer = model_class(textmodel_json_config, dtype, device) + self.num_layers = self.transformer.num_layers + self.max_length = max_length + self.transformer = self.transformer.eval() + for param in self.parameters(): + param.requires_grad = False + self.layer = layer + self.layer_idx = None + self.special_tokens = special_tokens + self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) + self.layer_norm_hidden_state = layer_norm_hidden_state + self.return_projected_pooled = return_projected_pooled + if layer == "hidden": + assert layer_idx is not None + assert abs(layer_idx) < self.num_layers + self.set_clip_options({"layer": layer_idx}) + self.options_default = (self.layer, self.layer_idx, self.return_projected_pooled) + + def set_clip_options(self, options): + layer_idx = options.get("layer", self.layer_idx) + self.return_projected_pooled = options.get("projected_pooled", self.return_projected_pooled) + if layer_idx is None or abs(layer_idx) > self.num_layers: + self.layer = "last" + else: + self.layer = "hidden" + self.layer_idx = layer_idx + + def forward(self, tokens): + backup_embeds = self.transformer.get_input_embeddings() + device = backup_embeds.weight.device + tokens = torch.LongTensor(tokens).to(device) + outputs = self.transformer(tokens, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state) + self.transformer.set_input_embeddings(backup_embeds) + if self.layer == "last": + z = outputs[0] + else: + z = outputs[1] + pooled_output = None + if len(outputs) >= 3: + if not self.return_projected_pooled and len(outputs) >= 4 and outputs[3] is not None: + pooled_output = outputs[3].float() + elif outputs[2] is not None: + pooled_output = outputs[2].float() + return z.float(), pooled_output + + +class SDXLClipG(SDClipModel): + """Wraps the CLIP-G model into the SD-CLIP-Model interface""" + def __init__(self, config, device="cpu", layer="penultimate", layer_idx=None, dtype=None): + if layer == "penultimate": + layer="hidden" + layer_idx=-2 + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False) + + +class T5XXLModel(SDClipModel): + """Wraps the T5-XXL model into the SD-CLIP-Model interface for convenience""" + def __init__(self, config, device="cpu", layer="last", layer_idx=None, dtype=None): + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=T5) + + +################################################################################################# +### T5 implementation, for the T5-XXL text encoder portion, largely pulled from upstream impl +################################################################################################# + + +class T5XXLTokenizer(SDTokenizer): + """Wraps the T5 Tokenizer from HF into the SDTokenizer interface""" + def __init__(self): + super().__init__(pad_with_end=False, tokenizer=T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl"), has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=77) + + +class T5LayerNorm(torch.nn.Module): + def __init__(self, hidden_size, eps=1e-6, dtype=None, device=None): + super().__init__() + self.weight = torch.nn.Parameter(torch.ones(hidden_size, dtype=dtype, device=device)) + self.variance_epsilon = eps + + def forward(self, x): + variance = x.pow(2).mean(-1, keepdim=True) + x = x * torch.rsqrt(variance + self.variance_epsilon) + return self.weight.to(device=x.device, dtype=x.dtype) * x + + +class T5DenseGatedActDense(torch.nn.Module): + def __init__(self, model_dim, ff_dim, dtype, device): + super().__init__() + self.wi_0 = nn.Linear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) + self.wi_1 = nn.Linear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) + self.wo = nn.Linear(ff_dim, model_dim, bias=False, dtype=dtype, device=device) + + def forward(self, x): + hidden_gelu = torch.nn.functional.gelu(self.wi_0(x), approximate="tanh") + hidden_linear = self.wi_1(x) + x = hidden_gelu * hidden_linear + x = self.wo(x) + return x + + +class T5LayerFF(torch.nn.Module): + def __init__(self, model_dim, ff_dim, dtype, device): + super().__init__() + self.DenseReluDense = T5DenseGatedActDense(model_dim, ff_dim, dtype, device) + self.layer_norm = T5LayerNorm(model_dim, dtype=dtype, device=device) + + def forward(self, x): + forwarded_states = self.layer_norm(x) + forwarded_states = self.DenseReluDense(forwarded_states) + x += forwarded_states + return x + + +class T5Attention(torch.nn.Module): + def __init__(self, model_dim, inner_dim, num_heads, relative_attention_bias, dtype, device): + super().__init__() + # Mesh TensorFlow initialization to avoid scaling before softmax + self.q = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.k = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.v = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.o = nn.Linear(inner_dim, model_dim, bias=False, dtype=dtype, device=device) + self.num_heads = num_heads + self.relative_attention_bias = None + if relative_attention_bias: + self.relative_attention_num_buckets = 32 + self.relative_attention_max_distance = 128 + self.relative_attention_bias = torch.nn.Embedding(self.relative_attention_num_buckets, self.num_heads, device=device) + + @staticmethod + def _relative_position_bucket(relative_position, bidirectional=True, num_buckets=32, max_distance=128): + """ + Adapted from Mesh Tensorflow: + https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593 + + Translate relative position to a bucket number for relative attention. The relative position is defined as + memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to + position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for + small absolute relative_position and larger buckets for larger absolute relative_positions. All relative + positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket. + This should allow for more graceful generalization to longer sequences than the model has been trained on + + Args: + relative_position: an int32 Tensor + bidirectional: a boolean - whether the attention is bidirectional + num_buckets: an integer + max_distance: an integer + + Returns: + a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets) + """ + relative_buckets = 0 + if bidirectional: + num_buckets //= 2 + relative_buckets += (relative_position > 0).to(torch.long) * num_buckets + relative_position = torch.abs(relative_position) + else: + relative_position = -torch.min(relative_position, torch.zeros_like(relative_position)) + # now relative_position is in the range [0, inf) + # half of the buckets are for exact increments in positions + max_exact = num_buckets // 2 + is_small = relative_position < max_exact + # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance + relative_position_if_large = max_exact + ( + torch.log(relative_position.float() / max_exact) + / math.log(max_distance / max_exact) + * (num_buckets - max_exact) + ).to(torch.long) + relative_position_if_large = torch.min(relative_position_if_large, torch.full_like(relative_position_if_large, num_buckets - 1)) + relative_buckets += torch.where(is_small, relative_position, relative_position_if_large) + return relative_buckets + + def compute_bias(self, query_length, key_length, device): + """Compute binned relative position bias""" + context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None] + memory_position = torch.arange(key_length, dtype=torch.long, device=device)[None, :] + relative_position = memory_position - context_position # shape (query_length, key_length) + relative_position_bucket = self._relative_position_bucket( + relative_position, # shape (query_length, key_length) + bidirectional=True, + num_buckets=self.relative_attention_num_buckets, + max_distance=self.relative_attention_max_distance, + ) + values = self.relative_attention_bias(relative_position_bucket) # shape (query_length, key_length, num_heads) + values = values.permute([2, 0, 1]).unsqueeze(0) # shape (1, num_heads, query_length, key_length) + return values + + def forward(self, x, past_bias=None): + q = self.q(x) + k = self.k(x) + v = self.v(x) + if self.relative_attention_bias is not None: + past_bias = self.compute_bias(x.shape[1], x.shape[1], x.device) + if past_bias is not None: + mask = past_bias + out = attention(q, k * ((k.shape[-1] / self.num_heads) ** 0.5), v, self.num_heads, mask) + return self.o(out), past_bias + + +class T5LayerSelfAttention(torch.nn.Module): + def __init__(self, model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias, dtype, device): + super().__init__() + self.SelfAttention = T5Attention(model_dim, inner_dim, num_heads, relative_attention_bias, dtype, device) + self.layer_norm = T5LayerNorm(model_dim, dtype=dtype, device=device) + + def forward(self, x, past_bias=None): + output, past_bias = self.SelfAttention(self.layer_norm(x), past_bias=past_bias) + x += output + return x, past_bias + + +class T5Block(torch.nn.Module): + def __init__(self, model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias, dtype, device): + super().__init__() + self.layer = torch.nn.ModuleList() + self.layer.append(T5LayerSelfAttention(model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias, dtype, device)) + self.layer.append(T5LayerFF(model_dim, ff_dim, dtype, device)) + + def forward(self, x, past_bias=None): + x, past_bias = self.layer[0](x, past_bias) + x = self.layer[-1](x) + return x, past_bias + + +class T5Stack(torch.nn.Module): + def __init__(self, num_layers, model_dim, inner_dim, ff_dim, num_heads, vocab_size, dtype, device): + super().__init__() + self.embed_tokens = torch.nn.Embedding(vocab_size, model_dim, device=device) + self.block = torch.nn.ModuleList([T5Block(model_dim, inner_dim, ff_dim, num_heads, relative_attention_bias=(i == 0), dtype=dtype, device=device) for i in range(num_layers)]) + self.final_layer_norm = T5LayerNorm(model_dim, dtype=dtype, device=device) + + def forward(self, input_ids, intermediate_output=None, final_layer_norm_intermediate=True): + intermediate = None + x = self.embed_tokens(input_ids) + past_bias = None + for i, l in enumerate(self.block): + x, past_bias = l(x, past_bias) + if i == intermediate_output: + intermediate = x.clone() + x = self.final_layer_norm(x) + if intermediate is not None and final_layer_norm_intermediate: + intermediate = self.final_layer_norm(intermediate) + return x, intermediate + + +class T5(torch.nn.Module): + def __init__(self, config_dict, dtype, device): + super().__init__() + self.num_layers = config_dict["num_layers"] + self.encoder = T5Stack(self.num_layers, config_dict["d_model"], config_dict["d_model"], config_dict["d_ff"], config_dict["num_heads"], config_dict["vocab_size"], dtype, device) + self.dtype = dtype + + def get_input_embeddings(self): + return self.encoder.embed_tokens + + def set_input_embeddings(self, embeddings): + self.encoder.embed_tokens = embeddings + + def forward(self, *args, **kwargs): + return self.encoder(*args, **kwargs) diff --git a/modules/models/sd3/sd3_impls.py b/modules/models/sd3/sd3_impls.py new file mode 100644 index 00000000000..6e9d0a4db7c --- /dev/null +++ b/modules/models/sd3/sd3_impls.py @@ -0,0 +1,371 @@ +### Impls of the SD3 core diffusion model and VAE + +import torch, math, einops +from mmdit import MMDiT +from PIL import Image + + +################################################################################################# +### MMDiT Model Wrapping +################################################################################################# + + +class ModelSamplingDiscreteFlow(torch.nn.Module): + """Helper for sampler scheduling (ie timestep/sigma calculations) for Discrete Flow models""" + def __init__(self, shift=1.0): + super().__init__() + self.shift = shift + timesteps = 1000 + ts = self.sigma(torch.arange(1, timesteps + 1, 1)) + self.register_buffer('sigmas', ts) + + @property + def sigma_min(self): + return self.sigmas[0] + + @property + def sigma_max(self): + return self.sigmas[-1] + + def timestep(self, sigma): + return sigma * 1000 + + def sigma(self, timestep: torch.Tensor): + timestep = timestep / 1000.0 + if self.shift == 1.0: + return timestep + return self.shift * timestep / (1 + (self.shift - 1) * timestep) + + def calculate_denoised(self, sigma, model_output, model_input): + sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + return model_input - model_output * sigma + + def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): + return sigma * noise + (1.0 - sigma) * latent_image + + +class BaseModel(torch.nn.Module): + """Wrapper around the core MM-DiT model""" + def __init__(self, shift=1.0, device=None, dtype=torch.float32, file=None, prefix=""): + super().__init__() + # Important configuration values can be quickly determined by checking shapes in the source file + # Some of these will vary between models (eg 2B vs 8B primarily differ in their depth, but also other details change) + patch_size = file.get_tensor(f"{prefix}x_embedder.proj.weight").shape[2] + depth = file.get_tensor(f"{prefix}x_embedder.proj.weight").shape[0] // 64 + num_patches = file.get_tensor(f"{prefix}pos_embed").shape[1] + pos_embed_max_size = round(math.sqrt(num_patches)) + adm_in_channels = file.get_tensor(f"{prefix}y_embedder.mlp.0.weight").shape[1] + context_shape = file.get_tensor(f"{prefix}context_embedder.weight").shape + context_embedder_config = { + "target": "torch.nn.Linear", + "params": { + "in_features": context_shape[1], + "out_features": context_shape[0] + } + } + self.diffusion_model = MMDiT(input_size=None, pos_embed_scaling_factor=None, pos_embed_offset=None, pos_embed_max_size=pos_embed_max_size, patch_size=patch_size, in_channels=16, depth=depth, num_patches=num_patches, adm_in_channels=adm_in_channels, context_embedder_config=context_embedder_config, device=device, dtype=dtype) + self.model_sampling = ModelSamplingDiscreteFlow(shift=shift) + + def apply_model(self, x, sigma, c_crossattn=None, y=None): + dtype = self.get_dtype() + timestep = self.model_sampling.timestep(sigma).float() + model_output = self.diffusion_model(x.to(dtype), timestep, context=c_crossattn.to(dtype), y=y.to(dtype)).float() + return self.model_sampling.calculate_denoised(sigma, model_output, x) + + def forward(self, *args, **kwargs): + return self.apply_model(*args, **kwargs) + + def get_dtype(self): + return self.diffusion_model.dtype + + +class CFGDenoiser(torch.nn.Module): + """Helper for applying CFG Scaling to diffusion outputs""" + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, x, timestep, cond, uncond, cond_scale): + # Run cond and uncond in a batch together + batched = self.model.apply_model(torch.cat([x, x]), torch.cat([timestep, timestep]), c_crossattn=torch.cat([cond["c_crossattn"], uncond["c_crossattn"]]), y=torch.cat([cond["y"], uncond["y"]])) + # Then split and apply CFG Scaling + pos_out, neg_out = batched.chunk(2) + scaled = neg_out + (pos_out - neg_out) * cond_scale + return scaled + + +class SD3LatentFormat: + """Latents are slightly shifted from center - this class must be called after VAE Decode to correct for the shift""" + def __init__(self): + self.scale_factor = 1.5305 + self.shift_factor = 0.0609 + + def process_in(self, latent): + return (latent - self.shift_factor) * self.scale_factor + + def process_out(self, latent): + return (latent / self.scale_factor) + self.shift_factor + + def decode_latent_to_preview(self, x0): + """Quick RGB approximate preview of sd3 latents""" + factors = torch.tensor([ + [-0.0645, 0.0177, 0.1052], [ 0.0028, 0.0312, 0.0650], + [ 0.1848, 0.0762, 0.0360], [ 0.0944, 0.0360, 0.0889], + [ 0.0897, 0.0506, -0.0364], [-0.0020, 0.1203, 0.0284], + [ 0.0855, 0.0118, 0.0283], [-0.0539, 0.0658, 0.1047], + [-0.0057, 0.0116, 0.0700], [-0.0412, 0.0281, -0.0039], + [ 0.1106, 0.1171, 0.1220], [-0.0248, 0.0682, -0.0481], + [ 0.0815, 0.0846, 0.1207], [-0.0120, -0.0055, -0.0867], + [-0.0749, -0.0634, -0.0456], [-0.1418, -0.1457, -0.1259] + ], device="cpu") + latent_image = x0[0].permute(1, 2, 0).cpu() @ factors + + latents_ubyte = (((latent_image + 1) / 2) + .clamp(0, 1) # change scale from -1..1 to 0..1 + .mul(0xFF) # to 0..255 + .byte()).cpu() + + return Image.fromarray(latents_ubyte.numpy()) + + +################################################################################################# +### K-Diffusion Sampling +################################################################################################# + + +def append_dims(x, target_dims): + """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" + dims_to_append = target_dims - x.ndim + return x[(...,) + (None,) * dims_to_append] + + +def to_d(x, sigma, denoised): + """Converts a denoiser output to a Karras ODE derivative.""" + return (x - denoised) / append_dims(sigma, x.ndim) + + +@torch.no_grad() +@torch.autocast("cuda", dtype=torch.float16) +def sample_euler(model, x, sigmas, extra_args=None): + """Implements Algorithm 2 (Euler steps) from Karras et al. (2022).""" + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + for i in range(len(sigmas) - 1): + sigma_hat = sigmas[i] + denoised = model(x, sigma_hat * s_in, **extra_args) + d = to_d(x, sigma_hat, denoised) + dt = sigmas[i + 1] - sigma_hat + # Euler method + x = x + d * dt + return x + + +################################################################################################# +### VAE +################################################################################################# + + +def Normalize(in_channels, num_groups=32, dtype=torch.float32, device=None): + return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True, dtype=dtype, device=device) + + +class ResnetBlock(torch.nn.Module): + def __init__(self, *, in_channels, out_channels=None, dtype=torch.float32, device=None): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + + self.norm1 = Normalize(in_channels, dtype=dtype, device=device) + self.conv1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + self.norm2 = Normalize(out_channels, dtype=dtype, device=device) + self.conv2 = torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + if self.in_channels != self.out_channels: + self.nin_shortcut = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + else: + self.nin_shortcut = None + self.swish = torch.nn.SiLU(inplace=True) + + def forward(self, x): + hidden = x + hidden = self.norm1(hidden) + hidden = self.swish(hidden) + hidden = self.conv1(hidden) + hidden = self.norm2(hidden) + hidden = self.swish(hidden) + hidden = self.conv2(hidden) + if self.in_channels != self.out_channels: + x = self.nin_shortcut(x) + return x + hidden + + +class AttnBlock(torch.nn.Module): + def __init__(self, in_channels, dtype=torch.float32, device=None): + super().__init__() + self.norm = Normalize(in_channels, dtype=dtype, device=device) + self.q = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + self.k = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + self.v = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + self.proj_out = torch.nn.Conv2d(in_channels, in_channels, kernel_size=1, stride=1, padding=0, dtype=dtype, device=device) + + def forward(self, x): + hidden = self.norm(x) + q = self.q(hidden) + k = self.k(hidden) + v = self.v(hidden) + b, c, h, w = q.shape + q, k, v = map(lambda x: einops.rearrange(x, "b c h w -> b 1 (h w) c").contiguous(), (q, k, v)) + hidden = torch.nn.functional.scaled_dot_product_attention(q, k, v) # scale is dim ** -0.5 per default + hidden = einops.rearrange(hidden, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b) + hidden = self.proj_out(hidden) + return x + hidden + + +class Downsample(torch.nn.Module): + def __init__(self, in_channels, dtype=torch.float32, device=None): + super().__init__() + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0, dtype=dtype, device=device) + + def forward(self, x): + pad = (0,1,0,1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + return x + + +class Upsample(torch.nn.Module): + def __init__(self, in_channels, dtype=torch.float32, device=None): + super().__init__() + self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + + def forward(self, x): + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + x = self.conv(x) + return x + + +class VAEEncoder(torch.nn.Module): + def __init__(self, ch=128, ch_mult=(1,2,4,4), num_res_blocks=2, in_channels=3, z_channels=16, dtype=torch.float32, device=None): + super().__init__() + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + # downsampling + self.conv_in = torch.nn.Conv2d(in_channels, ch, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = torch.nn.ModuleList() + for i_level in range(self.num_resolutions): + block = torch.nn.ModuleList() + attn = torch.nn.ModuleList() + block_in = ch*in_ch_mult[i_level] + block_out = ch*ch_mult[i_level] + for i_block in range(num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out, dtype=dtype, device=device)) + block_in = block_out + down = torch.nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, dtype=dtype, device=device) + self.down.append(down) + # middle + self.mid = torch.nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + self.mid.attn_1 = AttnBlock(block_in, dtype=dtype, device=device) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + # end + self.norm_out = Normalize(block_in, dtype=dtype, device=device) + self.conv_out = torch.nn.Conv2d(block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + self.swish = torch.nn.SiLU(inplace=True) + + def forward(self, x): + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1]) + hs.append(h) + if i_level != self.num_resolutions-1: + hs.append(self.down[i_level].downsample(hs[-1])) + # middle + h = hs[-1] + h = self.mid.block_1(h) + h = self.mid.attn_1(h) + h = self.mid.block_2(h) + # end + h = self.norm_out(h) + h = self.swish(h) + h = self.conv_out(h) + return h + + +class VAEDecoder(torch.nn.Module): + def __init__(self, ch=128, out_ch=3, ch_mult=(1, 2, 4, 4), num_res_blocks=2, resolution=256, z_channels=16, dtype=torch.float32, device=None): + super().__init__() + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + # z to block_in + self.conv_in = torch.nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + # middle + self.mid = torch.nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + self.mid.attn_1 = AttnBlock(block_in, dtype=dtype, device=device) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in, dtype=dtype, device=device) + # upsampling + self.up = torch.nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = torch.nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out, dtype=dtype, device=device)) + block_in = block_out + up = torch.nn.Module() + up.block = block + if i_level != 0: + up.upsample = Upsample(block_in, dtype=dtype, device=device) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + # end + self.norm_out = Normalize(block_in, dtype=dtype, device=device) + self.conv_out = torch.nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1, dtype=dtype, device=device) + self.swish = torch.nn.SiLU(inplace=True) + + def forward(self, z): + # z to block_in + hidden = self.conv_in(z) + # middle + hidden = self.mid.block_1(hidden) + hidden = self.mid.attn_1(hidden) + hidden = self.mid.block_2(hidden) + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + hidden = self.up[i_level].block[i_block](hidden) + if i_level != 0: + hidden = self.up[i_level].upsample(hidden) + # end + hidden = self.norm_out(hidden) + hidden = self.swish(hidden) + hidden = self.conv_out(hidden) + return hidden + + +class SDVAE(torch.nn.Module): + def __init__(self, dtype=torch.float32, device=None): + super().__init__() + self.encoder = VAEEncoder(dtype=dtype, device=device) + self.decoder = VAEDecoder(dtype=dtype, device=device) + + @torch.autocast("cuda", dtype=torch.float16) + def decode(self, latent): + return self.decoder(latent) + + @torch.autocast("cuda", dtype=torch.float16) + def encode(self, image): + hidden = self.encoder(image) + mean, logvar = torch.chunk(hidden, 2, dim=1) + logvar = torch.clamp(logvar, -30.0, 20.0) + std = torch.exp(0.5 * logvar) + return mean + std * torch.randn_like(mean) From 5b2a60b8e2b7fb1221359047cbe9bc1f6cf0c51d Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 08:04:31 +0300 Subject: [PATCH 112/201] initial SD3 support --- README.md | 2 +- configs/sd3-inference.yaml | 5 + extensions-builtin/Lora/networks.py | 4 +- modules/models/sd3/mmdit.py | 3 +- modules/models/sd3/sd3_impls.py | 14 +-- modules/models/sd3/sd3_model.py | 166 ++++++++++++++++++++++++++++ modules/processing.py | 3 +- modules/sd_models.py | 87 ++++++++++++--- modules/sd_models_config.py | 7 +- modules/sd_models_types.py | 6 + modules/sd_samplers_common.py | 4 +- modules/sd_samplers_kdiffusion.py | 9 +- modules/sd_vae_approx.py | 27 ++++- modules/sd_vae_taesd.py | 40 +++++-- 14 files changed, 333 insertions(+), 44 deletions(-) create mode 100644 configs/sd3-inference.yaml create mode 100644 modules/models/sd3/sd3_model.py diff --git a/README.md b/README.md index bc08e7ad155..fc582e15ced 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ For the purposes of getting Google and other search engines to crawl the wiki, h ## Credits Licenses for borrowed code can be found in `Settings -> Licenses` screen, and also in `html/licenses.html` file. -- Stable Diffusion - https://github.com/Stability-AI/stablediffusion, https://github.com/CompVis/taming-transformers +- Stable Diffusion - https://github.com/Stability-AI/stablediffusion, https://github.com/CompVis/taming-transformers, https://github.com/mcmonkey4eva/sd3-ref - k-diffusion - https://github.com/crowsonkb/k-diffusion.git - Spandrel - https://github.com/chaiNNer-org/spandrel implementing - GFPGAN - https://github.com/TencentARC/GFPGAN.git diff --git a/configs/sd3-inference.yaml b/configs/sd3-inference.yaml new file mode 100644 index 00000000000..bccb69d2ea3 --- /dev/null +++ b/configs/sd3-inference.yaml @@ -0,0 +1,5 @@ +model: + target: modules.models.sd3.sd3_model.SD3Inferencer + params: + shift: 3 + state_dict: null diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 8869d2c82b2..63e8c946594 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -130,7 +130,9 @@ def assign_network_names_to_compvis_modules(sd_model): network_layer_mapping[network_name] = module module.network_layer_name = network_name else: - for name, module in shared.sd_model.cond_stage_model.wrapped.named_modules(): + cond_stage_model = getattr(shared.sd_model.cond_stage_model, 'wrapped', shared.sd_model.cond_stage_model) + + for name, module in cond_stage_model.named_modules(): network_name = name.replace(".", "_") network_layer_mapping[network_name] = module module.network_layer_name = network_name diff --git a/modules/models/sd3/mmdit.py b/modules/models/sd3/mmdit.py index 6d8b65bdf45..5ec73c05483 100644 --- a/modules/models/sd3/mmdit.py +++ b/modules/models/sd3/mmdit.py @@ -6,7 +6,8 @@ import torch import torch.nn as nn from einops import rearrange, repeat -from other_impls import attention, Mlp +from modules.models.sd3.other_impls import attention, Mlp + class PatchEmbed(nn.Module): """ 2D Image to Patch Embedding""" diff --git a/modules/models/sd3/sd3_impls.py b/modules/models/sd3/sd3_impls.py index 6e9d0a4db7c..91dad66d096 100644 --- a/modules/models/sd3/sd3_impls.py +++ b/modules/models/sd3/sd3_impls.py @@ -1,7 +1,7 @@ ### Impls of the SD3 core diffusion model and VAE import torch, math, einops -from mmdit import MMDiT +from modules.models.sd3.mmdit import MMDiT from PIL import Image @@ -46,16 +46,16 @@ def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): class BaseModel(torch.nn.Module): """Wrapper around the core MM-DiT model""" - def __init__(self, shift=1.0, device=None, dtype=torch.float32, file=None, prefix=""): + def __init__(self, shift=1.0, device=None, dtype=torch.float32, state_dict=None, prefix=""): super().__init__() # Important configuration values can be quickly determined by checking shapes in the source file # Some of these will vary between models (eg 2B vs 8B primarily differ in their depth, but also other details change) - patch_size = file.get_tensor(f"{prefix}x_embedder.proj.weight").shape[2] - depth = file.get_tensor(f"{prefix}x_embedder.proj.weight").shape[0] // 64 - num_patches = file.get_tensor(f"{prefix}pos_embed").shape[1] + patch_size = state_dict[f"{prefix}x_embedder.proj.weight"].shape[2] + depth = state_dict[f"{prefix}x_embedder.proj.weight"].shape[0] // 64 + num_patches = state_dict[f"{prefix}pos_embed"].shape[1] pos_embed_max_size = round(math.sqrt(num_patches)) - adm_in_channels = file.get_tensor(f"{prefix}y_embedder.mlp.0.weight").shape[1] - context_shape = file.get_tensor(f"{prefix}context_embedder.weight").shape + adm_in_channels = state_dict[f"{prefix}y_embedder.mlp.0.weight"].shape[1] + context_shape = state_dict[f"{prefix}context_embedder.weight"].shape context_embedder_config = { "target": "torch.nn.Linear", "params": { diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py new file mode 100644 index 00000000000..8b828524480 --- /dev/null +++ b/modules/models/sd3/sd3_model.py @@ -0,0 +1,166 @@ +import contextlib +import os +from typing import Mapping + +import safetensors +import torch + +import k_diffusion +from modules.models.sd3.other_impls import SDClipModel, SDXLClipG, T5XXLModel, SD3Tokenizer +from modules.models.sd3.sd3_impls import BaseModel, SDVAE, SD3LatentFormat + +from modules import shared, modelloader, devices + +CLIPG_URL = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/text_encoders/clip_g.safetensors" +CLIPG_CONFIG = { + "hidden_act": "gelu", + "hidden_size": 1280, + "intermediate_size": 5120, + "num_attention_heads": 20, + "num_hidden_layers": 32, +} + +CLIPL_URL = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/text_encoders/clip_l.safetensors" +CLIPL_CONFIG = { + "hidden_act": "quick_gelu", + "hidden_size": 768, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12, +} + +T5_URL = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/text_encoders/t5xxl_fp16.safetensors" +T5_CONFIG = { + "d_ff": 10240, + "d_model": 4096, + "num_heads": 64, + "num_layers": 24, + "vocab_size": 32128, +} + + +class SafetensorsMapping(Mapping): + def __init__(self, file): + self.file = file + + def __len__(self): + return len(self.file.keys()) + + def __iter__(self): + for key in self.file.keys(): + yield key + + def __getitem__(self, key): + return self.file.get_tensor(key) + + +class SD3Cond(torch.nn.Module): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.tokenizer = SD3Tokenizer() + + with torch.no_grad(): + self.clip_g = SDXLClipG(CLIPG_CONFIG, device="cpu", dtype=torch.float32) + self.clip_l = SDClipModel(layer="hidden", layer_idx=-2, device="cpu", dtype=torch.float32, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=CLIPL_CONFIG) + self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=torch.float32) + + self.weights_loaded = False + + def forward(self, prompts: list[str]): + res = [] + + for prompt in prompts: + tokens = self.tokenizer.tokenize_with_weights(prompt) + l_out, l_pooled = self.clip_l.encode_token_weights(tokens["l"]) + g_out, g_pooled = self.clip_g.encode_token_weights(tokens["g"]) + t5_out, t5_pooled = self.t5xxl.encode_token_weights(tokens["t5xxl"]) + lg_out = torch.cat([l_out, g_out], dim=-1) + lg_out = torch.nn.functional.pad(lg_out, (0, 4096 - lg_out.shape[-1])) + lgt_out = torch.cat([lg_out, t5_out], dim=-2) + vector_out = torch.cat((l_pooled, g_pooled), dim=-1) + + res.append({ + 'crossattn': lgt_out[0].to(devices.device), + 'vector': vector_out[0].to(devices.device), + }) + + return res + + def load_weights(self): + if self.weights_loaded: + return + + clip_path = os.path.join(shared.models_path, "CLIP") + + clip_g_file = modelloader.load_file_from_url(CLIPG_URL, model_dir=clip_path, file_name="clip_g.safetensors") + with safetensors.safe_open(clip_g_file, framework="pt") as file: + self.clip_g.transformer.load_state_dict(SafetensorsMapping(file)) + + clip_l_file = modelloader.load_file_from_url(CLIPL_URL, model_dir=clip_path, file_name="clip_l.safetensors") + with safetensors.safe_open(clip_l_file, framework="pt") as file: + self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) + + t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp16.safetensors") + with safetensors.safe_open(t5_file, framework="pt") as file: + self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) + + self.weights_loaded = True + + def encode_embedding_init_text(self, init_text, nvpt): + return torch.tensor([[0]], device=devices.device) # XXX + + +class SD3Denoiser(k_diffusion.external.DiscreteSchedule): + def __init__(self, inner_model, sigmas): + super().__init__(sigmas, quantize=shared.opts.enable_quantization) + self.inner_model = inner_model + + def forward(self, input, sigma, **kwargs): + return self.inner_model.apply_model(input, sigma, **kwargs) + + +class SD3Inferencer(torch.nn.Module): + def __init__(self, state_dict, shift=3, use_ema=False): + super().__init__() + + self.shift = shift + + with torch.no_grad(): + self.model = BaseModel(shift=shift, state_dict=state_dict, prefix="model.diffusion_model.", device="cpu", dtype=devices.dtype) + self.first_stage_model = SDVAE(device="cpu", dtype=devices.dtype_vae) + self.first_stage_model.dtype = self.model.diffusion_model.dtype + + self.alphas_cumprod = 1 / (self.model.model_sampling.sigmas ** 2 + 1) + + self.cond_stage_model = SD3Cond() + self.cond_stage_key = 'txt' + + self.parameterization = "eps" + self.model.conditioning_key = "crossattn" + + self.latent_format = SD3LatentFormat() + self.latent_channels = 16 + + def after_load_weights(self): + self.cond_stage_model.load_weights() + + def ema_scope(self): + return contextlib.nullcontext() + + def get_learned_conditioning(self, batch: list[str]): + return self.cond_stage_model(batch) + + def apply_model(self, x, t, cond): + return self.model.apply_model(x, t, c_crossattn=cond['crossattn'], y=cond['vector']) + + def decode_first_stage(self, latent): + latent = self.latent_format.process_out(latent) + return self.first_stage_model.decode(latent) + + def encode_first_stage(self, image): + latent = self.first_stage_model.encode(image) + return self.latent_format.process_in(latent) + + def create_denoiser(self): + return SD3Denoiser(self, self.model.model_sampling.sigmas) diff --git a/modules/processing.py b/modules/processing.py index 79a3f0a726c..d32a1811ec3 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -942,7 +942,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: p.seeds = p.all_seeds[n * p.batch_size:(n + 1) * p.batch_size] p.subseeds = p.all_subseeds[n * p.batch_size:(n + 1) * p.batch_size] - p.rng = rng.ImageRNG((opt_C, p.height // opt_f, p.width // opt_f), p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w) + latent_channels = getattr(shared.sd_model, 'latent_channels', opt_C) + p.rng = rng.ImageRNG((latent_channels, p.height // opt_f, p.width // opt_f), p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w) if p.scripts is not None: p.scripts.before_process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) diff --git a/modules/sd_models.py b/modules/sd_models.py index af35187cdb0..21a98c1de84 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1,7 +1,9 @@ import collections +import importlib import os import sys import threading +import enum import torch import re @@ -10,8 +12,6 @@ from urllib import request import ldm.modules.midas as midas -from ldm.util import instantiate_from_config - from modules import paths, shared, modelloader, devices, script_callbacks, sd_vae, sd_disable_initialization, errors, hashes, sd_models_config, sd_unet, sd_models_xl, cache, extra_networks, processing, lowvram, sd_hijack, patches from modules.timer import Timer from modules.shared import opts @@ -27,6 +27,14 @@ checkpoints_loaded = collections.OrderedDict() +class ModelType(enum.Enum): + SD1 = 1 + SD2 = 2 + SDXL = 3 + SSD = 4 + SD3 = 5 + + def replace_key(d, key, new_key, value): keys = list(d.keys()) @@ -368,6 +376,36 @@ def check_fp8(model): return enable_fp8 +def set_model_type(model, state_dict): + model.is_sd1 = False + model.is_sd2 = False + model.is_sdxl = False + model.is_ssd = False + model.is_ssd3 = False + + if "model.diffusion_model.x_embedder.proj.weight" in state_dict: + model.is_sd3 = True + model.model_type = ModelType.SD3 + elif hasattr(model, 'conditioner'): + model.is_sdxl = True + + if 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight' not in state_dict.keys(): + model.is_ssd = True + model.model_type = ModelType.SSD + else: + model.model_type = ModelType.SDXL + elif hasattr(model.cond_stage_model, 'model'): + model.is_sd2 = True + model.model_type = ModelType.SD2 + else: + model.is_sd1 = True + model.model_type = ModelType.SD1 + + +def set_model_fields(model): + if not hasattr(model, 'latent_channels'): + model.latent_channels = 4 + def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer): sd_model_hash = checkpoint_info.calculate_shorthash() timer.record("calculate hash") @@ -382,10 +420,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer if state_dict is None: state_dict = get_checkpoint_state_dict(checkpoint_info, timer) - model.is_sdxl = hasattr(model, 'conditioner') - model.is_sd2 = not model.is_sdxl and hasattr(model.cond_stage_model, 'model') - model.is_sd1 = not model.is_sdxl and not model.is_sd2 - model.is_ssd = model.is_sdxl and 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight' not in state_dict.keys() + set_model_type(model, state_dict) + set_model_fields(model) + if model.is_sdxl: sd_models_xl.extend_sdxl(model) @@ -552,8 +589,7 @@ def patched_register_schedule(*args, **kwargs): original_register_schedule = patches.patch(__name__, ldm.models.diffusion.ddpm.DDPM, 'register_schedule', patched_register_schedule) -def repair_config(sd_config): - +def repair_config(sd_config, state_dict=None): if not hasattr(sd_config.model.params, "use_ema"): sd_config.model.params.use_ema = False @@ -563,8 +599,9 @@ def repair_config(sd_config): elif shared.cmd_opts.upcast_sampling or shared.cmd_opts.precision == "half": sd_config.model.params.unet_config.params.use_fp16 = True - if getattr(sd_config.model.params.first_stage_config.params.ddconfig, "attn_type", None) == "vanilla-xformers" and not shared.xformers_available: - sd_config.model.params.first_stage_config.params.ddconfig.attn_type = "vanilla" + if hasattr(sd_config.model.params, 'first_stage_config'): + if getattr(sd_config.model.params.first_stage_config.params.ddconfig, "attn_type", None) == "vanilla-xformers" and not shared.xformers_available: + sd_config.model.params.first_stage_config.params.ddconfig.attn_type = "vanilla" # For UnCLIP-L, override the hardcoded karlo directory if hasattr(sd_config.model.params, "noise_aug_config") and hasattr(sd_config.model.params.noise_aug_config.params, "clip_stats_path"): @@ -580,6 +617,7 @@ def repair_config(sd_config): sd_config.model.params.unet_config.params.use_checkpoint = False + def rescale_zero_terminal_snr_abar(alphas_cumprod): alphas_bar_sqrt = alphas_cumprod.sqrt() @@ -715,6 +753,25 @@ def send_model_to_trash(m): devices.torch_gc() +def instantiate_from_config(config, state_dict=None): + constructor = get_obj_from_str(config["target"]) + + params = {**config.get("params", {})} + + if state_dict and "state_dict" in params and params["state_dict"] is None: + params["state_dict"] = state_dict + + return constructor(**params) + + +def get_obj_from_str(string, reload=False): + module, cls = string.rsplit(".", 1) + if reload: + module_imp = importlib.import_module(module) + importlib.reload(module_imp) + return getattr(importlib.import_module(module, package=None), cls) + + def load_model(checkpoint_info=None, already_loaded_state_dict=None): from modules import sd_hijack checkpoint_info = checkpoint_info or select_checkpoint() @@ -739,7 +796,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): timer.record("find config") sd_config = OmegaConf.load(checkpoint_config) - repair_config(sd_config) + repair_config(sd_config, state_dict) timer.record("load config") @@ -749,7 +806,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): try: with sd_disable_initialization.DisableInitialization(disable_clip=clip_is_included_into_sd or shared.cmd_opts.do_not_download_clip): with sd_disable_initialization.InitializeOnMeta(): - sd_model = instantiate_from_config(sd_config.model) + sd_model = instantiate_from_config(sd_config.model, state_dict) except Exception as e: errors.display(e, "creating model quickly", full_traceback=True) @@ -758,7 +815,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): print('Failed to create model quickly; will retry using slow method.', file=sys.stderr) with sd_disable_initialization.InitializeOnMeta(): - sd_model = instantiate_from_config(sd_config.model) + sd_model = instantiate_from_config(sd_config.model, state_dict) sd_model.used_config = checkpoint_config @@ -775,6 +832,10 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): with sd_disable_initialization.LoadStateDictOnMeta(state_dict, device=model_target_device(sd_model), weight_dtype_conversion=weight_dtype_conversion): load_model_weights(sd_model, checkpoint_info, state_dict, timer) + + if hasattr(sd_model, "after_load_weights"): + sd_model.after_load_weights() + timer.record("load weights from state dict") send_model_to_device(sd_model) diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py index 9cec4f13dc2..7cfeca67f71 100644 --- a/modules/sd_models_config.py +++ b/modules/sd_models_config.py @@ -23,6 +23,8 @@ config_instruct_pix2pix = os.path.join(sd_configs_path, "instruct-pix2pix.yaml") config_alt_diffusion = os.path.join(sd_configs_path, "alt-diffusion-inference.yaml") config_alt_diffusion_m18 = os.path.join(sd_configs_path, "alt-diffusion-m18-inference.yaml") +config_sd3 = os.path.join(sd_configs_path, "sd3-inference.yaml") + def is_using_v_parameterization_for_sd2(state_dict): """ @@ -71,11 +73,15 @@ def guess_model_config_from_state_dict(sd, filename): diffusion_model_input = sd.get('model.diffusion_model.input_blocks.0.0.weight', None) sd2_variations_weight = sd.get('embedder.model.ln_final.weight', None) + if "model.diffusion_model.x_embedder.proj.weight" in sd: + return config_sd3 + if sd.get('conditioner.embedders.1.model.ln_final.weight', None) is not None: if diffusion_model_input.shape[1] == 9: return config_sdxl_inpainting else: return config_sdxl + if sd.get('conditioner.embedders.0.model.ln_final.weight', None) is not None: return config_sdxl_refiner elif sd.get('depth_model.model.pretrained.act_postprocess3.0.project.0.bias', None) is not None: @@ -99,7 +105,6 @@ def guess_model_config_from_state_dict(sd, filename): if diffusion_model_input.shape[1] == 8: return config_instruct_pix2pix - if sd.get('cond_stage_model.roberta.embeddings.word_embeddings.weight', None) is not None: if sd.get('cond_stage_model.transformation.weight').size()[0] == 1024: return config_alt_diffusion_m18 diff --git a/modules/sd_models_types.py b/modules/sd_models_types.py index f911fbb68db..2fce2777b2f 100644 --- a/modules/sd_models_types.py +++ b/modules/sd_models_types.py @@ -32,3 +32,9 @@ class WebuiSdModel(LatentDiffusion): is_sd1: bool """True if the model's architecture is SD 1.x""" + + is_sd3: bool + """True if the model's architecture is SD 3""" + + latent_channels: int + """number of layer in latent image representation; will be 16 in SD3 and 4 in other version""" diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index bda578cc5b8..b584b68a962 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -54,7 +54,7 @@ def samples_to_images_tensor(sample, approximation=None, model=None): else: if model is None: model = shared.sd_model - with devices.without_autocast(): # fixes an issue with unstable VAEs that are flaky even in fp32 + with torch.no_grad(), devices.without_autocast(): # fixes an issue with unstable VAEs that are flaky even in fp32 x_sample = model.decode_first_stage(sample.to(model.first_stage_model.dtype)) return x_sample @@ -246,7 +246,7 @@ def __init__(self, funcname): self.eta_infotext_field = 'Eta' self.eta_default = 1.0 - self.conditioning_key = shared.sd_model.model.conditioning_key + self.conditioning_key = getattr(shared.sd_model.model, 'conditioning_key', 'crossattn') self.p = None self.model_wrap_cfg = None diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 64e14e0c2a3..cede0760ad6 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -53,8 +53,13 @@ class CFGDenoiserKDiffusion(sd_samplers_cfg_denoiser.CFGDenoiser): @property def inner_model(self): if self.model_wrap is None: - denoiser = k_diffusion.external.CompVisVDenoiser if shared.sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser - self.model_wrap = denoiser(shared.sd_model, quantize=shared.opts.enable_quantization) + denoiser_constructor = getattr(shared.sd_model, 'create_denoiser', None) + + if denoiser_constructor is not None: + self.model_wrap = denoiser_constructor() + else: + denoiser = k_diffusion.external.CompVisVDenoiser if shared.sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser + self.model_wrap = denoiser(shared.sd_model, quantize=shared.opts.enable_quantization) return self.model_wrap diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py index 3965e223e6f..c5dda7431f1 100644 --- a/modules/sd_vae_approx.py +++ b/modules/sd_vae_approx.py @@ -8,9 +8,9 @@ class VAEApprox(nn.Module): - def __init__(self): + def __init__(self, latent_channels=4): super(VAEApprox, self).__init__() - self.conv1 = nn.Conv2d(4, 8, (7, 7)) + self.conv1 = nn.Conv2d(latent_channels, 8, (7, 7)) self.conv2 = nn.Conv2d(8, 16, (5, 5)) self.conv3 = nn.Conv2d(16, 32, (3, 3)) self.conv4 = nn.Conv2d(32, 64, (3, 3)) @@ -40,7 +40,13 @@ def download_model(model_path, model_url): def model(): - model_name = "vaeapprox-sdxl.pt" if getattr(shared.sd_model, 'is_sdxl', False) else "model.pt" + if shared.sd_model.is_sd3: + model_name = "vaeapprox-sd3.pt" + elif shared.sd_model.is_sdxl: + model_name = "vaeapprox-sdxl.pt" + else: + model_name = "model.pt" + loaded_model = sd_vae_approx_models.get(model_name) if loaded_model is None: @@ -52,7 +58,7 @@ def model(): model_path = os.path.join(paths.models_path, "VAE-approx", model_name) download_model(model_path, 'https://github.com/AUTOMATIC1111/stable-diffusion-webui/releases/download/v1.0.0-pre/' + model_name) - loaded_model = VAEApprox() + loaded_model = VAEApprox(latent_channels=shared.sd_model.latent_channels) loaded_model.load_state_dict(torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' else None)) loaded_model.eval() loaded_model.to(devices.device, devices.dtype) @@ -64,7 +70,18 @@ def model(): def cheap_approximation(sample): # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/2 - if shared.sd_model.is_sdxl: + if shared.sd_model.is_sd3: + coeffs = [ + [-0.0645, 0.0177, 0.1052], [ 0.0028, 0.0312, 0.0650], + [ 0.1848, 0.0762, 0.0360], [ 0.0944, 0.0360, 0.0889], + [ 0.0897, 0.0506, -0.0364], [-0.0020, 0.1203, 0.0284], + [ 0.0855, 0.0118, 0.0283], [-0.0539, 0.0658, 0.1047], + [-0.0057, 0.0116, 0.0700], [-0.0412, 0.0281, -0.0039], + [ 0.1106, 0.1171, 0.1220], [-0.0248, 0.0682, -0.0481], + [ 0.0815, 0.0846, 0.1207], [-0.0120, -0.0055, -0.0867], + [-0.0749, -0.0634, -0.0456], [-0.1418, -0.1457, -0.1259], + ] + elif shared.sd_model.is_sdxl: coeffs = [ [ 0.3448, 0.4168, 0.4395], [-0.1953, -0.0290, 0.0250], diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py index 808eb3624fd..d06253d2a88 100644 --- a/modules/sd_vae_taesd.py +++ b/modules/sd_vae_taesd.py @@ -34,9 +34,9 @@ def forward(self, x): return self.fuse(self.conv(x) + self.skip(x)) -def decoder(): +def decoder(latent_channels=4): return nn.Sequential( - Clamp(), conv(4, 64), nn.ReLU(), + Clamp(), conv(latent_channels, 64), nn.ReLU(), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), @@ -44,13 +44,13 @@ def decoder(): ) -def encoder(): +def encoder(latent_channels=4): return nn.Sequential( conv(3, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), - conv(64, 4), + conv(64, latent_channels), ) @@ -58,10 +58,14 @@ class TAESDDecoder(nn.Module): latent_magnitude = 3 latent_shift = 0.5 - def __init__(self, decoder_path="taesd_decoder.pth"): + def __init__(self, decoder_path="taesd_decoder.pth", latent_channels=None): """Initialize pretrained TAESD on the given device from the given checkpoints.""" super().__init__() - self.decoder = decoder() + + if latent_channels is None: + latent_channels = 16 if "taesd3" in str(decoder_path) else 4 + + self.decoder = decoder(latent_channels) self.decoder.load_state_dict( torch.load(decoder_path, map_location='cpu' if devices.device.type != 'cuda' else None)) @@ -70,10 +74,14 @@ class TAESDEncoder(nn.Module): latent_magnitude = 3 latent_shift = 0.5 - def __init__(self, encoder_path="taesd_encoder.pth"): + def __init__(self, encoder_path="taesd_encoder.pth", latent_channels=None): """Initialize pretrained TAESD on the given device from the given checkpoints.""" super().__init__() - self.encoder = encoder() + + if latent_channels is None: + latent_channels = 16 if "taesd3" in str(encoder_path) else 4 + + self.encoder = encoder(latent_channels) self.encoder.load_state_dict( torch.load(encoder_path, map_location='cpu' if devices.device.type != 'cuda' else None)) @@ -87,7 +95,13 @@ def download_model(model_path, model_url): def decoder_model(): - model_name = "taesdxl_decoder.pth" if getattr(shared.sd_model, 'is_sdxl', False) else "taesd_decoder.pth" + if shared.sd_model.is_sd3: + model_name = "taesd3_decoder.pth" + elif shared.sd_model.is_sdxl: + model_name = "taesdxl_decoder.pth" + else: + model_name = "taesd_decoder.pth" + loaded_model = sd_vae_taesd_models.get(model_name) if loaded_model is None: @@ -106,7 +120,13 @@ def decoder_model(): def encoder_model(): - model_name = "taesdxl_encoder.pth" if getattr(shared.sd_model, 'is_sdxl', False) else "taesd_encoder.pth" + if shared.sd_model.is_sd3: + model_name = "taesd3_encoder.pth" + elif shared.sd_model.is_sdxl: + model_name = "taesdxl_encoder.pth" + else: + model_name = "taesd_encoder.pth" + loaded_model = sd_vae_taesd_models.get(model_name) if loaded_model is None: From 79de09c3df95a54723dbd0676444e9e4fa6f8990 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 08:13:23 +0300 Subject: [PATCH 113/201] linter --- modules/models/sd3/other_impls.py | 19 ++++++++++--------- modules/models/sd3/sd3_impls.py | 10 ++++++---- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index 2c76e1cb64f..cd10edc8d0e 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -1,6 +1,7 @@ ### This file contains impls for underlying related models (CLIP, T5, etc) -import torch, math +import torch +import math from torch import nn from transformers import CLIPTokenizer, T5TokenizerFast @@ -14,7 +15,7 @@ def attention(q, k, v, heads, mask=None): """Convenience wrapper around a basic attention operation""" b, _, dim_head = q.shape dim_head //= heads - q, k, v = map(lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), (q, k, v)) + q, k, v = [t.view(b, -1, heads, dim_head).transpose(1, 2) for t in (q, k, v)] out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) return out.transpose(1, 2).reshape(b, -1, heads * dim_head) @@ -89,8 +90,8 @@ def forward(self, x, mask=None, intermediate_output=None): if intermediate_output < 0: intermediate_output = len(self.layers) + intermediate_output intermediate = None - for i, l in enumerate(self.layers): - x = l(x, mask) + for i, layer in enumerate(self.layers): + x = layer(x, mask) if i == intermediate_output: intermediate = x.clone() return x, intermediate @@ -215,7 +216,7 @@ def tokenize_with_weights(self, text:str): class ClipTokenWeightEncoder: def encode_token_weights(self, token_weight_pairs): - tokens = list(map(lambda a: a[0], token_weight_pairs[0])) + tokens = [a[0] for a in token_weight_pairs[0]] out, pooled = self([tokens]) if pooled is not None: first_pooled = pooled[0:1].cpu() @@ -229,7 +230,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): """Uses the CLIP transformer encoder for text (from huggingface)""" LAYERS = ["last", "pooled", "hidden"] def __init__(self, device="cpu", max_length=77, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=CLIPTextModel, - special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, return_projected_pooled=True): + special_tokens=None, layer_norm_hidden_state=True, return_projected_pooled=True): super().__init__() assert layer in self.LAYERS self.transformer = model_class(textmodel_json_config, dtype, device) @@ -240,7 +241,7 @@ def __init__(self, device="cpu", max_length=77, layer="last", layer_idx=None, te param.requires_grad = False self.layer = layer self.layer_idx = None - self.special_tokens = special_tokens + self.special_tokens = special_tokens if special_tokens is not None else {"start": 49406, "end": 49407, "pad": 49407} self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) self.layer_norm_hidden_state = layer_norm_hidden_state self.return_projected_pooled = return_projected_pooled @@ -465,8 +466,8 @@ def forward(self, input_ids, intermediate_output=None, final_layer_norm_intermed intermediate = None x = self.embed_tokens(input_ids) past_bias = None - for i, l in enumerate(self.block): - x, past_bias = l(x, past_bias) + for i, layer in enumerate(self.block): + x, past_bias = layer(x, past_bias) if i == intermediate_output: intermediate = x.clone() x = self.final_layer_norm(x) diff --git a/modules/models/sd3/sd3_impls.py b/modules/models/sd3/sd3_impls.py index 91dad66d096..e2f6cad5b52 100644 --- a/modules/models/sd3/sd3_impls.py +++ b/modules/models/sd3/sd3_impls.py @@ -1,6 +1,8 @@ ### Impls of the SD3 core diffusion model and VAE -import torch, math, einops +import torch +import math +import einops from modules.models.sd3.mmdit import MMDiT from PIL import Image @@ -214,7 +216,7 @@ def forward(self, x): k = self.k(hidden) v = self.v(hidden) b, c, h, w = q.shape - q, k, v = map(lambda x: einops.rearrange(x, "b c h w -> b 1 (h w) c").contiguous(), (q, k, v)) + q, k, v = [einops.rearrange(x, "b c h w -> b 1 (h w) c").contiguous() for x in (q, k, v)] hidden = torch.nn.functional.scaled_dot_product_attention(q, k, v) # scale is dim ** -0.5 per default hidden = einops.rearrange(hidden, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b) hidden = self.proj_out(hidden) @@ -259,7 +261,7 @@ def __init__(self, ch=128, ch_mult=(1,2,4,4), num_res_blocks=2, in_channels=3, z attn = torch.nn.ModuleList() block_in = ch*in_ch_mult[i_level] block_out = ch*ch_mult[i_level] - for i_block in range(num_res_blocks): + for _ in range(num_res_blocks): block.append(ResnetBlock(in_channels=block_in, out_channels=block_out, dtype=dtype, device=device)) block_in = block_out down = torch.nn.Module() @@ -318,7 +320,7 @@ def __init__(self, ch=128, out_ch=3, ch_mult=(1, 2, 4, 4), num_res_blocks=2, res for i_level in reversed(range(self.num_resolutions)): block = torch.nn.ModuleList() block_out = ch * ch_mult[i_level] - for i_block in range(self.num_res_blocks + 1): + for _ in range(self.num_res_blocks + 1): block.append(ResnetBlock(in_channels=block_in, out_channels=block_out, dtype=dtype, device=device)) block_in = block_out up = torch.nn.Module() From 7ee2114cd9c401ceb390c141e604661055e0aaf4 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 08:18:05 +0300 Subject: [PATCH 114/201] typo --- modules/sd_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 21a98c1de84..da083f71d6d 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -381,7 +381,7 @@ def set_model_type(model, state_dict): model.is_sd2 = False model.is_sdxl = False model.is_ssd = False - model.is_ssd3 = False + model.is_sd3 = False if "model.diffusion_model.x_embedder.proj.weight" in state_dict: model.is_sd3 = True From b443fdcf7670d2a8bbd02936b0770957838f5e1d Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 11:04:19 +0300 Subject: [PATCH 115/201] prevent accidental creation of CLIP models in float32 type when user wants float16 --- modules/models/sd3/sd3_model.py | 6 +++--- modules/sd_models.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 8b828524480..d60b04e4ee7 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -61,9 +61,9 @@ def __init__(self, *args, **kwargs): self.tokenizer = SD3Tokenizer() with torch.no_grad(): - self.clip_g = SDXLClipG(CLIPG_CONFIG, device="cpu", dtype=torch.float32) - self.clip_l = SDClipModel(layer="hidden", layer_idx=-2, device="cpu", dtype=torch.float32, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=CLIPL_CONFIG) - self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=torch.float32) + self.clip_g = SDXLClipG(CLIPG_CONFIG, device="cpu", dtype=devices.dtype) + self.clip_l = SDClipModel(layer="hidden", layer_idx=-2, device="cpu", dtype=devices.dtype, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=CLIPL_CONFIG) + self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=devices.dtype) self.weights_loaded = False diff --git a/modules/sd_models.py b/modules/sd_models.py index da083f71d6d..61fb881ba5c 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -406,6 +406,7 @@ def set_model_fields(model): if not hasattr(model, 'latent_channels'): model.latent_channels = 4 + def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer): sd_model_hash = checkpoint_info.calculate_shorthash() timer.record("calculate hash") From 80f618ea95f1f833f415fc53733d45f7f3d452db Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 12:52:03 +0300 Subject: [PATCH 116/201] add protobuf==3.20.0 to requirements --- requirements.txt | 1 + requirements_versions.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 9e2ecfe4d67..0d6bac600e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ omegaconf open-clip-torch piexif +protobuf==3.20.0 psutil pytorch_lightning requests diff --git a/requirements_versions.txt b/requirements_versions.txt index 3037a395bfc..d6b83e78af4 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -18,6 +18,7 @@ numpy==1.26.2 omegaconf==2.2.3 open-clip-torch==2.20.0 piexif==1.1.3 +protobuf==3.20.0 psutil==5.9.5 pytorch_lightning==1.9.4 resize-right==0.0.2 From 06d0a5ab4d44728943c799030b8a218f0af4f242 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 14:09:32 +0300 Subject: [PATCH 117/201] fix NaN issue when running without --precision half --- modules/models/sd3/other_impls.py | 3 +-- modules/models/sd3/sd3_model.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index cd10edc8d0e..6e4c5d10d33 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -262,8 +262,7 @@ def set_clip_options(self, options): def forward(self, tokens): backup_embeds = self.transformer.get_input_embeddings() - device = backup_embeds.weight.device - tokens = torch.LongTensor(tokens).to(device) + tokens = torch.asarray(tokens, dtype=torch.int64, device=backup_embeds.weight.device) outputs = self.transformer(tokens, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state) self.transformer.set_input_embeddings(backup_embeds) if self.layer == "last": diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index d60b04e4ee7..bb3e6a3d047 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -149,7 +149,8 @@ def ema_scope(self): return contextlib.nullcontext() def get_learned_conditioning(self, batch: list[str]): - return self.cond_stage_model(batch) + with devices.without_autocast(): + return self.cond_stage_model(batch) def apply_model(self, x, t, cond): return self.model.apply_model(x, t, c_crossattn=cond['crossattn'], y=cond['vector']) From 58dc35a64ac3a5e172b77374c6fa3651fba5a70c Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 14:31:43 +0300 Subject: [PATCH 118/201] change CLIP links to allow anonymous downloading --- modules/models/sd3/sd3_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index bb3e6a3d047..2b98b8847bc 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -11,7 +11,7 @@ from modules import shared, modelloader, devices -CLIPG_URL = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/text_encoders/clip_g.safetensors" +CLIPG_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/clip_g.safetensors" CLIPG_CONFIG = { "hidden_act": "gelu", "hidden_size": 1280, @@ -20,7 +20,7 @@ "num_hidden_layers": 32, } -CLIPL_URL = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/text_encoders/clip_l.safetensors" +CLIPL_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/clip_l.safetensors" CLIPL_CONFIG = { "hidden_act": "quick_gelu", "hidden_size": 768, @@ -29,7 +29,7 @@ "num_hidden_layers": 12, } -T5_URL = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/text_encoders/t5xxl_fp16.safetensors" +T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp16.safetensors" T5_CONFIG = { "d_ff": 10240, "d_model": 4096, From d4b814aed609878513f5d0caf60204dda35a9e5a Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 14:39:58 +0300 Subject: [PATCH 119/201] change t5xxl checkpoint to fp8 --- modules/models/sd3/sd3_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 2b98b8847bc..2095f4d2495 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -29,7 +29,7 @@ "num_hidden_layers": 12, } -T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp16.safetensors" +T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors" T5_CONFIG = { "d_ff": 10240, "d_model": 4096, @@ -101,7 +101,7 @@ def load_weights(self): with safetensors.safe_open(clip_l_file, framework="pt") as file: self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) - t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp16.safetensors") + t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp8_e4m3fn.safetensors") with safetensors.safe_open(t5_file, framework="pt") as file: self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) From 34b4443cc32de501d7035e5f2f171634a857e44e Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 16 Jun 2024 21:57:17 +0300 Subject: [PATCH 120/201] add an option (on by default) to disable T5 revert t5xxl back to fp16 --- modules/models/sd3/sd3_model.py | 22 ++++++++++++++++------ modules/shared_options.py | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 2095f4d2495..146ddf2e2aa 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -29,7 +29,7 @@ "num_hidden_layers": 12, } -T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors" +T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp16.safetensors" T5_CONFIG = { "d_ff": 10240, "d_model": 4096, @@ -63,7 +63,11 @@ def __init__(self, *args, **kwargs): with torch.no_grad(): self.clip_g = SDXLClipG(CLIPG_CONFIG, device="cpu", dtype=devices.dtype) self.clip_l = SDClipModel(layer="hidden", layer_idx=-2, device="cpu", dtype=devices.dtype, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=CLIPL_CONFIG) - self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=devices.dtype) + + if shared.opts.sd3_enable_t5: + self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=devices.dtype) + else: + self.t5xxl = None self.weights_loaded = False @@ -74,7 +78,12 @@ def forward(self, prompts: list[str]): tokens = self.tokenizer.tokenize_with_weights(prompt) l_out, l_pooled = self.clip_l.encode_token_weights(tokens["l"]) g_out, g_pooled = self.clip_g.encode_token_weights(tokens["g"]) - t5_out, t5_pooled = self.t5xxl.encode_token_weights(tokens["t5xxl"]) + + if self.t5xxl and shared.opts.sd3_enable_t5: + t5_out, t5_pooled = self.t5xxl.encode_token_weights(tokens["t5xxl"]) + else: + t5_out = torch.zeros(l_out.shape[0:2] + (4096,), dtype=l_out.dtype, device=l_out.device) + lg_out = torch.cat([l_out, g_out], dim=-1) lg_out = torch.nn.functional.pad(lg_out, (0, 4096 - lg_out.shape[-1])) lgt_out = torch.cat([lg_out, t5_out], dim=-2) @@ -101,9 +110,10 @@ def load_weights(self): with safetensors.safe_open(clip_l_file, framework="pt") as file: self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) - t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp8_e4m3fn.safetensors") - with safetensors.safe_open(t5_file, framework="pt") as file: - self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) + if self.t5xxl: + t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp16.safetensors") + with safetensors.safe_open(t5_file, framework="pt") as file: + self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) self.weights_loaded = True diff --git a/modules/shared_options.py b/modules/shared_options.py index 7bce04686b4..f40832c4067 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -191,6 +191,10 @@ "sdxl_refiner_high_aesthetic_score": OptionInfo(6.0, "SDXL high aesthetic score", gr.Number).info("used for refiner model prompt"), })) +options_templates.update(options_section(('sd3', "Stable Diffusion 3", "sd"), { + "sd3_enable_t5": OptionInfo(False, "Enable T5").info("load T5 text encoder; increases VRAM use by a lot, potentially improving quality of generation; requires model reload to apply"), +})) + options_templates.update(options_section(('vae', "VAE", "sd"), { "sd_vae_explanation": OptionHTML(""" VAE is a neural network that transforms a standard RGB From 663a4d80dfae5510257b362fd0015c8dc8b8bb5e Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Sun, 16 Jun 2024 17:47:21 -0700 Subject: [PATCH 121/201] add new sampler DDIM CFG++ --- modules/sd_samplers_cfg_denoiser.py | 10 ++++++++ modules/sd_samplers_timesteps.py | 1 + modules/sd_samplers_timesteps_impl.py | 37 +++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index a86fa88eee9..c8eeedad3c9 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -58,6 +58,8 @@ def __init__(self, sampler): self.model_wrap = None self.p = None + self.last_noise_uncond = None + # NOTE: masking before denoising can cause the original latents to be oversmoothed # as the original latents do not have noise self.mask_before_denoising = False @@ -160,6 +162,8 @@ def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): # so is_edit_model is set to False to support AND composition. is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0 + is_cfg_pp = 'CFG++' in self.sampler.config.name + conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) @@ -273,10 +277,16 @@ def apply_blend(current_latent): denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model) cfg_denoised_callback(denoised_params) + if is_cfg_pp: + self.last_noise_uncond = x_out[-uncond.shape[0]:] + self.last_noise_uncond = torch.clone(self.last_noise_uncond) + if is_edit_model: denoised = self.combine_denoised_for_edit_model(x_out, cond_scale) elif skip_uncond: denoised = self.combine_denoised(x_out, conds_list, uncond, 1.0) + elif is_cfg_pp: + denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale/12.5) # CFG++ scale of (0, 1) maps to (1.0, 12.5) else: denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale) diff --git a/modules/sd_samplers_timesteps.py b/modules/sd_samplers_timesteps.py index 8cc7d3848aa..81edd67d6e4 100644 --- a/modules/sd_samplers_timesteps.py +++ b/modules/sd_samplers_timesteps.py @@ -10,6 +10,7 @@ samplers_timesteps = [ ('DDIM', sd_samplers_timesteps_impl.ddim, ['ddim'], {}), + ('DDIM CFG++', sd_samplers_timesteps_impl.ddim_cfgpp, ['ddim_cfgpp'], {}), ('PLMS', sd_samplers_timesteps_impl.plms, ['plms'], {}), ('UniPC', sd_samplers_timesteps_impl.unipc, ['unipc'], {}), ] diff --git a/modules/sd_samplers_timesteps_impl.py b/modules/sd_samplers_timesteps_impl.py index 84867d6ee65..8896cfc9a85 100644 --- a/modules/sd_samplers_timesteps_impl.py +++ b/modules/sd_samplers_timesteps_impl.py @@ -40,6 +40,43 @@ def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta= return x +@torch.no_grad() +def ddim_cfgpp(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0): + """ Implements CFG++: Manifold-constrained Classifier Free Guidance For Diffusion Models (2024). + Uses the unconditional noise prediction instead of the conditional noise to guide the denoising direction. + The CFG scale is divided by 12.5 to map CFG from [0.0, 12.5] to [0, 1.0]. + """ + alphas_cumprod = model.inner_model.inner_model.alphas_cumprod + alphas = alphas_cumprod[timesteps] + alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(float64(x)) + sqrt_one_minus_alphas = torch.sqrt(1 - alphas) + sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy())) + + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones((x.shape[0])) + s_x = x.new_ones((x.shape[0], 1, 1, 1)) + for i in tqdm.trange(len(timesteps) - 1, disable=disable): + index = len(timesteps) - 1 - i + + e_t = model(x, timesteps[index].item() * s_in, **extra_args) + last_noise_uncond = model.last_noise_uncond + + a_t = alphas[index].item() * s_x + a_prev = alphas_prev[index].item() * s_x + sigma_t = sigmas[index].item() * s_x + sqrt_one_minus_at = sqrt_one_minus_alphas[index].item() * s_x + + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + dir_xt = (1. - a_prev - sigma_t ** 2).sqrt() * last_noise_uncond + noise = sigma_t * k_diffusion.sampling.torch.randn_like(x) + x = a_prev.sqrt() * pred_x0 + dir_xt + noise + + if callback is not None: + callback({'x': x, 'i': i, 'sigma': 0, 'sigma_hat': 0, 'denoised': pred_x0}) + + return x + + @torch.no_grad() def plms(model, x, timesteps, extra_args=None, callback=None, disable=None): alphas_cumprod = model.inner_model.inner_model.alphas_cumprod From a772fd9804944cc19c4d6a03ccfbaa6066ce62a8 Mon Sep 17 00:00:00 2001 From: viking1304 Date: Thu, 20 Jun 2024 23:57:59 +0200 Subject: [PATCH 122/201] Update torch for ARM Macs to 2.3.1 --- webui-macos-env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webui-macos-env.sh b/webui-macos-env.sh index ad0736378c6..f390e4d34bc 100644 --- a/webui-macos-env.sh +++ b/webui-macos-env.sh @@ -16,7 +16,7 @@ export PYTORCH_ENABLE_MPS_FALLBACK=1 if [[ "$(sysctl -n machdep.cpu.brand_string)" =~ ^.*"Intel".*$ ]]; then export TORCH_COMMAND="pip install torch==2.1.2 torchvision==0.16.2" else - export TORCH_COMMAND="pip install torch==2.3.0 torchvision==0.18.0" + export TORCH_COMMAND="pip install torch==2.3.1 torchvision==0.18.1" fi #################################################################### From 13f22974a42df9f7a491fefe2912cdf72dcbebaf Mon Sep 17 00:00:00 2001 From: snoppy Date: Fri, 21 Jun 2024 09:52:02 +0800 Subject: [PATCH 123/201] chore: fix typos Signed-off-by: snoppy --- extensions-builtin/LDSR/sd_hijack_ddpm_v1.py | 2 +- modules/api/api.py | 2 +- modules/devices.py | 2 +- modules/models/diffusion/uni_pc/uni_pc.py | 2 +- modules/shared.py | 2 +- modules/util.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py index 9a1e0778f24..51ab1821282 100644 --- a/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py +++ b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py @@ -572,7 +572,7 @@ def delta_border(self, h, w): :param h: height :param w: width :return: normalized distance to image border, - wtith min distance = 0 at border and max dist = 0.5 at image center + with min distance = 0 at border and max dist = 0.5 at image center """ lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2) arr = self.meshgrid(h, w) / lower_right_corner diff --git a/modules/api/api.py b/modules/api/api.py index f468c385275..3d208b711e1 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -372,7 +372,7 @@ def apply_infotext(self, request, tabname, *, script_runner=None, mentioned_scri return {} possible_fields = infotext_utils.paste_fields[tabname]["fields"] - set_fields = request.model_dump(exclude_unset=True) if hasattr(request, "request") else request.dict(exclude_unset=True) # pydantic v1/v2 have differenrt names for this + set_fields = request.model_dump(exclude_unset=True) if hasattr(request, "request") else request.dict(exclude_unset=True) # pydantic v1/v2 have different names for this params = infotext_utils.parse_generation_parameters(request.infotext) def get_field_value(field, params): diff --git a/modules/devices.py b/modules/devices.py index e4f671ac659..d4cf2bc68c8 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -258,7 +258,7 @@ def test_for_nans(x, where): @lru_cache def first_time_calculation(): """ - just do any calculation with pytorch layers - the first time this is done it allocaltes about 700MB of memory and + just do any calculation with pytorch layers - the first time this is done it allocates about 700MB of memory and spends about 2.7 seconds doing that, at least with NVidia. """ diff --git a/modules/models/diffusion/uni_pc/uni_pc.py b/modules/models/diffusion/uni_pc/uni_pc.py index d257a7286fc..3333bc808d0 100644 --- a/modules/models/diffusion/uni_pc/uni_pc.py +++ b/modules/models/diffusion/uni_pc/uni_pc.py @@ -323,7 +323,7 @@ def cond_grad_fn(x, t_input, condition): def model_fn(x, t_continuous, condition, unconditional_condition): """ - The noise predicition model function that is used for DPM-Solver. + The noise prediction model function that is used for DPM-Solver. """ if t_continuous.reshape((-1,)).shape[0] == 1: t_continuous = t_continuous.expand((x.shape[0])) diff --git a/modules/shared.py b/modules/shared.py index a41cd457cef..2a3787f990d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -47,7 +47,7 @@ sd_model: sd_models_types.WebuiSdModel = None settings_components: dict = None -"""assigned from ui.py, a mapping on setting names to gradio components repsponsible for those settings""" +"""assigned from ui.py, a mapping on setting names to gradio components responsible for those settings""" tab_names = [] diff --git a/modules/util.py b/modules/util.py index 0db13736cc7..1fd736c7c32 100644 --- a/modules/util.py +++ b/modules/util.py @@ -156,7 +156,7 @@ def update_file_entry(self, path): def topological_sort(dependencies): """Accepts a dictionary mapping name to its dependencies, returns a list of names ordered according to dependencies. - Ignores errors relating to missing dependeencies or circular dependencies + Ignores errors relating to missing dependencies or circular dependencies """ visited = {} From bd85b3f19b3546ce12bcdf1c13a885f94c9e1b6c Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:53:44 +0900 Subject: [PATCH 124/201] remove dont_fix_second_order_samplers_schedule --- modules/shared_options.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/shared_options.py b/modules/shared_options.py index 7bce04686b4..b1a484adf86 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -245,7 +245,6 @@ "use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."), "no_dpmpp_sde_batch_determinism": OptionInfo(False, "Do not make DPM++ SDE deterministic across different batch sizes."), "use_old_hires_fix_width_height": OptionInfo(False, "For hires fix, use width/height sliders to set final resolution rather than first pass (disables Upscale by, Resize width/height to)."), - "dont_fix_second_order_samplers_schedule": OptionInfo(False, "Do not fix prompt schedule for second order samplers."), "hires_fix_use_firstpass_conds": OptionInfo(False, "For hires fix, calculate conds of second pass using extra networks of first pass."), "use_old_scheduling": OptionInfo(False, "Use old prompt editing timelines.", infotext="Old prompt editing timelines").info("For [red:green:N]; old: If N < 1, it's a fraction of steps (and hires fix uses range from 0 to 1), if N >= 1, it's an absolute number of steps; new: If N has a decimal point in it, it's a fraction of steps (and hires fix uses range from 1 to 2), othewrwise it's an absolute number of steps"), "use_downcasted_alpha_bar": OptionInfo(False, "Downcast model alphas_cumprod to fp16 before sampling. For reproducing old seeds.", infotext="Downcast alphas_cumprod"), From 0f40c4b9b116a16b7e81e75a4e78b98a310e414d Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 20 Jun 2024 20:02:59 +0900 Subject: [PATCH 125/201] fix Sampler Scheduler autocorrection warning --- modules/sd_samplers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index b8abac4a998..963da5be0bf 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -98,7 +98,7 @@ def get_hr_scheduler_from_infotext(d: dict): @functools.cache -def get_sampler_and_scheduler(sampler_name, scheduler_name): +def get_sampler_and_scheduler(sampler_name, scheduler_name, *, convert_automatic=True): default_sampler = samplers[0] found_scheduler = sd_schedulers.schedulers_map.get(scheduler_name, sd_schedulers.schedulers[0]) @@ -116,7 +116,7 @@ def get_sampler_and_scheduler(sampler_name, scheduler_name): sampler = all_samplers_map.get(name, default_sampler) # revert back to Automatic if it's the default scheduler for the selected sampler - if sampler.options.get('scheduler', None) == found_scheduler.name: + if convert_automatic and sampler.options.get('scheduler', None) == found_scheduler.name: found_scheduler = sd_schedulers.schedulers[0] return sampler.name, found_scheduler.label @@ -124,7 +124,7 @@ def get_sampler_and_scheduler(sampler_name, scheduler_name): def fix_p_invalid_sampler_and_scheduler(p): i_sampler_name, i_scheduler = p.sampler_name, p.scheduler - p.sampler_name, p.scheduler = get_sampler_and_scheduler(p.sampler_name, p.scheduler) + p.sampler_name, p.scheduler = get_sampler_and_scheduler(p.sampler_name, p.scheduler, convert_automatic=False) if p.sampler_name != i_sampler_name or i_scheduler != p.scheduler: logging.warning(f'Sampler Scheduler autocorrection: "{i_sampler_name}" -> "{p.sampler_name}", "{i_scheduler}" -> "{p.scheduler}"') From 109bbda70959c47d26d8a7f9e26bf61030dc162a Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:47:56 +0900 Subject: [PATCH 126/201] fix infotext Lora hashes fro hires fix different lora --- .../Lora/extra_networks_lora.py | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/extensions-builtin/Lora/extra_networks_lora.py b/extensions-builtin/Lora/extra_networks_lora.py index 005ff32cbe3..17a620f77e3 100644 --- a/extensions-builtin/Lora/extra_networks_lora.py +++ b/extensions-builtin/Lora/extra_networks_lora.py @@ -9,6 +9,8 @@ def __init__(self): self.errors = {} """mapping of network names to the number of errors the network had during operation""" + remove_symbols = str.maketrans('', '', ":,") + def activate(self, p, params_list): additional = shared.opts.sd_lora @@ -43,22 +45,15 @@ def activate(self, p, params_list): networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims) if shared.opts.lora_add_hashes_to_infotext: - network_hashes = [] - for item in networks.loaded_networks: - shorthash = item.network_on_disk.shorthash - if not shorthash: - continue - - alias = item.mentioned_name - if not alias: - continue + if not getattr(p, "is_hr_pass", False) or not hasattr(p, "lora_hashes"): + p.lora_hashes = {} - alias = alias.replace(":", "").replace(",", "") - - network_hashes.append(f"{alias}: {shorthash}") + for item in networks.loaded_networks: + if item.network_on_disk.shorthash and item.mentioned_name: + p.lora_hashes[item.mentioned_name.translate(self.remove_symbols)] = item.network_on_disk.shorthash - if network_hashes: - p.extra_generation_params["Lora hashes"] = ", ".join(network_hashes) + if p.lora_hashes: + p.extra_generation_params["Lora hashes"] = ', '.join(f'{k}: {v}' for k, v in p.lora_hashes.items()) def deactivate(self, p): if self.errors: From 775fa7696b90f0a41852fee604655d4b19de3c4a Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Fri, 21 Jun 2024 20:26:09 +0900 Subject: [PATCH 127/201] ToggleLivePriview in image viewer --- javascript/imageviewer.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js index a3f08ad16c8..9b23f4700b3 100644 --- a/javascript/imageviewer.js +++ b/javascript/imageviewer.js @@ -6,6 +6,8 @@ function closeModal() { function showModal(event) { const source = event.target || event.srcElement; const modalImage = gradioApp().getElementById("modalImage"); + const modalToggleLivePreviewBtn = gradioApp().getElementById("modal_toggle_live_preview"); + modalToggleLivePreviewBtn.innerHTML = opts.js_live_preview_in_modal_lightbox ? "🗇" : "🗆"; const lb = gradioApp().getElementById("lightboxModal"); modalImage.src = source.src; if (modalImage.style.display === 'none') { @@ -152,6 +154,13 @@ function modalZoomToggle(event) { event.stopPropagation(); } +function modalLivePreviewToggle(event) { + const modalToggleLivePreview = gradioApp().getElementById("modal_toggle_live_preview"); + opts.js_live_preview_in_modal_lightbox = !opts.js_live_preview_in_modal_lightbox; + modalToggleLivePreview.innerHTML = opts.js_live_preview_in_modal_lightbox ? "🗇" : "🗆"; + event.stopPropagation(); +} + function modalTileImageToggle(event) { const modalImage = gradioApp().getElementById("modalImage"); const modal = gradioApp().getElementById("lightboxModal"); @@ -209,6 +218,14 @@ document.addEventListener("DOMContentLoaded", function() { modalSave.title = "Save Image(s)"; modalControls.appendChild(modalSave); + const modalToggleLivePreview = document.createElement('span'); + modalToggleLivePreview.className = 'modalToggleLivePreview cursor'; + modalToggleLivePreview.id = "modal_toggle_live_preview"; + modalToggleLivePreview.innerHTML = "🗆"; + modalToggleLivePreview.onclick = modalLivePreviewToggle; + modalToggleLivePreview.title = "Toggle live preview"; + modalControls.appendChild(modalToggleLivePreview); + const modalClose = document.createElement('span'); modalClose.className = 'modalClose cursor'; modalClose.innerHTML = '×'; From c3ef381cd8c85ff05eba7d1d1d6295bcb105137d Mon Sep 17 00:00:00 2001 From: huchenlei Date: Sun, 23 Jun 2024 11:19:04 -0400 Subject: [PATCH 128/201] Fix SD2 loading --- modules/sd_hijack_unet.py | 2 ++ modules/sd_models_config.py | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index b4f03b138a4..6d657511985 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -138,6 +138,7 @@ def hijack_ddpm_edit(): CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond) CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond) +# Always make sure inputs to unet are in correct dtype CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model) CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model) @@ -150,5 +151,6 @@ def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs): return orig_func(timesteps, *args, **kwargs).to(dtype=dtype) +# Always make sure timestep calculation is in correct dtype CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result) CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result) diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py index 9cec4f13dc2..928beb57e83 100644 --- a/modules/sd_models_config.py +++ b/modules/sd_models_config.py @@ -54,14 +54,19 @@ def is_using_v_parameterization_for_sd2(state_dict): unet.eval() with torch.no_grad(): + unet_dtype = torch.float + original_unet_dtype = devices.dtype_unet + unet_sd = {k.replace("model.diffusion_model.", ""): v for k, v in state_dict.items() if "model.diffusion_model." in k} unet.load_state_dict(unet_sd, strict=True) - unet.to(device=device, dtype=torch.float) + unet.to(device=device, dtype=unet_dtype) + devices.dtype_unet = unet_dtype test_cond = torch.ones((1, 2, 1024), device=device) * 0.5 x_test = torch.ones((1, 4, 8, 8), device=device) * 0.5 out = (unet(x_test, torch.asarray([999], device=device), context=test_cond) - x_test).mean().item() + devices.dtype_unet = original_unet_dtype return out < -1 From 731eb7277454fe16b094f54750e05d20e05a8be8 Mon Sep 17 00:00:00 2001 From: Andray Date: Sun, 23 Jun 2024 21:16:48 +0400 Subject: [PATCH 129/201] fix sd2 switching --- modules/sd_models_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py index 9cec4f13dc2..733d70afd46 100644 --- a/modules/sd_models_config.py +++ b/modules/sd_models_config.py @@ -31,7 +31,7 @@ def is_using_v_parameterization_for_sd2(state_dict): import ldm.modules.diffusionmodules.openaimodel - device = devices.cpu + device = devices.device with sd_disable_initialization.DisableInitialization(): unet = ldm.modules.diffusionmodules.openaimodel.UNetModel( From a65dd315adcab0467d652160b26a95604573530c Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 24 Jun 2024 09:06:10 +0300 Subject: [PATCH 130/201] fix T5 --- modules/models/sd3/other_impls.py | 38 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index 6e4c5d10d33..d7b9b262114 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -11,6 +11,18 @@ ################################################################################################# +class AutocastLinear(nn.Linear): + """Same as usual linear layer, but casts its weights to whatever the parameter type is. + + This is different from torch.autocast in a way that float16 layer processing float32 input + will return float16 with autocast on, and float32 with this. T5 seems to be fucked + if you do it in full float16 (returning almost all zeros in the final output). + """ + + def forward(self, x): + return torch.nn.functional.linear(x, self.weight.to(x.dtype), self.bias.to(x.dtype) if self.bias is not None else None) + + def attention(q, k, v, heads, mask=None): """Convenience wrapper around a basic attention operation""" b, _, dim_head = q.shape @@ -27,9 +39,9 @@ def __init__(self, in_features, hidden_features=None, out_features=None, act_lay out_features = out_features or in_features hidden_features = hidden_features or in_features - self.fc1 = nn.Linear(in_features, hidden_features, bias=bias, dtype=dtype, device=device) + self.fc1 = AutocastLinear(in_features, hidden_features, bias=bias, dtype=dtype, device=device) self.act = act_layer - self.fc2 = nn.Linear(hidden_features, out_features, bias=bias, dtype=dtype, device=device) + self.fc2 = AutocastLinear(hidden_features, out_features, bias=bias, dtype=dtype, device=device) def forward(self, x): x = self.fc1(x) @@ -297,7 +309,6 @@ def __init__(self, config, device="cpu", layer="last", layer_idx=None, dtype=Non ### T5 implementation, for the T5-XXL text encoder portion, largely pulled from upstream impl ################################################################################################# - class T5XXLTokenizer(SDTokenizer): """Wraps the T5 Tokenizer from HF into the SDTokenizer interface""" def __init__(self): @@ -319,9 +330,9 @@ def forward(self, x): class T5DenseGatedActDense(torch.nn.Module): def __init__(self, model_dim, ff_dim, dtype, device): super().__init__() - self.wi_0 = nn.Linear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) - self.wi_1 = nn.Linear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) - self.wo = nn.Linear(ff_dim, model_dim, bias=False, dtype=dtype, device=device) + self.wi_0 = AutocastLinear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) + self.wi_1 = AutocastLinear(model_dim, ff_dim, bias=False, dtype=dtype, device=device) + self.wo = AutocastLinear(ff_dim, model_dim, bias=False, dtype=dtype, device=device) def forward(self, x): hidden_gelu = torch.nn.functional.gelu(self.wi_0(x), approximate="tanh") @@ -348,10 +359,10 @@ class T5Attention(torch.nn.Module): def __init__(self, model_dim, inner_dim, num_heads, relative_attention_bias, dtype, device): super().__init__() # Mesh TensorFlow initialization to avoid scaling before softmax - self.q = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) - self.k = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) - self.v = nn.Linear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) - self.o = nn.Linear(inner_dim, model_dim, bias=False, dtype=dtype, device=device) + self.q = AutocastLinear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.k = AutocastLinear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.v = AutocastLinear(model_dim, inner_dim, bias=False, dtype=dtype, device=device) + self.o = AutocastLinear(inner_dim, model_dim, bias=False, dtype=dtype, device=device) self.num_heads = num_heads self.relative_attention_bias = None if relative_attention_bias: @@ -421,11 +432,16 @@ def forward(self, x, past_bias=None): q = self.q(x) k = self.k(x) v = self.v(x) + if self.relative_attention_bias is not None: past_bias = self.compute_bias(x.shape[1], x.shape[1], x.device) if past_bias is not None: mask = past_bias - out = attention(q, k * ((k.shape[-1] / self.num_heads) ** 0.5), v, self.num_heads, mask) + else: + mask = None + + out = attention(q, k * ((k.shape[-1] / self.num_heads) ** 0.5), v, self.num_heads, mask.to(x.dtype) if mask is not None else None) + return self.o(out), past_bias From a8fba9af35a081ada8c563fc6c76212dd131be01 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 24 Jun 2024 10:15:46 +0300 Subject: [PATCH 131/201] medvram support for SD3 --- modules/lowvram.py | 28 +++++++++++++++++++++++----- modules/models/sd3/mmdit.py | 1 - modules/models/sd3/sd3_model.py | 12 +++++++++++- modules/sd_samplers_common.py | 2 +- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/modules/lowvram.py b/modules/lowvram.py index 45701046b54..00aad477bb8 100644 --- a/modules/lowvram.py +++ b/modules/lowvram.py @@ -1,9 +1,12 @@ +from collections import namedtuple + import torch from modules import devices, shared module_in_gpu = None cpu = torch.device("cpu") +ModuleWithParent = namedtuple('ModuleWithParent', ['module', 'parent'], defaults=['None']) def send_everything_to_cpu(): global module_in_gpu @@ -75,13 +78,14 @@ def first_stage_model_decode_wrap(z): (sd_model, 'depth_model'), (sd_model, 'embedder'), (sd_model, 'model'), - (sd_model, 'embedder'), ] is_sdxl = hasattr(sd_model, 'conditioner') is_sd2 = not is_sdxl and hasattr(sd_model.cond_stage_model, 'model') - if is_sdxl: + if hasattr(sd_model, 'medvram_fields'): + to_remain_in_cpu = sd_model.medvram_fields() + elif is_sdxl: to_remain_in_cpu.append((sd_model, 'conditioner')) elif is_sd2: to_remain_in_cpu.append((sd_model.cond_stage_model, 'model')) @@ -103,7 +107,21 @@ def first_stage_model_decode_wrap(z): setattr(obj, field, module) # register hooks for those the first three models - if is_sdxl: + if hasattr(sd_model.cond_stage_model, "medvram_modules"): + for module in sd_model.cond_stage_model.medvram_modules(): + if isinstance(module, ModuleWithParent): + parent = module.parent + module = module.module + else: + parent = None + + if module: + module.register_forward_pre_hook(send_me_to_gpu) + + if parent: + parents[module] = parent + + elif is_sdxl: sd_model.conditioner.register_forward_pre_hook(send_me_to_gpu) elif is_sd2: sd_model.cond_stage_model.model.register_forward_pre_hook(send_me_to_gpu) @@ -117,9 +135,9 @@ def first_stage_model_decode_wrap(z): sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu) sd_model.first_stage_model.encode = first_stage_model_encode_wrap sd_model.first_stage_model.decode = first_stage_model_decode_wrap - if sd_model.depth_model: + if hasattr(sd_model, 'depth_model'): sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu) - if sd_model.embedder: + if hasattr(sd_model, 'embedder'): sd_model.embedder.register_forward_pre_hook(send_me_to_gpu) if use_medvram: diff --git a/modules/models/sd3/mmdit.py b/modules/models/sd3/mmdit.py index 5ec73c05483..4d2b855512b 100644 --- a/modules/models/sd3/mmdit.py +++ b/modules/models/sd3/mmdit.py @@ -492,7 +492,6 @@ def __init__( device = None, ): super().__init__() - print(f"mmdit initializing with: {input_size=}, {patch_size=}, {in_channels=}, {depth=}, {mlp_ratio=}, {learn_sigma=}, {adm_in_channels=}, {context_embedder_config=}, {register_length=}, {attn_mode=}, {rmsnorm=}, {scale_mod_only=}, {swiglu=}, {out_channels=}, {pos_embed_scaling_factor=}, {pos_embed_offset=}, {pos_embed_max_size=}, {num_patches=}, {qk_norm=}, {qkv_bias=}, {dtype=}, {device=}") self.dtype = dtype self.learn_sigma = learn_sigma self.in_channels = in_channels diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 146ddf2e2aa..309a7f863f5 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -120,6 +120,9 @@ def load_weights(self): def encode_embedding_init_text(self, init_text, nvpt): return torch.tensor([[0]], device=devices.device) # XXX + def medvram_modules(self): + return [self.clip_g, self.clip_l, self.t5xxl] + class SD3Denoiser(k_diffusion.external.DiscreteSchedule): def __init__(self, inner_model, sigmas): @@ -163,7 +166,7 @@ def get_learned_conditioning(self, batch: list[str]): return self.cond_stage_model(batch) def apply_model(self, x, t, cond): - return self.model.apply_model(x, t, c_crossattn=cond['crossattn'], y=cond['vector']) + return self.model(x, t, c_crossattn=cond['crossattn'], y=cond['vector']) def decode_first_stage(self, latent): latent = self.latent_format.process_out(latent) @@ -175,3 +178,10 @@ def encode_first_stage(self, image): def create_denoiser(self): return SD3Denoiser(self, self.model.model_sampling.sigmas) + + def medvram_fields(self): + return [ + (self, 'first_stage_model'), + (self, 'cond_stage_model'), + (self, 'model'), + ] diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index b584b68a962..c060cccb24b 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -163,7 +163,7 @@ def apply_refiner(cfg_denoiser, sigma=None): else: # torch.max(sigma) only to handle rare case where we might have different sigmas in the same batch try: - timestep = torch.argmin(torch.abs(cfg_denoiser.inner_model.sigmas - torch.max(sigma))) + timestep = torch.argmin(torch.abs(cfg_denoiser.inner_model.sigmas.to(sigma.device) - torch.max(sigma))) except AttributeError: # for samplers that don't use sigmas (DDIM) sigma is actually the timestep timestep = torch.max(sigma).to(dtype=int) completed_ratio = (999 - timestep) / 1000 From 5d9f1e6a431fb3396ee3bb9000188941a48801f2 Mon Sep 17 00:00:00 2001 From: Andray Date: Tue, 25 Jun 2024 05:33:07 +0400 Subject: [PATCH 132/201] stoping generation extras --- modules/postprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/postprocessing.py b/modules/postprocessing.py index 8ec122b7c9d..a413d1027c7 100644 --- a/modules/postprocessing.py +++ b/modules/postprocessing.py @@ -51,7 +51,7 @@ def get_images(extras_mode, image, image_folder, input_dir): shared.state.textinfo = name shared.state.skipped = False - if shared.state.interrupted: + if shared.state.interrupted or shared.state.stopping_generation: break if isinstance(image_placeholder, str): From 9e60cdbc3f392b39143ee11b53cfec37c563682b Mon Sep 17 00:00:00 2001 From: cuba3 Date: Tue, 25 Jun 2024 15:24:46 +0800 Subject: [PATCH 133/201] Maintaining Project Compatibility for Python 3.9 Users Without Upgrade Requirements. Sole usage of Python 3.10's match-case in the project hinders quick-start for beginners; consider replacing with if-else for improved accessibility. --- modules/torch_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/torch_utils.py b/modules/torch_utils.py index a07e02853b1..5ea3da094c5 100644 --- a/modules/torch_utils.py +++ b/modules/torch_utils.py @@ -20,7 +20,6 @@ def get_param(model) -> torch.nn.Parameter: def float64(t: torch.Tensor): """return torch.float64 if device is not mps or xpu, else return torch.float32""" - match t.device.type: - case 'mps', 'xpu': - return torch.float32 + if t.device.type in ['mps', 'xpu']: + return torch.float32 return torch.float64 From ec3c31e7a19f3240bfba072787399eb02b88dc9e Mon Sep 17 00:00:00 2001 From: viking1304 Date: Tue, 25 Jun 2024 21:01:33 +0200 Subject: [PATCH 134/201] Try to use specified python version on linux and mac, with fallback --- webui-macos-env.sh | 5 ----- webui.sh | 6 +++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/webui-macos-env.sh b/webui-macos-env.sh index ad0736378c6..ae84f1e4abf 100644 --- a/webui-macos-env.sh +++ b/webui-macos-env.sh @@ -4,11 +4,6 @@ # Please modify webui-user.sh to change these instead of this file # #################################################################### -if [[ -x "$(command -v python3.10)" ]] -then - python_cmd="python3.10" -fi - export install_dir="$HOME" export COMMANDLINE_ARGS="--skip-torch-cuda-test --upcast-sampling --no-half-vae --use-cpu interrogate" export PYTORCH_ENABLE_MPS_FALLBACK=1 diff --git a/webui.sh b/webui.sh index 7acea902cfc..4a1bebd00fe 100755 --- a/webui.sh +++ b/webui.sh @@ -44,7 +44,11 @@ fi # python3 executable if [[ -z "${python_cmd}" ]] then - python_cmd="python3" + python_cmd="python3.10" +fi +if [[ ! -x "$(command -v "${python_cmd}")" ]] +then + python_cmd="python3" fi # git executable From d686e73daa6cca399fe68976922cabde681f69f1 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Wed, 26 Jun 2024 23:22:00 +0300 Subject: [PATCH 135/201] support for SD3: infinite prompt length, token counting --- modules/models/sd3/sd3_cond.py | 225 ++++++++++++++++++++++++++++++++ modules/models/sd3/sd3_model.py | 119 +---------------- modules/prompt_parser.py | 2 +- modules/sd_hijack.py | 5 +- modules/sd_hijack_clip.py | 59 ++++++--- modules/sd_models.py | 7 +- 6 files changed, 278 insertions(+), 139 deletions(-) create mode 100644 modules/models/sd3/sd3_cond.py diff --git a/modules/models/sd3/sd3_cond.py b/modules/models/sd3/sd3_cond.py new file mode 100644 index 00000000000..c61ae0fe641 --- /dev/null +++ b/modules/models/sd3/sd3_cond.py @@ -0,0 +1,225 @@ +import os +import safetensors +import torch +import typing + +from transformers import CLIPTokenizer, T5TokenizerFast + +from modules import shared, devices, modelloader, sd_hijack_clip, prompt_parser +from modules.models.sd3.other_impls import SDClipModel, SDXLClipG, T5XXLModel, SD3Tokenizer + + +class SafetensorsMapping(typing.Mapping): + def __init__(self, file): + self.file = file + + def __len__(self): + return len(self.file.keys()) + + def __iter__(self): + for key in self.file.keys(): + yield key + + def __getitem__(self, key): + return self.file.get_tensor(key) + + +CLIPL_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/clip_l.safetensors" +CLIPL_CONFIG = { + "hidden_act": "quick_gelu", + "hidden_size": 768, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12, +} + +CLIPG_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/clip_g.safetensors" +CLIPG_CONFIG = { + "hidden_act": "gelu", + "hidden_size": 1280, + "intermediate_size": 5120, + "num_attention_heads": 20, + "num_hidden_layers": 32, +} + +T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp16.safetensors" +T5_CONFIG = { + "d_ff": 10240, + "d_model": 4096, + "num_heads": 64, + "num_layers": 24, + "vocab_size": 32128, +} + + +class Sd3ClipLG(sd_hijack_clip.TextConditionalModel): + def __init__(self, clip_l, clip_g): + super().__init__() + + self.clip_l = clip_l + self.clip_g = clip_g + + self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + + empty = self.tokenizer('')["input_ids"] + self.id_start = empty[0] + self.id_end = empty[1] + self.id_pad = empty[1] + + self.return_pooled = True + + def tokenize(self, texts): + return self.tokenizer(texts, truncation=False, add_special_tokens=False)["input_ids"] + + def encode_with_transformers(self, tokens): + tokens_g = tokens.clone() + + for batch_pos in range(tokens_g.shape[0]): + index = tokens_g[batch_pos].cpu().tolist().index(self.id_end) + tokens_g[batch_pos, index+1:tokens_g.shape[1]] = 0 + + l_out, l_pooled = self.clip_l(tokens) + g_out, g_pooled = self.clip_g(tokens_g) + + lg_out = torch.cat([l_out, g_out], dim=-1) + lg_out = torch.nn.functional.pad(lg_out, (0, 4096 - lg_out.shape[-1])) + + vector_out = torch.cat((l_pooled, g_pooled), dim=-1) + + lg_out.pooled = vector_out + return lg_out + + def encode_embedding_init_text(self, init_text, nvpt): + return torch.zeros((nvpt, 768+1280), device=devices.device) # XXX + + +class Sd3T5(torch.nn.Module): + def __init__(self, t5xxl): + super().__init__() + + self.t5xxl = t5xxl + self.tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl") + + empty = self.tokenizer('', padding='max_length', max_length=2)["input_ids"] + self.id_end = empty[0] + self.id_pad = empty[1] + + def tokenize(self, texts): + return self.tokenizer(texts, truncation=False, add_special_tokens=False)["input_ids"] + + def tokenize_line(self, line, *, target_token_count=None): + if shared.opts.emphasis != "None": + parsed = prompt_parser.parse_prompt_attention(line) + else: + parsed = [[line, 1.0]] + + tokenized = self.tokenize([text for text, _ in parsed]) + + tokens = [] + multipliers = [] + + for text_tokens, (text, weight) in zip(tokenized, parsed): + if text == 'BREAK' and weight == -1: + continue + + tokens += text_tokens + multipliers += [weight] * len(text_tokens) + + tokens += [self.id_end] + multipliers += [1.0] + + if target_token_count is not None: + if len(tokens) < target_token_count: + tokens += [self.id_pad] * (target_token_count - len(tokens)) + multipliers += [1.0] * (target_token_count - len(tokens)) + else: + tokens = tokens[0:target_token_count] + multipliers = multipliers[0:target_token_count] + + return tokens, multipliers + + def forward(self, texts, *, token_count): + if not self.t5xxl or not shared.opts.sd3_enable_t5: + return torch.zeros((len(texts), token_count, 4096), device=devices.device, dtype=devices.dtype) + + tokens_batch = [] + + for text in texts: + tokens, multipliers = self.tokenize_line(text, target_token_count=token_count) + tokens_batch.append(tokens) + + t5_out, t5_pooled = self.t5xxl(tokens_batch) + + return t5_out + + def encode_embedding_init_text(self, init_text, nvpt): + return torch.zeros((nvpt, 4096), device=devices.device) # XXX + + +class SD3Cond(torch.nn.Module): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.tokenizer = SD3Tokenizer() + + with torch.no_grad(): + self.clip_g = SDXLClipG(CLIPG_CONFIG, device="cpu", dtype=devices.dtype) + self.clip_l = SDClipModel(layer="hidden", layer_idx=-2, device="cpu", dtype=devices.dtype, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=CLIPL_CONFIG) + + if shared.opts.sd3_enable_t5: + self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=devices.dtype) + else: + self.t5xxl = None + + self.model_lg = Sd3ClipLG(self.clip_l, self.clip_g) + self.model_t5 = Sd3T5(self.t5xxl) + + self.weights_loaded = False + + def forward(self, prompts: list[str]): + lg_out, vector_out = self.model_lg(prompts) + + token_count = lg_out.shape[1] + + t5_out = self.model_t5(prompts, token_count=token_count) + lgt_out = torch.cat([lg_out, t5_out], dim=-2) + + return { + 'crossattn': lgt_out, + 'vector': vector_out, + } + + def load_weights(self): + if self.weights_loaded: + return + + clip_path = os.path.join(shared.models_path, "CLIP") + + clip_g_file = modelloader.load_file_from_url(CLIPG_URL, model_dir=clip_path, file_name="clip_g.safetensors") + with safetensors.safe_open(clip_g_file, framework="pt") as file: + self.clip_g.transformer.load_state_dict(SafetensorsMapping(file)) + + clip_l_file = modelloader.load_file_from_url(CLIPL_URL, model_dir=clip_path, file_name="clip_l.safetensors") + with safetensors.safe_open(clip_l_file, framework="pt") as file: + self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) + + if self.t5xxl: + t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp16.safetensors") + with safetensors.safe_open(t5_file, framework="pt") as file: + self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) + + self.weights_loaded = True + + def encode_embedding_init_text(self, init_text, nvpt): + return torch.tensor([[0]], device=devices.device) # XXX + + def medvram_modules(self): + return [self.clip_g, self.clip_l, self.t5xxl] + + def get_token_count(self, text): + _, token_count = self.model_lg.process_texts([text]) + + return token_count + + def get_target_prompt_token_count(self, token_count): + return self.model_lg.get_target_prompt_token_count(token_count) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 309a7f863f5..10209f82aa7 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -1,127 +1,12 @@ import contextlib -import os -from typing import Mapping -import safetensors import torch import k_diffusion -from modules.models.sd3.other_impls import SDClipModel, SDXLClipG, T5XXLModel, SD3Tokenizer from modules.models.sd3.sd3_impls import BaseModel, SDVAE, SD3LatentFormat +from modules.models.sd3.sd3_cond import SD3Cond -from modules import shared, modelloader, devices - -CLIPG_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/clip_g.safetensors" -CLIPG_CONFIG = { - "hidden_act": "gelu", - "hidden_size": 1280, - "intermediate_size": 5120, - "num_attention_heads": 20, - "num_hidden_layers": 32, -} - -CLIPL_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/clip_l.safetensors" -CLIPL_CONFIG = { - "hidden_act": "quick_gelu", - "hidden_size": 768, - "intermediate_size": 3072, - "num_attention_heads": 12, - "num_hidden_layers": 12, -} - -T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp16.safetensors" -T5_CONFIG = { - "d_ff": 10240, - "d_model": 4096, - "num_heads": 64, - "num_layers": 24, - "vocab_size": 32128, -} - - -class SafetensorsMapping(Mapping): - def __init__(self, file): - self.file = file - - def __len__(self): - return len(self.file.keys()) - - def __iter__(self): - for key in self.file.keys(): - yield key - - def __getitem__(self, key): - return self.file.get_tensor(key) - - -class SD3Cond(torch.nn.Module): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.tokenizer = SD3Tokenizer() - - with torch.no_grad(): - self.clip_g = SDXLClipG(CLIPG_CONFIG, device="cpu", dtype=devices.dtype) - self.clip_l = SDClipModel(layer="hidden", layer_idx=-2, device="cpu", dtype=devices.dtype, layer_norm_hidden_state=False, return_projected_pooled=False, textmodel_json_config=CLIPL_CONFIG) - - if shared.opts.sd3_enable_t5: - self.t5xxl = T5XXLModel(T5_CONFIG, device="cpu", dtype=devices.dtype) - else: - self.t5xxl = None - - self.weights_loaded = False - - def forward(self, prompts: list[str]): - res = [] - - for prompt in prompts: - tokens = self.tokenizer.tokenize_with_weights(prompt) - l_out, l_pooled = self.clip_l.encode_token_weights(tokens["l"]) - g_out, g_pooled = self.clip_g.encode_token_weights(tokens["g"]) - - if self.t5xxl and shared.opts.sd3_enable_t5: - t5_out, t5_pooled = self.t5xxl.encode_token_weights(tokens["t5xxl"]) - else: - t5_out = torch.zeros(l_out.shape[0:2] + (4096,), dtype=l_out.dtype, device=l_out.device) - - lg_out = torch.cat([l_out, g_out], dim=-1) - lg_out = torch.nn.functional.pad(lg_out, (0, 4096 - lg_out.shape[-1])) - lgt_out = torch.cat([lg_out, t5_out], dim=-2) - vector_out = torch.cat((l_pooled, g_pooled), dim=-1) - - res.append({ - 'crossattn': lgt_out[0].to(devices.device), - 'vector': vector_out[0].to(devices.device), - }) - - return res - - def load_weights(self): - if self.weights_loaded: - return - - clip_path = os.path.join(shared.models_path, "CLIP") - - clip_g_file = modelloader.load_file_from_url(CLIPG_URL, model_dir=clip_path, file_name="clip_g.safetensors") - with safetensors.safe_open(clip_g_file, framework="pt") as file: - self.clip_g.transformer.load_state_dict(SafetensorsMapping(file)) - - clip_l_file = modelloader.load_file_from_url(CLIPL_URL, model_dir=clip_path, file_name="clip_l.safetensors") - with safetensors.safe_open(clip_l_file, framework="pt") as file: - self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) - - if self.t5xxl: - t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp16.safetensors") - with safetensors.safe_open(t5_file, framework="pt") as file: - self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) - - self.weights_loaded = True - - def encode_embedding_init_text(self, init_text, nvpt): - return torch.tensor([[0]], device=devices.device) # XXX - - def medvram_modules(self): - return [self.clip_g, self.clip_l, self.t5xxl] +from modules import shared, devices class SD3Denoiser(k_diffusion.external.DiscreteSchedule): diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py index cba1345545d..4e393d2866f 100644 --- a/modules/prompt_parser.py +++ b/modules/prompt_parser.py @@ -268,7 +268,7 @@ def get_multicond_learned_conditioning(model, prompts, steps, hires_steps=None, class DictWithShape(dict): - def __init__(self, x, shape): + def __init__(self, x, shape=None): super().__init__() self.update(x) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index e139d9964cb..d5b2989f4e5 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -325,7 +325,10 @@ def get_prompt_lengths(self, text): if self.clip is None: return "-", "-" - _, token_count = self.clip.process_texts([text]) + if hasattr(self.clip, 'get_token_count'): + token_count = self.clip.get_token_count(text) + else: + _, token_count = self.clip.process_texts([text]) return token_count, self.clip.get_target_prompt_token_count(token_count) diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py index 355df3d30d1..a479148fc21 100644 --- a/modules/sd_hijack_clip.py +++ b/modules/sd_hijack_clip.py @@ -27,24 +27,21 @@ def __init__(self): are applied by sd_hijack.EmbeddingsWithFixes's forward function.""" -class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module): - """A pytorch module that is a wrapper for FrozenCLIPEmbedder module. it enhances FrozenCLIPEmbedder, making it possible to - have unlimited prompt length and assign weights to tokens in prompt. - """ - - def __init__(self, wrapped, hijack): +class TextConditionalModel(torch.nn.Module): + def __init__(self): super().__init__() - self.wrapped = wrapped - """Original FrozenCLIPEmbedder module; can also be FrozenOpenCLIPEmbedder or xlmr.BertSeriesModelWithTransformation, - depending on model.""" - - self.hijack: sd_hijack.StableDiffusionModelHijack = hijack + self.hijack = sd_hijack.model_hijack self.chunk_length = 75 - self.is_trainable = getattr(wrapped, 'is_trainable', False) - self.input_key = getattr(wrapped, 'input_key', 'txt') - self.legacy_ucg_val = None + self.is_trainable = False + self.input_key = 'txt' + self.return_pooled = False + + self.comma_token = None + self.id_start = None + self.id_end = None + self.id_pad = None def empty_chunk(self): """creates an empty PromptChunk and returns it""" @@ -210,10 +207,6 @@ def forward(self, texts): is when you do prompt editing: "a picture of a [cat:dog:0.4] eating ice cream" """ - if opts.use_old_emphasis_implementation: - import modules.sd_hijack_clip_old - return modules.sd_hijack_clip_old.forward_old(self, texts) - batch_chunks, token_count = self.process_texts(texts) used_embeddings = {} @@ -252,7 +245,7 @@ def forward(self, texts): if any(x for x in texts if "(" in x or "[" in x) and opts.emphasis != "Original": self.hijack.extra_generation_params["Emphasis"] = opts.emphasis - if getattr(self.wrapped, 'return_pooled', False): + if self.return_pooled: return torch.hstack(zs), zs[0].pooled else: return torch.hstack(zs) @@ -292,6 +285,34 @@ def process_tokens(self, remade_batch_tokens, batch_multipliers): return z +class FrozenCLIPEmbedderWithCustomWordsBase(TextConditionalModel): + """A pytorch module that is a wrapper for FrozenCLIPEmbedder module. it enhances FrozenCLIPEmbedder, making it possible to + have unlimited prompt length and assign weights to tokens in prompt. + """ + + def __init__(self, wrapped, hijack): + super().__init__() + + self.hijack = hijack + + self.wrapped = wrapped + """Original FrozenCLIPEmbedder module; can also be FrozenOpenCLIPEmbedder or xlmr.BertSeriesModelWithTransformation, + depending on model.""" + + self.is_trainable = getattr(wrapped, 'is_trainable', False) + self.input_key = getattr(wrapped, 'input_key', 'txt') + self.return_pooled = getattr(self.wrapped, 'return_pooled', False) + + self.legacy_ucg_val = None # for sgm codebase + + def forward(self, texts): + if opts.use_old_emphasis_implementation: + import modules.sd_hijack_clip_old + return modules.sd_hijack_clip_old.forward_old(self, texts) + + return super().forward(texts) + + class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase): def __init__(self, wrapped, hijack): super().__init__(wrapped, hijack) diff --git a/modules/sd_models.py b/modules/sd_models.py index 61fb881ba5c..45575e4405b 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -722,7 +722,12 @@ def get_empty_cond(sd_model): d = sd_model.get_learned_conditioning([""]) return d['crossattn'] else: - return sd_model.cond_stage_model([""]) + d = sd_model.cond_stage_model([""]) + + if isinstance(d, dict): + d = d['crossattn'] + + return d def send_model_to_cpu(m): From 42ca30d6c1b7dff737f49ca20409281947b0b110 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Thu, 27 Jun 2024 07:35:53 +0300 Subject: [PATCH 136/201] fix mdevram for SD1/SDXL --- modules/lowvram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/lowvram.py b/modules/lowvram.py index 00aad477bb8..6728c337b64 100644 --- a/modules/lowvram.py +++ b/modules/lowvram.py @@ -107,7 +107,7 @@ def first_stage_model_decode_wrap(z): setattr(obj, field, module) # register hooks for those the first three models - if hasattr(sd_model.cond_stage_model, "medvram_modules"): + if hasattr(sd_model, "cond_stage_model") and hasattr(sd_model.cond_stage_model, "medvram_modules"): for module in sd_model.cond_stage_model.medvram_modules(): if isinstance(module, ModuleWithParent): parent = module.parent @@ -135,9 +135,9 @@ def first_stage_model_decode_wrap(z): sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu) sd_model.first_stage_model.encode = first_stage_model_encode_wrap sd_model.first_stage_model.decode = first_stage_model_decode_wrap - if hasattr(sd_model, 'depth_model'): + if getattr(sd_model, 'depth_model', None) is not None: sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu) - if hasattr(sd_model, 'embedder'): + if getattr(sd_model, 'embedder', None) is not None: sd_model.embedder.register_forward_pre_hook(send_me_to_gpu) if use_medvram: From afaf120bc2677110a8676ff183054d68393cf192 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Thu, 27 Jun 2024 17:44:12 +0900 Subject: [PATCH 137/201] docs: update bug_report.yml occured -> occurred --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 5876e941085..c86bd8a680b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -91,7 +91,7 @@ body: id: logs attributes: label: Console logs - description: Please provide **full** cmd/terminal logs from the moment you started UI to the end of it, after the bug occured. If it's very long, provide a link to pastebin or similar service. + description: Please provide **full** cmd/terminal logs from the moment you started UI to the end of it, after the bug occurred. If it's very long, provide a link to pastebin or similar service. render: Shell validations: required: true From 06fe174c74fe99093a20eba87aff2bd4e7edadf9 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Fri, 28 Jun 2024 07:51:30 +0300 Subject: [PATCH 138/201] get deepbooru to run with --precision-half --- modules/deepbooru.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 547e1b4c67a..fb043feb296 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -57,7 +57,7 @@ def tag_multi(self, pil_image, force_disable_ranks=False): a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255 with torch.no_grad(), devices.autocast(): - x = torch.from_numpy(a).to(devices.device) + x = torch.from_numpy(a).to(devices.device, devices.dtype) y = self.model(x)[0].detach().cpu().numpy() probability_dict = {} From fc8b126673bdfed65b2b98ee747368d35c1b42ba Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Fri, 28 Jun 2024 08:10:19 +0300 Subject: [PATCH 139/201] get T5 to work both with and without --precision half --- modules/models/sd3/other_impls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index d7b9b262114..002fe4832eb 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -479,7 +479,7 @@ def __init__(self, num_layers, model_dim, inner_dim, ff_dim, num_heads, vocab_si def forward(self, input_ids, intermediate_output=None, final_layer_norm_intermediate=True): intermediate = None - x = self.embed_tokens(input_ids) + x = self.embed_tokens(input_ids).to(torch.float32) # needs float32 or else T5 returns all zeroes past_bias = None for i, layer in enumerate(self.block): x, past_bias = layer(x, past_bias) From 0c7bdcc1b130130bdaec425a9c3537ba098e0098 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Fri, 28 Jun 2024 08:10:32 +0300 Subject: [PATCH 140/201] add the missing get_first_stage_encoding function --- modules/models/sd3/sd3_model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 10209f82aa7..f8a4c96cc8f 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -61,6 +61,9 @@ def encode_first_stage(self, image): latent = self.first_stage_model.encode(image) return self.latent_format.process_in(latent) + def get_first_stage_encoding(self, x): + return x + def create_denoiser(self): return SD3Denoiser(self, self.model.model_sampling.sigmas) From 0b64633584e95e94ea2023bbe5640250f9e23984 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Fri, 28 Jun 2024 09:23:41 +0300 Subject: [PATCH 141/201] fix img2img --- modules/models/sd3/sd3_model.py | 3 +++ modules/processing.py | 6 +++--- modules/sd_samplers_kdiffusion.py | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index f8a4c96cc8f..c17fd97e99c 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -73,3 +73,6 @@ def medvram_fields(self): (self, 'cond_stage_model'), (self, 'model'), ] + + def add_noise_to_latent(self, x, noise, amount): + return x * (1 - amount) + noise * amount diff --git a/modules/processing.py b/modules/processing.py index d32a1811ec3..c3ce975eaf6 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1737,10 +1737,10 @@ def init(self, all_prompts, all_seeds, all_subseeds): latmask = latmask[0] if self.mask_round: latmask = np.around(latmask) - latmask = np.tile(latmask[None], (4, 1, 1)) + latmask = np.tile(latmask[None], (self.init_latent.shape[1], 1, 1)) - self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype) - self.nmask = torch.asarray(latmask).to(shared.device).type(self.sd_model.dtype) + self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(devices.dtype) + self.nmask = torch.asarray(latmask).to(shared.device).type(devices.dtype) # this needs to be fixed to be done in sample() using actual seeds for batches if self.inpainting_fill == 2: diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index cede0760ad6..8398299f87d 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -133,7 +133,10 @@ def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, sigmas = self.get_sigmas(p, steps) sigma_sched = sigmas[steps - t_enc - 1:] - xi = x + noise * sigma_sched[0] + if hasattr(shared.sd_model, 'add_noise_to_latent'): + xi = shared.sd_model.add_noise_to_latent(x, noise, sigma_sched[0]) + else: + xi = x + noise * sigma_sched[0] if opts.img2img_extra_noise > 0: p.extra_generation_params["Extra noise"] = opts.img2img_extra_noise From 179ae47d642d9d28184bc0a6cacda00d67b81744 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Fri, 28 Jun 2024 11:15:34 +0300 Subject: [PATCH 142/201] fix the problem with infinite prompts where empty cond would be calculated incorrectly --- modules/models/sd3/sd3_cond.py | 9 +++++---- modules/models/sd3/sd3_model.py | 3 +-- modules/sd_models.py | 9 ++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/modules/models/sd3/sd3_cond.py b/modules/models/sd3/sd3_cond.py index c61ae0fe641..e25ba1b63eb 100644 --- a/modules/models/sd3/sd3_cond.py +++ b/modules/models/sd3/sd3_cond.py @@ -177,12 +177,13 @@ def __init__(self, *args, **kwargs): self.weights_loaded = False def forward(self, prompts: list[str]): - lg_out, vector_out = self.model_lg(prompts) + with devices.without_autocast(): + lg_out, vector_out = self.model_lg(prompts) - token_count = lg_out.shape[1] + token_count = lg_out.shape[1] - t5_out = self.model_t5(prompts, token_count=token_count) - lgt_out = torch.cat([lg_out, t5_out], dim=-2) + t5_out = self.model_t5(prompts, token_count=token_count) + lgt_out = torch.cat([lg_out, t5_out], dim=-2) return { 'crossattn': lgt_out, diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index c17fd97e99c..336e8d2d499 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -47,8 +47,7 @@ def ema_scope(self): return contextlib.nullcontext() def get_learned_conditioning(self, batch: list[str]): - with devices.without_autocast(): - return self.cond_stage_model(batch) + return self.cond_stage_model(batch) def apply_model(self, x, t, cond): return self.model(x, t, c_crossattn=cond['crossattn'], y=cond['vector']) diff --git a/modules/sd_models.py b/modules/sd_models.py index 45575e4405b..681030442c6 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -718,16 +718,15 @@ def get_empty_cond(sd_model): p = processing.StableDiffusionProcessingTxt2Img() extra_networks.activate(p, {}) - if hasattr(sd_model, 'conditioner'): + if hasattr(sd_model, 'get_learned_conditioning'): d = sd_model.get_learned_conditioning([""]) - return d['crossattn'] else: d = sd_model.cond_stage_model([""]) - if isinstance(d, dict): - d = d['crossattn'] + if isinstance(d, dict): + d = d['crossattn'] - return d + return d def send_model_to_cpu(m): From d67348a0a54a4d4c612673d3622ef0a617036646 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Fri, 28 Jun 2024 18:06:49 +0300 Subject: [PATCH 143/201] allow generation to be started with any dimensions specified --- modules/models/sd3/sd3_model.py | 3 +++ modules/processing.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 336e8d2d499..2d66b80f106 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -75,3 +75,6 @@ def medvram_fields(self): def add_noise_to_latent(self, x, noise, amount): return x * (1 - amount) + noise * amount + + def fix_dimensions(self, width, height): + return width // 16 * 16, height // 16 * 16 diff --git a/modules/processing.py b/modules/processing.py index c3ce975eaf6..7535b56e18c 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -884,6 +884,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if p.refiner_checkpoint_info is None: raise Exception(f'Could not find checkpoint with name {p.refiner_checkpoint}') + if hasattr(shared.sd_model, 'fix_dimensions'): + p.width, p.height = shared.sd_model.fix_dimensions(p.width, p.height) + p.sd_model_name = shared.sd_model.sd_checkpoint_info.name_for_extra p.sd_model_hash = shared.sd_model.sd_model_hash p.sd_vae_name = sd_vae.get_loaded_vae_name() From 7e4b06fcd0b9b47fcc4f0a1261039358114bbacd Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 29 Jun 2024 00:38:52 +0300 Subject: [PATCH 144/201] support loading clip/t5 from the main model checkpoint --- modules/models/sd3/sd3_cond.py | 30 +++++++++++------------------- modules/models/sd3/sd3_model.py | 10 +++++++--- modules/sd_models.py | 9 ++++++--- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/modules/models/sd3/sd3_cond.py b/modules/models/sd3/sd3_cond.py index e25ba1b63eb..bade90ba1a8 100644 --- a/modules/models/sd3/sd3_cond.py +++ b/modules/models/sd3/sd3_cond.py @@ -174,15 +174,10 @@ def __init__(self, *args, **kwargs): self.model_lg = Sd3ClipLG(self.clip_l, self.clip_g) self.model_t5 = Sd3T5(self.t5xxl) - self.weights_loaded = False - def forward(self, prompts: list[str]): with devices.without_autocast(): lg_out, vector_out = self.model_lg(prompts) - - token_count = lg_out.shape[1] - - t5_out = self.model_t5(prompts, token_count=token_count) + t5_out = self.model_t5(prompts, token_count=lg_out.shape[1]) lgt_out = torch.cat([lg_out, t5_out], dim=-2) return { @@ -190,27 +185,24 @@ def forward(self, prompts: list[str]): 'vector': vector_out, } - def load_weights(self): - if self.weights_loaded: - return - + def before_load_weights(self, state_dict): clip_path = os.path.join(shared.models_path, "CLIP") - clip_g_file = modelloader.load_file_from_url(CLIPG_URL, model_dir=clip_path, file_name="clip_g.safetensors") - with safetensors.safe_open(clip_g_file, framework="pt") as file: - self.clip_g.transformer.load_state_dict(SafetensorsMapping(file)) + if 'text_encoders.clip_g.transformer.text_model.embeddings.position_embedding.weight' not in state_dict: + clip_g_file = modelloader.load_file_from_url(CLIPG_URL, model_dir=clip_path, file_name="clip_g.safetensors") + with safetensors.safe_open(clip_g_file, framework="pt") as file: + self.clip_g.transformer.load_state_dict(SafetensorsMapping(file)) - clip_l_file = modelloader.load_file_from_url(CLIPL_URL, model_dir=clip_path, file_name="clip_l.safetensors") - with safetensors.safe_open(clip_l_file, framework="pt") as file: - self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) + if 'text_encoders.clip_l.transformer.text_model.embeddings.position_embedding.weight' not in state_dict: + clip_l_file = modelloader.load_file_from_url(CLIPL_URL, model_dir=clip_path, file_name="clip_l.safetensors") + with safetensors.safe_open(clip_l_file, framework="pt") as file: + self.clip_l.transformer.load_state_dict(SafetensorsMapping(file), strict=False) - if self.t5xxl: + if self.t5xxl and 'text_encoders.t5xxl.transformer.encoder.embed_tokens.weight' not in state_dict: t5_file = modelloader.load_file_from_url(T5_URL, model_dir=clip_path, file_name="t5xxl_fp16.safetensors") with safetensors.safe_open(t5_file, framework="pt") as file: self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) - self.weights_loaded = True - def encode_embedding_init_text(self, init_text, nvpt): return torch.tensor([[0]], device=devices.device) # XXX diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 2d66b80f106..98470cdabfc 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -31,7 +31,7 @@ def __init__(self, state_dict, shift=3, use_ema=False): self.alphas_cumprod = 1 / (self.model.model_sampling.sigmas ** 2 + 1) - self.cond_stage_model = SD3Cond() + self.text_encoders = SD3Cond() self.cond_stage_key = 'txt' self.parameterization = "eps" @@ -40,8 +40,12 @@ def __init__(self, state_dict, shift=3, use_ema=False): self.latent_format = SD3LatentFormat() self.latent_channels = 16 - def after_load_weights(self): - self.cond_stage_model.load_weights() + @property + def cond_stage_model(self): + return self.text_encoders + + def before_load_weights(self, state_dict): + self.cond_stage_model.before_load_weights(state_dict) def ema_scope(self): return contextlib.nullcontext() diff --git a/modules/sd_models.py b/modules/sd_models.py index 681030442c6..55bd9ca5e43 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -434,9 +434,15 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer # cache newly loaded model checkpoints_loaded[checkpoint_info] = state_dict.copy() + if hasattr(model, "before_load_weights"): + model.before_load_weights(state_dict) + model.load_state_dict(state_dict, strict=False) timer.record("apply weights to model") + if hasattr(model, "after_load_weights"): + model.after_load_weights(state_dict) + del state_dict # Set is_sdxl_inpaint flag. @@ -838,9 +844,6 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None): with sd_disable_initialization.LoadStateDictOnMeta(state_dict, device=model_target_device(sd_model), weight_dtype_conversion=weight_dtype_conversion): load_model_weights(sd_model, checkpoint_info, state_dict, timer) - if hasattr(sd_model, "after_load_weights"): - sd_model.after_load_weights() - timer.record("load weights from state dict") send_model_to_device(sd_model) From 1394ecaf36da365e6bdd16ff20a4ef661e94623f Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 29 Jun 2024 08:05:35 +0300 Subject: [PATCH 145/201] do sampler calculations on CPU --- modules/sd_samplers_kdiffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 8398299f87d..95a354dacb6 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -125,7 +125,7 @@ def get_sigmas(self, p, steps): if discard_next_to_last_sigma: sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) - return sigmas + return sigmas.cpu() def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps) From ebe8be9028b1a01cc21ff1c49a7fce7bd5138f1b Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 29 Jun 2024 08:05:55 +0300 Subject: [PATCH 146/201] remove AutocastLinear from SD3's MLP --- modules/models/sd3/other_impls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index 002fe4832eb..f992db9bdfc 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -39,9 +39,9 @@ def __init__(self, in_features, hidden_features=None, out_features=None, act_lay out_features = out_features or in_features hidden_features = hidden_features or in_features - self.fc1 = AutocastLinear(in_features, hidden_features, bias=bias, dtype=dtype, device=device) + self.fc1 = nn.Linear(in_features, hidden_features, bias=bias, dtype=dtype, device=device) self.act = act_layer - self.fc2 = AutocastLinear(hidden_features, out_features, bias=bias, dtype=dtype, device=device) + self.fc2 = nn.Linear(hidden_features, out_features, bias=bias, dtype=dtype, device=device) def forward(self, x): x = self.fc1(x) From 9e404c315432ca9aca2b9805e462c2360b19f5ae Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 30 Jun 2024 07:06:28 +0300 Subject: [PATCH 147/201] fix --medvram --- modules/models/sd3/sd3_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index 98470cdabfc..dbec8168fe9 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -73,7 +73,7 @@ def create_denoiser(self): def medvram_fields(self): return [ (self, 'first_stage_model'), - (self, 'cond_stage_model'), + (self, 'text_encoders'), (self, 'model'), ] From 6ddcd8914ba7bead27cc72964959076b4e24fc9b Mon Sep 17 00:00:00 2001 From: viking1304 Date: Sun, 30 Jun 2024 11:44:06 +0200 Subject: [PATCH 148/201] ensure use of python from venv --- webui.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/webui.sh b/webui.sh index 7acea902cfc..a101e80f71e 100755 --- a/webui.sh +++ b/webui.sh @@ -217,6 +217,8 @@ then if [[ -f "${venv_dir}"/bin/activate ]] then source "${venv_dir}"/bin/activate + # ensure use of python from venv + python_cmd="${venv_dir}"/bin/python else printf "\n%s\n" "${delimiter}" printf "\e[1m\e[31mERROR: Cannot activate python venv, aborting...\e[0m" From 957185f7eb5a9aecea5853517f30dfe8f9c4ca58 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 30 Jun 2024 20:01:58 +0900 Subject: [PATCH 149/201] fix Replace preview fix broken Replace preview for extra networks tabs edit metadata caused by #11808 --- javascript/ui.js | 8 ++++++++ modules/ui_extra_networks_user_metadata.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/javascript/ui.js b/javascript/ui.js index ff6f8974b0e..20309634fb6 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -26,6 +26,14 @@ function selected_gallery_index() { return all_gallery_buttons().findIndex(elem => elem.classList.contains('selected')); } +function gallery_container_buttons(gallery_container) { + return gradioApp().querySelectorAll(`#${gallery_container} .thumbnail-item.thumbnail-small`); +} + +function selected_gallery_index_id(gallery_container) { + return Array.from(gallery_container_buttons(gallery_container)).findIndex(elem => elem.classList.contains('selected')); +} + function extract_image_from_gallery(gallery) { if (gallery.length == 0) { return [null]; diff --git a/modules/ui_extra_networks_user_metadata.py b/modules/ui_extra_networks_user_metadata.py index fde093700b8..3a07db10542 100644 --- a/modules/ui_extra_networks_user_metadata.py +++ b/modules/ui_extra_networks_user_metadata.py @@ -194,7 +194,7 @@ def save_preview(self, index, gallery, name): def setup_ui(self, gallery): self.button_replace_preview.click( fn=self.save_preview, - _js="function(x, y, z){return [selected_gallery_index(), y, z]}", + _js=f"function(x, y, z){{return [selected_gallery_index_id('{self.tabname + '_gallery_container'}'), y, z]}}", inputs=[self.edit_name_input, gallery, self.edit_name_input], outputs=[self.html_preview, self.html_status] ).then( From fd16393465847acc07ce49df51f23683d7c690ec Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 30 Jun 2024 21:19:25 +0900 Subject: [PATCH 150/201] defunct --max-batch-count --- modules/cmd_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cmd_args.py b/modules/cmd_args.py index 61ec1866b57..d71982b2c12 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -30,7 +30,7 @@ parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats") parser.add_argument("--no-half-vae", action='store_true', help="do not switch the VAE model to 16-bit floats") parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)") -parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI") +parser.add_argument("--max-batch-count", type=int, default=16, help="does not do anything") parser.add_argument("--embeddings-dir", type=normalized_filepath, default=os.path.join(data_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)") parser.add_argument("--textual-inversion-templates-dir", type=normalized_filepath, default=os.path.join(script_path, 'textual_inversion_templates'), help="directory with textual inversion templates") parser.add_argument("--hypernetwork-dir", type=normalized_filepath, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory") From 3971c015624cff2de7ea74f98bf14c53797c0568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?I=CC=87brahim=20Su=CC=88ren?= Date: Wed, 3 Jul 2024 17:33:25 +0300 Subject: [PATCH 151/201] Return http 400 instead of 404 on invalid sampler --- modules/api/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/api/api.py b/modules/api/api.py index f468c385275..06cc854fbcc 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -43,7 +43,7 @@ def script_name_to_index(name, scripts): def validate_sampler_name(name): config = sd_samplers.all_samplers_map.get(name, None) if config is None: - raise HTTPException(status_code=404, detail="Sampler not found") + raise HTTPException(status_code=400, detail="Sampler not found") return name From 32fdf18203cee09b558aa3e299fa4f9c0c69c8e0 Mon Sep 17 00:00:00 2001 From: Andrey Efremov <50556416+AndreyRGW@users.noreply.github.com> Date: Thu, 4 Jul 2024 00:56:18 +0300 Subject: [PATCH 152/201] Add Simple Scheduler --- modules/sd_schedulers.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 0165e6a0286..59098d6211c 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -76,6 +76,14 @@ def kl_optimal(n, sigma_min, sigma_max, device): sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max) return sigmas +def simple_scheduler(n, sigma_min, sigma_max, inner_model, device): + sigs = [] + ss = len(inner_model.sigmas) / n + for x in range(n): + sigs += [float(inner_model.sigmas[-(1 + int(x * ss))])] + sigs += [0.0] + return torch.FloatTensor(sigs).to(device) + schedulers = [ Scheduler('automatic', 'Automatic', None), @@ -86,6 +94,7 @@ def kl_optimal(n, sigma_min, sigma_max, device): Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]), Scheduler('kl_optimal', 'KL Optimal', kl_optimal), Scheduler('align_your_steps', 'Align Your Steps', get_align_your_steps_sigmas), + Scheduler('simple', 'Simple', simple_scheduler, need_inner_model=True), ] schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}} From f8fb74b93ab3f78dcec05e52e669b3b89b3a3b26 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Thu, 4 Jul 2024 08:43:48 +0300 Subject: [PATCH 153/201] Bump Spandrel to 0.3.4; add spandrel-extra-arches for CodeFormer --- modules/gfpgan_model.py | 4 +--- modules/modelloader.py | 32 +++++++++++++++++++++++++++++--- requirements_versions.txt | 3 ++- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/modules/gfpgan_model.py b/modules/gfpgan_model.py index 445b040925e..01ef899e4a6 100644 --- a/modules/gfpgan_model.py +++ b/modules/gfpgan_model.py @@ -36,13 +36,11 @@ def load_net(self) -> torch.Module: ext_filter=['.pth'], ): if 'GFPGAN' in os.path.basename(model_path): - model = modelloader.load_spandrel_model( + return modelloader.load_spandrel_model( model_path, device=self.get_device(), expected_architecture='GFPGAN', ).model - model.different_w = True # see https://github.com/chaiNNer-org/spandrel/pull/81 - return model raise ValueError("No GFPGAN model found") def restore(self, np_image): diff --git a/modules/modelloader.py b/modules/modelloader.py index 5421e59b013..36e7415af43 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -139,6 +139,27 @@ def load_upscalers(): key=lambda x: x.name.lower() if not isinstance(x.scaler, (UpscalerNone, UpscalerLanczos, UpscalerNearest)) else "" ) +# None: not loaded, False: failed to load, True: loaded +_spandrel_extra_init_state = None + + +def _init_spandrel_extra_archs() -> None: + """ + Try to initialize `spandrel_extra_archs` (exactly once). + """ + global _spandrel_extra_init_state + if _spandrel_extra_init_state is not None: + return + + try: + import spandrel + import spandrel_extra_arches + spandrel.MAIN_REGISTRY.add(*spandrel_extra_arches.EXTRA_REGISTRY) + _spandrel_extra_init_state = True + except Exception: + logger.warning("Failed to load spandrel_extra_arches", exc_info=True) + _spandrel_extra_init_state = False + def load_spandrel_model( path: str | os.PathLike, @@ -148,11 +169,16 @@ def load_spandrel_model( dtype: str | torch.dtype | None = None, expected_architecture: str | None = None, ) -> spandrel.ModelDescriptor: + global _spandrel_extra_init_state + import spandrel + _init_spandrel_extra_archs() + model_descriptor = spandrel.ModelLoader(device=device).load_from_file(str(path)) - if expected_architecture and model_descriptor.architecture != expected_architecture: + arch = model_descriptor.architecture + if expected_architecture and arch.name != expected_architecture: logger.warning( - f"Model {path!r} is not a {expected_architecture!r} model (got {model_descriptor.architecture!r})", + f"Model {path!r} is not a {expected_architecture!r} model (got {arch.name!r})", ) half = False if prefer_half: @@ -166,6 +192,6 @@ def load_spandrel_model( model_descriptor.model.eval() logger.debug( "Loaded %s from %s (device=%s, half=%s, dtype=%s)", - model_descriptor, path, device, half, dtype, + arch, path, device, half, dtype, ) return model_descriptor diff --git a/requirements_versions.txt b/requirements_versions.txt index 3037a395bfc..050b6d1fb50 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -23,7 +23,8 @@ pytorch_lightning==1.9.4 resize-right==0.0.2 safetensors==0.4.2 scikit-image==0.21.0 -spandrel==0.1.6 +spandrel==0.3.4 +spandrel-extra-arches==0.1.1 tomesd==0.1.3 torch torchdiffeq==0.2.3 From f8640662c56db75d7c16fe9224ada4e2fa81981e Mon Sep 17 00:00:00 2001 From: Andrey Efremov <50556416+AndreyRGW@users.noreply.github.com> Date: Thu, 4 Jul 2024 19:27:08 +0300 Subject: [PATCH 154/201] Add Normal and DDIM Schedulers --- modules/sd_schedulers.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 0165e6a0286..118beea5df1 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -76,6 +76,33 @@ def kl_optimal(n, sigma_min, sigma_max, device): sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max) return sigmas +def normal_scheduler(n, sigma_min, sigma_max, inner_model, device, sgm=False, floor=False): + start = inner_model.sigma_to_t(torch.tensor(sigma_max)) + end = inner_model.sigma_to_t(torch.tensor(sigma_min)) + + if sgm: + timesteps = torch.linspace(start, end, n + 1)[:-1] + else: + timesteps = torch.linspace(start, end, n) + + sigs = [] + for x in range(len(timesteps)): + ts = timesteps[x] + sigs.append(inner_model.t_to_sigma(ts)) + sigs += [0.0] + return torch.FloatTensor(sigs).to(device) + +def ddim_scheduler(n, sigma_min, sigma_max, inner_model, device): + sigs = [] + ss = max(len(inner_model.sigmas) // n, 1) + x = 1 + while x < len(inner_model.sigmas): + sigs += [float(inner_model.sigmas[x])] + x += ss + sigs = sigs[::-1] + sigs += [0.0] + return torch.FloatTensor(sigs).to(device) + schedulers = [ Scheduler('automatic', 'Automatic', None), @@ -86,6 +113,8 @@ def kl_optimal(n, sigma_min, sigma_max, device): Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]), Scheduler('kl_optimal', 'KL Optimal', kl_optimal), Scheduler('align_your_steps', 'Align Your Steps', get_align_your_steps_sigmas), + Scheduler('normal', 'Normal', normal_scheduler, need_inner_model=True), + Scheduler('ddim', 'DDIM', ddim_scheduler, need_inner_model=True), ] schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}} From bfbca310744e29d502e56c11dc212323bc9158ab Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:56:39 +0800 Subject: [PATCH 155/201] possible fix of wrong scale https://github.com/comfyanonymous/ComfyUI/pull/3922 --- extensions-builtin/Lora/network.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py index 20f8df3d4a8..3c39c49d7f8 100644 --- a/extensions-builtin/Lora/network.py +++ b/extensions-builtin/Lora/network.py @@ -204,10 +204,12 @@ def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None): if ex_bias is not None: ex_bias = ex_bias * self.multiplier() + updown = updown * self.calc_scale() + if self.dora_scale is not None: updown = self.apply_weight_decompose(updown, orig_weight) - return updown * self.calc_scale() * self.multiplier(), ex_bias + return updown * self.multiplier(), ex_bias def calc_updown(self, target): raise NotImplementedError() From b82caf132274aa3cd9b087ed9dc671e8987a4686 Mon Sep 17 00:00:00 2001 From: Andray Date: Fri, 5 Jul 2024 19:28:16 +0400 Subject: [PATCH 156/201] fix ui flashing on reloading and fast scrollong --- modules/ui_gradio_extensions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/ui_gradio_extensions.py b/modules/ui_gradio_extensions.py index 18fbd6777e2..c895b3b6392 100644 --- a/modules/ui_gradio_extensions.py +++ b/modules/ui_gradio_extensions.py @@ -41,6 +41,8 @@ def stylesheet(fn): if os.path.exists(user_css): head += stylesheet(user_css) + head += '' + return head From 0a6628bad0615a640efd99937eb6d10d6d648975 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 6 Jul 2024 10:31:08 +0300 Subject: [PATCH 157/201] remove mentions of specific samplers from CFG denoiser code --- modules/sd_samplers_cfg_denoiser.py | 16 +++++++--------- modules/sd_samplers_timesteps_impl.py | 3 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index c8eeedad3c9..b6fbf337243 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -58,6 +58,9 @@ def __init__(self, sampler): self.model_wrap = None self.p = None + self.cond_scale_miltiplier = 1.0 + + self.need_last_noise_uncond = False self.last_noise_uncond = None # NOTE: masking before denoising can cause the original latents to be oversmoothed @@ -162,8 +165,6 @@ def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): # so is_edit_model is set to False to support AND composition. is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0 - is_cfg_pp = 'CFG++' in self.sampler.config.name - conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step) uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step) @@ -277,18 +278,15 @@ def apply_blend(current_latent): denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model) cfg_denoised_callback(denoised_params) - if is_cfg_pp: - self.last_noise_uncond = x_out[-uncond.shape[0]:] - self.last_noise_uncond = torch.clone(self.last_noise_uncond) + if self.need_last_noise_uncond: + self.last_noise_uncond = torch.clone(x_out[-uncond.shape[0]:]) if is_edit_model: - denoised = self.combine_denoised_for_edit_model(x_out, cond_scale) + denoised = self.combine_denoised_for_edit_model(x_out, cond_scale * self.cond_scale_miltiplier) elif skip_uncond: denoised = self.combine_denoised(x_out, conds_list, uncond, 1.0) - elif is_cfg_pp: - denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale/12.5) # CFG++ scale of (0, 1) maps to (1.0, 12.5) else: - denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale) + denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale * self.cond_scale_miltiplier) # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: diff --git a/modules/sd_samplers_timesteps_impl.py b/modules/sd_samplers_timesteps_impl.py index 8896cfc9a85..180e4389988 100644 --- a/modules/sd_samplers_timesteps_impl.py +++ b/modules/sd_samplers_timesteps_impl.py @@ -52,6 +52,9 @@ def ddim_cfgpp(model, x, timesteps, extra_args=None, callback=None, disable=None sqrt_one_minus_alphas = torch.sqrt(1 - alphas) sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy())) + model.cond_scale_miltiplier = 1 / 12.5 + model.need_last_noise_uncond = True + extra_args = {} if extra_args is None else extra_args s_in = x.new_ones((x.shape[0])) s_x = x.new_ones((x.shape[0], 1, 1, 1)) From ffead92d4e36a5082fa6ac5dd54c88477c9b524e Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 6 Jul 2024 10:40:48 +0300 Subject: [PATCH 158/201] Revert "Merge pull request #16078 from huchenlei/fix_sd2" This reverts commit 4cc3add770b10cb8e8f7aa980c0d50e5b637ab2b, reversing changes made to 50514ce414ee4fad9aa4780ef0b97116c7d7c970. --- modules/sd_hijack_unet.py | 2 -- modules/sd_models_config.py | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index 6d657511985..b4f03b138a4 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -138,7 +138,6 @@ def hijack_ddpm_edit(): CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond) CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond) -# Always make sure inputs to unet are in correct dtype CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model) CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model) @@ -151,6 +150,5 @@ def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs): return orig_func(timesteps, *args, **kwargs).to(dtype=dtype) -# Always make sure timestep calculation is in correct dtype CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result) CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result) diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py index e9a80ebafa8..7cfeca67f71 100644 --- a/modules/sd_models_config.py +++ b/modules/sd_models_config.py @@ -56,19 +56,14 @@ def is_using_v_parameterization_for_sd2(state_dict): unet.eval() with torch.no_grad(): - unet_dtype = torch.float - original_unet_dtype = devices.dtype_unet - unet_sd = {k.replace("model.diffusion_model.", ""): v for k, v in state_dict.items() if "model.diffusion_model." in k} unet.load_state_dict(unet_sd, strict=True) - unet.to(device=device, dtype=unet_dtype) - devices.dtype_unet = unet_dtype + unet.to(device=device, dtype=torch.float) test_cond = torch.ones((1, 2, 1024), device=device) * 0.5 x_test = torch.ones((1, 4, 8, 8), device=device) * 0.5 out = (unet(x_test, torch.asarray([999], device=device), context=test_cond) - x_test).mean().item() - devices.dtype_unet = original_unet_dtype return out < -1 From 74069addc31e6cb24a5fb394419aef87b43a8b2c Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 6 Jul 2024 11:00:22 +0300 Subject: [PATCH 159/201] SD2 v autodetection fix --- modules/sd_models_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py index 599153c2df3..fb44c5a8d98 100644 --- a/modules/sd_models_config.py +++ b/modules/sd_models_config.py @@ -58,12 +58,13 @@ def is_using_v_parameterization_for_sd2(state_dict): with torch.no_grad(): unet_sd = {k.replace("model.diffusion_model.", ""): v for k, v in state_dict.items() if "model.diffusion_model." in k} unet.load_state_dict(unet_sd, strict=True) - unet.to(device=device, dtype=torch.float) + unet.to(device=device, dtype=devices.dtype_unet) test_cond = torch.ones((1, 2, 1024), device=device) * 0.5 x_test = torch.ones((1, 4, 8, 8), device=device) * 0.5 - out = (unet(x_test, torch.asarray([999], device=device), context=test_cond) - x_test).mean().item() + with devices.autocast(): + out = (unet(x_test, torch.asarray([999], device=device), context=test_cond) - x_test).mean().cpu().item() return out < -1 From 340a9108ca800774e274b61a70517d1938b39ead Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 6 Jul 2024 11:26:14 +0300 Subject: [PATCH 160/201] update changelog --- CHANGELOG.md | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 596b1ec45a4..368b29538a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,100 @@ +## 1.10.0 + +### Features: +* A lot of performance improvements (see below in Performance section) +* Stable Diffusion 3 support ([#16030](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16030)) + * Recommended Euler sampler; DDIM and other timestamp samplers currently not supported + * T5 text model is disabled by default, enable it in settings +* New schedulers: + * Align Your Steps ([#15751](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15751)) + * KL Optimal ([#15608](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15608)) + * Normal ([#16149](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16149)) + * DDIM ([#16149](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16149)) + * Simple ([#16142](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16142)) +* New sampler: DDIM CFG++ ([#16035](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16035)) + +### Minor: +* Option to skip CFG on early steps ([#15607](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15607)) +* Add --models-dir option ([#15742](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15742)) +* Allow mobile users to open context menu by using two fingers press ([#15682](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15682)) +* Infotext: add Lora name as TI hashes for bundled Textual Inversion ([#15679](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15679)) +* Check model's hash after downloading it to prevent corruped downloads ([#15602](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15602)) +* More extension tag filtering options ([#15627](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15627)) +* When saving AVIF, use JPEG's quality setting ([#15610](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15610)) +* Add filename pattern: `[basename]` ([#15978](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15978)) +* Add option to enable clip skip for clip L on SDXL ([#15992](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15992)) +* Option to prevent screen sleep during generation ([#16001](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16001)) +* ToggleLivePriview button in image viewer ([#16065](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16065)) + +### Extensions and API: +* Add process_before_every_sampling hook ([#15984](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15984)) +* Return HTTP 400 instead of 404 on invalid sampler error ([#16140](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16140)) + +### Performance: +* [Performance 1/6] use_checkpoint = False ([#15803](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15803)) +* [Performance 2/6] Replace einops.rearrange with torch native ops ([#15804](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15804)) +* [Performance 4/6] Precompute is_sdxl_inpaint flag ([#15806](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15806)) +* [Performance 5/6] Prevent unnecessary extra networks bias backup ([#15816](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15816)) +* [Performance 6/6] Add --precision half option to avoid casting during inference ([#15820](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15820)) +* [Performance] LDM optimization patches ([#15824](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15824)) +* [Performance] Keep sigmas on CPU ([#15823](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15823)) +* Check for nans in unet only once, after all steps have been completed +* Added pption to run torch profiler for image generation + +### Bug Fixes: +* Fix for grids without comprehensive infotexts ([#15958](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15958)) +* feat: lora partial update precede full update ([#15943](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15943)) +* Fix bug where file extension had an extra '.' under some circumstances ([#15893](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15893)) +* Fix corrupt model initial load loop ([#15600](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15600)) +* Allow old sampler names in API ([#15656](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15656)) +* more old sampler scheduler compatibility ([#15681](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15681)) +* Fix Hypertile xyz ([#15831](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15831)) +* XYZ CSV skipinitialspace ([#15832](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15832)) +* fix soft inpainting on mps and xpu, torch_utils.float64 ([#15815](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15815)) +* fix extention update when not on main branch ([#15797](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15797)) +* update pickle safe filenames +* use relative path for webui-assets css ([#15757](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15757)) +* When creating a virtual environment, upgrade pip in webui.bat/webui.sh ([#15750](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15750)) +* Fix AttributeError ([#15738](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15738)) +* use script_path for webui root in launch_utils ([#15705](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15705)) +* fix extra batch mode P Transparency ([#15664](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15664)) +* use gradio theme colors in css ([#15680](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15680)) +* Fix dragging text within prompt input ([#15657](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15657)) +* Add correct mimetype for .mjs files ([#15654](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15654)) +* QOL Items - handle metadata issues more cleanly for SD models, Loras and embeddings ([#15632](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15632)) +* replace wsl-open with wslpath and explorer.exe ([#15968](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15968)) +* Fix SDXL Inpaint ([#15976](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15976)) +* multi size grid ([#15988](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15988)) +* fix Replace preview ([#16118](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16118)) +* Possible fix of wrong scale in weight decomposition ([#16151](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16151)) +* Ensure use of python from venv on Mac and Linux ([#16116](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16116)) +* Prioritize python3.10 over python3 if both are available on Linux and Mac (with fallback) ([#16092](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16092)) +* stoping generation extras ([#16085](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16085)) +* Fix SD2 loading ([#16078](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16078), [#16079](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16079)) +* fix infotext Lora hashes for hires fix different lora ([#16062](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16062)) +* Fix sampler scheduler autocorrection warning ([#16054](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16054)) + +### Other: +* fix changelog #15883 -> #15882 ([#15907](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15907)) +* ReloadUI backgroundColor --background-fill-primary ([#15864](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15864)) +* Use different torch versions for Intel and ARM Macs ([#15851](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15851)) +* XYZ override rework ([#15836](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15836)) +* scroll extensions table on overflow ([#15830](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15830)) +* img2img batch upload method ([#15817](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15817)) +* chore: sync v1.8.0 packages according to changelog ([#15783](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15783)) +* Add AVIF MIME type support to mimetype definitions ([#15739](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15739)) +* Update imageviewer.js ([#15730](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15730)) +* no-referrer ([#15641](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15641)) +* .gitignore trace.json ([#15980](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15980)) +* Bump spandrel to 0.3.4 ([#16144](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16144)) +* Defunct --max-batch-count ([#16119](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16119)) +* docs: update bug_report.yml ([#16102](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16102)) +* Maintaining Project Compatibility for Python 3.9 Users Without Upgrade Requirements. ([#16088](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16088)) +* Update torch for ARM Macs to 2.3.1 ([#16059](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16059)) +* remove deprecated setting dont_fix_second_order_samplers_schedule ([#16061](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16061)) +* chore: fix typos ([#16060](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16060)) + + ## 1.9.4 ### Bug Fixes: From ec580374e54cb45cce504b9c8455a748fa1f991d Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sun, 7 Jul 2024 00:22:27 +0900 Subject: [PATCH 161/201] background-color: background_fill_primary --- modules/shared_gradio_themes.py | 41 +++++++++++++++++++++++++++++++++ modules/ui_gradio_extensions.py | 5 +++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/modules/shared_gradio_themes.py b/modules/shared_gradio_themes.py index b6dc31450bc..b4e3f32bc9f 100644 --- a/modules/shared_gradio_themes.py +++ b/modules/shared_gradio_themes.py @@ -69,3 +69,44 @@ def reload_gradio_theme(theme_name=None): # append additional values gradio_theme shared.gradio_theme.sd_webui_modal_lightbox_toolbar_opacity = shared.opts.sd_webui_modal_lightbox_toolbar_opacity shared.gradio_theme.sd_webui_modal_lightbox_icon_opacity = shared.opts.sd_webui_modal_lightbox_icon_opacity + + +def resolve_var(name: str, gradio_theme=None, history=None): + """ + Attempt to resolve a theme variable name to its value + + Parameters: + name (str): The name of the theme variable + ie "background_fill_primary", "background_fill_primary_dark" + spaces and asterisk (*) prefix is removed from name before lookup + gradio_theme (gradio.themes.ThemeClass): The theme object to resolve the variable from + blank to use the webui default shared.gradio_theme + history (list): A list of previously resolved variables to prevent circular references + for regular use leave blank + Returns: + str: The resolved value + + Error handling: + return either #000000 or #ffffff depending on initial name ending with "_dark" + """ + try: + if history is None: + history = [] + if gradio_theme is None: + gradio_theme = shared.gradio_theme + + name = name.strip() + name = name[1:] if name.startswith("*") else name + + if name in history: + raise ValueError(f'Circular references: name "{name}" in {history}') + + if value := getattr(gradio_theme, name, None): + return resolve_var(value, gradio_theme, history + [name]) + else: + return name + + except Exception: + name = history[0] if history else name + errors.report(f'resolve_color({name})', exc_info=True) + return '#000000' if name.endswith("_dark") else '#ffffff' diff --git a/modules/ui_gradio_extensions.py b/modules/ui_gradio_extensions.py index c895b3b6392..ed57c1e9896 100644 --- a/modules/ui_gradio_extensions.py +++ b/modules/ui_gradio_extensions.py @@ -41,7 +41,10 @@ def stylesheet(fn): if os.path.exists(user_css): head += stylesheet(user_css) - head += '' + from modules.shared_gradio_themes import resolve_var + light = resolve_var('background_fill_primary') + dark = resolve_var('background_fill_primary_dark') + head += f'' return head From b5481c619583bf2e6212234ab129a46a225229c5 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 7 Jul 2024 08:37:58 +0300 Subject: [PATCH 162/201] Merge pull request #16153 from light-and-ray/fix_ui_flashing_on_reload_and_fast_scrollong fix ui flashing on reloading and fast scrollong --- modules/shared_gradio_themes.py | 41 +++++++++++++++++++++++++++++++++ modules/ui_gradio_extensions.py | 5 ++++ 2 files changed, 46 insertions(+) diff --git a/modules/shared_gradio_themes.py b/modules/shared_gradio_themes.py index b6dc31450bc..b4e3f32bc9f 100644 --- a/modules/shared_gradio_themes.py +++ b/modules/shared_gradio_themes.py @@ -69,3 +69,44 @@ def reload_gradio_theme(theme_name=None): # append additional values gradio_theme shared.gradio_theme.sd_webui_modal_lightbox_toolbar_opacity = shared.opts.sd_webui_modal_lightbox_toolbar_opacity shared.gradio_theme.sd_webui_modal_lightbox_icon_opacity = shared.opts.sd_webui_modal_lightbox_icon_opacity + + +def resolve_var(name: str, gradio_theme=None, history=None): + """ + Attempt to resolve a theme variable name to its value + + Parameters: + name (str): The name of the theme variable + ie "background_fill_primary", "background_fill_primary_dark" + spaces and asterisk (*) prefix is removed from name before lookup + gradio_theme (gradio.themes.ThemeClass): The theme object to resolve the variable from + blank to use the webui default shared.gradio_theme + history (list): A list of previously resolved variables to prevent circular references + for regular use leave blank + Returns: + str: The resolved value + + Error handling: + return either #000000 or #ffffff depending on initial name ending with "_dark" + """ + try: + if history is None: + history = [] + if gradio_theme is None: + gradio_theme = shared.gradio_theme + + name = name.strip() + name = name[1:] if name.startswith("*") else name + + if name in history: + raise ValueError(f'Circular references: name "{name}" in {history}') + + if value := getattr(gradio_theme, name, None): + return resolve_var(value, gradio_theme, history + [name]) + else: + return name + + except Exception: + name = history[0] if history else name + errors.report(f'resolve_color({name})', exc_info=True) + return '#000000' if name.endswith("_dark") else '#ffffff' diff --git a/modules/ui_gradio_extensions.py b/modules/ui_gradio_extensions.py index 18fbd6777e2..ed57c1e9896 100644 --- a/modules/ui_gradio_extensions.py +++ b/modules/ui_gradio_extensions.py @@ -41,6 +41,11 @@ def stylesheet(fn): if os.path.exists(user_css): head += stylesheet(user_css) + from modules.shared_gradio_themes import resolve_var + light = resolve_var('background_fill_primary') + dark = resolve_var('background_fill_primary_dark') + head += f'' + return head From 780c70f6eadc4f0d466f00fe7def2023eec3c8cf Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 7 Jul 2024 08:40:19 +0300 Subject: [PATCH 163/201] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 368b29538a2..ac2b4ad66e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ * Add option to enable clip skip for clip L on SDXL ([#15992](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15992)) * Option to prevent screen sleep during generation ([#16001](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16001)) * ToggleLivePriview button in image viewer ([#16065](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16065)) +* Remove ui flashing on reloading and fast scrollong ([#16153](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16153)) ### Extensions and API: * Add process_before_every_sampling hook ([#15984](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15984)) From 11cfe0dd054926b5df81632f9e2b2a78738ccf95 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sun, 7 Jul 2024 16:36:53 +0300 Subject: [PATCH 164/201] sd3 TI support --- modules/models/sd3/other_impls.py | 8 +++++--- modules/models/sd3/sd3_cond.py | 6 +++++- modules/sd_hijack.py | 17 ++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index f992db9bdfc..78c1dc68758 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -5,6 +5,8 @@ from torch import nn from transformers import CLIPTokenizer, T5TokenizerFast +from modules import sd_hijack + ################################################################################################# ### Core/Utility @@ -110,9 +112,9 @@ def forward(self, x, mask=None, intermediate_output=None): class CLIPEmbeddings(torch.nn.Module): - def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None): + def __init__(self, embed_dim, vocab_size=49408, num_positions=77, dtype=None, device=None, textual_inversion_key="clip_l"): super().__init__() - self.token_embedding = torch.nn.Embedding(vocab_size, embed_dim, dtype=dtype, device=device) + self.token_embedding = sd_hijack.TextualInversionEmbeddings(vocab_size, embed_dim, dtype=dtype, device=device, textual_inversion_key=textual_inversion_key) self.position_embedding = torch.nn.Embedding(num_positions, embed_dim, dtype=dtype, device=device) def forward(self, input_tokens): @@ -127,7 +129,7 @@ def __init__(self, config_dict, dtype, device): intermediate_size = config_dict["intermediate_size"] intermediate_activation = config_dict["hidden_act"] super().__init__() - self.embeddings = CLIPEmbeddings(embed_dim, dtype=torch.float32, device=device) + self.embeddings = CLIPEmbeddings(embed_dim, dtype=torch.float32, device=device, textual_inversion_key=config_dict.get('textual_inversion_key', 'clip_l')) self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device) self.final_layer_norm = nn.LayerNorm(embed_dim, dtype=dtype, device=device) diff --git a/modules/models/sd3/sd3_cond.py b/modules/models/sd3/sd3_cond.py index bade90ba1a8..325c512d594 100644 --- a/modules/models/sd3/sd3_cond.py +++ b/modules/models/sd3/sd3_cond.py @@ -40,6 +40,7 @@ def __getitem__(self, key): "intermediate_size": 5120, "num_attention_heads": 20, "num_hidden_layers": 32, + "textual_inversion_key": "clip_g", } T5_URL = "https://huggingface.co/AUTOMATIC/stable-diffusion-3-medium-text-encoders/resolve/main/t5xxl_fp16.safetensors" @@ -204,7 +205,10 @@ def before_load_weights(self, state_dict): self.t5xxl.transformer.load_state_dict(SafetensorsMapping(file), strict=False) def encode_embedding_init_text(self, init_text, nvpt): - return torch.tensor([[0]], device=devices.device) # XXX + return self.model_lg.encode_embedding_init_text(init_text, nvpt) + + def tokenize(self, texts): + return self.model_lg.tokenize(texts) def medvram_modules(self): return [self.clip_g, self.clip_l, self.t5xxl] diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index d5b2989f4e5..0de83054186 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -359,13 +359,28 @@ def forward(self, input_ids): vec = embedding.vec[self.textual_inversion_key] if isinstance(embedding.vec, dict) else embedding.vec emb = devices.cond_cast_unet(vec) emb_len = min(tensor.shape[0] - offset - 1, emb.shape[0]) - tensor = torch.cat([tensor[0:offset + 1], emb[0:emb_len], tensor[offset + 1 + emb_len:]]) + tensor = torch.cat([tensor[0:offset + 1], emb[0:emb_len], tensor[offset + 1 + emb_len:]]).to(dtype=inputs_embeds.dtype) vecs.append(tensor) return torch.stack(vecs) +class TextualInversionEmbeddings(torch.nn.Embedding): + def __init__(self, num_embeddings: int, embedding_dim: int, textual_inversion_key='clip_l', **kwargs): + super().__init__(num_embeddings, embedding_dim, **kwargs) + + self.embeddings = model_hijack + self.textual_inversion_key = textual_inversion_key + + @property + def wrapped(self): + return super().forward + + def forward(self, input_ids): + return EmbeddingsWithFixes.forward(self, input_ids) + + def add_circular_option_to_conv_2d(): conv2d_constructor = torch.nn.Conv2d.__init__ From 7b2917255a7d2065e7c956eb263b59b8262e97f3 Mon Sep 17 00:00:00 2001 From: Richard Tallent Date: Sun, 7 Jul 2024 11:18:17 -0500 Subject: [PATCH 165/201] Fix noisy DS_Store files for MacOS --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 96cfe22dbd1..091f779619f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__ *.ckpt *.safetensors *.pth +.DS_Store /ESRGAN/* /SwinIR/* /repositories From 21e72d1a5e6cac133c379cae62a4315fec81ee9d Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:07:26 +0900 Subject: [PATCH 166/201] py 3.9 find_vae() --- scripts/xyz_grid.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b184721bebc..b702c74d821 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -118,11 +118,10 @@ def apply_size(p, x: str, xs) -> None: def find_vae(name: str): - match name := name.lower().strip(): - case 'auto', 'automatic': - return 'Automatic' - case 'none': - return 'None' + if name := name.strip().lower() in ('auto', 'automatic'): + return 'Automatic' + elif name == 'none': + return 'None' return next((k for k in modules.sd_vae.vae_dict if k.lower() == name), print(f'No VAE found for {name}; using Automatic') or 'Automatic') From c3d8b78b47dddb03bb4558d62c6eaffc167cc51b Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:17:51 +0900 Subject: [PATCH 167/201] py 3.9 compatibility --- extensions-builtin/Lora/networks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 63e8c946594..9ed8fa4359f 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -1,3 +1,4 @@ +from __future__ import annotations import gradio as gr import logging import os From 6ca7a453d40120f5a213a88e2576a9265a784573 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 8 Jul 2024 08:27:07 +0300 Subject: [PATCH 168/201] Merge pull request #16169 from AUTOMATIC1111/py-3.9-compatibility Py 3.9 compatibility --- extensions-builtin/Lora/networks.py | 1 + scripts/xyz_grid.py | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 63e8c946594..9ed8fa4359f 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -1,3 +1,4 @@ +from __future__ import annotations import gradio as gr import logging import os diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b184721bebc..b702c74d821 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -118,11 +118,10 @@ def apply_size(p, x: str, xs) -> None: def find_vae(name: str): - match name := name.lower().strip(): - case 'auto', 'automatic': - return 'Automatic' - case 'none': - return 'None' + if name := name.strip().lower() in ('auto', 'automatic'): + return 'Automatic' + elif name == 'none': + return 'None' return next((k for k in modules.sd_vae.vae_dict if k.lower() == name), print(f'No VAE found for {name}; using Automatic') or 'Automatic') From 1b0823db94ab491924fd1dc57287ef9dc5bff234 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:51:06 +0900 Subject: [PATCH 169/201] shlex.join launch args in console log --- modules/launch_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/launch_utils.py b/modules/launch_utils.py index e22da4ec64d..93890cd108c 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -9,6 +9,7 @@ import importlib.metadata import platform import json +import shlex from functools import lru_cache from modules import cmd_args, errors @@ -461,7 +462,7 @@ def configure_for_tests(): def start(): - print(f"Launching {'API server' if '--nowebui' in sys.argv else 'Web UI'} with arguments: {' '.join(sys.argv[1:])}") + print(f"Launching {'API server' if '--nowebui' in sys.argv else 'Web UI'} with arguments: {shlex.join(sys.argv[1:])}") import webui if '--nowebui' in sys.argv: webui.api_only() From 7d7f7f4b49cc4265b48ee08fe13b7e7b03cecf98 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 15:45:45 +0900 Subject: [PATCH 170/201] sysinfo handle psutil not working --- modules/sysinfo.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index f336251e445..614334661d1 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -5,7 +5,6 @@ import platform import hashlib import pkg_resources -import psutil import re import launch @@ -69,9 +68,27 @@ def check(x): return h.hexdigest() == m.group(1) -def get_dict(): - ram = psutil.virtual_memory() +def get_cpu_info(): + cpu_info = {"model": platform.processor()} + try: + import psutil + cpu_info["count logical"] = psutil.cpu_count(logical=True) + cpu_info["count physical"] = psutil.cpu_count(logical=False) + except Exception as e: + cpu_info["error"] = str(e) + return cpu_info + +def get_ram_info(): + try: + import psutil + ram = psutil.virtual_memory() + return {x: pretty_bytes(getattr(ram, x, 0)) for x in ["total", "used", "free", "active", "inactive", "buffers", "cached", "shared"] if getattr(ram, x, 0) != 0} + except Exception as e: + return str(e) + + +def get_dict(): res = { "Platform": platform.platform(), "Python": platform.python_version(), @@ -84,14 +101,8 @@ def get_dict(): "Commandline": get_argv(), "Torch env info": get_torch_sysinfo(), "Exceptions": errors.get_exceptions(), - "CPU": { - "model": platform.processor(), - "count logical": psutil.cpu_count(logical=True), - "count physical": psutil.cpu_count(logical=False), - }, - "RAM": { - x: pretty_bytes(getattr(ram, x, 0)) for x in ["total", "used", "free", "active", "inactive", "buffers", "cached", "shared"] if getattr(ram, x, 0) != 0 - }, + "CPU": get_cpu_info(), + "RAM": get_ram_info(), "Extensions": get_extensions(enabled=True), "Inactive extensions": get_extensions(enabled=False), "Environment": get_environment(), @@ -123,6 +134,7 @@ def get_argv(): return res + re_newline = re.compile(r"\r*\n") From 48dd4d9eaeaeb600bfcd27a02a4070192c55b858 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 8 Jul 2024 18:36:28 +0300 Subject: [PATCH 171/201] Merge pull request #16170 from AUTOMATIC1111/shlex.quote-launch-args-in-console-log shlex.join launch args in console log --- modules/launch_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/launch_utils.py b/modules/launch_utils.py index e22da4ec64d..93890cd108c 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -9,6 +9,7 @@ import importlib.metadata import platform import json +import shlex from functools import lru_cache from modules import cmd_args, errors @@ -461,7 +462,7 @@ def configure_for_tests(): def start(): - print(f"Launching {'API server' if '--nowebui' in sys.argv else 'Web UI'} with arguments: {' '.join(sys.argv[1:])}") + print(f"Launching {'API server' if '--nowebui' in sys.argv else 'Web UI'} with arguments: {shlex.join(sys.argv[1:])}") import webui if '--nowebui' in sys.argv: webui.api_only() From 11f827c58b276dff946dccf4167d8e11159eeba5 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:33:02 +0900 Subject: [PATCH 172/201] use pip freeze --all to get packages --- modules/sysinfo.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index 614334661d1..65d4e3c9847 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -4,7 +4,6 @@ import platform import hashlib -import pkg_resources import re import launch @@ -88,6 +87,19 @@ def get_ram_info(): return str(e) +def get_packages(): + try: + import subprocess + return subprocess.check_output([sys.executable, '-m', 'pip', 'freeze', '--all']).decode("utf8").splitlines() + except Exception as pip_error: + try: + import importlib.metadata + packages = importlib.metadata.distributions() + return sorted([f"{package.metadata['Name']}=={package.version}" for package in packages]) + except Exception as e2: + return {'error pip': pip_error, 'error importlib': str(e2)} + + def get_dict(): res = { "Platform": platform.platform(), @@ -108,7 +120,7 @@ def get_dict(): "Environment": get_environment(), "Config": get_config(), "Startup": timer.startup_record, - "Packages": sorted([f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set]), + "Packages": get_packages(), } return res From 27947a79d619eac5ce40b3f2db62d422313d12f6 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:56:06 +0900 Subject: [PATCH 173/201] git status --- modules/launch_utils.py | 8 ++++++++ modules/sysinfo.py | 11 +++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/modules/launch_utils.py b/modules/launch_utils.py index e22da4ec64d..0688f482630 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -85,6 +85,14 @@ def git_tag(): return "" +@lru_cache() +def git_status(): + try: + return subprocess.check_output([git, "-C", script_path, "status"], shell=False, encoding='utf8').strip() + except Exception as e: + return str(e) + + def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str: if desc is not None: print(desc) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index 65d4e3c9847..52617573b1b 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -1,13 +1,12 @@ import json import os import sys - +import subprocess import platform import hashlib import re -import launch -from modules import paths_internal, timer, shared, extensions, errors +from modules import paths_internal, timer, shared, extensions, errors, launch_utils checksum_token = "DontStealMyGamePlz__WINNERS_DONT_USE_DRUGS__DONT_COPY_THAT_FLOPPY" environment_whitelist = { @@ -89,7 +88,6 @@ def get_ram_info(): def get_packages(): try: - import subprocess return subprocess.check_output([sys.executable, '-m', 'pip', 'freeze', '--all']).decode("utf8").splitlines() except Exception as pip_error: try: @@ -104,8 +102,9 @@ def get_dict(): res = { "Platform": platform.platform(), "Python": platform.python_version(), - "Version": launch.git_tag(), - "Commit": launch.commit_hash(), + "Version": launch_utils.git_tag(), + "Commit": launch_utils.commit_hash(), + "Git status": launch_utils.git_status(), "Script path": paths_internal.script_path, "Data path": paths_internal.data_path, "Extensions dir": paths_internal.extensions_dir, From dd4f798b97de3e32d0a6a1a18816b4cc28c6008e Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 19:20:49 +0900 Subject: [PATCH 174/201] fallback get_config() --- modules/sysinfo.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index 52617573b1b..2c08dd226f9 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -179,5 +179,9 @@ def to_json(x: extensions.Extension): def get_config(): try: return shared.opts.data - except Exception as e: - return str(e) + except Exception as _: + try: + with open(shared.cmd_opts.ui_settings_file, 'r') as f: + return json.load(f) + except Exception as e: + return str(e) From 27d96fa608da81b334163835fe39c1bb32984a7c Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 20:31:50 +0900 Subject: [PATCH 175/201] fallback Extensions info --- modules/sysinfo.py | 56 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index 2c08dd226f9..c2bb35cdf07 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -99,6 +99,7 @@ def get_packages(): def get_dict(): + config = get_config() res = { "Platform": platform.platform(), "Python": platform.python_version(), @@ -114,10 +115,10 @@ def get_dict(): "Exceptions": errors.get_exceptions(), "CPU": get_cpu_info(), "RAM": get_ram_info(), - "Extensions": get_extensions(enabled=True), - "Inactive extensions": get_extensions(enabled=False), + "Extensions": get_extensions(enabled=True, fallback_disabled_extensions=config.get('disabled_extensions', [])), + "Inactive extensions": get_extensions(enabled=False, fallback_disabled_extensions=config.get('disabled_extensions', [])), "Environment": get_environment(), - "Config": get_config(), + "Config": config, "Startup": timer.startup_record, "Packages": get_packages(), } @@ -159,19 +160,46 @@ def get_torch_sysinfo(): return str(e) -def get_extensions(*, enabled): +def run_git(path, *args): + try: + if os.path.isdir(os.path.join(path, '.git')): + return subprocess.check_output([launch_utils.git, '-C', path, *args], shell=False, encoding='utf8').strip() + return None + except Exception as e: + return str(e) + + +def get_info_from_repo_path(path): + return { + 'name': os.path.basename(path), + 'path': path, + 'version': run_git(path, 'rev-parse', 'HEAD'), + 'branch': run_git(path, 'branch', '--show-current'), + 'remote': run_git(path, 'remote', 'get-url', 'origin') + } + +def get_extensions(*, enabled, fallback_disabled_extensions=None): try: - def to_json(x: extensions.Extension): - return { - "name": x.name, - "path": x.path, - "version": x.version, - "branch": x.branch, - "remote": x.remote, - } - - return [to_json(x) for x in extensions.extensions if not x.is_builtin and x.enabled == enabled] + if extensions.extensions: + def to_json(x: extensions.Extension): + return { + "name": x.name, + "path": x.path, + "version": x.version, + "branch": x.branch, + "remote": x.remote, + } + return [to_json(x) for x in extensions.extensions if not x.is_builtin and x.enabled == enabled] + else: + extensions_list = [] + for extension_dirname in sorted(os.listdir(paths_internal.extensions_dir)): + path = os.path.join(paths_internal.extensions_dir, extension_dirname) + if enabled == (extension_dirname in fallback_disabled_extensions): + continue + if os.path.isdir(path): + extensions_list.append(get_info_from_repo_path(path)) + return extensions_list except Exception as e: return str(e) From 3f6dcda3e50594a6581bee68f482901cd9ba5d5b Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 20:33:15 +0900 Subject: [PATCH 176/201] Extensions info full commit hash --- modules/sysinfo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index c2bb35cdf07..13427af63f1 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -173,7 +173,7 @@ def get_info_from_repo_path(path): return { 'name': os.path.basename(path), 'path': path, - 'version': run_git(path, 'rev-parse', 'HEAD'), + 'commit': run_git(path, 'rev-parse', 'HEAD'), 'branch': run_git(path, 'branch', '--show-current'), 'remote': run_git(path, 'remote', 'get-url', 'origin') } @@ -186,7 +186,7 @@ def to_json(x: extensions.Extension): return { "name": x.name, "path": x.path, - "version": x.version, + "commit": x.commit_hash, "branch": x.branch, "remote": x.remote, } From 4debd4d3ef62449787d6d02943f82ead356bfe48 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 9 Jul 2024 01:02:11 +0900 Subject: [PATCH 177/201] compact get_info_from_repo_path --- modules/sysinfo.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index 13427af63f1..aa4328ed1be 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -5,6 +5,7 @@ import platform import hashlib import re +from pathlib import Path from modules import paths_internal, timer, shared, extensions, errors, launch_utils @@ -162,20 +163,19 @@ def get_torch_sysinfo(): def run_git(path, *args): try: - if os.path.isdir(os.path.join(path, '.git')): - return subprocess.check_output([launch_utils.git, '-C', path, *args], shell=False, encoding='utf8').strip() - return None + return subprocess.check_output([launch_utils.git, '-C', path, *args], shell=False, encoding='utf8').strip() except Exception as e: return str(e) -def get_info_from_repo_path(path): +def get_info_from_repo_path(path: Path): + is_repo = (path / '.git').is_dir() return { - 'name': os.path.basename(path), - 'path': path, - 'commit': run_git(path, 'rev-parse', 'HEAD'), - 'branch': run_git(path, 'branch', '--show-current'), - 'remote': run_git(path, 'remote', 'get-url', 'origin') + 'name': path.name, + 'path': str(path), + 'commit': run_git(path, 'rev-parse', 'HEAD') if is_repo else None, + 'branch': run_git(path, 'branch', '--show-current') if is_repo else None, + 'remote': run_git(path, 'remote', 'get-url', 'origin') if is_repo else None, } @@ -192,14 +192,7 @@ def to_json(x: extensions.Extension): } return [to_json(x) for x in extensions.extensions if not x.is_builtin and x.enabled == enabled] else: - extensions_list = [] - for extension_dirname in sorted(os.listdir(paths_internal.extensions_dir)): - path = os.path.join(paths_internal.extensions_dir, extension_dirname) - if enabled == (extension_dirname in fallback_disabled_extensions): - continue - if os.path.isdir(path): - extensions_list.append(get_info_from_repo_path(path)) - return extensions_list + return [get_info_from_repo_path(d) for d in Path(paths_internal.extensions_dir).iterdir() if d.is_dir() and enabled != (str(d.name) in fallback_disabled_extensions)] except Exception as e: return str(e) From 72cfa2829d9595b6c92d554035aec6e5d92a6602 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 9 Jul 2024 01:30:55 +0900 Subject: [PATCH 178/201] safer Imports --- modules/sysinfo.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/sysinfo.py b/modules/sysinfo.py index aa4328ed1be..2faa5075730 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -7,7 +7,7 @@ import re from pathlib import Path -from modules import paths_internal, timer, shared, extensions, errors, launch_utils +from modules import paths_internal, timer, shared_cmd_options, errors, launch_utils checksum_token = "DontStealMyGamePlz__WINNERS_DONT_USE_DRUGS__DONT_COPY_THAT_FLOPPY" environment_whitelist = { @@ -135,11 +135,11 @@ def get_argv(): res = [] for v in sys.argv: - if shared.cmd_opts.gradio_auth and shared.cmd_opts.gradio_auth == v: + if shared_cmd_options.cmd_opts.gradio_auth and shared_cmd_options.cmd_opts.gradio_auth == v: res.append("") continue - if shared.cmd_opts.api_auth and shared.cmd_opts.api_auth == v: + if shared_cmd_options.cmd_opts.api_auth and shared_cmd_options.cmd_opts.api_auth == v: res.append("") continue @@ -181,6 +181,7 @@ def get_info_from_repo_path(path: Path): def get_extensions(*, enabled, fallback_disabled_extensions=None): try: + from modules import extensions if extensions.extensions: def to_json(x: extensions.Extension): return { @@ -199,10 +200,11 @@ def to_json(x: extensions.Extension): def get_config(): try: + from modules import shared return shared.opts.data except Exception as _: try: - with open(shared.cmd_opts.ui_settings_file, 'r') as f: + with open(shared_cmd_options.cmd_opts.ui_settings_file, 'r') as f: return json.load(f) except Exception as e: return str(e) From 6a7042fe2fe2974b61e7f6271bd8dad3fedd9dd1 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Tue, 9 Jul 2024 01:51:47 +0900 Subject: [PATCH 179/201] move git_status to sysinfo --- modules/launch_utils.py | 9 --------- modules/sysinfo.py | 7 ++++++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/modules/launch_utils.py b/modules/launch_utils.py index 0688f482630..b2cc7127732 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -85,14 +85,6 @@ def git_tag(): return "" -@lru_cache() -def git_status(): - try: - return subprocess.check_output([git, "-C", script_path, "status"], shell=False, encoding='utf8').strip() - except Exception as e: - return str(e) - - def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str: if desc is not None: print(desc) @@ -453,7 +445,6 @@ def prepare_environment(): exit(0) - def configure_for_tests(): if "--api" not in sys.argv: sys.argv.append("--api") diff --git a/modules/sysinfo.py b/modules/sysinfo.py index 2faa5075730..e9a83d74e03 100644 --- a/modules/sysinfo.py +++ b/modules/sysinfo.py @@ -106,7 +106,7 @@ def get_dict(): "Python": platform.python_version(), "Version": launch_utils.git_tag(), "Commit": launch_utils.commit_hash(), - "Git status": launch_utils.git_status(), + "Git status": git_status(paths_internal.script_path), "Script path": paths_internal.script_path, "Data path": paths_internal.data_path, "Extensions dir": paths_internal.extensions_dir, @@ -168,6 +168,11 @@ def run_git(path, *args): return str(e) +def git_status(path): + if (Path(path) / '.git').is_dir(): + return run_git(paths_internal.script_path, 'status') + + def get_info_from_repo_path(path: Path): is_repo = (path / '.git').is_dir() return { From 5a5fe7494ad6e4ace6b31bcb00f1e606c5755909 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:32:20 +0900 Subject: [PATCH 180/201] .gitignore sysinfo.json --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 96cfe22dbd1..40f659d3c24 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ notification.mp3 /test/test_outputs /cache trace.json +/sysinfo-????-??-??-??-??.json From 9cc7142dd7c89cc0105e27ecdcf2125b43349bf3 Mon Sep 17 00:00:00 2001 From: Andray Date: Tue, 9 Jul 2024 14:07:12 +0400 Subject: [PATCH 181/201] update installation guide linux --- README.md | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fc582e15ced..60d4d7399cf 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ A web interface for Stable Diffusion, implemented using Gradio library. - Clip skip - Hypernetworks - Loras (same as Hypernetworks but more pretty) -- A separate UI where you can choose, with preview, which embeddings, hypernetworks or Loras to add to your prompt +- A separate UI where you can choose, with preview, which embeddings, hypernetworks or Loras to add to your prompt - Can select to load a different VAE from settings screen - Estimated completion time in progress bar - API @@ -122,16 +122,35 @@ Alternatively, use online services (like Google Colab): # Debian-based: sudo apt install wget git python3 python3-venv libgl1 libglib2.0-0 # Red Hat-based: -sudo dnf install wget git python3 gperftools-libs libglvnd-glx +sudo dnf install wget git python3 gperftools-libs libglvnd-glx # openSUSE-based: sudo zypper install wget git python3 libtcmalloc4 libglvnd # Arch-based: sudo pacman -S wget git python3 ``` +If your system is very new, you need to install python3.11 or python3.10: +```bash +# Ubuntu 24.04 +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt update +sudo apt install python3.11 + +# Manjaro/Arch +sudo pacman -S yay +yay -S python311 # do not confuse with python3.11 package + +# Then set up env variable in launch script (only for 3.11) +export python_cmd="python3.11" +``` 2. Navigate to the directory you would like the webui to be installed and execute the following command: ```bash wget -q https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/webui.sh ``` +Or just clone the repo wherever you want: +```bash +git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui +``` + 3. Run `webui.sh`. 4. Check `webui-user.sh` for options. ### Installation on Apple Silicon From 26cccd8faab7ade582458611b083cd168993ade0 Mon Sep 17 00:00:00 2001 From: Andray Date: Tue, 9 Jul 2024 14:22:08 +0400 Subject: [PATCH 182/201] update --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 60d4d7399cf..bc62945c0c5 100644 --- a/README.md +++ b/README.md @@ -139,8 +139,11 @@ sudo apt install python3.11 sudo pacman -S yay yay -S python311 # do not confuse with python3.11 package -# Then set up env variable in launch script (only for 3.11) +# Only for 3.11 +# Then set up env variable in launch script export python_cmd="python3.11" +# or in webui-user.sh +python_cmd="python3.11" ``` 2. Navigate to the directory you would like the webui to be installed and execute the following command: ```bash From d57ff884edd5fe3e813dbb65adb45a8966dd15b2 Mon Sep 17 00:00:00 2001 From: Andray Date: Tue, 9 Jul 2024 16:12:39 +0400 Subject: [PATCH 183/201] do not send image size on paste inpaint --- modules/infotext_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/infotext_utils.py b/modules/infotext_utils.py index f1e8f54ba5e..32dbafa6518 100644 --- a/modules/infotext_utils.py +++ b/modules/infotext_utils.py @@ -146,18 +146,19 @@ def connect_paste_params_buttons(): destination_height_component = next(iter([field for field, name in fields if name == "Size-2"] if fields else []), None) if binding.source_image_component and destination_image_component: + need_send_dementions = destination_width_component and binding.tabname != 'inpaint' if isinstance(binding.source_image_component, gr.Gallery): - func = send_image_and_dimensions if destination_width_component else image_from_url_text + func = send_image_and_dimensions if need_send_dementions else image_from_url_text jsfunc = "extract_image_from_gallery" else: - func = send_image_and_dimensions if destination_width_component else lambda x: x + func = send_image_and_dimensions if need_send_dementions else lambda x: x jsfunc = None binding.paste_button.click( fn=func, _js=jsfunc, inputs=[binding.source_image_component], - outputs=[destination_image_component, destination_width_component, destination_height_component] if destination_width_component else [destination_image_component], + outputs=[destination_image_component, destination_width_component, destination_height_component] if need_send_dementions else [destination_image_component], show_progress=False, ) From b1695c1b68f0e52cfe8dc4b9ed28228bd3710336 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 11 Jul 2024 18:45:13 +0900 Subject: [PATCH 184/201] fix #16169 Py 3.9 compatibility Co-Authored-By: SLAPaper Pang --- scripts/xyz_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index b702c74d821..5664932669c 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -118,7 +118,7 @@ def apply_size(p, x: str, xs) -> None: def find_vae(name: str): - if name := name.strip().lower() in ('auto', 'automatic'): + if (name := name.strip().lower()) in ('auto', 'automatic'): return 'Automatic' elif name == 'none': return 'None' From 3d2dbefcde4091ce4e6d915b3eda16ca964097f2 Mon Sep 17 00:00:00 2001 From: Andray Date: Thu, 11 Jul 2024 23:54:25 +0400 Subject: [PATCH 185/201] fix OSError: cannot write mode P as JPEG --- modules/api/api.py | 2 +- modules/shared_state.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/api/api.py b/modules/api/api.py index 307476bdd0b..97ec7514ea1 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -113,7 +113,7 @@ def encode_pil_to_base64(image): image.save(output_bytes, format="PNG", pnginfo=(metadata if use_metadata else None), quality=opts.jpeg_quality) elif opts.samples_format.lower() in ("jpg", "jpeg", "webp"): - if image.mode == "RGBA": + if image.mode in ("RGBA", "P"): image = image.convert("RGB") parameters = image.info.get('parameters', None) exif_bytes = piexif.dump({ diff --git a/modules/shared_state.py b/modules/shared_state.py index f74eafc5895..4cd53af6271 100644 --- a/modules/shared_state.py +++ b/modules/shared_state.py @@ -162,7 +162,7 @@ def do_set_current_image(self): errors.record_exception() def assign_current_image(self, image): - if shared.opts.live_previews_image_format == 'jpeg' and image.mode == 'RGBA': + if shared.opts.live_previews_image_format == 'jpeg' and image.mode in ('RGBA', 'P'): image = image.convert('RGB') self.current_image = image self.id_live_preview += 1 From 589dda3cf2954beaeef65928c063e2bb5c680209 Mon Sep 17 00:00:00 2001 From: Andray Date: Fri, 12 Jul 2024 16:08:36 +0400 Subject: [PATCH 186/201] do not break progressbar on non-job actions --- modules/call_queue.py | 25 +++++++++++++++++-------- modules/ui.py | 4 ++-- modules/ui_common.py | 4 ++-- modules/ui_extensions.py | 14 +++++++------- modules/ui_settings.py | 4 ++-- 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/modules/call_queue.py b/modules/call_queue.py index d22c23b317c..555c35312dd 100644 --- a/modules/call_queue.py +++ b/modules/call_queue.py @@ -47,6 +47,22 @@ def f(*args, **kwargs): def wrap_gradio_call(func, extra_outputs=None, add_stats=False): + @wraps(func) + def f(*args, **kwargs): + try: + res = func(*args, **kwargs) + finally: + shared.state.skipped = False + shared.state.interrupted = False + shared.state.stopping_generation = False + shared.state.job_count = 0 + shared.state.job = "" + return res + + return wrap_gradio_call_no_job(f, extra_outputs, add_stats) + + +def wrap_gradio_call_no_job(func, extra_outputs=None, add_stats=False): @wraps(func) def f(*args, extra_outputs_array=extra_outputs, **kwargs): run_memmon = shared.opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats @@ -66,9 +82,6 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs): arg_str += f" (Argument list truncated at {max_debug_str_len}/{len(arg_str)} characters)" errors.report(f"{message}\n{arg_str}", exc_info=True) - shared.state.job = "" - shared.state.job_count = 0 - if extra_outputs_array is None: extra_outputs_array = [None, ''] @@ -77,11 +90,6 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs): devices.torch_gc() - shared.state.skipped = False - shared.state.interrupted = False - shared.state.stopping_generation = False - shared.state.job_count = 0 - if not add_stats: return tuple(res) @@ -123,3 +131,4 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs): return tuple(res) return f + diff --git a/modules/ui.py b/modules/ui.py index 5af34ecb0dc..8edce620f31 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -10,7 +10,7 @@ import gradio.utils import numpy as np from PIL import Image, PngImagePlugin # noqa: F401 -from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call +from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call, wrap_gradio_call_no_job # noqa: F401 from modules import gradio_extensons, sd_schedulers # noqa: F401 from modules import sd_hijack, sd_models, script_callbacks, ui_extensions, deepbooru, extra_networks, ui_common, ui_postprocessing, progress, ui_loadsave, shared_items, ui_settings, timer, sysinfo, ui_checkpoint_merger, scripts, sd_samplers, processing, ui_extra_networks, ui_toprow, launch_utils @@ -889,7 +889,7 @@ def select_img2img_tab(tab): )) image.change( - fn=wrap_gradio_call(modules.extras.run_pnginfo), + fn=wrap_gradio_call_no_job(modules.extras.run_pnginfo), inputs=[image], outputs=[html, generation_info, html2], ) diff --git a/modules/ui_common.py b/modules/ui_common.py index 48992a3c121..fb396770102 100644 --- a/modules/ui_common.py +++ b/modules/ui_common.py @@ -228,7 +228,7 @@ def open_folder(f, images=None, index=None): ) save.click( - fn=call_queue.wrap_gradio_call(save_files), + fn=call_queue.wrap_gradio_call_no_job(save_files), _js="(x, y, z, w) => [x, y, false, selected_gallery_index()]", inputs=[ res.generation_info, @@ -244,7 +244,7 @@ def open_folder(f, images=None, index=None): ) save_zip.click( - fn=call_queue.wrap_gradio_call(save_files), + fn=call_queue.wrap_gradio_call_no_job(save_files), _js="(x, y, z, w) => [x, y, true, selected_gallery_index()]", inputs=[ res.generation_info, diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py index 6b6403f23d6..23aff709627 100644 --- a/modules/ui_extensions.py +++ b/modules/ui_extensions.py @@ -624,37 +624,37 @@ def create_ui(): ) install_extension_button.click( - fn=modules.ui.wrap_gradio_call(install_extension_from_index, extra_outputs=[gr.update(), gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(install_extension_from_index, extra_outputs=[gr.update(), gr.update()]), inputs=[extension_to_install, selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, extensions_table, install_result], ) search_extensions_text.change( - fn=modules.ui.wrap_gradio_call(search_extensions, extra_outputs=[gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(search_extensions, extra_outputs=[gr.update()]), inputs=[search_extensions_text, selected_tags, showing_type, filtering_type, sort_column], outputs=[available_extensions_table, install_result], ) selected_tags.change( - fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, install_result] ) showing_type.change( - fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, install_result] ) filtering_type.change( - fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, install_result] ) sort_column.change( - fn=modules.ui.wrap_gradio_call(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(refresh_available_extensions_for_tags, extra_outputs=[gr.update()]), inputs=[selected_tags, showing_type, filtering_type, sort_column, search_extensions_text], outputs=[available_extensions_table, install_result] ) @@ -667,7 +667,7 @@ def create_ui(): install_result = gr.HTML(elem_id="extension_install_result") install_button.click( - fn=modules.ui.wrap_gradio_call(lambda *args: [gr.update(), *install_extension_from_url(*args)], extra_outputs=[gr.update(), gr.update()]), + fn=modules.ui.wrap_gradio_call_no_job(lambda *args: [gr.update(), *install_extension_from_url(*args)], extra_outputs=[gr.update(), gr.update()]), inputs=[install_dirname, install_url, install_branch], outputs=[install_url, extensions_table, install_result], ) diff --git a/modules/ui_settings.py b/modules/ui_settings.py index 087b91f3b3d..e53ad50f8f4 100644 --- a/modules/ui_settings.py +++ b/modules/ui_settings.py @@ -1,7 +1,7 @@ import gradio as gr from modules import ui_common, shared, script_callbacks, scripts, sd_models, sysinfo, timer, shared_items -from modules.call_queue import wrap_gradio_call +from modules.call_queue import wrap_gradio_call_no_job from modules.options import options_section from modules.shared import opts from modules.ui_components import FormRow @@ -295,7 +295,7 @@ def add_quicksettings(self): def add_functionality(self, demo): self.submit.click( - fn=wrap_gradio_call(lambda *args: self.run_settings(*args), extra_outputs=[gr.update()]), + fn=wrap_gradio_call_no_job(lambda *args: self.run_settings(*args), extra_outputs=[gr.update()]), inputs=self.components, outputs=[self.text_settings, self.result], ) From 7e5cdaab4b386621a186999e7348f6d0af7317a7 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 15 Jul 2024 08:31:55 +0300 Subject: [PATCH 187/201] SD3 lora support --- extensions-builtin/Lora/network.py | 6 +- extensions-builtin/Lora/network_lora.py | 10 ++- extensions-builtin/Lora/networks.py | 96 +++++++++++++++++++------ modules/models/sd3/mmdit.py | 5 +- modules/models/sd3/sd3_impls.py | 1 + modules/models/sd3/sd3_model.py | 12 ++++ 6 files changed, 106 insertions(+), 24 deletions(-) diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py index 3c39c49d7f8..98ff367fd8a 100644 --- a/extensions-builtin/Lora/network.py +++ b/extensions-builtin/Lora/network.py @@ -7,6 +7,7 @@ import torch.nn.functional as F from modules import sd_models, cache, errors, hashes, shared +import modules.models.sd3.mmdit NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module']) @@ -114,7 +115,10 @@ def __init__(self, net: Network, weights: NetworkWeights): self.sd_key = weights.sd_key self.sd_module = weights.sd_module - if hasattr(self.sd_module, 'weight'): + if isinstance(self.sd_module, modules.models.sd3.mmdit.QkvLinear): + s = self.sd_module.weight.shape + self.shape = (s[0] // 3, s[1]) + elif hasattr(self.sd_module, 'weight'): self.shape = self.sd_module.weight.shape elif isinstance(self.sd_module, nn.MultiheadAttention): # For now, only self-attn use Pytorch's MHA diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py index 4cc4029510f..a7a088949ea 100644 --- a/extensions-builtin/Lora/network_lora.py +++ b/extensions-builtin/Lora/network_lora.py @@ -1,6 +1,7 @@ import torch import lyco_helpers +import modules.models.sd3.mmdit import network from modules import devices @@ -10,6 +11,13 @@ def create_module(self, net: network.Network, weights: network.NetworkWeights): if all(x in weights.w for x in ["lora_up.weight", "lora_down.weight"]): return NetworkModuleLora(net, weights) + if all(x in weights.w for x in ["lora_A.weight", "lora_B.weight"]): + w = weights.w.copy() + weights.w.clear() + weights.w.update({"lora_up.weight": w["lora_B.weight"], "lora_down.weight": w["lora_A.weight"]}) + + return NetworkModuleLora(net, weights) + return None @@ -29,7 +37,7 @@ def create_module(self, weights, key, none_ok=False): if weight is None and none_ok: return None - is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention] + is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, modules.models.sd3.mmdit.QkvLinear] is_conv = type(self.sd_module) in [torch.nn.Conv2d] if is_linear: diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 9ed8fa4359f..4ad98714b45 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -20,6 +20,7 @@ from modules import shared, devices, sd_models, errors, scripts, sd_hijack import modules.textual_inversion.textual_inversion as textual_inversion +import modules.models.sd3.mmdit from lora_logger import logger @@ -166,12 +167,26 @@ def load_network(name, network_on_disk): keys_failed_to_match = {} is_sd2 = 'model_transformer_resblocks' in shared.sd_model.network_layer_mapping + if hasattr(shared.sd_model, 'diffusers_weight_map'): + diffusers_weight_map = shared.sd_model.diffusers_weight_map + elif hasattr(shared.sd_model, 'diffusers_weight_mapping'): + diffusers_weight_map = {} + for k, v in shared.sd_model.diffusers_weight_mapping(): + diffusers_weight_map[k] = v + shared.sd_model.diffusers_weight_map = diffusers_weight_map + else: + diffusers_weight_map = None matched_networks = {} bundle_embeddings = {} for key_network, weight in sd.items(): - key_network_without_network_parts, _, network_part = key_network.partition(".") + + if diffusers_weight_map: + key_network_without_network_parts, network_name, network_weight = key_network.rsplit(".", 2) + network_part = network_name + '.' + network_weight + else: + key_network_without_network_parts, _, network_part = key_network.partition(".") if key_network_without_network_parts == "bundle_emb": emb_name, vec_name = network_part.split(".", 1) @@ -183,7 +198,11 @@ def load_network(name, network_on_disk): emb_dict[vec_name] = weight bundle_embeddings[emb_name] = emb_dict - key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2) + if diffusers_weight_map: + key = diffusers_weight_map.get(key_network_without_network_parts, key_network_without_network_parts) + else: + key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2) + sd_module = shared.sd_model.network_layer_mapping.get(key, None) if sd_module is None: @@ -347,6 +366,28 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No purge_networks_from_memory() +def allowed_layer_without_weight(layer): + if isinstance(layer, torch.nn.LayerNorm) and not layer.elementwise_affine: + return True + + return False + + +def store_weights_backup(weight): + if weight is None: + return None + + return weight.to(devices.cpu, copy=True) + + +def restore_weights_backup(obj, field, weight): + if weight is None: + setattr(obj, field, None) + return + + getattr(obj, field).copy_(weight) + + def network_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, torch.nn.MultiheadAttention]): weights_backup = getattr(self, "network_weights_backup", None) bias_backup = getattr(self, "network_bias_backup", None) @@ -356,21 +397,15 @@ def network_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Li if weights_backup is not None: if isinstance(self, torch.nn.MultiheadAttention): - self.in_proj_weight.copy_(weights_backup[0]) - self.out_proj.weight.copy_(weights_backup[1]) + restore_weights_backup(self, 'in_proj_weight', weights_backup[0]) + restore_weights_backup(self.out_proj, 'weight', weights_backup[0]) else: - self.weight.copy_(weights_backup) + restore_weights_backup(self, 'weight', weights_backup) - if bias_backup is not None: - if isinstance(self, torch.nn.MultiheadAttention): - self.out_proj.bias.copy_(bias_backup) - else: - self.bias.copy_(bias_backup) + if isinstance(self, torch.nn.MultiheadAttention): + restore_weights_backup(self.out_proj, 'bias', bias_backup) else: - if isinstance(self, torch.nn.MultiheadAttention): - self.out_proj.bias = None - else: - self.bias = None + restore_weights_backup(self, 'bias', bias_backup) def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, torch.nn.MultiheadAttention]): @@ -389,22 +424,22 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn weights_backup = getattr(self, "network_weights_backup", None) if weights_backup is None and wanted_names != (): - if current_names != (): - raise RuntimeError("no backup weights found and current weights are not unchanged") + if current_names != () and not allowed_layer_without_weight(self): + raise RuntimeError(f"{network_layer_name} - no backup weights found and current weights are not unchanged") if isinstance(self, torch.nn.MultiheadAttention): - weights_backup = (self.in_proj_weight.to(devices.cpu, copy=True), self.out_proj.weight.to(devices.cpu, copy=True)) + weights_backup = (store_weights_backup(self.in_proj_weight), store_weights_backup(self.out_proj.weight)) else: - weights_backup = self.weight.to(devices.cpu, copy=True) + weights_backup = store_weights_backup(self.weight) self.network_weights_backup = weights_backup bias_backup = getattr(self, "network_bias_backup", None) if bias_backup is None and wanted_names != (): if isinstance(self, torch.nn.MultiheadAttention) and self.out_proj.bias is not None: - bias_backup = self.out_proj.bias.to(devices.cpu, copy=True) + bias_backup = store_weights_backup(self.out_proj) elif getattr(self, 'bias', None) is not None: - bias_backup = self.bias.to(devices.cpu, copy=True) + bias_backup = store_weights_backup(self.bias) else: bias_backup = None @@ -412,6 +447,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn # Only report if bias is not None and current bias are not unchanged. if bias_backup is not None and current_names != (): raise RuntimeError("no backup bias found and current bias are not unchanged") + self.network_bias_backup = bias_backup if current_names != wanted_names: @@ -419,7 +455,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn for net in loaded_networks: module = net.modules.get(network_layer_name, None) - if module is not None and hasattr(self, 'weight'): + if module is not None and hasattr(self, 'weight') and not isinstance(module, modules.models.sd3.mmdit.QkvLinear): try: with torch.no_grad(): if getattr(self, 'fp16_weight', None) is None: @@ -479,6 +515,24 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn continue + if isinstance(self, modules.models.sd3.mmdit.QkvLinear) and module_q and module_k and module_v: + try: + with torch.no_grad(): + # Send "real" orig_weight into MHA's lora module + qw, kw, vw = self.weight.chunk(3, 0) + updown_q, _ = module_q.calc_updown(qw) + updown_k, _ = module_k.calc_updown(kw) + updown_v, _ = module_v.calc_updown(vw) + del qw, kw, vw + updown_qkv = torch.vstack([updown_q, updown_k, updown_v]) + self.weight += updown_qkv + + except RuntimeError as e: + logging.debug(f"Network {net.name} layer {network_layer_name}: {e}") + extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1 + + continue + if module is None: continue diff --git a/modules/models/sd3/mmdit.py b/modules/models/sd3/mmdit.py index 4d2b855512b..8ddf49a4e3e 100644 --- a/modules/models/sd3/mmdit.py +++ b/modules/models/sd3/mmdit.py @@ -175,6 +175,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: ################################################################################# +class QkvLinear(torch.nn.Linear): + pass + def split_qkv(qkv, head_dim): qkv = qkv.reshape(qkv.shape[0], qkv.shape[1], 3, -1, head_dim).movedim(2, 0) return qkv[0], qkv[1], qkv[2] @@ -202,7 +205,7 @@ def __init__( self.num_heads = num_heads self.head_dim = dim // num_heads - self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) + self.qkv = QkvLinear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device) if not pre_only: self.proj = nn.Linear(dim, dim, dtype=dtype, device=device) assert attn_mode in self.ATTENTION_MODES diff --git a/modules/models/sd3/sd3_impls.py b/modules/models/sd3/sd3_impls.py index e2f6cad5b52..59f11b2cbe1 100644 --- a/modules/models/sd3/sd3_impls.py +++ b/modules/models/sd3/sd3_impls.py @@ -67,6 +67,7 @@ def __init__(self, shift=1.0, device=None, dtype=torch.float32, state_dict=None, } self.diffusion_model = MMDiT(input_size=None, pos_embed_scaling_factor=None, pos_embed_offset=None, pos_embed_max_size=pos_embed_max_size, patch_size=patch_size, in_channels=16, depth=depth, num_patches=num_patches, adm_in_channels=adm_in_channels, context_embedder_config=context_embedder_config, device=device, dtype=dtype) self.model_sampling = ModelSamplingDiscreteFlow(shift=shift) + self.depth = depth def apply_model(self, x, sigma, c_crossattn=None, y=None): dtype = self.get_dtype() diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index dbec8168fe9..37cf85eb36f 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -82,3 +82,15 @@ def add_noise_to_latent(self, x, noise, amount): def fix_dimensions(self, width, height): return width // 16 * 16, height // 16 * 16 + + def diffusers_weight_mapping(self): + for i in range(self.model.depth): + yield f"transformer.transformer_blocks.{i}.attn.to_q", f"diffusion_model_joint_blocks_{i}_x_block_attn_qkv_q_proj" + yield f"transformer.transformer_blocks.{i}.attn.to_k", f"diffusion_model_joint_blocks_{i}_x_block_attn_qkv_k_proj" + yield f"transformer.transformer_blocks.{i}.attn.to_v", f"diffusion_model_joint_blocks_{i}_x_block_attn_qkv_v_proj" + yield f"transformer.transformer_blocks.{i}.attn.to_out.0", f"diffusion_model_joint_blocks_{i}_x_block_attn_proj" + + yield f"transformer.transformer_blocks.{i}.attn.add_q_proj", f"diffusion_model_joint_blocks_{i}_context_block.attn_qkv_q_proj" + yield f"transformer.transformer_blocks.{i}.attn.add_k_proj", f"diffusion_model_joint_blocks_{i}_context_block.attn_qkv_k_proj" + yield f"transformer.transformer_blocks.{i}.attn.add_v_proj", f"diffusion_model_joint_blocks_{i}_context_block.attn_qkv_v_proj" + yield f"transformer.transformer_blocks.{i}.attn.add_out_proj.0", f"diffusion_model_joint_blocks_{i}_context_block_attn_proj" From f5866199c4787f905b229581a8adf765bc42da40 Mon Sep 17 00:00:00 2001 From: Haoming Date: Tue, 16 Jul 2024 11:07:22 +0800 Subject: [PATCH 188/201] add ids --- modules/ui.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 5af34ecb0dc..47d01b4a0c8 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -622,8 +622,8 @@ def copy_image(img): with gr.Column(elem_id="img2img_column_size", scale=4): selected_scale_tab = gr.Number(value=0, visible=False) - with gr.Tabs(): - with gr.Tab(label="Resize to", elem_id="img2img_tab_resize_to") as tab_scale_to: + with gr.Tabs(elem_id="img2img_tabs_resize"): + with gr.Tab(label="Resize to", id="to", elem_id="img2img_tab_resize_to") as tab_scale_to: with FormRow(): with gr.Column(elem_id="img2img_column_size", scale=4): width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width") @@ -632,7 +632,7 @@ def copy_image(img): res_switch_btn = ToolButton(value=switch_values_symbol, elem_id="img2img_res_switch_btn", tooltip="Switch width/height") detect_image_size_btn = ToolButton(value=detect_image_size_symbol, elem_id="img2img_detect_image_size_btn", tooltip="Auto detect size from img2img") - with gr.Tab(label="Resize by", elem_id="img2img_tab_resize_by") as tab_scale_by: + with gr.Tab(label="Resize by", id="by", elem_id="img2img_tab_resize_by") as tab_scale_by: scale_by = gr.Slider(minimum=0.05, maximum=4.0, step=0.05, label="Scale", value=1.0, elem_id="img2img_scale") with FormRow(): From 2b50233f3ffa522d5183bacaee3411b9382cbe2c Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Tue, 16 Jul 2024 20:50:25 +0300 Subject: [PATCH 189/201] fix bugs in lora support --- extensions-builtin/Lora/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 4ad98714b45..67f9abe2a37 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -398,7 +398,7 @@ def network_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Li if weights_backup is not None: if isinstance(self, torch.nn.MultiheadAttention): restore_weights_backup(self, 'in_proj_weight', weights_backup[0]) - restore_weights_backup(self.out_proj, 'weight', weights_backup[0]) + restore_weights_backup(self.out_proj, 'weight', weights_backup[1]) else: restore_weights_backup(self, 'weight', weights_backup) @@ -437,7 +437,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn bias_backup = getattr(self, "network_bias_backup", None) if bias_backup is None and wanted_names != (): if isinstance(self, torch.nn.MultiheadAttention) and self.out_proj.bias is not None: - bias_backup = store_weights_backup(self.out_proj) + bias_backup = store_weights_backup(self.out_proj.bias) elif getattr(self, 'bias', None) is not None: bias_backup = store_weights_backup(self.bias) else: From 2abc628899aaddb7e28840d7f7a40d516cb5073d Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Thu, 18 Jul 2024 23:49:49 +0900 Subject: [PATCH 190/201] bat activate venv --- webui.bat | 1 + 1 file changed, 1 insertion(+) diff --git a/webui.bat b/webui.bat index a8d479b05e2..7b162ce27cc 100644 --- a/webui.bat +++ b/webui.bat @@ -48,6 +48,7 @@ echo Warning: Failed to upgrade PIP version :activate_venv set PYTHON="%VENV_DIR%\Scripts\Python.exe" +call "%VENV_DIR%\Scripts\activate.bat" echo venv %PYTHON% :skip_venv From a5f66b5003527508c2e2c49c79360ab20071fec2 Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Thu, 18 Jul 2024 15:53:54 -0700 Subject: [PATCH 191/201] feature: beta scheduler --- modules/sd_schedulers.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 84b0abb6aca..19d294c1d77 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -2,6 +2,7 @@ import torch import k_diffusion import numpy as np +from scipy import stats from modules import shared @@ -115,6 +116,17 @@ def ddim_scheduler(n, sigma_min, sigma_max, inner_model, device): return torch.FloatTensor(sigs).to(device) +def beta_scheduler(n, sigma_min, sigma_max, inner_model, device): + # From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024) """ + alpha = 0.6 + beta = 0.6 + timesteps = 1 - np.linspace(0, 1, n) + timesteps = [stats.beta.ppf(x, alpha, beta) for x in timesteps] + sigmas = [sigma_min + ((x)*(sigma_max-sigma_min)) for x in timesteps] + [0.0] + sigmas = torch.FloatTensor(sigmas).to(device) + return sigmas + + schedulers = [ Scheduler('automatic', 'Automatic', None), Scheduler('uniform', 'Uniform', uniform, need_inner_model=True), @@ -127,6 +139,7 @@ def ddim_scheduler(n, sigma_min, sigma_max, inner_model, device): Scheduler('simple', 'Simple', simple_scheduler, need_inner_model=True), Scheduler('normal', 'Normal', normal_scheduler, need_inner_model=True), Scheduler('ddim', 'DDIM', ddim_scheduler, need_inner_model=True), + Scheduler('beta', 'Beta', beta_scheduler, need_inner_model=True), ] schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}} From 964fc13a99d47263d023f4e3116ac2c220acec88 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 20 Jul 2024 04:00:54 +0900 Subject: [PATCH 192/201] fix upscale logic --- modules/upscaler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/upscaler.py b/modules/upscaler.py index 28c60cdcdfe..507881fede2 100644 --- a/modules/upscaler.py +++ b/modules/upscaler.py @@ -56,8 +56,8 @@ def upscale(self, img: PIL.Image, scale, selected_model: str = None): dest_w = int((img.width * scale) // 8 * 8) dest_h = int((img.height * scale) // 8 * 8) - for _ in range(3): - if img.width >= dest_w and img.height >= dest_h and scale != 1: + for i in range(3): + if img.width >= dest_w and img.height >= dest_h and (i > 0 or scale != 1): break if shared.state.interrupted: From 7e1bd3e3c30e1d7e95f719a925f11d9225251f7c Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 13:44:22 -0700 Subject: [PATCH 193/201] refactor: syntax and add 0.0 on new line --- modules/sd_schedulers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 19d294c1d77..6f2dc6308a7 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -122,9 +122,9 @@ def beta_scheduler(n, sigma_min, sigma_max, inner_model, device): beta = 0.6 timesteps = 1 - np.linspace(0, 1, n) timesteps = [stats.beta.ppf(x, alpha, beta) for x in timesteps] - sigmas = [sigma_min + ((x)*(sigma_max-sigma_min)) for x in timesteps] + [0.0] - sigmas = torch.FloatTensor(sigmas).to(device) - return sigmas + sigmas = [sigma_min + (x * (sigma_max-sigma_min)) for x in timesteps] + sigmas += [0.0] + return torch.FloatTensor(sigmas).to(device) schedulers = [ From 3a5a66775c4c9dd03ffbf3c1696ae0db64a71793 Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:08:08 -0700 Subject: [PATCH 194/201] add new options 'beta_dist_alpha', 'beta_dist_beta' --- modules/shared_options.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/shared_options.py b/modules/shared_options.py index 096366e0aa6..203f7a32b55 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -404,6 +404,8 @@ 'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'), 'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"), 'skip_early_cond': OptionInfo(0.0, "Ignore negative prompt during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("disables CFG on a proportion of steps at the beginning of generation; 0=skip none; 1=skip all; can both improve sample diversity/quality and speed up sampling"), + 'beta_dist_alpha': OptionInfo(0.6, "Beta scheduler - alpha", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Beta scheduler alpha').info('Default = 0.6; the alpha parameter of the beta distribution used in Beta sampling'), + 'beta_dist_beta': OptionInfo(0.6, "Beta scheduler - beta", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Beta scheduler beta').info('Default = 0.6; the beta parameter of the beta distribution used in Beta sampling'), })) options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), { From f6f055a93df4fb1a59fa8f28e3b8f092fd7ba511 Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:08:44 -0700 Subject: [PATCH 195/201] use configured alpha/beta values in Beta scheduling --- modules/sd_schedulers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py index 6f2dc6308a7..f4d16e309ff 100644 --- a/modules/sd_schedulers.py +++ b/modules/sd_schedulers.py @@ -118,8 +118,8 @@ def ddim_scheduler(n, sigma_min, sigma_max, inner_model, device): def beta_scheduler(n, sigma_min, sigma_max, inner_model, device): # From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024) """ - alpha = 0.6 - beta = 0.6 + alpha = shared.opts.beta_dist_alpha + beta = shared.opts.beta_dist_beta timesteps = 1 - np.linspace(0, 1, n) timesteps = [stats.beta.ppf(x, alpha, beta) for x in timesteps] sigmas = [sigma_min + (x * (sigma_max-sigma_min)) for x in timesteps] From e285af6e4817a34c86212d1b640aad7225f0c022 Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:15:10 -0700 Subject: [PATCH 196/201] add beta schedule opts to xyz options --- scripts/xyz_grid.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 5664932669c..6a42a04d9a3 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -259,6 +259,8 @@ def __init__(self, *args, **kwargs): AxisOption("Schedule min sigma", float, apply_override("sigma_min")), AxisOption("Schedule max sigma", float, apply_override("sigma_max")), AxisOption("Schedule rho", float, apply_override("rho")), + AxisOption("Beta schedule alpha", float, apply_override("beta_dist_alpha")), + AxisOption("Beta schedule beta", float, apply_override("beta_dist_beta")), AxisOption("Eta", float, apply_field("eta")), AxisOption("Clip skip", int, apply_override('CLIP_stop_at_last_layers')), AxisOption("Denoising", float, apply_field("denoising_strength")), From 94275b115c2a6c3c273baa92caf0b5f4ff3cc43f Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:15:55 -0700 Subject: [PATCH 197/201] enforce beta_dist_alpha / beta_dist_beta > 0 to avoid nan --- modules/shared_options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared_options.py b/modules/shared_options.py index 203f7a32b55..4ff7f51c071 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -404,8 +404,8 @@ 'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'), 'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"), 'skip_early_cond': OptionInfo(0.0, "Ignore negative prompt during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("disables CFG on a proportion of steps at the beginning of generation; 0=skip none; 1=skip all; can both improve sample diversity/quality and speed up sampling"), - 'beta_dist_alpha': OptionInfo(0.6, "Beta scheduler - alpha", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Beta scheduler alpha').info('Default = 0.6; the alpha parameter of the beta distribution used in Beta sampling'), - 'beta_dist_beta': OptionInfo(0.6, "Beta scheduler - beta", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Beta scheduler beta').info('Default = 0.6; the beta parameter of the beta distribution used in Beta sampling'), + 'beta_dist_alpha': OptionInfo(0.6, "Beta scheduler - alpha", gr.Slider, {"minimum": 0.01, "maximum": 1.0, "step": 0.01}, infotext='Beta scheduler alpha').info('Default = 0.6; the alpha parameter of the beta distribution used in Beta sampling'), + 'beta_dist_beta': OptionInfo(0.6, "Beta scheduler - beta", gr.Slider, {"minimum": 0.01, "maximum": 1.0, "step": 0.01}, infotext='Beta scheduler beta').info('Default = 0.6; the beta parameter of the beta distribution used in Beta sampling'), })) options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), { From 9de7084884f4266a19e4ed3f503687927c845c4d Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:54:24 -0700 Subject: [PATCH 198/201] always add alpha/beta to extra_generation_params when schedule is Beta --- modules/sd_samplers_kdiffusion.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 95a354dacb6..4e5310bc6c9 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -119,6 +119,10 @@ def get_sigmas(self, p, steps): if scheduler.need_inner_model: sigmas_kwargs['inner_model'] = self.model_wrap + + if scheduler.label == 'Beta': + p.extra_generation_params["Beta schedule alpha"] = opts.beta_dist_alpha + p.extra_generation_params["Beta schedule beta"] = opts.beta_dist_beta sigmas = scheduler.function(n=steps, **sigmas_kwargs, device=devices.cpu) From 874954060297d847bf30cc5d220effe80ac18968 Mon Sep 17 00:00:00 2001 From: v0xie <28695009+v0xie@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:33:07 -0700 Subject: [PATCH 199/201] fix lint --- modules/sd_samplers_kdiffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index 4e5310bc6c9..0c94d100d25 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -119,7 +119,7 @@ def get_sigmas(self, p, steps): if scheduler.need_inner_model: sigmas_kwargs['inner_model'] = self.model_wrap - + if scheduler.label == 'Beta': p.extra_generation_params["Beta schedule alpha"] = opts.beta_dist_alpha p.extra_generation_params["Beta schedule beta"] = opts.beta_dist_beta From 24a23e1225244a5ee92acad8b96077e0af858761 Mon Sep 17 00:00:00 2001 From: w-e-w <40751091+w-e-w@users.noreply.github.com> Date: Sat, 20 Jul 2024 15:59:28 +0900 Subject: [PATCH 200/201] option to disable save button log.csv --- modules/shared_options.py | 1 + modules/ui_common.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/modules/shared_options.py b/modules/shared_options.py index 096366e0aa6..a482c7c6d06 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -64,6 +64,7 @@ "use_original_name_batch": OptionInfo(True, "Use original name for output filename during batch process in extras tab"), "use_upscaler_name_as_suffix": OptionInfo(False, "Use upscaler name as filename suffix in the extras tab"), "save_selected_only": OptionInfo(True, "When using 'Save' button, only save a single selected image"), + "save_write_log_csv": OptionInfo(True, "Write log.csv when saving images using 'Save' button"), "save_init_img": OptionInfo(False, "Save init images when using img2img"), "temp_dir": OptionInfo("", "Directory for temporary images; leave empty for default"), diff --git a/modules/ui_common.py b/modules/ui_common.py index 48992a3c121..af5857e61e4 100644 --- a/modules/ui_common.py +++ b/modules/ui_common.py @@ -3,6 +3,7 @@ import json import html import os +from contextlib import nullcontext import gradio as gr @@ -103,14 +104,15 @@ def __init__(self, d=None): # NOTE: ensure csv integrity when fields are added by # updating headers and padding with delimiters where needed - if os.path.exists(logfile_path): + if shared.opts.save_write_log_csv and os.path.exists(logfile_path): update_logfile(logfile_path, fields) - with open(logfile_path, "a", encoding="utf8", newline='') as file: - at_start = file.tell() == 0 - writer = csv.writer(file) - if at_start: - writer.writerow(fields) + with (open(logfile_path, "a", encoding="utf8", newline='') if shared.opts.save_write_log_csv else nullcontext()) as file: + if file: + at_start = file.tell() == 0 + writer = csv.writer(file) + if at_start: + writer.writerow(fields) for image_index, filedata in enumerate(images, start_index): image = image_from_url_text(filedata) @@ -130,7 +132,8 @@ def __init__(self, d=None): filenames.append(os.path.basename(txt_fullfn)) fullfns.append(txt_fullfn) - writer.writerow([parsed_infotexts[0]['Prompt'], parsed_infotexts[0]['Seed'], data["width"], data["height"], data["sampler_name"], data["cfg_scale"], data["steps"], filenames[0], parsed_infotexts[0]['Negative prompt'], data["sd_model_name"], data["sd_model_hash"]]) + if file: + writer.writerow([parsed_infotexts[0]['Prompt'], parsed_infotexts[0]['Seed'], data["width"], data["height"], data["sampler_name"], data["cfg_scale"], data["steps"], filenames[0], parsed_infotexts[0]['Negative prompt'], data["sd_model_name"], data["sd_model_hash"]]) # Make Zip if do_make_zip: From 8b3d98c5a580c3c72e82d03fdab2b643bf9a8edd Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 20 Jul 2024 11:54:14 +0300 Subject: [PATCH 201/201] update CHANGELOG --- CHANGELOG.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac2b4ad66e0..301bfd068d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ### Features: * A lot of performance improvements (see below in Performance section) -* Stable Diffusion 3 support ([#16030](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16030)) +* Stable Diffusion 3 support ([#16030](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16030), [#16164](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16164), [#16212](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16212)) * Recommended Euler sampler; DDIM and other timestamp samplers currently not supported * T5 text model is disabled by default, enable it in settings * New schedulers: @@ -11,6 +11,7 @@ * Normal ([#16149](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16149)) * DDIM ([#16149](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16149)) * Simple ([#16142](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16142)) + * Beta ([#16235](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16235)) * New sampler: DDIM CFG++ ([#16035](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16035)) ### Minor: @@ -26,6 +27,7 @@ * Option to prevent screen sleep during generation ([#16001](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16001)) * ToggleLivePriview button in image viewer ([#16065](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16065)) * Remove ui flashing on reloading and fast scrollong ([#16153](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16153)) +* option to disable save button log.csv ([#16242](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16242)) ### Extensions and API: * Add process_before_every_sampling hook ([#15984](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15984)) @@ -74,6 +76,10 @@ * Fix SD2 loading ([#16078](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16078), [#16079](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16079)) * fix infotext Lora hashes for hires fix different lora ([#16062](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16062)) * Fix sampler scheduler autocorrection warning ([#16054](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16054)) +* fix ui flashing on reloading and fast scrollong ([#16153](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16153)) +* fix upscale logic ([#16239](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16239)) +* [bug] do not break progressbar on non-job actions (add wrap_gradio_call_no_job) ([#16202](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16202)) +* fix OSError: cannot write mode P as JPEG ([#16194](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16194)) ### Other: * fix changelog #15883 -> #15882 ([#15907](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15907)) @@ -90,10 +96,17 @@ * Bump spandrel to 0.3.4 ([#16144](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16144)) * Defunct --max-batch-count ([#16119](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16119)) * docs: update bug_report.yml ([#16102](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16102)) -* Maintaining Project Compatibility for Python 3.9 Users Without Upgrade Requirements. ([#16088](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16088)) +* Maintaining Project Compatibility for Python 3.9 Users Without Upgrade Requirements. ([#16088](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16088), [#16169](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16169), [#16192](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16192)) * Update torch for ARM Macs to 2.3.1 ([#16059](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16059)) * remove deprecated setting dont_fix_second_order_samplers_schedule ([#16061](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16061)) * chore: fix typos ([#16060](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16060)) +* shlex.join launch args in console log ([#16170](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16170)) +* activate venv .bat ([#16231](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16231)) +* add ids to the resize tabs in img2img ([#16218](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16218)) +* update installation guide linux ([#16178](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16178)) +* Robust sysinfo ([#16173](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16173)) +* do not send image size on paste inpaint ([#16180](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16180)) +* Fix noisy DS_Store files for MacOS ([#16166](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/16166)) ## 1.9.4