Acly · paprik · Sep 5, 2024
diff --git a/ai_diffusion/api.py b/ai_diffusion/api.py
@@ -157,6 +157,7 @@ class WorkflowInput:
     control_mode: ControlMode = ControlMode.reference
     batch_count: int = 1
     nsfw_filter: float = 0.0
+    use_transparency: bool = False
 
     @property
     def extent(self):

diff --git a/ai_diffusion/comfy_workflow.py b/ai_diffusion/comfy_workflow.py
@@ -720,3 +720,22 @@ def estimate_pose(self, image: Output, resolution: int):
             # use smaller model, but it requires onnxruntime, see #630
             mdls["bbox_detector"] = "yolo_nas_l_fp16.onnx"
         return self.add("DWPreprocessor", 1, image=image, resolution=resolution, **feat, **mdls)
+
+    def layer_diffuse_apply(self, model: Output, weight: int):
+        return self.add(
+            "LayeredDiffusionApply",
+            1,
+            model=model,
+            config="SDXL, Conv Injection",
+            weight=weight,
+        )
+
+    def layer_diffuse_decode(self, image: Output, latent_image: Output):
+        return self.add(
+            "LayeredDiffusionDecodeRGBA",
+            1,
+            samples=latent_image,
+            images=image,
+            sd_version="SDXL",
+            sub_batch_size=16,
+        )
diff --git a/ai_diffusion/model.py b/ai_diffusion/model.py
@@ -59,6 +59,7 @@ class Model(QObject, ObservableProperties):
     style = Property(Styles.list().default, setter="set_style", persist=True)
     strength = Property(1.0, persist=True)
     region_only = Property(False, persist=True)
+    use_transparency = Property(False, persist=True)
     batch_count = Property(1, persist=True)
     seed = Property(0, persist=True)
     fixed_seed = Property(False, persist=True)
@@ -77,6 +78,7 @@ class Model(QObject, ObservableProperties):
     style_changed = pyqtSignal(Style)
     strength_changed = pyqtSignal(float)
     region_only_changed = pyqtSignal(bool)
+    use_transparency_changed = pyqtSignal(bool)
     batch_count_changed = pyqtSignal(int)
     seed_changed = pyqtSignal(int)
     fixed_seed_changed = pyqtSignal(bool)
@@ -195,6 +197,7 @@ def _prepare_workflow(self, dryrun=False):
             mask=mask,
             strength=self.strength,
             inpaint=inpaint,
+            use_transparency=self.use_transparency
         )
         job_params = JobParams(bounds, prompt, regions=job_regions)
         return input, job_params

diff --git a/ai_diffusion/resolution.py b/ai_diffusion/resolution.py
@@ -175,6 +175,7 @@ def prepare_diffusion_input(
     style: Style,
     perf: PerformanceSettings,
     downscale=True,
+    layer_diffusion=False
 ):
     # Take settings into account to compute the desired resolution for diffusion.
     desired = apply_resolution_settings(extent, perf)
@@ -183,7 +184,7 @@ def prepare_diffusion_input(
     mult = 8
     if sd_version is SDVersion.flux:
         mult = 16
-    if SDVersion is SDVersion.sd3:
+    if SDVersion is SDVersion.sd3 or layer_diffusion:
         mult = 64
     min_size, max_size, min_scale, max_scale = CheckpointResolution.compute(
         desired, sd_version, style
@@ -228,10 +229,10 @@ def prepare_extent(
 
 
 def prepare_image(
-    image: Image, sd_ver: SDVersion, style: Style, perf: PerformanceSettings, downscale=True
+    image: Image, sd_ver: SDVersion, style: Style, perf: PerformanceSettings, downscale=True, layer_diffusion=False
 ):
     scaled, out_image, batch = prepare_diffusion_input(
-        image.extent, image, sd_ver, style, perf, downscale
+        image.extent, image, sd_ver, style, perf, downscale, layer_diffusion
     )
     assert out_image is not None
     return ImageInput(scaled.as_input, out_image), batch

diff --git a/ai_diffusion/resources.py b/ai_diffusion/resources.py
@@ -49,6 +49,13 @@ class CustomNode(NamedTuple):
         "6ce66ff1b5ed4e5819b23ccf1feb976ef479528a",
         ["INPAINT_LoadFooocusInpaint", "INPAINT_ApplyFooocusInpaint", "INPAINT_ExpandMask"],
     ),
+    CustomNode(
+        "Layer Diffusion",
+        "ComfyUI-layerdiffuse",
+        "https://github.com/huchenlei/ComfyUI-layerdiffuse",
+        "6e4aeb2da78ba48c519367608a61bf47ea6249b4",
+        ["LayeredDiffusionApply", "LayeredDiffusionDecodeRGBA"],
+    ),
 ]
 
 

diff --git a/ai_diffusion/ui/generation.py b/ai_diffusion/ui/generation.py
@@ -547,6 +547,15 @@ def __init__(self):
             _("Generate the active layer region only (use layer transparency as mask)")
         )
 
+        self.use_transparency_button = QCheckBox(self)
+        self.use_transparency_button.setText(_("Transparency"))
+        self.use_transparency_button.setToolTip(_("Generate content on transparent background"))
+
+        options_layout = QHBoxLayout(self)
+        options_layout.setContentsMargins(5, 0, 5, 0)
+        options_layout.addWidget(self.use_transparency_button)
+        layout.addLayout(options_layout)
+
         generate_layout = QHBoxLayout()
         generate_layout.setSpacing(0)
         generate_layout.addWidget(self.generate_button)
@@ -593,6 +602,7 @@ def model(self, model: Model):
                 bind(model, "workspace", self.workspace_select, "value", Bind.one_way),
                 bind(model, "style", self.style_select, "value"),
                 bind(model, "strength", self.strength_slider, "value"),
+                bind_toggle(model, "use_transparency", self.use_transparency_button),
                 bind_toggle(model, "region_only", self.region_mask_button),
                 model.inpaint.mode_changed.connect(self.update_generate_button),
                 model.strength_changed.connect(self.update_generate_button),

diff --git a/ai_diffusion/workflow.py b/ai_diffusion/workflow.py
@@ -492,6 +492,7 @@ def scale_refine_and_decode(
     clip: Output,
     vae: Output,
     models: ModelDict,
+    use_transparency: bool = False
 ):
     """Handles scaling images from `initial` to `desired` resolution.
     If it is a substantial upscale, runs a high-res SD refinement pass.
@@ -500,6 +501,8 @@ def scale_refine_and_decode(
     mode = extent.refinement_scaling
     if mode in [ScaleMode.none, ScaleMode.resize, ScaleMode.upscale_fast]:
         decoded = w.vae_decode(vae, latent)
+        if use_transparency:
+            decoded = w.layer_diffuse_decode(decoded, latent)
         return scale(extent.initial, extent.desired, mode, w, decoded, models)
 
     model = apply_attention_mask(w, model, cond, clip, extent.desired)
@@ -523,6 +526,8 @@ def scale_refine_and_decode(
     )
     result = w.sampler_custom_advanced(model, positive, negative, latent, models.version, **params)
     image = w.vae_decode(vae, result)
+    if use_transparency:
+        image = w.layer_diffuse_decode(image, result)
     return image
 
 
@@ -536,6 +541,7 @@ def ensure_minimum_extent(w: ComfyWorkflow, image: Output, extent: Extent, min_e
 class MiscParams(NamedTuple):
     batch_count: int
     nsfw_filter: float
+    use_transparency: bool
 
 
 def generate(
@@ -548,6 +554,8 @@ def generate(
     models: ModelDict,
 ):
     model, clip, vae = load_checkpoint_with_lora(w, checkpoint, models.all)
+    if misc.use_transparency:
+        model = w.layer_diffuse_apply(model, 1)
     model = apply_ip_adapter(w, model, cond.control, models)
     model_orig = copy(model)
     model = apply_attention_mask(w, model, cond, clip, extent.initial)
@@ -561,7 +569,8 @@ def generate(
         model, positive, negative, latent, models.version, **_sampler_params(sampling)
     )
     out_image = scale_refine_and_decode(
-        extent, w, cond, sampling, out_latent, prompt_pos, prompt_neg, model_orig, clip, vae, models
+        extent, w, cond, sampling, out_latent, prompt_pos, prompt_neg, model_orig, clip, vae, models,
+        misc.use_transparency
     )
     out_image = w.nsfw_filter(out_image, sensitivity=misc.nsfw_filter)
     out_image = scale_to_target(extent, w, out_image, models)
@@ -1024,6 +1033,7 @@ def prepare(
     mask: Mask | None = None,
     strength: float = 1.0,
     inpaint: InpaintParams | None = None,
+    use_transparency: bool = False,
     upscale_factor: float = 1.0,
     upscale_model: str = "",
     is_live: bool = False,
@@ -1043,6 +1053,7 @@ def prepare(
         extra_loras += region_loras
     i.sampling = _sampling_from_style(style, strength, is_live)
     i.sampling.seed = seed
+    i.use_transparency = use_transparency
     i.models = style.get_models()
     i.conditioning.positive += _collect_lora_triggers(i.models.loras, files)
     i.models.loras = unique(i.models.loras + extra_loras, key=lambda l: l.name)
@@ -1066,7 +1077,7 @@ def prepare(
 
     elif kind is WorkflowKind.inpaint:
         assert isinstance(canvas, Image) and mask and inpaint and style
-        i.images, _ = resolution.prepare_image(canvas, sd_version, style, perf)
+        i.images, _ = resolution.prepare_image(canvas, sd_version, style, perf, layer_diffusion=use_transparency)
         i.images.hires_mask = mask.to_image(canvas.extent)
         upscale_extent, _ = resolution.prepare_extent(
             mask.bounds.extent, sd_version, style, perf, downscale=False
@@ -1085,15 +1096,15 @@ def prepare(
     elif kind is WorkflowKind.refine:
         assert isinstance(canvas, Image) and style
         i.images, i.batch_count = resolution.prepare_image(
-            canvas, sd_version, style, perf, downscale=False
+            canvas, sd_version, style, perf, downscale=False, layer_diffusion=use_transparency
         )
         downscale_all_control_images(i.conditioning, canvas.extent, i.images.extent.desired)
 
     elif kind is WorkflowKind.refine_region:
         assert isinstance(canvas, Image) and mask and inpaint and style
         allow_2pass = strength >= 0.7
         i.images, i.batch_count = resolution.prepare_image(
-            canvas, sd_version, style, perf, downscale=allow_2pass
+            canvas, sd_version, style, perf, downscale=allow_2pass, layer_diffusion=use_transparency
         )
         i.images.hires_mask = mask.to_image(canvas.extent)
         i.inpaint = inpaint
@@ -1152,7 +1163,7 @@ def create(i: WorkflowInput, models: ClientModels, comfy_mode=ComfyRunMode.serve
     This should be a pure function, the workflow is entirely defined by the input.
     """
     workflow = ComfyWorkflow(models.node_inputs, comfy_mode)
-    misc = MiscParams(i.batch_count, i.nsfw_filter)
+    misc = MiscParams(i.batch_count, i.nsfw_filter, i.use_transparency)
 
     if i.kind is WorkflowKind.generate:
         return generate(