Add StableDiffusionXLControlNetPAGImg2ImgPipeline#8990
Add StableDiffusionXLControlNetPAGImg2ImgPipeline#8990yiyixuxu merged 19 commits intohuggingface:mainfrom
Conversation
|
Generation code import torch
import numpy as np
from PIL import Image
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from diffusers import ControlNetModel, AutoencoderKL, AutoPipelineForImage2Image
from diffusers.utils import load_image
depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
controlnet = ControlNetModel.from_pretrained(
"diffusers/controlnet-depth-sdxl-1.0-small",
variant="fp16",
use_safetensors="True",
torch_dtype=torch.float16,
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
variant="fp16",
use_safetensors=True,
torch_dtype=torch.float16,
enable_pag=True,
)
pipe.enable_model_cpu_offload()
def get_depth_map(image):
image = feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
with torch.no_grad(), torch.autocast("cuda"):
depth_map = depth_estimator(image).predicted_depth
depth_map = torch.nn.functional.interpolate(
depth_map.unsqueeze(1),
size=(1024, 1024),
mode="bicubic",
align_corners=False,
)
depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
depth_map = (depth_map - depth_min) / (depth_max - depth_min)
image = torch.cat([depth_map] * 3, dim=1)
image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
return image
prompt = "A robot, 4k photo"
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/kandinsky/cat.png"
).resize((1024, 1024))
controlnet_conditioning_scale = 0.5
depth_image = get_depth_map(image)
images = pipe(
prompt,
image=image,
control_image=depth_image,
strength=0.99,
num_inference_steps=50,
controlnet_conditioning_scale=controlnet_conditioning_scale,
).images
images[0].save(f"robot_cat.png")It works with Error: |
|
Any help would be nice. Thanks |
|
can you share the full stack trace? |
|
|
src/diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py
Outdated
Show resolved
Hide resolved
…img.py Co-authored-by: YiYi Xu <yixu310@gmail.com>
|
hi @yiyixuxu can you review this? |
|
sorry this PR got lost too |
src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py
Outdated
Show resolved
Hide resolved
|
The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update. |
|
@asomoza |
|
@yiyixuxu Tested it and seems ok, it's harder to see the difference here because the base image helps a lot even without PAG, but it still works similar to the other ones.
|
|
@satani99 thank you! |
* Added pad controlnet sdxl img2img pipeline --------- Co-authored-by: YiYi Xu <yixu310@gmail.com>


What does this PR do?
fix #8700
Before submitting
documentation guidelines, and
here are tips on formatting docstrings.
Who can review?
@yiyixuxu