Skip to content

Loading Instantid with Ipadapter on diffuser pipeline #282

@Fqlox

Description

@Fqlox

I wanted to load Instant Id with Ipadater. It seems there are some kind of conflict within the pipeline.

So far I got


# diffusers pipeline 
import diffusers
from diffusers.utils import load_image
from diffusers.models import ControlNetModel
from transformers import CLIPVisionModelWithProjection


# Custom diffusers implementation Instantid & insightface 
from insightface.app import FaceAnalysis
from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps

# Other dependencies
import cv2
import torch
import numpy as np
from PIL import Image

from compel import Compel, ReturnedEmbeddingsType


app_face = FaceAnalysis(name='antelopev2', root='./', providers=['CPUExecutionProvider', 'CPUExecutionProvider']) #CUDAExecutionProvider
app_face.prepare(ctx_id=0, det_size=(640, 640))

# prepare models under ./checkpoints
face_adapter = "./models/instantid/ip-adapter.bin"
controlnet_path = "./models/instantid/ControlNetModel/"

# load IdentityNet
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
pipe = StableDiffusionXLInstantIDPipeline.from_single_file(
    "./models/checkpoints/realvisxlV40_v40LightningBakedvae.safetensors", 
    controlnet=controlnet, torch_dtype=torch.float16

)
pipe.cuda()

# load adapter
pipe.load_ip_adapter_instantid(face_adapter)

# Load ipadapter
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
    "./models/ipadapters",
    subfolder="sdxl_models/image_encoder",
    torch_dtype=torch.float16,
    #weight_name="ip-adapter-plus_sdxl_vit-h.safetensors"
).to("cuda")

# Apply adapter to pipe
pipe.image_encoder = image_encoder

pipe.load_ip_adapter("./models/ipadapters", subfolder="sdxl_models", weight_name="ip-adapter-plus_sdxl_vit-h.safetensors")
pipe.set_ip_adapter_scale(1.3)

# Optimisation
pipe.enable_model_cpu_offload()
pipe.enable_vae_tiling()

image = Image.open("img1.png")

face_info = app_face.get(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1]  # only use the maximum face
face_emb = face_info['embedding']


prompt = "prompt"

kps = Image.open("kps_standard.png")

ipadapter_image = Image.open("img2.png")

#encod = pipe.image_encoder(ipadapter_image)


prompt_embed, pooled = compel_proc(prompt)

image = pipe(
    prompt,

    width=768,
    height=1024,

    image_embeds=face_emb,
    image=kps,
    seed=42,
    ip_adapter_image=ipadapter_image,
    
    controlnet_conditioning_scale=0.7,
    control_guidance_end = .7,
    num_inference_steps=6,
    guidance_scale=3,
    
).images[0]

It gives me :

ValueError: <class 'diffusers.models.unet_2d_condition.UNet2DConditionModel'> has the config param `encoder_hid_dim_type` set to 'ip_image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`

Is there some workaround ?

It surely can work, I already made a similar pipe in comfyui

Thanks

Edit : I've seen this response but the jupyter notebook link is broken

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions