Skip to content

Commit e94b2bd

Browse files
author
wjay
committed
[ChronoEdit] support multiple loras
1 parent 67dc65e commit e94b2bd

File tree

2 files changed

+59
-3
lines changed

2 files changed

+59
-3
lines changed

docs/source/en/api/pipelines/chronoedit.md

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030

3131
The ChronoEdit pipeline is developed by the ChronoEdit Team. The original code is available on [GitHub](https://github.com/nv-tlabs/ChronoEdit), and pretrained models can be found in the [nvidia/ChronoEdit](https://huggingface.co/collections/nvidia/chronoedit) collection on Hugging Face.
3232

33+
Available Models/LoRAs:
34+
- [nvidia/ChronoEdit-14B-Diffusers](https://huggingface.co/nvidia/ChronoEdit-14B-Diffusers)
35+
- [nvidia/ChronoEdit-14B-Diffusers-Upscaler-Lora](https://huggingface.co/nvidia/ChronoEdit-14B-Diffusers-Upscaler-Lora)
36+
- [nvidia/ChronoEdit-14B-Diffusers-Paint-Brush-Lora](https://huggingface.co/nvidia/ChronoEdit-14B-Diffusers-Paint-Brush-Lora)
3337

3438
### Image Editing
3539

@@ -100,6 +104,7 @@ Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.pn
100104
import torch
101105
import numpy as np
102106
from diffusers import AutoencoderKLWan, ChronoEditTransformer3DModel, ChronoEditPipeline
107+
from diffusers.schedulers import UniPCMultistepScheduler
103108
from diffusers.utils import export_to_video, load_image
104109
from transformers import CLIPVisionModel
105110
from PIL import Image
@@ -109,9 +114,8 @@ image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encod
109114
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
110115
transformer = ChronoEditTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
111116
pipe = ChronoEditPipeline.from_pretrained(model_id, image_encoder=image_encoder, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
112-
lora_path = hf_hub_download(repo_id=model_id, filename="lora/chronoedit_distill_lora.safetensors")
113-
pipe.load_lora_weights(lora_path)
114-
pipe.fuse_lora(lora_scale=1.0)
117+
pipe.load_lora_weights("nvidia/ChronoEdit-14B-Diffusers", weight_name="lora/chronoedit_distill_lora.safetensors", adapter_name="distill")
118+
pipe.fuse_lora(adapter_names=["distill"], lora_scale=1.0)
115119
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=2.0)
116120
pipe.to("cuda")
117121

@@ -145,6 +149,57 @@ export_to_video(output, "output.mp4", fps=16)
145149
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output.png")
146150
```
147151

152+
### Inference with Multiple LoRAs
153+
154+
```py
155+
import torch
156+
import numpy as np
157+
from diffusers import AutoencoderKLWan, ChronoEditTransformer3DModel, ChronoEditPipeline
158+
from diffusers.schedulers import UniPCMultistepScheduler
159+
from diffusers.utils import export_to_video, load_image
160+
from transformers import CLIPVisionModel
161+
from PIL import Image
162+
163+
model_id = "nvidia/ChronoEdit-14B-Diffusers"
164+
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
165+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
166+
transformer = ChronoEditTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
167+
pipe = ChronoEditPipeline.from_pretrained(model_id, image_encoder=image_encoder, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
168+
pipe.load_lora_weights("nvidia/ChronoEdit-14B-Diffusers-Paint-Brush-Lora", weight_name="paintbrush_lora_diffusers.safetensors", adapter_name="paintbrush")
169+
pipe.load_lora_weights("nvidia/ChronoEdit-14B-Diffusers", weight_name="lora/chronoedit_distill_lora.safetensors", adapter_name="distill")
170+
pipe.fuse_lora(adapter_names=["paintbrush", "distill"], lora_scale=1.0)
171+
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=2.0)
172+
pipe.to("cuda")
173+
174+
image = load_image(
175+
"https://raw.githubusercontent.com/nv-tlabs/ChronoEdit/refs/heads/main/assets/images/input_paintbrush.png"
176+
)
177+
max_area = 720 * 1280
178+
aspect_ratio = image.height / image.width
179+
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
180+
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
181+
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
182+
print("width", width, "height", height)
183+
image = image.resize((width, height))
184+
prompt = (
185+
"Turn the pencil sketch in the image into an actual object that is consistent with the image’s content. The user wants to change the sketch to a crown and a hat."
186+
)
187+
188+
output = pipe(
189+
image=image,
190+
prompt=prompt,
191+
height=height,
192+
width=width,
193+
num_frames=5,
194+
num_inference_steps=8,
195+
guidance_scale=1.0,
196+
enable_temporal_reasoning=False,
197+
num_temporal_reasoning_steps=0,
198+
).frames[0]
199+
export_to_video(output, "output.mp4", fps=16)
200+
Image.fromarray((output[-1] * 255).clip(0, 255).astype("uint8")).save("output_1.png")
201+
```
202+
148203
## ChronoEditPipeline
149204

150205
[[autodoc]] ChronoEditPipeline

src/diffusers/loaders/peft.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
"HunyuanVideoFramepackTransformer3DModel": lambda model_cls, weights: weights,
6262
"WanVACETransformer3DModel": lambda model_cls, weights: weights,
6363
"ChromaTransformer2DModel": lambda model_cls, weights: weights,
64+
"ChronoEditTransformer3DModel": lambda model_cls, weights: weights,
6465
"QwenImageTransformer2DModel": lambda model_cls, weights: weights,
6566
}
6667

0 commit comments

Comments
 (0)