Add support for warp-ai/wuerstchen

carson-katri · carson-katri · commit 5568ac1fb176 · 2023-11-08T17:23:19.000-05:00
diff --git a/generator_process/actions/huggingface_hub.py b/generator_process/actions/huggingface_hub.py
@@ -63,6 +63,7 @@ def list_dir(cache_dir):
         def detect_model_type(snapshot_folder):
             unet_config = os.path.join(snapshot_folder, 'unet', 'config.json')
             config = os.path.join(snapshot_folder, 'config.json')
+            model_index = os.path.join(snapshot_folder, 'model_index.json')
             if os.path.exists(unet_config):
                 with open(unet_config, 'r') as f:
                     return ModelType(json.load(f)['in_channels'])
@@ -73,6 +74,13 @@ def detect_model_type(snapshot_folder):
                         return ModelType.CONTROL_NET
                     else:
                         return ModelType.UNKNOWN
+            elif os.path.exists(model_index):
+                with open(model_index, 'r') as f:
+                    model_index_dict = json.load(f)
+                    if '_class_name' in model_index_dict and model_index_dict['_class_name'] == 'WuerstchenDecoderPipeline':
+                        return ModelType.PROMPT_TO_IMAGE
+                    else:
+                        return ModelType.UNKNOWN
             else:
                 return ModelType.UNKNOWN
 
diff --git a/generator_process/actions/prompt_to_image.py b/generator_process/actions/prompt_to_image.py
@@ -47,6 +47,7 @@ def prompt_to_image(
     import diffusers
     import torch
     from PIL import Image, ImageOps
+    from diffusers.pipelines.wuerstchen import WuerstchenCombinedPipeline
 
     device = self.choose_device(optimizations)
 
@@ -56,8 +57,8 @@ def prompt_to_image(
     else:
         pipe = self.load_model(diffusers.AutoPipelineForText2Image, model, optimizations, scheduler)
         refiner = None
-    height = height or pipe.unet.config.sample_size * pipe.vae_scale_factor
-    width = width or pipe.unet.config.sample_size * pipe.vae_scale_factor
+    height = height or ((pipe.unet.config.sample_size * pipe.vae_scale_factor) if hasattr(pipe, 'unet') and hasattr(pipe, 'vae_scale_factor') else 512)
+    width = width or ((pipe.unet.config.sample_size * pipe.vae_scale_factor) if hasattr(pipe, 'unet') and hasattr(pipe, 'vae_scale_factor') else 512)
 
     # Optimizations
     pipe = optimizations.apply(pipe, device)
@@ -73,8 +74,16 @@ def prompt_to_image(
         generator = generator[0]
     
     # Seamless
-    _configure_model_padding(pipe.unet, seamless_axes)
-    _configure_model_padding(pipe.vae, seamless_axes)
+    if hasattr(pipe, 'unet'):
+        _configure_model_padding(pipe.unet, seamless_axes)
+    if hasattr(pipe, 'vae'):
+        _configure_model_padding(pipe.vae, seamless_axes)
+    if hasattr(pipe, 'prior_prior'):
+        _configure_model_padding(pipe.prior_prior, seamless_axes)
+    if hasattr(pipe, 'decoder'):
+        _configure_model_padding(pipe.decoder, seamless_axes)
+    if hasattr(pipe, 'vqgan'):
+        _configure_model_padding(pipe.vqgan, seamless_axes)
 
     # Inference
     with torch.inference_mode() if device not in ('mps', "dml") else nullcontext():
@@ -85,23 +94,28 @@ def callback(step, timestep, latents):
                 raise InterruptedError()
             future.add_response(ImageGenerationResult.step_preview(self, step_preview_mode, width, height, latents, generator, step))
         try:
-            result = pipe(
-                prompt=prompt,
-                height=height,
-                width=width,
-                num_inference_steps=steps,
-                guidance_scale=cfg_scale,
-                negative_prompt=negative_prompt if use_negative_prompt else None,
-                num_images_per_prompt=1,
-                eta=0.0,
-                generator=generator,
-                latents=None,
-                output_type=output_type,
-                return_dict=True,
-                callback=callback,
-                callback_steps=1,
-                #cfg_end=optimizations.cfg_end
-            )
+            pipe_kwargs = {
+                'prompt': prompt,
+                'height': height,
+                'width': width,
+                'num_inference_steps': steps,
+                'guidance_scale': cfg_scale,
+                'negative_prompt': negative_prompt if use_negative_prompt else None,
+                'num_images_per_prompt': 1,
+                'eta': 0.0,
+                'generator': generator,
+                'latents': None,
+                'output_type': output_type,
+                'return_dict': True,
+                'callback': callback,
+                'callback_steps': 1,
+            }
+            if isinstance(pipe, WuerstchenCombinedPipeline):
+                pipe_kwargs['prior_guidance_scale'] = pipe_kwargs.pop('guidance_scale')
+                del pipe_kwargs['eta']
+                del pipe_kwargs['callback']
+                del pipe_kwargs['callback_steps']
+            result = pipe(**pipe_kwargs)
             if is_sdxl and sdxl_refiner_model is not None and refiner is None:
                 # allow load_model() to garbage collect pipe
                 pipe = None