Skip to content

Commit

Permalink
Removes SV3D video_decoder, keeps SV3D image_decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
Vikram Voleti committed Mar 18, 2024
1 parent b41860f commit 30e4d32
Show file tree
Hide file tree
Showing 11 changed files with 400 additions and 568 deletions.
11 changes: 5 additions & 6 deletions configs/inference/sv3d_p.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,17 +103,16 @@ model:
encoder_config:
target: torch.nn.Identity
decoder_config:
target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
target: sgm.modules.diffusionmodules.model.Decoder
params:
attn_type: vanilla
attn_type: vanilla-xformers
double_z: True
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
ch_mult: [ 1, 2, 4, 4 ]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
video_kernel_size: [3, 1, 1]
attn_resolutions: [ ]
dropout: 0.0
118 changes: 0 additions & 118 deletions configs/inference/sv3d_p_image_decoder.yaml

This file was deleted.

11 changes: 5 additions & 6 deletions configs/inference/sv3d_u.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,16 @@ model:
encoder_config:
target: torch.nn.Identity
decoder_config:
target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
target: sgm.modules.diffusionmodules.model.Decoder
params:
attn_type: vanilla
attn_type: vanilla-xformers
double_z: True
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
ch_mult: [ 1, 2, 4, 4 ]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
video_kernel_size: [3, 1, 1]
attn_resolutions: [ ]
dropout: 0.0
106 changes: 0 additions & 106 deletions configs/inference/sv3d_u_image_decoder.yaml

This file was deleted.

40 changes: 0 additions & 40 deletions scripts/demo/video_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,26 +109,6 @@
"decoding_t": 14,
},
},
"sv3d_u_image_decoder": {
"T": 21,
"H": 576,
"W": 576,
"C": 4,
"f": 8,
"config": "configs/inference/sv3d_u_image_decoder.yaml",
"ckpt": "checkpoints/sv3d_u_image_decoder.safetensors",
"options": {
"discretization": 1,
"cfg": 2.5,
"sigma_min": 0.002,
"sigma_max": 700.0,
"rho": 7.0,
"guider": 3,
"force_uc_zero_embeddings": ["cond_frames", "cond_frames_without_noise"],
"num_steps": 50,
"decoding_t": 14,
},
},
"sv3d_p": {
"T": 21,
"H": 576,
Expand All @@ -149,26 +129,6 @@
"decoding_t": 14,
},
},
"sv3d_p_image_decoder": {
"T": 21,
"H": 576,
"W": 576,
"C": 4,
"f": 8,
"config": "configs/inference/sv3d_p_image_decoder.yaml",
"ckpt": "checkpoints/sv3d_p_image_decoder.safetensors",
"options": {
"discretization": 1,
"cfg": 2.5,
"sigma_min": 0.002,
"sigma_max": 700.0,
"rho": 7.0,
"guider": 3,
"force_uc_zero_embeddings": ["cond_frames", "cond_frames_without_noise"],
"num_steps": 50,
"decoding_t": 14,
},
},
}


Expand Down
11 changes: 5 additions & 6 deletions scripts/sampling/configs/sv3d_p.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ model:
params:
scale_factor: 0.18215
disable_first_stage_autocast: True
ckpt_path: checkpoints/sv3d_p.safetensors
ckpt_path: checkpoints/sv3d_p_image_decoder.safetensors

denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.Denoiser
Expand Down Expand Up @@ -104,20 +104,19 @@ model:
encoder_config:
target: torch.nn.Identity
decoder_config:
target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
target: sgm.modules.diffusionmodules.model.Decoder
params:
attn_type: vanilla
attn_type: vanilla-xformers
double_z: True
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
ch_mult: [ 1, 2, 4, 4 ]
num_res_blocks: 2
attn_resolutions: []
attn_resolutions: [ ]
dropout: 0.0
video_kernel_size: [3, 1, 1]

sampler_config:
target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
Expand Down
Loading

0 comments on commit 30e4d32

Please sign in to comment.