Skip to content

Commit

Permalink
added repaint unet implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
liopeer committed Nov 3, 2023
1 parent 9f93d91 commit 5f27288
Show file tree
Hide file tree
Showing 14 changed files with 1,745 additions and 42 deletions.
4 changes: 4 additions & 0 deletions diffusion_models/models/diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,11 @@ def forward(
tuple of noise predictions and noise for random timesteps in the denoising process
"""
timesteps = self._sample_timesteps(x.shape[0], device=x.device)
if timesteps.dim() != 1:
raise ValueError("Timesteps should only have batch dimension.", timesteps.shape)
time_enc = self.time_encoder.get_pos_encoding(timesteps)
if time_enc.dim() != 2:
raise ValueError("Time Encoding should be 2 dimensional.", time_enc.shape)
# make (partially) noisy versions of batch, returns noisy version + applied noise
x_t, noise = self.fwd_diff(x, timesteps)
# predict the applied noise from the noisy version
Expand Down
2 changes: 2 additions & 0 deletions diffusion_models/models/positional_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def get_pos_encoding(self, t: Int64[Tensor, "batch"]) -> Float[Tensor, "batch fe
out
positional encodings for batch
"""
if (t.dim() != 1) or (t.shape[0]==1):
raise ValueError("Timesteps not the right size.", t.shape)
x = self.pe[t]
return x.squeeze()

Expand Down
245 changes: 245 additions & 0 deletions diffusion_models/models/repaint_unet/fp16_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
# Copyright (c) 2022 Huawei Technologies Co., Ltd.
# Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
#
# The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license

"""
Helpers to train with 16-bit precision.
"""

import numpy as np
import torch as th
import torch.nn as nn
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors


INITIAL_LOG_LOSS_SCALE = 20.0


def convert_module_to_f16(l):
"""
Convert primitive modules to float16.
"""
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
l.weight.data = l.weight.data.half()
if l.bias is not None:
l.bias.data = l.bias.data.half()


def convert_module_to_f32(l):
"""
Convert primitive modules to float32, undoing convert_module_to_f16().
"""
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
l.weight.data = l.weight.data.float()
if l.bias is not None:
l.bias.data = l.bias.data.float()


def make_master_params(param_groups_and_shapes):
"""
Copy model parameters into a (differently-shaped) list of full-precision
parameters.
"""
master_params = []
for param_group, shape in param_groups_and_shapes:
master_param = nn.Parameter(
_flatten_dense_tensors(
[param.detach().float() for (_, param) in param_group]
).view(shape)
)
master_param.requires_grad = True
master_params.append(master_param)
return master_params


def model_grads_to_master_grads(param_groups_and_shapes, master_params):
"""
Copy the gradients from the model parameters into the master parameters
from make_master_params().
"""
for master_param, (param_group, shape) in zip(
master_params, param_groups_and_shapes
):
master_param.grad = _flatten_dense_tensors(
[param_grad_or_zeros(param) for (_, param) in param_group]
).view(shape)


def master_params_to_model_params(param_groups_and_shapes, master_params):
"""
Copy the master parameter data back into the model parameters.
"""
# Without copying to a list, if a generator is passed, this will
# silently not copy any parameters.
for master_param, (param_group, _) in zip(master_params, param_groups_and_shapes):
for (_, param), unflat_master_param in zip(
param_group, unflatten_master_params(param_group, master_param.view(-1))
):
param.detach().copy_(unflat_master_param)


def unflatten_master_params(param_group, master_param):
return _unflatten_dense_tensors(master_param, [param for (_, param) in param_group])


def get_param_groups_and_shapes(named_model_params):
named_model_params = list(named_model_params)
scalar_vector_named_params = (
[(n, p) for (n, p) in named_model_params if p.ndim <= 1],
(-1),
)
matrix_named_params = (
[(n, p) for (n, p) in named_model_params if p.ndim > 1],
(1, -1),
)
return [scalar_vector_named_params, matrix_named_params]


def master_params_to_state_dict(
model, param_groups_and_shapes, master_params, use_fp16
):
if use_fp16:
state_dict = model.state_dict()
for master_param, (param_group, _) in zip(
master_params, param_groups_and_shapes
):
for (name, _), unflat_master_param in zip(
param_group, unflatten_master_params(param_group, master_param.view(-1))
):
assert name in state_dict
state_dict[name] = unflat_master_param
else:
state_dict = model.state_dict()
for i, (name, _value) in enumerate(model.named_parameters()):
assert name in state_dict
state_dict[name] = master_params[i]
return state_dict


def state_dict_to_master_params(model, state_dict, use_fp16):
if use_fp16:
named_model_params = [
(name, state_dict[name]) for name, _ in model.named_parameters()
]
param_groups_and_shapes = get_param_groups_and_shapes(named_model_params)
master_params = make_master_params(param_groups_and_shapes)
else:
master_params = [state_dict[name] for name, _ in model.named_parameters()]
return master_params


def zero_master_grads(master_params):
for param in master_params:
param.grad = None


def zero_grad(model_params):
for param in model_params:
# Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group
if param.grad is not None:
param.grad.detach_()
param.grad.zero_()


def param_grad_or_zeros(param):
if param.grad is not None:
return param.grad.data.detach()
else:
return th.zeros_like(param)


class MixedPrecisionTrainer:
def __init__(
self,
*,
model,
use_fp16=False,
fp16_scale_growth=1e-3,
initial_lg_loss_scale=INITIAL_LOG_LOSS_SCALE,
):
self.model = model
self.use_fp16 = use_fp16
self.fp16_scale_growth = fp16_scale_growth

self.model_params = list(self.model.parameters())
self.master_params = self.model_params
self.param_groups_and_shapes = None
self.lg_loss_scale = initial_lg_loss_scale

if self.use_fp16:
self.param_groups_and_shapes = get_param_groups_and_shapes(
self.model.named_parameters()
)
self.master_params = make_master_params(self.param_groups_and_shapes)
self.model.convert_to_fp16()

def zero_grad(self):
zero_grad(self.model_params)

def backward(self, loss: th.Tensor):
if self.use_fp16:
loss_scale = 2 ** self.lg_loss_scale
(loss * loss_scale).backward()
else:
loss.backward()

def optimize(self, opt: th.optim.Optimizer):
if self.use_fp16:
return self._optimize_fp16(opt)
else:
return self._optimize_normal(opt)

def _optimize_fp16(self, opt: th.optim.Optimizer):
model_grads_to_master_grads(self.param_groups_and_shapes, self.master_params)
grad_norm, param_norm = self._compute_norms(grad_scale=2 ** self.lg_loss_scale)
if check_overflow(grad_norm):
self.lg_loss_scale -= 1
zero_master_grads(self.master_params)
return False

for p in self.master_params:
p.grad.mul_(1.0 / (2 ** self.lg_loss_scale))
opt.step()
zero_master_grads(self.master_params)
master_params_to_model_params(self.param_groups_and_shapes, self.master_params)
self.lg_loss_scale += self.fp16_scale_growth
return True

def _optimize_normal(self, opt: th.optim.Optimizer):
grad_norm, param_norm = self._compute_norms()
opt.step()
return True

def _compute_norms(self, grad_scale=1.0):
grad_norm = 0.0
param_norm = 0.0
for p in self.master_params:
with th.no_grad():
param_norm += th.norm(p, p=2, dtype=th.float32).item() ** 2
if p.grad is not None:
grad_norm += th.norm(p.grad, p=2, dtype=th.float32).item() ** 2
return np.sqrt(grad_norm) / grad_scale, np.sqrt(param_norm)

def master_params_to_state_dict(self, master_params):
return master_params_to_state_dict(
self.model, self.param_groups_and_shapes, master_params, self.use_fp16
)

def state_dict_to_master_params(self, state_dict):
return state_dict_to_master_params(self.model, state_dict, self.use_fp16)


def check_overflow(value):
return (value == float("inf")) or (value == -float("inf")) or (value != value)
Loading

0 comments on commit 5f27288

Please sign in to comment.