Source code for modelzoo.vision.pytorch.dit.layers.GaussianDiffusion

# Copyright 2022 Cerebras Systems.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

from modelzoo.common.pytorch.run_utils import half_dtype_instance
from modelzoo.vision.pytorch.dit.layers.schedulers import (
    get_named_beta_schedule,
)


[docs]def index(arr, timestep): return torch.index_select(arr, 0, timestep.long())
[docs]def extract(arr, timestep, broadcast_shape): shape = (broadcast_shape[0],) + (1,) * (len(broadcast_shape) - 1) result = index(arr, timestep).view(shape) + torch.zeros( broadcast_shape, device=arr.device ) return result.to(timestep.device, dtype=half_dtype_instance.half_dtype)
[docs]class GaussianDiffusion(torch.nn.Module): """Generate noisy images via Gaussian diffusion. The class implements the noising process as described in Step 5 of Algorithm 1 in the paper `"Denoising Diffusion Probabilistic Models` <https://arxiv.org/abs/2006.11239>`. """
[docs] def __init__( self, num_diffusion_steps, schedule_name, seed=None, beta_start=0.0001, beta_end=0.02, ): """ :param (int) num_diffusion_steps: Number of diffusion steps. :param (float) beta_start: Minimum variance for generated Gaussian noise. :param (float) beta_end: Maximum variance for generated Gaussian noise. :param (int) seed: Random seed for reproducibility. :param (float) beta_start: Initial value of variance schedule i.e beta_1 (default value according to Ho et al https://arxiv.org/pdf/2006.11239.pdf: Section 4) :param (float) beta_end: Final value of variance schedule i.e beta_T (default value according to Ho et al https://arxiv.org/pdf/2006.11239.pdf: Section 4) """ super().__init__() if num_diffusion_steps <= 0: raise ValueError("Number of diffusion steps must be positive.") if seed is not None: torch.manual_seed(seed) self.num_diffusion_steps = num_diffusion_steps self.schedule_name = schedule_name self.betas = get_named_beta_schedule( schedule_name, self.num_diffusion_steps, beta_start=beta_start, beta_end=beta_end, ) assert self.betas.dim() == 1, "betas must be 1-D" assert torch.all(torch.logical_and(self.betas > 0, self.betas <= 1)) alphas = 1.0 - self.betas alphas_cum_prod = torch.cumprod(alphas, dim=0) self.sqrt_alphas_cum_prod = torch.nn.Parameter( torch.sqrt(alphas_cum_prod).to(torch.float32), requires_grad=False, ) self.sqrt_one_minus_alphas_cum_prod = torch.nn.Parameter( torch.sqrt(1 - alphas_cum_prod).to(torch.float32), requires_grad=False, )
[docs] def forward(self, latent, noise, timestep): """Lookup alpha-related constants and create noised sample Args: :param latent (Tensor): Float tensor of size (B, C, H, W). Returns: A tuple corresponding to the noisy images, ground truth noises and the timesteps corresponding to the scheduled noise variance. """ if latent.ndim != 4: raise ValueError(f"Samples ndim should be 4. Got {latent.ndim}") sqrt_alpha_prod = extract( self.sqrt_alphas_cum_prod, timestep, noise.shape ) sqrt_one_minus_alpha_prod = extract( self.sqrt_one_minus_alphas_cum_prod, timestep, noise.shape ) noisy_samples = ( sqrt_alpha_prod * latent + sqrt_one_minus_alpha_prod * noise ) return noisy_samples.to(half_dtype_instance.half_dtype)
def __repr__(self): return ( f"{self.__class__.__name__}(" f"schedule_name={self.schedule_name}" f", num_diffusion_steps={self.num_diffusion_steps}" f")" )