Source code for cerebras_pytorch.sparse.init

# Copyright 2016-2023 Cerebras Systems
# SPDX-License-Identifier: BSD-3-Clause

"""
Sparsity mask initialization methods and helpers, invoked by
``BaseSparsityOptimizer``.
"""
import inspect
from typing import Callable, Optional, Union

import numpy as np
import torch

from cerebras_pytorch.utils.typing import signature_matches_type_hint

from .utils import ScoreShaper, make_mask_topk_sparsity

InitMethodCallable = Callable[
    [torch.nn.Parameter, torch.FloatTensor, Optional[ScoreShaper]],
    torch.BoolTensor,
]
InitMethodType = Union[str, InitMethodCallable]


[docs]def random(
    p: torch.nn.Parameter,
    sparsity: torch.FloatTensor,
    score_shaper: Optional[ScoreShaper] = None,
) -> torch.BoolTensor:
    """
    Uniformly random sparsity pattern.
    """
    score = torch.rand_like(p, device=sparsity.device)
    return make_mask_topk_sparsity(score, sparsity, score_shaper)


[docs]def topk(
    p: torch.nn.Parameter,
    sparsity: torch.FloatTensor,
    score_shaper: Optional[ScoreShaper] = None,
) -> torch.BoolTensor:
    """
    Prune lowest magnitude weights.
    """

    # We transfer the param to the sparsity device because for CSX this
    # involves reading the data back on CPU.
    score = p.to(sparsity.device).abs()
    return make_mask_topk_sparsity(score, sparsity, score_shaper)


[docs]def from_zeros(
    p: torch.nn.Parameter,
    sparsity: torch.FloatTensor,
    score_shaper: Optional[ScoreShaper] = None,
) -> torch.BoolTensor:
    """
    Any zeros currently in the weights represent pruned connections.
    NOTE: Doesn't actualy honor the configured sparsity.
    """
    return p.to(sparsity.device) != 0


[docs]def checkerboard(
    p: torch.nn.Parameter,
    sparsity: torch.FloatTensor,
    score_shaper: Optional[ScoreShaper] = None,
) -> torch.BoolTensor:
    """
    Mostly for stress and performance testing, creates a sparsity mask that is
    maximally distributed in a checkerboard across the weight.
    """
    density = 1 - sparsity.item()
    # Create a row with a uniformly distributed sparsity pattern
    col = p.shape[-1]
    # Alocate padding for potential rolling to still result in balance.
    padding = int(np.ceil(col / density + 1e-5))
    # [ 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 ]
    steps = np.floor(np.arange(col + padding) * density + 1e-5)
    # [ F,   F,   T,   F,   F,   T,   F,   F]
    mask = steps[1:] != steps[:-1]
    if len(p.shape) == 2:
        row = p.shape[0]
        # Now evenly distribute this over the rows as well by rolling each
        offset = -np.nonzero(mask)[0][0]
        mask = np.stack([np.roll(mask, x * offset) for x in range(row)])
    # Trim off padding columns
    mask = mask[..., :col]
    return torch.tensor(mask, device=sparsity.device)


def _noop_compile_only(
    p: torch.nn.Parameter,
    sparsity: torch.FloatTensor,
    score_shaper: Optional[ScoreShaper] = None,
) -> torch.BoolTensor:
    """
    "init" method that doesn't init to be used only with compile_only. This
    avoids computing masks on the CPU that aren't ultimately used.
    """
    return torch.empty_like(p, dtype=torch.bool)


def make_init_method(init_method: InitMethodType) -> InitMethodCallable:
    from cerebras_pytorch.backend import current_backend_impl

    if current_backend_impl().compile_only:
        return _noop_compile_only

    init_methods = {
        "random": random,
        "topk": topk,
        "from_zeros": from_zeros,
        "checkerboard": checkerboard,
    }
    init_method_error = (
        f'Unknown `init_method`: "{init_method}". Valid options are one '
        f'of the built-in {list(init_methods.keys())} or a function with '
        f'signature {InitMethodCallable}.'
    )
    if isinstance(init_method, str):
        if init_method not in init_methods:
            raise ValueError(init_method_error)
        init_method = init_methods[init_method]
    elif callable(init_method):
        signature = inspect.signature(init_method)
        if not signature_matches_type_hint(signature, InitMethodCallable):
            raise ValueError(init_method_error)
    else:
        raise ValueError(init_method_error)
    return init_method