Source code for

# Copyright 2016-2023 Cerebras Systems
# SPDX-License-Identifier: BSD-3-Clause

import fnmatch
import logging
import re
from dataclasses import dataclass
from typing import Callable, Dict, Optional, Union
from warnings import warn

import torch

import cerebras.pytorch as cstorch
from cerebras.pytorch.sparse.base import SparsityAlgorithm

[docs]class Group(SparsityAlgorithm): """ Group sparsity algorithm. This algorithm allows for multiple sparsity algorithms to be applied to different groups of parameters. For example: .. code:: python sparsity = cstorch.sparse.Group({ "fc1.*": cstorch.sparse.Static(sparsity=0.5), "fc2.*": cstorch.sparse.GMP( schedule=[0.3, 0.4, 0.5], update: {"freq": 100} ), }) sparsity.add("fc3.*", cstorch.sparse.RigL(sparsity=0.5)) model.apply(sparsity) optimizer.apply(sparsity) The group sparsity algorithm will apply the sparsity algorithms to the parameters that match the filter. If a parameter name matches multiple filters, the first filter that matches will be used. """ @dataclass class Filter: filter: Callable[[str, torch.Tensor], bool] algorithm: SparsityAlgorithm def __init__(self, groups: Dict[str, SparsityAlgorithm] = None): """ Args: groups: A dictionary of filter -> algorithm pairs. See :py:meth:`~cerebras.pytorch.sparse.Group.add` for more details. """ super().__init__(sparsity=None) self._groups = [] if groups is not None: for group_filter, algorithm in groups.items(): self.add(group_filter, algorithm) @property def num_sparse_params(self): return sum(len(g.algorithm.sparse_params) for g in self._groups) @property def sparsity(self): raise NotImplementedError( "Group sparsity algorithm does not have a sparsity level. " "You can access the sparsity of nested sparsity algorithms by " "indexing the Group object, i.e. group[0].sparsity" ) def __getitem__(self, index) -> SparsityAlgorithm: """Returns the algorithm at the given index""" return self._groups[index].algorithm
[docs] def add( self, filter: Union[str, Callable[[str, torch.Tensor], bool]], algorithm: SparsityAlgorithm, ): """ Add a sparsity algorithm to the group. Args: filter: A string, list of strings, or callable that takes a parameter name and a parameter tensor and returns True if the parameter should be sparsified. If one or more strings are provided, the filter will match if any of the strings match the parameter name. The strings may contain glob patterns, e.g. "fc1.*" will match all parameters in the "fc1" module. algorithm: An instance of :py:class:`~cerebras.pytorch.sparse.SparsityAlgorithm` """ if not isinstance(algorithm, SparsityAlgorithm): raise TypeError( f"algorithm must be an instance of SparsityAlgorithm, got {type(algorithm)}" ) elif isinstance(algorithm, Group): raise TypeError( f"algorithm must be not be Group sparsity algorithm. " f"If you want to merge groups, use the extend method." ) if isinstance(filter, str): filter = [filter] if isinstance(filter, (list, tuple)): filter_re = [] for f in filter: if isinstance(f, str): filter_re.append(re.compile(fnmatch.translate(f))) else: raise TypeError( f"filter must be a string or list of strings, " f"got {type(filter)}[{type(f)}]" ) filter = lambda name, _: any( f.match(name) is not None for f in filter_re ) self._groups.append(Group.Filter(filter, algorithm)) elif callable(filter): self._groups.append(Group.Filter(filter, algorithm)) else: raise TypeError( f"filter must be a string or callable, got {type(filter)}" )
[docs] def extend(self, group: "Group"): """ Extend the group with the filters and algorithms from another group. Args: group: An instance of :py:class:`~cerebras.pytorch.sparse.Group` """ if not isinstance(group, Group): raise TypeError( f"group must be an instance of Group, got {type(group)}" ) for g in group._groups: self.add(g.filter, g.algorithm)
def sparsify_parameter( self, module: torch.nn.Module, name: str, param: torch.Tensor ) -> None: if hasattr(param, "_sparse_param"): # Parameter is already sparsified return if getattr(param, "requires_dense", False): # Parameter has been marked as not sparsifiable return for group in self._groups: if group.filter(name, param): logging.debug(f"Sparsity filter matched: {name}") group.algorithm.sparsify_parameter(module, name, param) return else: logging.debug(f"Sparsity filter did *not* match: {name}") def sparsify_module(self, module): if len(self._groups) == 0: raise RuntimeError( "No groups were added to the Group sparsity algorithm" ) super().sparsify_module(module) if sum(len(g.algorithm.sparse_params) for g in self._groups) == 0: warn( "No parameters were sparsified in the module. " "This is likely due to the parameter filter not matching any " "parameters in the module" ) def sparsify_optimizer(self, optimizer): super().sparsify_optimizer(optimizer) # Call sparsify optimizer on each algorithm # so that it can apply any optimizer hooks for group in self._groups: group.algorithm.sparsify_optimizer(optimizer) def _forward_pre_hook(self, module, input): for group in self._groups: group.algorithm._forward_pre_hook(module, input) def _annotate_sparse_params(self): for group in self._groups: group.algorithm._annotate_sparse_params() def prune_weights(self): for group in self._groups: group.algorithm.prune_weights() def _ensure_sparsity_applied(self): for group in self._groups: group.algorithm._ensure_sparsity_applied() def update(self, optimizer: Optional[cstorch.optim.Optimizer] = None): for group in self._groups: group.algorithm.update(optimizer) def visit_state(self, f): for group in self._groups: group.algorithm.visit_state(f) def state_dict(self): return [group.algorithm.state_dict() for group in self._groups] def load_state_dict(self, state_dict): if isinstance(state_dict, dict): state_dict = [state_dict] if isinstance(state_dict, list): if len(state_dict) != len(self._groups): raise ValueError( f"Expected a list of {len(self._groups)} state_dicts for " f"the Group sparsity algorithm but got {len(state_dict)}." ) for s, group in zip(state_dict, self._groups): group.algorithm.load_state_dict(s)