modelzoo.transformers.pytorch.gpt2.gpt2_model.GPT2LMHeadModel#

class modelzoo.transformers.pytorch.gpt2.gpt2_model.GPT2LMHeadModel[source]#

Bases: torch.nn.Module

GPT-2 model with LM head

Methods

compute_input_embeddings

forward

get_input_embeddings

get_output_embeddings

reset_parameters

tie_weights

__call__(*args: Any, **kwargs: Any) Any#

Call self as a function.

__init__(vocab_size=50257, max_position_embeddings=1024, embd_pdrop=0.1, position_embedding_type='learned', position_embedding_offset=0, hidden_size=768, share_embedding_weights=True, embedding_layer_norm=False, num_relative_attention_buckets=32, rotary_dim=None, rope_theta=10000, num_hidden_layers=12, dropout_rate=0.1, norm_type='layernorm', layer_norm_epsilon=1e-05, num_heads=12, attention_type='scaled_dot_product', attention_module='aiayn_attention', extra_attention_params={}, use_projection_bias_in_attention=True, use_ffn_bias_in_attention=True, attention_dropout_rate=0.1, attention_softmax_fp32=True, fixed_sparse_attention=None, filter_size=3072, nonlinearity='gelu', use_ffn_bias=True, use_bias_in_output=False, initializer_range=0.02, embedding_initializer=None, initializer=None, output_layer_initializer=None, output_logits_scale=None, embeddings_scale=1.0, scale_qk_dot_by_d=False, alibi_trainable_slopes=False, pos_scaling_factor=1.0, scale_qk_dot_by_layer_idx=False)[source]#
static __new__(cls, *args: Any, **kwargs: Any) Any#