cerebras.modelzoo.data.nlp.t5.config.T5DynamicDataProcessorConfig#

class cerebras.modelzoo.data.nlp.t5.config.T5DynamicDataProcessorConfig(batch_size: int = <object object at 0x7fc286331b70>, shuffle: bool = True, shuffle_seed: int = 0, num_workers: int = 0, prefetch_factor: int = 10, persistent_workers: bool = True, src_data_dir: str = <object object at 0x7fc286331b70>, src_vocab_file: str = <object object at 0x7fc286331b70>, src_max_sequence_length: int = <object object at 0x7fc286331b70>, tgt_max_sequence_length: int = <object object at 0x7fc286331b70>, shuffle_buffer: Optional[int] = None, do_lower: bool = False, buckets: Optional[List[int]] = None, dynamic_loss_weight: Optional[bool] = None, pack_sequences: Optional[bool] = False, num_documents_to_concatenate: int = 128, drop_last: bool = True, oov_token: str = '<unk>', sos_token: str = '<s>', eos_token: str = '</s>', pad_token: str = '<pad>', extra_ids: Union[int, List[int]] = 0, labels_pad_id: int = 0, input_pad_id: int = 0)[source]#

src_data_dir: str = <object object>#

src_vocab_file: str = <object object>#

src_max_sequence_length: int = <object object>#

tgt_max_sequence_length: int = <object object>#

shuffle_buffer: Optional[int] = None#

do_lower: bool = False#

buckets: Optional[List[int]] = None#

dynamic_loss_weight: Optional[bool] = None#

pack_sequences: Optional[bool] = False#

num_documents_to_concatenate: int = 128#

num_workers: int = 0#: The number of PyTorch processes used in the dataloader

drop_last: bool = True#

prefetch_factor: int = 10#: The number of batches to prefetch in the dataloader

persistent_workers: bool = True#: Whether or not to keep workers persistent between epochs

oov_token: str = '<unk>'#

sos_token: str = '<s>'#

eos_token: str = '</s>'#

pad_token: str = '<pad>'#

extra_ids: Union[int, List[int]] = 0#

labels_pad_id: int = 0#

input_pad_id: int = 0#

batch_size: int = <object object>#: Batch size to be used

shuffle: bool = True#: Whether or not to shuffle the dataset

shuffle_seed: int = 0#: Seed used for deterministic shuffling

cerebras.modelzoo.data.nlp.t5.config

cerebras.modelzoo.data.nlp.t5.config.T5HDF5DataProcessorConfig